Compare commits

...

4 Commits

Author SHA1 Message Date
Jay D Dee
c24a4bdbc2 v3.8.3.2 2018-02-24 14:36:19 -05:00
Jay D Dee
59c7848d91 v3.8.3.1 2018-02-23 15:45:32 -05:00
Jay D Dee
3c02653dbe v3.8.3 2018-02-23 12:39:15 -05:00
Jay D Dee
502ed0b1fe v3.8.2.1 2018-02-17 13:52:24 -05:00
78 changed files with 2618 additions and 1850 deletions

View File

@@ -45,7 +45,10 @@ cpuminer_SOURCES = \
algo/blake/sph_blake2b.c \
algo/blake/blake2b.c \
algo/blake/sph-blake2s.c \
algo/blake/blake2s-hash-4way.c \
algo/blake/blake2s.c \
algo/blake/blake2s-gate.c \
algo/blake/blake2s-4way.c \
algo/blake/blakecoin-gate.c \
algo/blake/mod_blakecoin.c \
algo/blake/blakecoin.c \

View File

@@ -34,11 +34,12 @@ Others may work but may require more effort.
MacOS, OSx is not supported.
3. Stratum pool. Some algos may work wallet mining using getwork.
3. Stratum pool. Some algos may work wallet mining using getwork or GBT. YMMV.
Supported Algorithms
--------------------
allium Garlicoin
anime Animecoin
argon2
axiom Shabal-256 MemoHash
@@ -107,7 +108,7 @@ Supported Algorithms
x17
xevan Bitsend
yescrypt Globalboost-Y (BSTY)
yescryptr8 BitZeny (ZNY)\n\
yescryptr8 BitZeny (ZNY)
yescryptr16 Yenten (YTN)
zr5 Ziftr

View File

@@ -134,7 +134,7 @@ cd /c/path/to/cpuminer-opt
Run build.sh to build on Windows or execute the following commands.
./autogen.sh
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
make
Start mining
@@ -159,6 +159,31 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------
v3.8.3.2
Reverted gbt changes from v3.8.0 that broke getwork.
Reverted scaled hash rate for API, added HS term in addition to KHS.
Added blocks solved to console display and API.
v3.8.3.1
Fixed regression in v3.8.3 that broke several algos.
v3.8.3
More restoration of lost lyra2 hash.
8 way AVX2 and 4way AVX optimization for blakecoin, vanilla & blake2s.
8 way AVX2 for lbry.
Scaled hashrate for API output.
A couple of GBT fixes.
v3.8.2.1
Fixed low difficulty rejects with allium.
Fixed qubit AVX2.
Restored lyra2z lost hash.
Fixed build.sh
v3.8.2
Fixed and faster myr-gr.

View File

@@ -1,19 +1,18 @@
#include "blake-gate.h"
#if defined (BLAKE_4WAY)
#include "blake-hash-4way.h"
#include <string.h>
#include <stdint.h>
#include <memory.h>
blake256r14_4way_context blake_ctx;
#if defined (BLAKE_4WAY)
blake256r14_4way_context blake_4w_ctx;
void blakehash_4way(void *state, const void *input)
{
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
blake256r14_4way_context ctx;
memcpy( &ctx, &blake_ctx, sizeof ctx );
memcpy( &ctx, &blake_4w_ctx, sizeof ctx );
blake256r14_4way( &ctx, input + (64<<2), 16 );
blake256r14_4way_close( &ctx, vhash );
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
@@ -31,7 +30,6 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t _ALIGN(32) edata[20];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
if (opt_benchmark)
@@ -39,15 +37,12 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
// we need big endian data...
swab32_array( edata, pdata, 20 );
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
blake256r14_4way_init( &blake_ctx );
blake256r14_4way( &blake_ctx, vdata, 64 );
blake256r14_4way_init( &blake_4w_ctx );
blake256r14_4way( &blake_4w_ctx, vdata, 64 );
uint32_t *noncep = vdata + 76; // 19*4
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep, n );
be32enc( noncep +1, n+1 );
be32enc( noncep +2, n+2 );
@@ -55,34 +50,12 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
blakehash_4way( hash, vdata );
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
pdata[19] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
@@ -95,3 +68,77 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
#endif
#if defined(BLAKE_8WAY)
blake256r14_8way_context blake_8w_ctx;
void blakehash_8way( void *state, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
blake256r14_8way_context ctx;
memcpy( &ctx, &blake_8w_ctx, sizeof ctx );
blake256r14_8way( &ctx, input + (64<<3), 16 );
blake256r14_8way_close( &ctx, vhash );
mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96,
state+128, state+160, state+192, state+224,
vhash, 256 );
}
int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t HTarget = ptarget[7];
uint32_t _ALIGN(32) edata[20];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
int num_found = 0;
if (opt_benchmark)
HTarget = 0x7f;
// we need big endian data...
swab32_array( edata, pdata, 20 );
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
blake256r14_8way_init( &blake_8w_ctx );
blake256r14_8way( &blake_8w_ctx, vdata, 64 );
uint32_t *noncep = vdata + 152; // 19*8
do {
be32enc( noncep, n );
be32enc( noncep +1, n+1 );
be32enc( noncep +2, n+2 );
be32enc( noncep +3, n+3 );
be32enc( noncep +4, n+4 );
be32enc( noncep +5, n+5 );
be32enc( noncep +6, n+6 );
be32enc( noncep +7, n+7 );
pdata[19] = n;
blakehash_8way( hash, vdata );
for ( int i = 0; i < 8; i++ )
if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) )
{
pdata[19] = n+i;
num_found++;
nonces[i] = n+i;
work_set_target_ratio( work, hash+1 );
}
n += 8;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

View File

@@ -58,6 +58,8 @@ extern "C"{
#pragma warning (disable: 4146)
#endif
// Blake-256
static const sph_u32 IV256[8] = {
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85),
SPH_C32(0x3C6EF372), SPH_C32(0xA54FF53A),
@@ -67,6 +69,8 @@ static const sph_u32 IV256[8] = {
#if defined (__AVX2__)
// Blake-512
static const sph_u64 IV512[8] = {
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
@@ -78,7 +82,7 @@ static const sph_u64 IV512[8] = {
#if SPH_COMPACT_BLAKE_32 || SPH_COMPACT_BLAKE_64
// Blake-256 4 & 8 way, Blake-512 4way
// Blake-256 4 & 8 way, Blake-512 4 way
static const unsigned sigma[16][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
@@ -371,6 +375,8 @@ do { \
#if SPH_COMPACT_BLAKE_32
// Blake-256 4 way
#define ROUND_S_4WAY(r) do { \
GS_4WAY(M[sigma[r][0x0]], M[sigma[r][0x1]], \
CS[sigma[r][0x0]], CS[sigma[r][0x1]], V0, V4, V8, VC); \
@@ -407,7 +413,7 @@ do { \
#if defined (__AVX2__)
// BLAKE256 8 WAY
// Blake-256 8 way
#define GS_8WAY( m0, m1, c0, c1, a, b, c, d ) \
do { \
@@ -487,6 +493,8 @@ do { \
#endif
// Blake-256 4 way
#define DECL_STATE32_4WAY \
__m128i H0, H1, H2, H3, H4, H5, H6, H7; \
__m128i S0, S1, S2, S3; \
@@ -527,6 +535,7 @@ do { \
} while (0)
#if SPH_COMPACT_BLAKE_32
// not used
#define COMPRESS32_4WAY( rounds ) do { \
__m128i M[16]; \
@@ -778,7 +787,6 @@ do { \
S3 ), H7 ); \
} while (0)
// Blake-512 4 way
#define DECL_STATE64_4WAY \
@@ -967,6 +975,8 @@ do { \
#endif
// Blake-256 4 way
static const sph_u32 salt_zero_4way_small[4] = { 0, 0, 0, 0 };
static void
@@ -988,52 +998,51 @@ blake32_4way( blake_4way_small_context *sc, const void *data, size_t len )
{
__m128i *vdata = (__m128i*)data;
__m128i *buf;
size_t ptr;
const int buf_size = 64; // number of elements, sizeof/4
DECL_STATE32_4WAY
size_t ptr;
const int buf_size = 64; // number of elements, sizeof/4
DECL_STATE32_4WAY
buf = sc->buf;
ptr = sc->ptr;
if ( len < buf_size - ptr )
{
memcpy_128( buf + (ptr>>2), vdata, len>>2 );
ptr += len;
sc->ptr = ptr;
return;
}
buf = sc->buf;
ptr = sc->ptr;
if ( len < buf_size - ptr )
{
memcpy_128( buf + (ptr>>2), vdata, len>>2 );
ptr += len;
sc->ptr = ptr;
return;
}
READ_STATE32_4WAY(sc);
while ( len > 0 )
{
size_t clen;
READ_STATE32_4WAY(sc);
while ( len > 0 )
{
size_t clen;
clen = buf_size - ptr;
if (clen > len)
clen = len;
memcpy_128( buf + (ptr>>2), vdata, clen>>2 );
ptr += clen;
vdata += (clen>>2);
len -= clen;
if ( ptr == buf_size )
{
if ( ( T0 = SPH_T32(T0 + 512) ) < 512 )
T1 = SPH_T32(T1 + 1);
COMPRESS32_4WAY( sc->rounds );
ptr = 0;
}
}
WRITE_STATE32_4WAY(sc);
sc->ptr = ptr;
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_128( buf + (ptr>>2), vdata, clen>>2 );
ptr += clen;
vdata += (clen>>2);
len -= clen;
if ( ptr == buf_size )
{
if ( ( T0 = SPH_T32(T0 + 512) ) < 512 )
T1 = SPH_T32(T1 + 1);
COMPRESS32_4WAY( sc->rounds );
ptr = 0;
}
}
WRITE_STATE32_4WAY(sc);
sc->ptr = ptr;
}
static void
blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
void *dst, size_t out_size_w32 )
{
union {
// union {
__m128i buf[16];
sph_u32 dummy;
} u;
// sph_u32 dummy;
// } u;
size_t ptr, k;
unsigned bit_len;
sph_u32 th, tl;
@@ -1041,7 +1050,7 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
u.buf[ptr>>2] = _mm_set1_epi32( 0x80 );
buf[ptr>>2] = _mm_set1_epi32( 0x80 );
tl = sc->T0 + bit_len;
th = sc->T1;
@@ -1060,26 +1069,26 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
if ( ptr <= 52 )
{
memset_zero_128( u.buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
memset_zero_128( buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
if (out_size_w32 == 8)
u.buf[52>>2] = _mm_or_si128( u.buf[52>>2],
buf[52>>2] = _mm_or_si128( buf[52>>2],
_mm_set1_epi32( 0x01000000UL ) );
*(u.buf+(56>>2)) = mm_bswap_32( _mm_set1_epi32( th ) );
*(u.buf+(60>>2)) = mm_bswap_32( _mm_set1_epi32( tl ) );
blake32_4way( sc, u.buf + (ptr>>2), 64 - ptr );
*(buf+(56>>2)) = mm_bswap_32( _mm_set1_epi32( th ) );
*(buf+(60>>2)) = mm_bswap_32( _mm_set1_epi32( tl ) );
blake32_4way( sc, buf + (ptr>>2), 64 - ptr );
}
else
{
memset_zero_128( u.buf + (ptr>>2) + 1, (60-ptr) >> 2 );
blake32_4way( sc, u.buf + (ptr>>2), 64 - ptr );
memset_zero_128( buf + (ptr>>2) + 1, (60-ptr) >> 2 );
blake32_4way( sc, buf + (ptr>>2), 64 - ptr );
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
memset_zero_128( u.buf, 56>>2 );
memset_zero_128( buf, 56>>2 );
if (out_size_w32 == 8)
u.buf[52>>2] = _mm_set1_epi32( 0x01000000UL );
*(u.buf+(56>>2)) = mm_bswap_32( _mm_set1_epi32( th ) );
*(u.buf+(60>>2)) = mm_bswap_32( _mm_set1_epi32( tl ) );
blake32_4way( sc, u.buf, 64 );
buf[52>>2] = _mm_set1_epi32( 0x01000000UL );
*(buf+(56>>2)) = mm_bswap_32( _mm_set1_epi32( th ) );
*(buf+(60>>2)) = mm_bswap_32( _mm_set1_epi32( tl ) );
blake32_4way( sc, buf, 64 );
}
out = (__m128i*)dst;
for ( k = 0; k < out_size_w32; k++ )
@@ -1114,7 +1123,6 @@ blake32_8way( blake_8way_small_context *sc, const void *data, size_t len )
size_t ptr;
const int buf_size = 64; // number of elements, sizeof/4
DECL_STATE32_8WAY
buf = sc->buf;
ptr = sc->ptr;
if ( len < buf_size - ptr )
@@ -1153,10 +1161,10 @@ static void
blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
void *dst, size_t out_size_w32 )
{
union {
// union {
__m256i buf[16];
sph_u32 dummy;
} u;
// sph_u32 dummy;
// } u;
size_t ptr, k;
unsigned bit_len;
sph_u32 th, tl;
@@ -1164,7 +1172,7 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
u.buf[ptr>>2] = _mm256_set1_epi32( 0x80 );
buf[ptr>>2] = _mm256_set1_epi32( 0x80 );
tl = sc->T0 + bit_len;
th = sc->T1;
@@ -1183,26 +1191,26 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
if ( ptr <= 52 )
{
memset_zero_256( u.buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
if (out_size_w32 == 8)
u.buf[52>>2] = _mm256_or_si256( u.buf[52>>2],
memset_zero_256( buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
if ( out_size_w32 == 8 )
buf[52>>2] = _mm256_or_si256( buf[52>>2],
_mm256_set1_epi32( 0x01000000UL ) );
*(u.buf+(56>>2)) = mm256_bswap_32( _mm256_set1_epi32( th ) );
*(u.buf+(60>>2)) = mm256_bswap_32( _mm256_set1_epi32( tl ) );
blake32_8way( sc, u.buf + (ptr>>2), 64 - ptr );
*(buf+(56>>2)) = mm256_bswap_32( _mm256_set1_epi32( th ) );
*(buf+(60>>2)) = mm256_bswap_32( _mm256_set1_epi32( tl ) );
blake32_8way( sc, buf + (ptr>>2), 64 - ptr );
}
else
{
memset_zero_256( u.buf + (ptr>>2) + 1, (60-ptr) >> 2 );
blake32_8way( sc, u.buf + (ptr>>2), 64 - ptr );
memset_zero_256( buf + (ptr>>2) + 1, (60-ptr) >> 2 );
blake32_8way( sc, buf + (ptr>>2), 64 - ptr );
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
memset_zero_256( u.buf, 56>>2 );
if (out_size_w32 == 8)
u.buf[52>>2] = _mm256_set1_epi32( 0x01000000UL );
*(u.buf+(56>>2)) = mm256_bswap_32( _mm256_set1_epi32( th ) );
*(u.buf+(60>>2)) = mm256_bswap_32( _mm256_set1_epi32( tl ) );
blake32_8way( sc, u.buf, 64 );
memset_zero_256( buf, 56>>2 );
if ( out_size_w32 == 8 )
buf[52>>2] = _mm256_set1_epi32( 0x01000000UL );
*(buf+(56>>2)) = mm256_bswap_32( _mm256_set1_epi32( th ) );
*(buf+(60>>2)) = mm256_bswap_32( _mm256_set1_epi32( tl ) );
blake32_8way( sc, buf, 64 );
}
out = (__m256i*)dst;
for ( k = 0; k < out_size_w32; k++ )
@@ -1274,10 +1282,10 @@ static void
blake64_4way_close( blake_4way_big_context *sc,
unsigned ub, unsigned n, void *dst, size_t out_size_w64)
{
union {
// union {
__m256i buf[16];
sph_u64 dummy;
} u;
// sph_u64 dummy;
// } u;
size_t ptr, k;
unsigned bit_len;
uint64_t z, zz;
@@ -1288,7 +1296,7 @@ blake64_4way_close( blake_4way_big_context *sc,
bit_len = ((unsigned)ptr << 3);
z = 0x80 >> n;
zz = ((ub & -z) | z) & 0xFF;
u.buf[ptr>>3] = _mm256_set_epi64x( zz, zz, zz, zz );
buf[ptr>>3] = _mm256_set_epi64x( zz, zz, zz, zz );
tl = sc->T0 + bit_len;
th = sc->T1;
if (ptr == 0 )
@@ -1307,33 +1315,33 @@ blake64_4way_close( blake_4way_big_context *sc,
}
if ( ptr <= 104 )
{
memset_zero_256( u.buf + (ptr>>3) + 1, (104-ptr) >> 3 );
memset_zero_256( buf + (ptr>>3) + 1, (104-ptr) >> 3 );
if ( out_size_w64 == 8 )
u.buf[(104>>3)] = _mm256_or_si256( u.buf[(104>>3)],
buf[(104>>3)] = _mm256_or_si256( buf[(104>>3)],
_mm256_set1_epi64x( 0x0100000000000000ULL ) );
*(u.buf+(112>>3)) = mm256_bswap_64(
*(buf+(112>>3)) = mm256_bswap_64(
_mm256_set_epi64x( th, th, th, th ) );
*(u.buf+(120>>3)) = mm256_bswap_64(
*(buf+(120>>3)) = mm256_bswap_64(
_mm256_set_epi64x( tl, tl, tl, tl ) );
blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
blake64_4way( sc, buf + (ptr>>3), 128 - ptr );
}
else
{
memset_zero_256( u.buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
memset_zero_256( buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
blake64_4way( sc, buf + (ptr>>3), 128 - ptr );
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
memset_zero_256( u.buf, 112>>3 );
memset_zero_256( buf, 112>>3 );
if ( out_size_w64 == 8 )
u.buf[104>>3] = _mm256_set1_epi64x( 0x0100000000000000ULL );
*(u.buf+(112>>3)) = mm256_bswap_64(
buf[104>>3] = _mm256_set1_epi64x( 0x0100000000000000ULL );
*(buf+(112>>3)) = mm256_bswap_64(
_mm256_set_epi64x( th, th, th, th ) );
*(u.buf+(120>>3)) = mm256_bswap_64(
*(buf+(120>>3)) = mm256_bswap_64(
_mm256_set_epi64x( tl, tl, tl, tl ) );
blake64_4way( sc, u.buf, 128 );
blake64_4way( sc, buf, 128 );
}
out = (__m256i*)dst;
for ( k = 0; k < out_size_w64; k++ )
@@ -1342,7 +1350,7 @@ blake64_4way_close( blake_4way_big_context *sc,
#endif
// Blake-256 4 way & 8 way
// Blake-256 4 way
// default 14 rounds, backward copatibility
void
@@ -1364,6 +1372,9 @@ blake256_4way_close(void *cc, void *dst)
}
#if defined(__AVX2__)
// Blake-256 8way
void
blake256_8way_init(void *cc)
{

View File

@@ -35,7 +35,7 @@
*/
#ifndef __BLAKE_HASH_4WAY__
#define __BLAKE_HASH_4WAY__
#define __BLAKE_HASH_4WAY__ 1
#ifdef __AVX__
@@ -117,11 +117,11 @@ void blake256r8_8way_close(void *cc, void *dst);
// Blake-512 4 way
typedef struct {
__m256i buf[16] __attribute__ ((aligned (64)));
__m256i H[8];
__m256i S[4];
size_t ptr;
sph_u64 T0, T1;
__m256i buf[16] __attribute__ ((aligned (64)));
__m256i H[8];
__m256i S[4];
size_t ptr;
sph_u64 T0, T1;
} blake_4way_big_context;
typedef blake_4way_big_context blake512_4way_context;

136
algo/blake/blake2s-4way.c Normal file
View File

@@ -0,0 +1,136 @@
#include "blake2s-gate.h"
#include "blake2s-hash-4way.h"
#include <string.h>
#include <stdint.h>
#if defined(BLAKE2S_8WAY)
static __thread blake2s_8way_state blake2s_8w_ctx;
void blake2s_8way_hash( void *output, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
blake2s_8way_state ctx;
memcpy( &ctx, &blake2s_8w_ctx, sizeof ctx );
blake2s_8way_update( &ctx, input + (64<<3), 16 );
blake2s_8way_final( &ctx, vhash, BLAKE2S_OUTBYTES );
mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96,
output+128, output+160, output+192, output+224,
vhash, 256 );
}
int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t _ALIGN(64) edata[20];
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
int num_found = 0;
uint32_t *noncep = vdata + 152; // 19*8
swab32_array( edata, pdata, 20 );
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
blake2s_8way_init( &blake2s_8w_ctx, BLAKE2S_OUTBYTES );
blake2s_8way_update( &blake2s_8w_ctx, vdata, 64 );
do {
be32enc( noncep, n );
be32enc( noncep +1, n+1 );
be32enc( noncep +2, n+2 );
be32enc( noncep +3, n+3 );
be32enc( noncep +4, n+4 );
be32enc( noncep +5, n+5 );
be32enc( noncep +6, n+6 );
be32enc( noncep +7, n+7 );
pdata[19] = n;
blake2s_8way_hash( hash, vdata );
for ( int i = 0; i < 8; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 8;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return num_found;
}
#elif defined(BLAKE2S_4WAY)
static __thread blake2s_4way_state blake2s_4w_ctx;
void blake2s_4way_hash( void *output, const void *input )
{
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
blake2s_4way_state ctx;
memcpy( &ctx, &blake2s_4w_ctx, sizeof ctx );
blake2s_4way_update( &ctx, input + (64<<2), 16 );
blake2s_4way_final( &ctx, vhash, BLAKE2S_OUTBYTES );
mm_deinterleave_4x32( output, output+32, output+64, output+96, vhash, 256 );
}
int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t _ALIGN(64) edata[20];
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
int num_found = 0;
uint32_t *noncep = vdata + 76; // 19*4
swab32_array( edata, pdata, 20 );
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
blake2s_4way_init( &blake2s_4w_ctx, BLAKE2S_OUTBYTES );
blake2s_4way_update( &blake2s_4w_ctx, vdata, 64 );
do {
be32enc( noncep, n );
be32enc( noncep +1, n+1 );
be32enc( noncep +2, n+2 );
be32enc( noncep +3, n+3 );
pdata[19] = n;
blake2s_4way_hash( hash, vdata );
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

27
algo/blake/blake2s-gate.c Normal file
View File

@@ -0,0 +1,27 @@
#include "blake2s-gate.h"
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
int64_t blake2s_get_max64 ()
{
return 0x7ffffLL;
}
bool register_blake2s_algo( algo_gate_t* gate )
{
#if defined(BLAKE2S_8WAY)
gate->scanhash = (void*)&scanhash_blake2s_8way;
gate->hash = (void*)&blake2s_8way_hash;
#elif defined(BLAKE2S_4WAY)
gate->scanhash = (void*)&scanhash_blake2s_4way;
gate->hash = (void*)&blake2s_4way_hash;
#else
gate->scanhash = (void*)&scanhash_blake2s;
gate->hash = (void*)&blake2s_hash;
#endif
gate->get_max64 = (void*)&blake2s_get_max64;
gate->optimizations = AVX_OPT | AVX2_OPT;
return true;
};

35
algo/blake/blake2s-gate.h Normal file
View File

@@ -0,0 +1,35 @@
#ifndef __BLAKE2S_GATE_H__
#define __BLAKE2S_GATE_H__ 1
#include <stdint.h>
#include "algo-gate-api.h"
#if defined(__AVX__)
#define BLAKE2S_4WAY
#endif
#if defined(__AVX2__)
#define BLAKE2S_8WAY
#endif
bool register_blake2s_algo( algo_gate_t* gate );
#if defined(BLAKE2S_8WAY)
void blake2s_8way_hash( void *state, const void *input );
int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#elif defined (BLAKE2S_4WAY)
void blake2s_4way_hash( void *state, const void *input );
int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#else
void blake2s_hash( void *state, const void *input );
int scanhash_blake2s( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
#endif

View File

@@ -0,0 +1,362 @@
/**
* BLAKE2 reference source code package - reference C implementations
*
* Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide. This software is distributed without any warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication along with
* this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include "blake2s-hash-4way.h"
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#if defined(__AVX__)
static const uint32_t blake2s_IV[8] =
{
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
static const uint8_t blake2s_sigma[10][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
};
// define a constant for initial param.
int blake2s_4way_init( blake2s_4way_state *S, const uint8_t outlen )
{
blake2s_nway_param P[1];
P->digest_length = outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
P->leaf_length = 0;
*((uint64_t*)(P->node_offset)) = 0;
P->node_depth = 0;
P->inner_length = 0;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
memset( S, 0, sizeof( blake2s_4way_state ) );
for( int i = 0; i < 8; ++i )
S->h[i] = _mm_set1_epi32( blake2s_IV[i] );
uint32_t *p = ( uint32_t * )( P );
/* IV XOR ParamBlock */
for ( size_t i = 0; i < 8; ++i )
S->h[i] = _mm_xor_si128( S->h[i], _mm_set1_epi32( p[i] ) );
return 0;
}
int blake2s_4way_compress( blake2s_4way_state *S, const __m128i* block )
{
__m128i m[16];
__m128i v[16];
memcpy_128( m, block, 16 );
memcpy_128( v, S->h, 8 );
v[ 8] = _mm_set1_epi32( blake2s_IV[0] );
v[ 9] = _mm_set1_epi32( blake2s_IV[1] );
v[10] = _mm_set1_epi32( blake2s_IV[2] );
v[11] = _mm_set1_epi32( blake2s_IV[3] );
v[12] = _mm_xor_si128( _mm_set1_epi32( S->t[0] ),
_mm_set1_epi32( blake2s_IV[4] ) );
v[13] = _mm_xor_si128( _mm_set1_epi32( S->t[1] ),
_mm_set1_epi32( blake2s_IV[5] ) );
v[14] = _mm_xor_si128( _mm_set1_epi32( S->f[0] ),
_mm_set1_epi32( blake2s_IV[6] ) );
v[15] = _mm_xor_si128( _mm_set1_epi32( S->f[1] ),
_mm_set1_epi32( blake2s_IV[7] ) );
#define G4W(r,i,a,b,c,d) \
do { \
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+0] ] ); \
d = mm_rotr_32( _mm_xor_si128( d, a ), 16 ); \
c = _mm_add_epi32( c, d ); \
b = mm_rotr_32( _mm_xor_si128( b, c ), 12 ); \
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+1] ] ); \
d = mm_rotr_32( _mm_xor_si128( d, a ), 8 ); \
c = _mm_add_epi32( c, d ); \
b = mm_rotr_32( _mm_xor_si128( b, c ), 7 ); \
} while(0)
#define ROUND4W(r) \
do { \
G4W( r, 0, v[ 0], v[ 4], v[ 8], v[12] ); \
G4W( r, 1, v[ 1], v[ 5], v[ 9], v[13] ); \
G4W( r, 2, v[ 2], v[ 6], v[10], v[14] ); \
G4W( r, 3, v[ 3], v[ 7], v[11], v[15] ); \
G4W( r, 4, v[ 0], v[ 5], v[10], v[15] ); \
G4W( r, 5, v[ 1], v[ 6], v[11], v[12] ); \
G4W( r, 6, v[ 2], v[ 7], v[ 8], v[13] ); \
G4W( r, 7, v[ 3], v[ 4], v[ 9], v[14] ); \
} while(0)
ROUND4W( 0 );
ROUND4W( 1 );
ROUND4W( 2 );
ROUND4W( 3 );
ROUND4W( 4 );
ROUND4W( 5 );
ROUND4W( 6 );
ROUND4W( 7 );
ROUND4W( 8 );
ROUND4W( 9 );
for( size_t i = 0; i < 8; ++i )
S->h[i] = _mm_xor_si128( _mm_xor_si128( S->h[i], v[i] ), v[i + 8] );
#undef G4W
#undef ROUND4W
return 0;
}
int blake2s_4way_update( blake2s_4way_state *S, const void *in,
uint64_t inlen )
{
__m128i *input = (__m128i*)in;
__m128i *buf = (__m128i*)S->buf;
const int bsize = BLAKE2S_BLOCKBYTES;
while( inlen > 0 )
{
size_t left = S->buflen;
if( inlen >= bsize - left )
{
memcpy_128( buf + (left>>2), input, (bsize - left) >> 2 );
S->buflen += bsize - left;
S->t[0] += BLAKE2S_BLOCKBYTES;
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
blake2s_4way_compress( S, buf );
S->buflen = 0;
input += ( bsize >> 2 );
inlen -= bsize;
}
else
{
memcpy_128( buf + ( left>>2 ), input, inlen>>2 );
S->buflen += (size_t) inlen;
input += ( inlen>>2 );
inlen -= inlen;
}
}
return 0;
}
int blake2s_4way_final( blake2s_4way_state *S, void *out, uint8_t outlen )
{
__m128i *buf = (__m128i*)S->buf;
S->t[0] += S->buflen;
S->t[1] += ( S->t[0] < S->buflen );
if ( S->last_node )
S->f[1] = ~0U;
S->f[0] = ~0U;
memset_zero_128( buf + ( S->buflen>>2 ),
( BLAKE2S_BLOCKBYTES - S->buflen ) >> 2 );
blake2s_4way_compress( S, buf );
for ( int i = 0; i < 8; ++i )
casti_m128i( out, i ) = S->h[ i ];
return 0;
}
#if defined(__AVX2__)
int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block )
{
__m256i m[16];
__m256i v[16];
memcpy_256( m, block, 16 );
memcpy_256( v, S->h, 8 );
v[ 8] = _mm256_set1_epi32( blake2s_IV[0] );
v[ 9] = _mm256_set1_epi32( blake2s_IV[1] );
v[10] = _mm256_set1_epi32( blake2s_IV[2] );
v[11] = _mm256_set1_epi32( blake2s_IV[3] );
v[12] = _mm256_xor_si256( _mm256_set1_epi32( S->t[0] ),
_mm256_set1_epi32( blake2s_IV[4] ) );
v[13] = _mm256_xor_si256( _mm256_set1_epi32( S->t[1] ),
_mm256_set1_epi32( blake2s_IV[5] ) );
v[14] = _mm256_xor_si256( _mm256_set1_epi32( S->f[0] ),
_mm256_set1_epi32( blake2s_IV[6] ) );
v[15] = _mm256_xor_si256( _mm256_set1_epi32( S->f[1] ),
_mm256_set1_epi32( blake2s_IV[7] ) );
#define G8W(r,i,a,b,c,d) \
do { \
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
m[ blake2s_sigma[r][2*i+0] ] ); \
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 16 ); \
c = _mm256_add_epi32( c, d ); \
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 12 ); \
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
m[ blake2s_sigma[r][2*i+1] ] ); \
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 8 ); \
c = _mm256_add_epi32( c, d ); \
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 7 ); \
} while(0)
#define ROUND8W(r) \
do { \
G8W( r, 0, v[ 0], v[ 4], v[ 8], v[12] ); \
G8W( r, 1, v[ 1], v[ 5], v[ 9], v[13] ); \
G8W( r, 2, v[ 2], v[ 6], v[10], v[14] ); \
G8W( r, 3, v[ 3], v[ 7], v[11], v[15] ); \
G8W( r, 4, v[ 0], v[ 5], v[10], v[15] ); \
G8W( r, 5, v[ 1], v[ 6], v[11], v[12] ); \
G8W( r, 6, v[ 2], v[ 7], v[ 8], v[13] ); \
G8W( r, 7, v[ 3], v[ 4], v[ 9], v[14] ); \
} while(0)
ROUND8W( 0 );
ROUND8W( 1 );
ROUND8W( 2 );
ROUND8W( 3 );
ROUND8W( 4 );
ROUND8W( 5 );
ROUND8W( 6 );
ROUND8W( 7 );
ROUND8W( 8 );
ROUND8W( 9 );
for( size_t i = 0; i < 8; ++i )
S->h[i] = _mm256_xor_si256( _mm256_xor_si256( S->h[i], v[i] ), v[i + 8] );
#undef G8W
#undef ROUND8W
return 0;
}
int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen )
{
blake2s_nway_param P[1];
P->digest_length = outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
P->leaf_length = 0;
*((uint64_t*)(P->node_offset)) = 0;
P->node_depth = 0;
P->inner_length = 0;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
memset( S, 0, sizeof( blake2s_8way_state ) );
for( int i = 0; i < 8; ++i )
S->h[i] = _mm256_set1_epi32( blake2s_IV[i] );
uint32_t *p = ( uint32_t * )( P );
/* IV XOR ParamBlock */
for ( size_t i = 0; i < 8; ++i )
S->h[i] = _mm256_xor_si256( S->h[i], _mm256_set1_epi32( p[i] ) );
return 0;
}
int blake2s_8way_update( blake2s_8way_state *S, const void *in,
uint64_t inlen )
{
__m256i *input = (__m256i*)in;
__m256i *buf = (__m256i*)S->buf;
const int bsize = BLAKE2S_BLOCKBYTES;
while( inlen > 0 )
{
size_t left = S->buflen;
if( inlen >= bsize - left )
{
memcpy_256( buf + (left>>2), input, (bsize - left) >> 2 );
S->buflen += bsize - left;
S->t[0] += BLAKE2S_BLOCKBYTES;
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
blake2s_8way_compress( S, buf );
S->buflen = 0;
input += ( bsize >> 2 );
inlen -= bsize;
}
else
{
memcpy_256( buf + ( left>>2 ), input, inlen>>2 );
S->buflen += (size_t) inlen;
input += ( inlen>>2 );
inlen -= inlen;
}
}
return 0;
}
int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen )
{
__m256i *buf = (__m256i*)S->buf;
S->t[0] += S->buflen;
S->t[1] += ( S->t[0] < S->buflen );
if ( S->last_node )
S->f[1] = ~0U;
S->f[0] = ~0U;
memset_zero_256( buf + ( S->buflen>>2 ),
( BLAKE2S_BLOCKBYTES - S->buflen ) >> 2 );
blake2s_8way_compress( S, buf );
for ( int i = 0; i < 8; ++i )
casti_m256i( out, i ) = S->h[ i ];
return 0;
}
#endif // __AVX2__
#if 0
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
{
blake2s_state S[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key ) keylen = 0; /* Fail here instead if keylen != 0 and key == NULL? */
if( keylen > 0 )
{
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
}
else
{
if( blake2s_init( S, outlen ) < 0 ) return -1;
}
blake2s_update( S, ( uint8_t * )in, inlen );
blake2s_final( S, out, outlen );
return 0;
}
#endif
#endif // __AVX__

View File

@@ -0,0 +1,112 @@
/**
* BLAKE2 reference source code package - reference C implementations
*
* Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide. This software is distributed without any warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication along with
* this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
//#pragma once
#ifndef __BLAKE2S_HASH_4WAY_H__
#define __BLAKE2S_HASH_4WAY_H__ 1
#if defined(__AVX__)
#include "avxdefs.h"
#include <stddef.h>
#include <stdint.h>
#if defined(_MSC_VER)
#include <inttypes.h>
#define inline __inline
#define ALIGN(x) __declspec(align(x))
#else
#define ALIGN(x) __attribute__((aligned(x)))
#endif
#if defined(__cplusplus)
extern "C" {
#endif
enum blake2s_constant
{
BLAKE2S_BLOCKBYTES = 64,
BLAKE2S_OUTBYTES = 32,
BLAKE2S_KEYBYTES = 32,
BLAKE2S_SALTBYTES = 8,
BLAKE2S_PERSONALBYTES = 8
};
#pragma pack(push, 1)
typedef struct __blake2s_nway_param
{
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint8_t node_offset[6];// 14
uint8_t node_depth; // 15
uint8_t inner_length; // 16
// uint8_t reserved[0];
uint8_t salt[BLAKE2S_SALTBYTES]; // 24
uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32
} blake2s_nway_param;
#pragma pack(pop)
ALIGN( 64 ) typedef struct __blake2s_4way_state
{
__m128i h[8];
uint8_t buf[ BLAKE2S_BLOCKBYTES * 4 ];
uint32_t t[2];
uint32_t f[2];
size_t buflen;
uint8_t last_node;
} blake2s_4way_state ;
int blake2s_4way_init( blake2s_4way_state *S, const uint8_t outlen );
int blake2s_4way_update( blake2s_4way_state *S, const void *in,
uint64_t inlen );
int blake2s_4way_final( blake2s_4way_state *S, void *out, uint8_t outlen );
#if defined(__AVX2__)
ALIGN( 64 ) typedef struct __blake2s_8way_state
{
__m256i h[8];
uint8_t buf[ BLAKE2S_BLOCKBYTES * 8 ];
uint32_t t[2];
uint32_t f[2];
size_t buflen;
uint8_t last_node;
} blake2s_8way_state ;
int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen );
int blake2s_8way_update( blake2s_8way_state *S, const void *in,
uint64_t inlen );
int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen );
#endif
#if 0
// Simple API
// int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
// Direct Hash Mining Helpers
#define blake2s_salt32(out, in, inlen, key32) blake2s(out, in, key32, 32, inlen, 32) /* neoscrypt */
#define blake2s_simple(out, in, inlen) blake2s(out, in, NULL, 32, inlen, 0)
#endif
#if defined(__cplusplus)
}
#endif
#endif // __AVX__
#endif

View File

@@ -1,26 +1,29 @@
#include "algo-gate-api.h"
#include "blake2s-gate.h"
#include <string.h>
#include <stdint.h>
#include "sph-blake2s.h"
static __thread blake2s_state s_midstate;
static __thread blake2s_state s_ctx;
static __thread blake2s_state blake2s_ctx;
//static __thread blake2s_state s_ctx;
#define MIDLEN 76
void blake2s_hash(void *output, const void *input)
void blake2s_hash( void *output, const void *input )
{
unsigned char _ALIGN(64) hash[BLAKE2S_OUTBYTES];
blake2s_state blake2_ctx __attribute__ ((aligned (64)));
blake2s_init(&blake2_ctx, BLAKE2S_OUTBYTES);
blake2s_update(&blake2_ctx, input, 80);
blake2s_final(&blake2_ctx, hash, BLAKE2S_OUTBYTES);
unsigned char _ALIGN(64) hash[BLAKE2S_OUTBYTES];
blake2s_state ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &blake2s_ctx, sizeof ctx );
blake2s_update( &ctx, input+64, 16 );
// blake2s_init(&ctx, BLAKE2S_OUTBYTES);
// blake2s_update(&ctx, input, 80);
blake2s_final( &ctx, hash, BLAKE2S_OUTBYTES );
memcpy(output, hash, 32);
}
/*
static void blake2s_hash_end(uint32_t *output, const uint32_t *input)
{
s_ctx.buflen = MIDLEN;
@@ -28,7 +31,7 @@ static void blake2s_hash_end(uint32_t *output, const uint32_t *input)
blake2s_update(&s_ctx, (uint8_t*) &input[MIDLEN/4], 80 - MIDLEN);
blake2s_final(&s_ctx, (uint8_t*) output, BLAKE2S_OUTBYTES);
}
*/
int scanhash_blake2s(int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done)
{
@@ -46,13 +49,12 @@ int scanhash_blake2s(int thr_id, struct work *work,
swab32_array( endiandata, pdata, 20 );
// midstate
blake2s_init(&s_midstate, BLAKE2S_OUTBYTES);
blake2s_update(&s_midstate, (uint8_t*) endiandata, MIDLEN);
memcpy(&s_ctx, &s_midstate, sizeof(blake2s_state));
blake2s_init( &blake2s_ctx, BLAKE2S_OUTBYTES );
blake2s_update( &blake2s_ctx, (uint8_t*) endiandata, 64 );
do {
be32enc(&endiandata[19], n);
blake2s_hash_end(hash64, endiandata);
blake2s_hash( hash64, endiandata );
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
@@ -67,7 +69,7 @@ int scanhash_blake2s(int thr_id, struct work *work,
return 0;
}
/*
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
int64_t blake2s_get_max64 ()
{
@@ -81,4 +83,4 @@ bool register_blake2s_algo( algo_gate_t* gate )
gate->get_max64 = (void*)&blake2s_get_max64;
return true;
};
*/

View File

@@ -1,21 +1,22 @@
#include "blakecoin-gate.h"
#if defined (BLAKECOIN_4WAY)
#include "blake-hash-4way.h"
#include <string.h>
#include <stdint.h>
#include <memory.h>
blake256r8_4way_context blakecoin_ctx;
#if defined (BLAKECOIN_4WAY)
blake256r8_4way_context blakecoin_4w_ctx;
void blakecoin_4way_hash(void *state, const void *input)
{
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
blake256r8_4way_context ctx;
memcpy( &ctx, &blakecoin_ctx, sizeof ctx );
memcpy( &ctx, &blakecoin_4w_ctx, sizeof ctx );
blake256r8_4way( &ctx, input + (64<<2), 16 );
blake256r8_4way_close( &ctx, vhash );
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
}
@@ -31,58 +32,30 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t _ALIGN(32) edata[20];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
if (opt_benchmark)
if ( opt_benchmark )
HTarget = 0x7f;
// we need big endian data...
swab32_array( edata, pdata, 20 );
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
blake256r8_4way_init( &blakecoin_ctx );
blake256r8_4way( &blakecoin_ctx, vdata, 64 );
blake256r8_4way_init( &blakecoin_4w_ctx );
blake256r8_4way( &blakecoin_4w_ctx, vdata, 64 );
uint32_t *noncep = vdata + 76; // 19*4
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep, n );
be32enc( noncep +1, n+1 );
be32enc( noncep +2, n+2 );
be32enc( noncep +3, n+3 );
blakecoin_4way_hash( hash, vdata );
pdata[19] = n;
blakecoin_4way_hash( hash, vdata );
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
@@ -90,15 +63,77 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
// workaround to prevent flood of hash reports when nonce range exhasuted
// and thread is spinning waiting for new work
if ( ( n >= max_nonce ) && ( *hashes_done < 10 ) )
{
*hashes_done = 0;
// sleep(1);
}
return num_found;
}
#endif
#if defined(BLAKECOIN_8WAY)
blake256r8_8way_context blakecoin_8w_ctx;
void blakecoin_8way_hash( void *state, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
blake256r8_8way_context ctx;
memcpy( &ctx, &blakecoin_8w_ctx, sizeof ctx );
blake256r8_8way( &ctx, input + (64<<3), 16 );
blake256r8_8way_close( &ctx, vhash );
mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96,
state+128, state+160, state+192, state+224,
vhash, 256 );
}
int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t HTarget = ptarget[7];
uint32_t _ALIGN(32) edata[20];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
uint32_t *noncep = vdata + 152; // 19*8
int num_found = 0;
if ( opt_benchmark )
HTarget = 0x7f;
// we need big endian data...
swab32_array( edata, pdata, 20 );
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
blake256r8_8way_init( &blakecoin_8w_ctx );
blake256r8_8way( &blakecoin_8w_ctx, vdata, 64 );
do {
be32enc( noncep, n );
be32enc( noncep +1, n+1 );
be32enc( noncep +2, n+2 );
be32enc( noncep +3, n+3 );
be32enc( noncep +4, n+4 );
be32enc( noncep +5, n+5 );
be32enc( noncep +6, n+6 );
be32enc( noncep +7, n+7 );
pdata[19] = n;
blakecoin_8way_hash( hash, vdata );
for ( int i = 0; i < 8; i++ )
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
{
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 8;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return num_found;
}

View File

@@ -8,55 +8,21 @@ int64_t blakecoin_get_max64 ()
// return 0x3fffffLL;
}
// Blakecoin 4 way hashes so fast it runs out of nonces.
// This is an attempt to solve this but the result may be
// to rehash old nonces until new work is received.
void bc4w_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t *end_nonce_ptr, bool clean_job )
{
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
// if ( have_stratum && ( *nonceptr >= *end_nonce_ptr ) )
// algo_gate.stratum_gen_work( &stratum, g_work );
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|| ( *nonceptr >= *end_nonce_ptr )
|| ( ( work->job_id != g_work->job_id ) && clean_job ) )
/*
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|| ( work->job_id != g_work->job_id ) ) )
*/
{
work_free( work );
work_copy( work, g_work );
*nonceptr = 0xffffffffU / opt_n_threads * thr_id;
if ( opt_randomize )
*nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
// try incrementing the xnonce to chsnge the data
// for ( int i = 0; i < work->xnonce2_size && !( ++work->xnonce2[i] ); i++ );
}
else
++(*nonceptr);
}
// vanilla uses default gen merkle root, otherwise identical to blakecoin
bool register_vanilla_algo( algo_gate_t* gate )
{
#if defined(BLAKECOIN_4WAY)
// four_way_not_tested();
#if defined(BLAKECOIN_8WAY)
gate->scanhash = (void*)&scanhash_blakecoin_8way;
gate->hash = (void*)&blakecoin_8way_hash;
#elif defined(BLAKECOIN_4WAY)
gate->scanhash = (void*)&scanhash_blakecoin_4way;
gate->hash = (void*)&blakecoin_4way_hash;
// gate->get_new_work = (void*)&bc4w_get_new_work;
// blakecoin_4way_init( &blake_4way_init_ctx );
#else
gate->scanhash = (void*)&scanhash_blakecoin;
gate->hash = (void*)&blakecoinhash;
// blakecoin_init( &blake_init_ctx );
#endif
gate->optimizations = AVX2_OPT;
gate->optimizations = AVX_OPT | AVX2_OPT;
gate->get_max64 = (void*)&blakecoin_get_max64;
return true;
}

View File

@@ -1,12 +1,21 @@
#ifndef __BLAKECOIN_GATE_H__
#define __BLAKECOIN_GATE_H__
#define __BLAKECOIN_GATE_H__ 1
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(__AVX2__)
#if defined(__AVX__)
#define BLAKECOIN_4WAY
#endif
#if defined(__AVX2__)
#define BLAKECOIN_8WAY
#endif
#if defined (BLAKECOIN_8WAY)
void blakecoin_8way_hash(void *state, const void *input);
int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
#if defined (BLAKECOIN_4WAY)
void blakecoin_4way_hash(void *state, const void *input);

View File

@@ -38,7 +38,6 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = first_nonce;
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
// copy to buffer guaranteed to be aligned.
@@ -52,7 +51,6 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t *noncep = vdata + DECRED_NONCE_INDEX * 4;
do {
found[0] = found[1] = found[2] = found[3] = false;
* noncep = n;
*(noncep+1) = n+1;
*(noncep+2) = n+2;
@@ -60,35 +58,12 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
decred_hash_4way( hash, vdata );
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
{
work_set_target_ratio( work, hash );
found[0] = true;
num_found++;
nonces[0] = n;
pdata[DECRED_NONCE_INDEX] = n;
}
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
{
work_set_target_ratio( work, hash+8 );
found[1] = true;
num_found++;
nonces[1] = n+1;
}
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
{
work_set_target_ratio( work, hash+16 );
found[2] = true;
num_found++;
nonces[2] = n+2;
}
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
{
work_set_target_ratio( work, hash+24 );
found[3] = true;
num_found++;
nonces[3] = n+3;
pdata[DECRED_NONCE_INDEX] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce)

View File

@@ -111,12 +111,8 @@ int scanhash_pentablake_4way( int thr_id, struct work *work,
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
// uint32_t _ALIGN(32) hash64[8];
// uint32_t _ALIGN(32) endiandata[32];
@@ -150,47 +146,19 @@ int scanhash_pentablake_4way( int thr_id, struct work *work,
{
uint32_t mask = masks[m];
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
pentablakehash_4way( hash, vdata );
// return immediately on nonce found, only one submit
if ( ( !(hash[7] & mask) ) && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( !( (hash+(i<<3))[7] & mask )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
pdata[19] = n;
*hashes_done = n - first_nonce + 1;
return 1;
}
if ( (! ((hash+8)[7] & mask) ) && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n;
*hashes_done = n - first_nonce + 1;
return 1;
}
if ( ( !((hash+16)[7] & mask) ) && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n;
*hashes_done = n - first_nonce + 1;
return 1;
}
if ( ( !((hash+24)[7] & mask) ) && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n;
*hashes_done = n - first_nonce + 1;
return 1;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;

View File

@@ -64,12 +64,8 @@ int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 76; // 19*4
uint32_t *noncep1 = vdata + 77;
uint32_t *noncep2 = vdata + 78;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 76; // 19*4
/*
uint32_t *pdata = work->data;
@@ -86,42 +82,20 @@ int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+1, n+1 );
be32enc( noncep+2, n+2 );
be32enc( noncep+3, n+3 );
myriad_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = pdata[19] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce-4)

View File

@@ -150,6 +150,9 @@ int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
int searchNumber = COMPARE_SIZE / opt_n_threads;
int startLoc = threadNumber * searchNumber;
if ( opt_debug )
applog( LOG_DEBUG,"Hash target= %08lx", ptarget[7] );
for(int32_t k = startLoc; k < startLoc + searchNumber && !work_restart[threadNumber].restart; k++)
{
// copy data to first l2 cache

View File

@@ -95,12 +95,8 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
const uint32_t Htarg = ptarget[7];
uint32_t n = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
uint64_t htmax[] = {
0,
@@ -131,46 +127,21 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
{
uint32_t mask = masks[m];
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
jha_hash_4way( hash, vdata );
pdata[19] = n;
if ( ( !(hash[7] & mask) )
&& fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( !( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( !((hash+8)[7] & mask) )
&& fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( !((hash+16)[7] & mask) )
&& fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( !((hash+24)[7] & mask) )
&& fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -32,12 +32,8 @@ int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
// const uint32_t Htarg = ptarget[7];
uint32_t endiandata[20];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
for ( int i=0; i < 19; i++ )
be32enc( &endiandata[i], pdata[i] );
@@ -46,42 +42,20 @@ int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
keccakhash_4way( hash, vdata );
if ( ( ( hash[7] & 0xFFFFFF00 ) == 0 )
&& fulltest( hash, ptarget) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & 0xFFFFFF00 ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
pdata[19] = n;
}
if ( ( ( (hash+8)[7] & 0xFFFFFF00 ) == 0 )
&& fulltest( hash+8, ptarget) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
}
if ( ( ( (hash+16) [7] & 0xFFFFFF00 ) == 0 )
&& fulltest( hash+16, ptarget) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
}
if ( ( ( (hash+24)[7] & 0xFFFFFF00 ) == 0 )
&& fulltest( hash+24, ptarget) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;

View File

@@ -491,14 +491,14 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
__m256i *buffer = (__m256i*)state->buffer;
__m256i msg[2];
int i;
int blocks = (int)len / 32;
state-> rembytes = (int)len % 32;
int blocks = (int)len >> 5;
state-> rembytes = (int)len & 0x1F;
// full blocks
for ( i = 0; i < blocks; i++, vdata+=2 )
{
msg[0] = mm256_bswap_32( vdata[ i ] );
msg[1] = mm256_bswap_32( vdata[ i+1 ] );
msg[0] = mm256_bswap_32( vdata[ 0] );
msg[1] = mm256_bswap_32( vdata[ 1 ] );
rnd512_2way( state, msg );
}
@@ -533,7 +533,7 @@ int luffa_2way_close( luffa_2way_context *state, void *hashval )
finalization512_2way( state, (uint32*)hashval );
if ( state->hashbitlen > 512 )
finalization512_2way( state, (uint32*)( hashval+128 ) );
finalization512_2way( state, (uint32*)( hashval+32 ) );
return 0;
}
@@ -575,7 +575,7 @@ int luffa_2way_update_close( luffa_2way_context *state,
finalization512_2way( state, (uint32*)output );
if ( state->hashbitlen > 512 )
finalization512_2way( state, (uint32*)( output+128 ) );
finalization512_2way( state, (uint32*)( output+32 ) );
return 0;
}

View File

@@ -1,5 +1,6 @@
#include "allium-gate.h"
#include <memory.h>
#include <mm_malloc.h>
#if defined (ALLIUM_4WAY)
@@ -18,14 +19,15 @@ typedef struct {
} allium_4way_ctx_holder;
static allium_4way_ctx_holder allium_4way_ctx;
static __thread allium_4way_ctx_holder allium_4way_ctx;
void init_allium_4way_ctx()
bool init_allium_4way_ctx()
{
keccak256_4way_init( &allium_4way_ctx.keccak );
cubehashInit( &allium_4way_ctx.cube, 256, 16, 32 );
skein256_4way_init( &allium_4way_ctx.skein );
init_groestl256( &allium_4way_ctx.groestl, 32 );
return true;
}
void allium_4way_hash( void *state, const void *input )
@@ -99,12 +101,8 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 76; // 19*4
uint32_t *noncep1 = vdata + 77;
uint32_t *noncep2 = vdata + 78;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 76; // 19*4
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
@@ -115,44 +113,22 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+1, n+1 );
be32enc( noncep+2, n+2 );
be32enc( noncep+3, n+3 );
allium_4way_hash( hash, vdata );
pdata[19] = n;
allium_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = pdata[19] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n += 4;
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce-4)
&& !work_restart[thr_id].restart);

View File

@@ -5,11 +5,11 @@ int64_t get_max64_0xFFFFLL() { return 0xFFFFLL; }
bool register_allium_algo( algo_gate_t* gate )
{
#if defined (ALLIUM_4WAY)
init_allium_4way_ctx();
gate->miner_thread_init = (void*)&init_allium_4way_ctx;
gate->scanhash = (void*)&scanhash_allium_4way;
gate->hash = (void*)&allium_4way_hash;
#else
init_allium_ctx();
gate->miner_thread_init = (void*)&init_allium_ctx;
gate->scanhash = (void*)&scanhash_allium;
gate->hash = (void*)&allium_hash;
#endif

View File

@@ -16,14 +16,14 @@ bool register_allium_algo( algo_gate_t* gate );
void allium_4way_hash( void *state, const void *input );
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_allium_4way_ctx();
bool init_allium_4way_ctx();
#endif
void allium_hash( void *state, const void *input );
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_allium_ctx();
bool init_allium_ctx();
#endif

View File

@@ -12,9 +12,9 @@
#include "lyra2.h"
typedef struct {
cubehashParam cube;
sph_blake256_context blake;
sph_keccak256_context keccak;
cubehashParam cube;
sph_skein256_context skein;
#if defined (__AES__)
hashState_groestl256 groestl;
@@ -23,9 +23,9 @@ typedef struct {
#endif
} allium_ctx_holder;
static allium_ctx_holder allium_ctx;
static __thread allium_ctx_holder allium_ctx;
void init_allium_ctx()
bool init_allium_ctx()
{
sph_keccak256_init( &allium_ctx.keccak );
cubehashInit( &allium_ctx.cube, 256, 16, 32 );
@@ -35,6 +35,7 @@ void init_allium_ctx()
#else
sph_groestl256_init( &allium_ctx.groestl );
#endif
return true;
}
void allium_hash(void *state, const void *input)

View File

@@ -1,123 +0,0 @@
#include "allium-gate.h"
#include <memory.h>
#include "algo/blake/sph_blake.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/cubehash/sse2/cubehash_sse2.h"
#if defined(__AES__)
#include "algo/groestl/aes_ni/hash-groestl256.h"
#else
#include "algo/groestl/sph_groestl.h"
#endif
typedef struct {
cubehashParam cube;
sph_blake256_context blake;
sph_keccak256_context keccak;
sph_skein256_context skein;
#if defined (__AES__)
hashState_groestl256 groestl;
#else
sph_groestl256_context groestl;
#endif
} allium_ctx_holder;
static allium_ctx_holder allium_ctx;
static __thread sph_blake256_context allium_blake_mid;
void init_allium_ctx()
{
cubehashInit( &allium_ctx.cube, 256, 16, 32 );
sph_blake256_init( &allium_ctx.blake );
sph_keccak256_init( &allium_ctx.keccak );
sph_skein256_init( &allium_ctx.skein );
#if defined (__AES__)
init_groestl256( &allium_ctx.groestl, 32 );
#else
sph_groestl256_init( &allium_ctx.groestl );
#endif
}
void allium_blake256_midstate( const void* input )
{
memcpy( &allium_blake_mid, &allium_ctx.blake, sizeof allium_blake_mid );
sph_blake256( &allium_blake_mid, input, 64 );
}
void allium_hash( void *state, const void *input )
{
allium_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &allium_ctx, sizeof(allium_ctx) );
uint8_t hash[128] __attribute__ ((aligned (64)));
const int midlen = 64; // bytes
const int tail = 80 - midlen; // 16
memcpy( &ctx.blake, &allium_blake_mid, sizeof allium_blake_mid );
sph_blake256( &ctx.blake, (uint8_t*)input + midlen, tail );
sph_blake256_close( &ctx.blake, hash );
sph_keccak256( &ctx.keccak, hash, 32 );
sph_keccak256_close(&ctx.keccak, hash);
LYRA2RE( hash, 32, hash, 32, hash, 32, 1, 8, 8 );
// LYRA2REV2( allium_wholeMatrix, hash, 32, hash, 32, hash, 32, 1, 8, 8 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash, (const byte*)hash, 32 );
LYRA2RE( hash, 32, hash, 32, hash, 32, 1, 8, 8 );
// LYRA2REV2( allium_wholeMatrix, hash, 32, hash, 32, hash, 32, 1, 8, 8 );
sph_skein256( &ctx.skein, hash, 32 );
sph_skein256_close( &ctx.skein, hash );
#if defined (__AES__)
update_and_final_groestl256( &ctx.groestl, hash, hash, 256 );
#else
sph_groestl256( &ctx.skein, hash, 32 );
sph_groestl256_close( &ctx.skein, hash );
#endif
memcpy( state, hash, 32 );
}
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t endiandata[20] __attribute__ ((aligned (64)));
uint32_t hash[8] __attribute__((aligned(64)));
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
const uint32_t Htarg = ptarget[7];
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
swab32_array( endiandata, pdata, 20 );
allium_blake256_midstate( endiandata );
do {
be32enc(&endiandata[19], nonce);
allium_hash(hash, endiandata);
if (hash[7] <= Htarg )
{
if( fulltest(hash, ptarget) )
{
pdata[19] = nonce;
work_set_target_ratio( work, hash );
*hashes_done = pdata[19] - first_nonce;
return 1;
}
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

View File

@@ -68,13 +68,13 @@ int LYRA2REV2( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
//Tries to allocate enough space for the whole memory matrix
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
// for Lyra2REv2, nCols = 4, v1 was using 8
const int64_t BLOCK_LEN = (nCols == 4) ? BLOCK_LEN_BLAKE2_SAFE_INT64
: BLOCK_LEN_BLAKE2_SAFE_BYTES;
uint64_t *ptrWord = wholeMatrix;
memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
// memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
//=== Getting the password + salt + basil padded with 10*1 ==========//
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
@@ -232,9 +232,9 @@ int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
//Tries to allocate enough space for the whole memory matrix
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
// memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
//==== Getting the password + salt + basil padded with 10*1 ============//
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
@@ -380,18 +380,17 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd, const uint64_t pwdlen,
: BLOCK_LEN_BLAKE2_SAFE_BYTES;
i = (int64_t)ROW_LEN_BYTES * nRows;
uint64_t *wholeMatrix = _mm_malloc( i, 32 );
// uint64_t *wholeMatrix = _mm_malloc( i, 64 );
uint64_t *wholeMatrix = _mm_malloc( i, 64 );
if (wholeMatrix == NULL)
return -1;
//#if defined (__AVX2__)
// memset_zero_m256i( (__m256i*)wholeMatrix, i<<5 );
//#elif defined(__AVX__)
// memset_zero_m128i( (__m128i*)wholeMatrix, i<<4 );
//#else
memset(wholeMatrix, 0, i);
//#endif
#if defined(__AVX2__)
memset_zero_256( (__m256i*)wholeMatrix, i>>5 );
#elif defined(__AVX__)
memset_zero_128( (__m128i*)wholeMatrix, i>>4 );
#else
memset( wholeMatrix, 0, i );
#endif
uint64_t *ptrWord = wholeMatrix;
@@ -413,8 +412,8 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd, const uint64_t pwdlen,
memcpy(ptrByte, salt, saltlen);
ptrByte += saltlen;
memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES
- (saltlen + pwdlen) );
// memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES
// - (saltlen + pwdlen) );
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
memcpy(ptrByte, &kLen, sizeof(int64_t));

View File

@@ -61,12 +61,8 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 76; // 19*4
uint32_t *noncep1 = vdata + 77;
uint32_t *noncep2 = vdata + 78;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep= vdata + 76; // 19*4
if ( opt_benchmark )
ptarget[7] = 0x0000ff;
@@ -79,42 +75,20 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
lyra2h_4way_midstate( vdata );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+1, n+1 );
be32enc( noncep+2, n+2 );
be32enc( noncep+3, n+3 );
be32enc( &edata[19], n );
lyra2h_4way_hash( hash, vdata );
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = pdata[19] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce-4)

View File

@@ -19,12 +19,13 @@ typedef struct {
static lyra2v2_4way_ctx_holder l2v2_4way_ctx;
void init_lyra2rev2_4way_ctx()
bool init_lyra2rev2_4way_ctx()
{
keccak256_4way_init( &l2v2_4way_ctx.keccak );
cubehashInit( &l2v2_4way_ctx.cube, 256, 16, 32 );
skein256_4way_init( &l2v2_4way_ctx.skein );
bmw256_4way_init( &l2v2_4way_ctx.bmw );
return true;
}
void lyra2rev2_4way_hash( void *state, const void *input )
@@ -92,12 +93,8 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 76; // 19*4
uint32_t *noncep1 = vdata + 77;
uint32_t *noncep2 = vdata + 78;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 76; // 19*4
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
@@ -110,42 +107,20 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
blake256_4way( &l2v2_4way_ctx.blake, vdata, 64 );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+1, n+1 );
be32enc( noncep+2, n+2 );
be32enc( noncep+3, n+3 );
lyra2rev2_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = pdata[19] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce-4)

View File

@@ -14,18 +14,20 @@ bool lyra2rev2_thread_init()
int i = (int64_t)ROW_LEN_BYTES * 4; // nRows;
l2v2_wholeMatrix = _mm_malloc( i, 64 );
#if defined (LYRA2REV2_4WAY)
init_lyra2rev2_4way_ctx();;
#else
init_lyra2rev2_ctx();
#endif
return l2v2_wholeMatrix;
}
bool register_lyra2rev2_algo( algo_gate_t* gate )
{
#if defined (LYRA2REV2_4WAY)
init_lyra2rev2_4way_ctx();
gate->scanhash = (void*)&scanhash_lyra2rev2_4way;
gate->hash = (void*)&lyra2rev2_4way_hash;
#else
init_lyra2rev2_ctx();
gate->scanhash = (void*)&scanhash_lyra2rev2;
gate->hash = (void*)&lyra2rev2_hash;
#endif

View File

@@ -20,7 +20,7 @@ void lyra2rev2_4way_hash( void *state, const void *input );
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_lyra2rev2_4way_ctx();
bool init_lyra2rev2_4way_ctx();
#endif
@@ -29,7 +29,7 @@ void lyra2rev2_hash( void *state, const void *input );
int scanhash_lyra2rev2( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_lyra2rev2_ctx();
bool init_lyra2rev2_ctx();
#endif

View File

@@ -21,7 +21,7 @@ typedef struct {
static lyra2v2_ctx_holder lyra2v2_ctx;
static __thread sph_blake256_context l2v2_blake_mid;
void init_lyra2rev2_ctx()
bool init_lyra2rev2_ctx()
{
cubehashInit( &lyra2v2_ctx.cube1, 256, 16, 32 );
cubehashInit( &lyra2v2_ctx.cube2, 256, 16, 32 );
@@ -29,6 +29,7 @@ void init_lyra2rev2_ctx()
sph_keccak256_init( &lyra2v2_ctx.keccak );
sph_skein256_init( &lyra2v2_ctx.skein );
sph_bmw256_init( &lyra2v2_ctx.bmw );
return true;
}
void l2v2_blake256_midstate( const void* input )

View File

@@ -61,12 +61,8 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 76; // 19*4
uint32_t *noncep1 = vdata + 77;
uint32_t *noncep2 = vdata + 78;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 76; // 19*4
if ( opt_benchmark )
ptarget[7] = 0x0000ff;
@@ -79,42 +75,20 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
lyra2z_4way_midstate( vdata );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+1, n+1 );
be32enc( noncep+2, n+2 );
be32enc( noncep+3, n+3 );
lyra2z_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = pdata[19] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce-4)
@@ -126,3 +100,115 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
#endif
#if defined(LYRA2Z_8WAY)
__thread uint64_t* lyra2z_8way_matrix;
bool lyra2z_8way_thread_init()
{
return ( lyra2z_8way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
}
static __thread blake256_8way_context l2z_8way_blake_mid;
void lyra2z_8way_midstate( const void* input )
{
blake256_8way_init( &l2z_8way_blake_mid );
blake256_8way( &l2z_8way_blake_mid, input, 64 );
}
void lyra2z_8way_hash( void *state, const void *input )
{
uint32_t hash0[8] __attribute__ ((aligned (64)));
uint32_t hash1[8] __attribute__ ((aligned (64)));
uint32_t hash2[8] __attribute__ ((aligned (64)));
uint32_t hash3[8] __attribute__ ((aligned (64)));
uint32_t hash4[8] __attribute__ ((aligned (64)));
uint32_t hash5[8] __attribute__ ((aligned (64)));
uint32_t hash6[8] __attribute__ ((aligned (64)));
uint32_t hash7[8] __attribute__ ((aligned (64)));
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
blake256_8way_context ctx_blake __attribute__ ((aligned (64)));
memcpy( &ctx_blake, &l2z_8way_blake_mid, sizeof l2z_8way_blake_mid );
blake256_8way( &ctx_blake, input + (64*8), 16 );
blake256_8way_close( &ctx_blake, vhash );
mm256_deinterleave_8x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
LYRA2Z( lyra2z_8way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
LYRA2Z( lyra2z_8way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
LYRA2Z( lyra2z_8way_matrix, hash4, 32, hash4, 32, hash4, 32, 8, 8, 8 );
LYRA2Z( lyra2z_8way_matrix, hash5, 32, hash5, 32, hash5, 32, 8, 8, 8 );
LYRA2Z( lyra2z_8way_matrix, hash6, 32, hash6, 32, hash6, 32, 8, 8, 8 );
LYRA2Z( lyra2z_8way_matrix, hash7, 32, hash7, 32, hash7, 32, 8, 8, 8 );
memcpy( state, hash0, 32 );
memcpy( state+ 32, hash1, 32 );
memcpy( state+ 64, hash2, 32 );
memcpy( state+ 96, hash3, 32 );
memcpy( state+128, hash1, 32 );
memcpy( state+160, hash2, 32 );
memcpy( state+192, hash3, 32 );
memcpy( state+224, hash1, 32 );
}
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t _ALIGN(64) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
int num_found = 0;
uint32_t *noncep = vdata + 152; // 19*8
if ( opt_benchmark )
ptarget[7] = 0x0000ff;
for ( int i=0; i < 19; i++ )
be32enc( &edata[i], pdata[i] );
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
lyra2z_8way_midstate( vdata );
do {
be32enc( noncep, n );
be32enc( noncep+1, n+1 );
be32enc( noncep+2, n+2 );
be32enc( noncep+3, n+3 );
be32enc( noncep+4, n+4 );
be32enc( noncep+5, n+5 );
be32enc( noncep+6, n+6 );
be32enc( noncep+7, n+7 );
lyra2z_8way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 8; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 8;
} while ( (num_found == 0) && (n < max_nonce-4)
&& !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

View File

@@ -8,7 +8,11 @@ void lyra2z_set_target( struct work* work, double job_diff )
bool register_lyra2z_algo( algo_gate_t* gate )
{
#ifdef LYRA2Z_4WAY
#if defined(LYRA2Z_8WAY)
gate->miner_thread_init = (void*)&lyra2z_8way_thread_init;
gate->scanhash = (void*)&scanhash_lyra2z_8way;
gate->hash = (void*)&lyra2z_8way_hash;
#elif defined(LYRA2Z_4WAY)
gate->miner_thread_init = (void*)&lyra2z_4way_thread_init;
gate->scanhash = (void*)&scanhash_lyra2z_4way;
gate->hash = (void*)&lyra2z_4way_hash;

View File

@@ -1,17 +1,29 @@
#ifndef LYRA2Z_GATE_H__
#define LYRA2Z_GATE_H__
#define LYRA2Z_GATE_H__ 1
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(__AVX2__)
#if defined(__AVX__)
#define LYRA2Z_4WAY
#endif
#if defined(__AVX2__)
// #define LYRA2Z_8WAY
#endif
#define LYRA2Z_MATRIX_SIZE BLOCK_LEN_INT64 * 8 * 8 * 8
#if defined(LYRA2Z_4WAY)
#if defined(LYRA2Z_8WAY)
void lyra2z_8way_hash( void *state, const void *input );
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
bool lyra2z_8way_thread_init();
#elif defined(LYRA2Z_4WAY)
void lyra2z_4way_hash( void *state, const void *input );
@@ -20,7 +32,7 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
bool lyra2z_4way_thread_init();
#endif
#else
void lyra2z_hash( void *state, const void *input );
@@ -31,3 +43,4 @@ bool lyra2z_thread_init();
#endif
#endif

View File

@@ -79,12 +79,8 @@ int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
uint64_t htmax[] = { 0,
0xF,
@@ -117,47 +113,22 @@ int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
nist5hash_4way( hash, vdata );
pdata[19] = n;
if ( ( !(hash[7] & mask) )
&& fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( !( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( !((hash+8)[7] & mask) )
&& fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( !((hash+16)[7] & mask) )
&& fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( !((hash+24)[7] & mask) )
&& fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -145,12 +145,8 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = {
0,
@@ -181,42 +177,21 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
anime_4way_hash( hash, vdata );
pdata[19] = n;
anime_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash );
}
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash );
}
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -145,12 +145,8 @@ int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
swab32_array( endiandata, pdata, 20 );
@@ -159,42 +155,21 @@ int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
quark_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & 0xFFFFFF00 ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & 0xFFFFFF00 ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & 0xFFFFFF00 ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash );
}
if ( ( (hash+16)[7] & 0xFFFFFF00 ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash );
}
if ( ( (hash+24)[7] & 0xFFFFFF00 ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -74,10 +74,8 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 32+3; // 4*8 + 3
uint32_t *noncep1 = vdata + 32+7;
uint32_t *noncep = vdata + 32+3; // 4*8 + 3
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -98,24 +96,20 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
found[0] = found[1] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep, n );
be32enc( noncep+4, n+1 );
deep_2way_hash( hash, vdata );
pdata[19] = n;
if ( !( hash[7] & mask ) && fulltest( hash, ptarget) )
{
found[0] = true;
num_found++;
nonces[0] = n;
nonces[ num_found++ ] = n;
work_set_target_ratio( work, hash );
}
if ( !( (hash+8)[7] & mask ) && fulltest( hash+8, ptarget) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
nonces[ num_found++ ] = n+1;
work_set_target_ratio( work, hash+8 );
}
n += 2;

View File

@@ -25,7 +25,6 @@ qubit_2way_ctx_holder qubit_2way_ctx;
void init_qubit_2way_ctx()
{
luffa_2way_init( &qubit_2way_ctx.luffa, 512 );
cubehashInit(&qubit_2way_ctx.cube,512,16,32);
sph_shavite512_init(&qubit_2way_ctx.shavite);
simd_2way_init( &qubit_2way_ctx.simd, 512 );
@@ -81,10 +80,8 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 32+3; // 4*8 + 3
uint32_t *noncep1 = vdata + 32+7;
uint32_t *noncep = vdata + 32+3; // 4*8 + 3
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -94,7 +91,6 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
// big endian encode 0..18 uint32_t, 64 bits at a time
swab32_array( endiandata, pdata, 20 );
uint64_t *edata = (uint64_t*)endiandata;
mm256_interleave_2x128( (uint64_t*)vdata, edata, edata, 640 );
@@ -106,24 +102,21 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
found[0] = found[1] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep, n );
be32enc( noncep+4, n+1 );
qubit_2way_hash( hash, vdata );
pdata[19] = n;
if ( !( hash[7] & mask ) && fulltest( hash, ptarget) )
{
found[0] = true;
num_found++;
nonces[0] = n;
nonces[ num_found++ ] = n;
work_set_target_ratio( work, hash );
}
if ( !( (hash+8)[7] & mask ) && fulltest( hash+8, ptarget) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
pdata[19] = n+1;
nonces[ num_found++ ] = n+1;
work_set_target_ratio( work, hash+8 );
}
n += 2;

View File

@@ -1,7 +1,4 @@
#include "lbry-gate.h"
#if defined(LBRY_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -9,6 +6,141 @@
#include "algo/sha/sha2-hash-4way.h"
#include "ripemd-hash-4way.h"
#define LBRY_INPUT_SIZE 112
#define LBRY_MIDSTATE 64
#define LBRY_TAIL (LBRY_INPUT_SIZE) - (LBRY_MIDSTATE)
#if defined(LBRY_8WAY)
static __thread sha256_8way_context sha256_8w_mid;
void lbry_8way_hash( void* output, const void* input )
{
uint32_t _ALIGN(64) vhashA[16<<3];
uint32_t _ALIGN(64) vhashB[16<<3];
uint32_t _ALIGN(64) vhashC[16<<3];
uint32_t _ALIGN(32) h0[32];
uint32_t _ALIGN(32) h1[32];
uint32_t _ALIGN(32) h2[32];
uint32_t _ALIGN(32) h3[32];
uint32_t _ALIGN(32) h4[32];
uint32_t _ALIGN(32) h5[32];
uint32_t _ALIGN(32) h6[32];
uint32_t _ALIGN(32) h7[32];
sha256_8way_context ctx_sha256 __attribute__ ((aligned (64)));
sha512_4way_context ctx_sha512;
ripemd160_8way_context ctx_ripemd;
memcpy( &ctx_sha256, &sha256_8w_mid, sizeof(ctx_sha256) );
sha256_8way( &ctx_sha256, input + (LBRY_MIDSTATE<<3), LBRY_TAIL );
sha256_8way_close( &ctx_sha256, vhashA );
sha256_8way_init( &ctx_sha256 );
sha256_8way( &ctx_sha256, vhashA, 32 );
sha256_8way_close( &ctx_sha256, vhashA );
// reinterleave to do sha512 4-way 64 bit twice.
mm256_deinterleave_8x32( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 256 );
mm256_interleave_4x64( vhashA, h0, h1, h2, h3, 256 );
mm256_interleave_4x64( vhashB, h4, h5, h6, h7, 256 );
sha512_4way_init( &ctx_sha512 );
sha512_4way( &ctx_sha512, vhashA, 32 );
sha512_4way_close( &ctx_sha512, vhashA );
sha512_4way_init( &ctx_sha512 );
sha512_4way( &ctx_sha512, vhashB, 32 );
sha512_4way_close( &ctx_sha512, vhashB );
// back to 8-way 32 bit
mm256_deinterleave_4x64( h0, h1, h2, h3, vhashA, 512 );
mm256_deinterleave_4x64( h4, h5, h6, h7, vhashB, 512 );
mm256_interleave_8x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 512 );
ripemd160_8way_init( &ctx_ripemd );
ripemd160_8way( &ctx_ripemd, vhashA, 32 );
ripemd160_8way_close( &ctx_ripemd, vhashB );
ripemd160_8way_init( &ctx_ripemd );
ripemd160_8way( &ctx_ripemd, vhashA+(8<<3), 32 );
ripemd160_8way_close( &ctx_ripemd, vhashC );
sha256_8way_init( &ctx_sha256 );
sha256_8way( &ctx_sha256, vhashB, 20 );
sha256_8way( &ctx_sha256, vhashC, 20 );
sha256_8way_close( &ctx_sha256, vhashA );
sha256_8way_init( &ctx_sha256 );
sha256_8way( &ctx_sha256, vhashA, 32 );
sha256_8way_close( &ctx_sha256, vhashA );
mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96,
output+128, output+160, output+192, output+224,
vhashA, 256 );
}
int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done)
{
uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t vdata[32*8] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[27];
const uint32_t first_nonce = pdata[27];
const uint32_t Htarg = ptarget[7];
uint32_t edata[32] __attribute__ ((aligned (64)));
uint32_t *nonces = work->nonces;
int num_found = 0;
uint32_t *noncep = vdata + 216; // 27*8
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
// we need bigendian data...
swab32_array( edata, pdata, 32 );
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 1024 );
sha256_8way_init( &sha256_8w_mid );
sha256_8way( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
for ( int m = 0; m < sizeof(masks); m++ ) if ( Htarg <= htmax[m] )
{
uint32_t mask = masks[m];
do
{
be32enc( noncep, n );
be32enc( noncep+1, n+1 );
be32enc( noncep+2, n+2 );
be32enc( noncep+3, n+3 );
be32enc( noncep+4, n+4 );
be32enc( noncep+5, n+5 );
be32enc( noncep+6, n+6 );
be32enc( noncep+7, n+7 );
lbry_8way_hash( hash, vdata );
for ( int i = 0; i < 8; i++ )
if ( !( (hash+(i<<3))[7] & mask ) && fulltest( hash+(i<<3), ptarget ) )
{
pdata[27] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n+=8;
} while ( ( num_found == 0 ) && ( n < max_nonce )
&& !work_restart[thr_id].restart );
break;
}
*hashes_done = n - first_nonce;
return num_found;
}
#elif defined(LBRY_4WAY)
static __thread sha256_4way_context sha256_mid;
void lbry_4way_hash( void* output, const void* input )
@@ -21,7 +153,7 @@ void lbry_4way_hash( void* output, const void* input )
uint32_t _ALIGN(64) vhashC[16<<2];
memcpy( &ctx_sha256, &sha256_mid, sizeof(ctx_sha256) );
sha256_4way( &ctx_sha256, input+(64<<2), 48 );
sha256_4way( &ctx_sha256, input + (LBRY_MIDSTATE<<2), LBRY_TAIL );
sha256_4way_close( &ctx_sha256, vhashA );
sha256_4way_init( &ctx_sha256 );
@@ -67,12 +199,8 @@ int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
const uint32_t Htarg = ptarget[7];
uint32_t edata[32] __attribute__ ((aligned (64)));
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 108; // 27*4
uint32_t *noncep1 = vdata + 109;
uint32_t *noncep2 = vdata + 110;
uint32_t *noncep3 = vdata + 111;
uint32_t *noncep = vdata + 108; // 27*4
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -83,47 +211,26 @@ int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
swab32_array( edata, pdata, 32 );
mm_interleave_4x32( vdata, edata, edata, edata, edata, 1024 );
sha256_4way_init( &sha256_mid );
sha256_4way( &sha256_mid, vdata, 64 );
sha256_4way( &sha256_mid, vdata, LBRY_MIDSTATE );
for ( int m = 0; m < sizeof(masks); m++ ) if ( Htarg <= htmax[m] )
{
uint32_t mask = masks[m];
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+1, n+1 );
be32enc( noncep+2, n+2 );
be32enc( noncep+3, n+3 );
lbry_4way_hash( hash, vdata );
if ( !( hash[7] & mask ) && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( !( (hash+(i<<3))[7] & mask ) && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = pdata[27] = n;
work_set_target_ratio( work, hash );
}
if ( !( (hash+8)[7] & mask ) && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( !( (hash+16)[7] & mask ) && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( !( (hash+24)[7] & mask ) && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[27] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n+=4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -73,7 +73,10 @@ int64_t lbry_get_max64() { return 0x1ffffLL; }
bool register_lbry_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
#if defined (LBRY_4WAY)
#if defined (LBRY_8WAY)
gate->scanhash = (void*)&scanhash_lbry_8way;
gate->hash = (void*)&lbry_8way_hash;
#elif defined (LBRY_4WAY)
gate->scanhash = (void*)&scanhash_lbry_4way;
gate->hash = (void*)&lbry_4way_hash;
#else

View File

@@ -4,8 +4,9 @@
#include "algo-gate-api.h"
#include <stdint.h>
// need sha512 2 way AVX x2 or 1 way scalar x4 to support 4way AVX.
#if defined(__AVX2__)
#define LBRY_4WAY
#define LBRY_8WAY
#endif
#define LBRY_NTIME_INDEX 25
@@ -16,15 +17,21 @@
bool register_lbry_algo( algo_gate_t* gate );
#if defined(LBRY_4WAY)
#if defined(LBRY_8WAY)
void lbry_8way_hash( void *state, const void *input );
int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#elif defined(LBRY_4WAY)
void lbry_4way_hash( void *state, const void *input );
int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
#else
void lbry_hash( void *state, const void *input );
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
#endif

View File

@@ -5,25 +5,6 @@
#include <stddef.h>
#include <string.h>
/*
* Round functions for RIPEMD-128 and RIPEMD-160.
*/
#define F1(x, y, z) \
_mm_xor_si128( _mm_xor_si128( x, y ), z )
#define F2(x, y, z) \
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( y, z ), x ), z )
#define F3(x, y, z) \
_mm_xor_si128( _mm_or_si128( x, mm_not( y ) ), z )
#define F4(x, y, z) \
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( x, y ), z ), y )
#define F5(x, y, z) \
_mm_xor_si128( x, _mm_or_si128( y, mm_not( z ) ) )
static const uint32_t IV[5] =
{ 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 };
@@ -42,6 +23,23 @@ static const uint32_t IV[5] =
#define K24 0x7A6D76E9
#define K25 0x00000000
// RIPEMD-160 4 way
#define F1(x, y, z) \
_mm_xor_si128( _mm_xor_si128( x, y ), z )
#define F2(x, y, z) \
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( y, z ), x ), z )
#define F3(x, y, z) \
_mm_xor_si128( _mm_or_si128( x, mm_not( y ) ), z )
#define F4(x, y, z) \
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( x, y ), z ), y )
#define F5(x, y, z) \
_mm_xor_si128( x, _mm_or_si128( y, mm_not( z ) ) )
#define RR(a, b, c, d, e, f, s, r, k) \
do{ \
a = _mm_add_epi32( mm_rotl_32( _mm_add_epi32( _mm_add_epi32( \
@@ -321,3 +319,304 @@ void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
}
#endif
#if defined(__AVX2__)
// Ripemd-160 8 way
#define F8W_1(x, y, z) \
_mm256_xor_si256( _mm256_xor_si256( x, y ), z )
#define F8W_2(x, y, z) \
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( y, z ), x ), z )
#define F8W_3(x, y, z) \
_mm256_xor_si256( _mm256_or_si256( x, mm256_not( y ) ), z )
#define F8W_4(x, y, z) \
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( x, y ), z ), y )
#define F8W_5(x, y, z) \
_mm256_xor_si256( x, _mm256_or_si256( y, mm256_not( z ) ) )
#define RR_8W(a, b, c, d, e, f, s, r, k) \
do{ \
a = _mm256_add_epi32( mm256_rotl_32( _mm256_add_epi32( _mm256_add_epi32( \
_mm256_add_epi32( a, f( b ,c, d ) ), r ), \
_mm256_set1_epi32( k ) ), s ), e ); \
c = mm256_rotl_32( c, 10 );\
} while (0)
#define ROUND1_8W(a, b, c, d, e, f, s, r, k) \
RR_8W(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
#define ROUND2_8W(a, b, c, d, e, f, s, r, k) \
RR_8W(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
static void ripemd160_8way_round( ripemd160_8way_context *sc )
{
const __m256i *in = (__m256i*)sc->buf;
__m256i *h = (__m256i*)sc->val;
register __m256i A1, B1, C1, D1, E1;
register __m256i A2, B2, C2, D2, E2;
__m256i tmp;
A1 = A2 = h[0];
B1 = B2 = h[1];
C1 = C2 = h[2];
D1 = D2 = h[3];
E1 = E2 = h[4];
ROUND1_8W( A, B, C, D, E, F8W_1, 11, in[ 0], 1 );
ROUND1_8W( E, A, B, C, D, F8W_1, 14, in[ 1], 1 );
ROUND1_8W( D, E, A, B, C, F8W_1, 15, in[ 2], 1 );
ROUND1_8W( C, D, E, A, B, F8W_1, 12, in[ 3], 1 );
ROUND1_8W( B, C, D, E, A, F8W_1, 5, in[ 4], 1 );
ROUND1_8W( A, B, C, D, E, F8W_1, 8, in[ 5], 1 );
ROUND1_8W( E, A, B, C, D, F8W_1, 7, in[ 6], 1 );
ROUND1_8W( D, E, A, B, C, F8W_1, 9, in[ 7], 1 );
ROUND1_8W( C, D, E, A, B, F8W_1, 11, in[ 8], 1 );
ROUND1_8W( B, C, D, E, A, F8W_1, 13, in[ 9], 1 );
ROUND1_8W( A, B, C, D, E, F8W_1, 14, in[10], 1 );
ROUND1_8W( E, A, B, C, D, F8W_1, 15, in[11], 1 );
ROUND1_8W( D, E, A, B, C, F8W_1, 6, in[12], 1 );
ROUND1_8W( C, D, E, A, B, F8W_1, 7, in[13], 1 );
ROUND1_8W( B, C, D, E, A, F8W_1, 9, in[14], 1 );
ROUND1_8W( A, B, C, D, E, F8W_1, 8, in[15], 1 );
ROUND1_8W( E, A, B, C, D, F8W_2, 7, in[ 7], 2 );
ROUND1_8W( D, E, A, B, C, F8W_2, 6, in[ 4], 2 );
ROUND1_8W( C, D, E, A, B, F8W_2, 8, in[13], 2 );
ROUND1_8W( B, C, D, E, A, F8W_2, 13, in[ 1], 2 );
ROUND1_8W( A, B, C, D, E, F8W_2, 11, in[10], 2 );
ROUND1_8W( E, A, B, C, D, F8W_2, 9, in[ 6], 2 );
ROUND1_8W( D, E, A, B, C, F8W_2, 7, in[15], 2 );
ROUND1_8W( C, D, E, A, B, F8W_2, 15, in[ 3], 2 );
ROUND1_8W( B, C, D, E, A, F8W_2, 7, in[12], 2 );
ROUND1_8W( A, B, C, D, E, F8W_2, 12, in[ 0], 2 );
ROUND1_8W( E, A, B, C, D, F8W_2, 15, in[ 9], 2 );
ROUND1_8W( D, E, A, B, C, F8W_2, 9, in[ 5], 2 );
ROUND1_8W( C, D, E, A, B, F8W_2, 11, in[ 2], 2 );
ROUND1_8W( B, C, D, E, A, F8W_2, 7, in[14], 2 );
ROUND1_8W( A, B, C, D, E, F8W_2, 13, in[11], 2 );
ROUND1_8W( E, A, B, C, D, F8W_2, 12, in[ 8], 2 );
ROUND1_8W( D, E, A, B, C, F8W_3, 11, in[ 3], 3 );
ROUND1_8W( C, D, E, A, B, F8W_3, 13, in[10], 3 );
ROUND1_8W( B, C, D, E, A, F8W_3, 6, in[14], 3 );
ROUND1_8W( A, B, C, D, E, F8W_3, 7, in[ 4], 3 );
ROUND1_8W( E, A, B, C, D, F8W_3, 14, in[ 9], 3 );
ROUND1_8W( D, E, A, B, C, F8W_3, 9, in[15], 3 );
ROUND1_8W( C, D, E, A, B, F8W_3, 13, in[ 8], 3 );
ROUND1_8W( B, C, D, E, A, F8W_3, 15, in[ 1], 3 );
ROUND1_8W( A, B, C, D, E, F8W_3, 14, in[ 2], 3 );
ROUND1_8W( E, A, B, C, D, F8W_3, 8, in[ 7], 3 );
ROUND1_8W( D, E, A, B, C, F8W_3, 13, in[ 0], 3 );
ROUND1_8W( C, D, E, A, B, F8W_3, 6, in[ 6], 3 );
ROUND1_8W( B, C, D, E, A, F8W_3, 5, in[13], 3 );
ROUND1_8W( A, B, C, D, E, F8W_3, 12, in[11], 3 );
ROUND1_8W( E, A, B, C, D, F8W_3, 7, in[ 5], 3 );
ROUND1_8W( D, E, A, B, C, F8W_3, 5, in[12], 3 );
ROUND1_8W( C, D, E, A, B, F8W_4, 11, in[ 1], 4 );
ROUND1_8W( B, C, D, E, A, F8W_4, 12, in[ 9], 4 );
ROUND1_8W( A, B, C, D, E, F8W_4, 14, in[11], 4 );
ROUND1_8W( E, A, B, C, D, F8W_4, 15, in[10], 4 );
ROUND1_8W( D, E, A, B, C, F8W_4, 14, in[ 0], 4 );
ROUND1_8W( C, D, E, A, B, F8W_4, 15, in[ 8], 4 );
ROUND1_8W( B, C, D, E, A, F8W_4, 9, in[12], 4 );
ROUND1_8W( A, B, C, D, E, F8W_4, 8, in[ 4], 4 );
ROUND1_8W( E, A, B, C, D, F8W_4, 9, in[13], 4 );
ROUND1_8W( D, E, A, B, C, F8W_4, 14, in[ 3], 4 );
ROUND1_8W( C, D, E, A, B, F8W_4, 5, in[ 7], 4 );
ROUND1_8W( B, C, D, E, A, F8W_4, 6, in[15], 4 );
ROUND1_8W( A, B, C, D, E, F8W_4, 8, in[14], 4 );
ROUND1_8W( E, A, B, C, D, F8W_4, 6, in[ 5], 4 );
ROUND1_8W( D, E, A, B, C, F8W_4, 5, in[ 6], 4 );
ROUND1_8W( C, D, E, A, B, F8W_4, 12, in[ 2], 4 );
ROUND1_8W( B, C, D, E, A, F8W_5, 9, in[ 4], 5 );
ROUND1_8W( A, B, C, D, E, F8W_5, 15, in[ 0], 5 );
ROUND1_8W( E, A, B, C, D, F8W_5, 5, in[ 5], 5 );
ROUND1_8W( D, E, A, B, C, F8W_5, 11, in[ 9], 5 );
ROUND1_8W( C, D, E, A, B, F8W_5, 6, in[ 7], 5 );
ROUND1_8W( B, C, D, E, A, F8W_5, 8, in[12], 5 );
ROUND1_8W( A, B, C, D, E, F8W_5, 13, in[ 2], 5 );
ROUND1_8W( E, A, B, C, D, F8W_5, 12, in[10], 5 );
ROUND1_8W( D, E, A, B, C, F8W_5, 5, in[14], 5 );
ROUND1_8W( C, D, E, A, B, F8W_5, 12, in[ 1], 5 );
ROUND1_8W( B, C, D, E, A, F8W_5, 13, in[ 3], 5 );
ROUND1_8W( A, B, C, D, E, F8W_5, 14, in[ 8], 5 );
ROUND1_8W( E, A, B, C, D, F8W_5, 11, in[11], 5 );
ROUND1_8W( D, E, A, B, C, F8W_5, 8, in[ 6], 5 );
ROUND1_8W( C, D, E, A, B, F8W_5, 5, in[15], 5 );
ROUND1_8W( B, C, D, E, A, F8W_5, 6, in[13], 5 );
ROUND2_8W( A, B, C, D, E, F8W_5, 8, in[ 5], 1 );
ROUND2_8W( E, A, B, C, D, F8W_5, 9, in[14], 1 );
ROUND2_8W( D, E, A, B, C, F8W_5, 9, in[ 7], 1 );
ROUND2_8W( C, D, E, A, B, F8W_5, 11, in[ 0], 1 );
ROUND2_8W( B, C, D, E, A, F8W_5, 13, in[ 9], 1 );
ROUND2_8W( A, B, C, D, E, F8W_5, 15, in[ 2], 1 );
ROUND2_8W( E, A, B, C, D, F8W_5, 15, in[11], 1 );
ROUND2_8W( D, E, A, B, C, F8W_5, 5, in[ 4], 1 );
ROUND2_8W( C, D, E, A, B, F8W_5, 7, in[13], 1 );
ROUND2_8W( B, C, D, E, A, F8W_5, 7, in[ 6], 1 );
ROUND2_8W( A, B, C, D, E, F8W_5, 8, in[15], 1 );
ROUND2_8W( E, A, B, C, D, F8W_5, 11, in[ 8], 1 );
ROUND2_8W( D, E, A, B, C, F8W_5, 14, in[ 1], 1 );
ROUND2_8W( C, D, E, A, B, F8W_5, 14, in[10], 1 );
ROUND2_8W( B, C, D, E, A, F8W_5, 12, in[ 3], 1 );
ROUND2_8W( A, B, C, D, E, F8W_5, 6, in[12], 1 );
ROUND2_8W( E, A, B, C, D, F8W_4, 9, in[ 6], 2 );
ROUND2_8W( D, E, A, B, C, F8W_4, 13, in[11], 2 );
ROUND2_8W( C, D, E, A, B, F8W_4, 15, in[ 3], 2 );
ROUND2_8W( B, C, D, E, A, F8W_4, 7, in[ 7], 2 );
ROUND2_8W( A, B, C, D, E, F8W_4, 12, in[ 0], 2 );
ROUND2_8W( E, A, B, C, D, F8W_4, 8, in[13], 2 );
ROUND2_8W( D, E, A, B, C, F8W_4, 9, in[ 5], 2 );
ROUND2_8W( C, D, E, A, B, F8W_4, 11, in[10], 2 );
ROUND2_8W( B, C, D, E, A, F8W_4, 7, in[14], 2 );
ROUND2_8W( A, B, C, D, E, F8W_4, 7, in[15], 2 );
ROUND2_8W( E, A, B, C, D, F8W_4, 12, in[ 8], 2 );
ROUND2_8W( D, E, A, B, C, F8W_4, 7, in[12], 2 );
ROUND2_8W( C, D, E, A, B, F8W_4, 6, in[ 4], 2 );
ROUND2_8W( B, C, D, E, A, F8W_4, 15, in[ 9], 2 );
ROUND2_8W( A, B, C, D, E, F8W_4, 13, in[ 1], 2 );
ROUND2_8W( E, A, B, C, D, F8W_4, 11, in[ 2], 2 );
ROUND2_8W( D, E, A, B, C, F8W_3, 9, in[15], 3 );
ROUND2_8W( C, D, E, A, B, F8W_3, 7, in[ 5], 3 );
ROUND2_8W( B, C, D, E, A, F8W_3, 15, in[ 1], 3 );
ROUND2_8W( A, B, C, D, E, F8W_3, 11, in[ 3], 3 );
ROUND2_8W( E, A, B, C, D, F8W_3, 8, in[ 7], 3 );
ROUND2_8W( D, E, A, B, C, F8W_3, 6, in[14], 3 );
ROUND2_8W( C, D, E, A, B, F8W_3, 6, in[ 6], 3 );
ROUND2_8W( B, C, D, E, A, F8W_3, 14, in[ 9], 3 );
ROUND2_8W( A, B, C, D, E, F8W_3, 12, in[11], 3 );
ROUND2_8W( E, A, B, C, D, F8W_3, 13, in[ 8], 3 );
ROUND2_8W( D, E, A, B, C, F8W_3, 5, in[12], 3 );
ROUND2_8W( C, D, E, A, B, F8W_3, 14, in[ 2], 3 );
ROUND2_8W( B, C, D, E, A, F8W_3, 13, in[10], 3 );
ROUND2_8W( A, B, C, D, E, F8W_3, 13, in[ 0], 3 );
ROUND2_8W( E, A, B, C, D, F8W_3, 7, in[ 4], 3 );
ROUND2_8W( D, E, A, B, C, F8W_3, 5, in[13], 3 );
ROUND2_8W( C, D, E, A, B, F8W_2, 15, in[ 8], 4 );
ROUND2_8W( B, C, D, E, A, F8W_2, 5, in[ 6], 4 );
ROUND2_8W( A, B, C, D, E, F8W_2, 8, in[ 4], 4 );
ROUND2_8W( E, A, B, C, D, F8W_2, 11, in[ 1], 4 );
ROUND2_8W( D, E, A, B, C, F8W_2, 14, in[ 3], 4 );
ROUND2_8W( C, D, E, A, B, F8W_2, 14, in[11], 4 );
ROUND2_8W( B, C, D, E, A, F8W_2, 6, in[15], 4 );
ROUND2_8W( A, B, C, D, E, F8W_2, 14, in[ 0], 4 );
ROUND2_8W( E, A, B, C, D, F8W_2, 6, in[ 5], 4 );
ROUND2_8W( D, E, A, B, C, F8W_2, 9, in[12], 4 );
ROUND2_8W( C, D, E, A, B, F8W_2, 12, in[ 2], 4 );
ROUND2_8W( B, C, D, E, A, F8W_2, 9, in[13], 4 );
ROUND2_8W( A, B, C, D, E, F8W_2, 12, in[ 9], 4 );
ROUND2_8W( E, A, B, C, D, F8W_2, 5, in[ 7], 4 );
ROUND2_8W( D, E, A, B, C, F8W_2, 15, in[10], 4 );
ROUND2_8W( C, D, E, A, B, F8W_2, 8, in[14], 4 );
ROUND2_8W( B, C, D, E, A, F8W_1, 8, in[12], 5 );
ROUND2_8W( A, B, C, D, E, F8W_1, 5, in[15], 5 );
ROUND2_8W( E, A, B, C, D, F8W_1, 12, in[10], 5 );
ROUND2_8W( D, E, A, B, C, F8W_1, 9, in[ 4], 5 );
ROUND2_8W( C, D, E, A, B, F8W_1, 12, in[ 1], 5 );
ROUND2_8W( B, C, D, E, A, F8W_1, 5, in[ 5], 5 );
ROUND2_8W( A, B, C, D, E, F8W_1, 14, in[ 8], 5 );
ROUND2_8W( E, A, B, C, D, F8W_1, 6, in[ 7], 5 );
ROUND2_8W( D, E, A, B, C, F8W_1, 8, in[ 6], 5 );
ROUND2_8W( C, D, E, A, B, F8W_1, 13, in[ 2], 5 );
ROUND2_8W( B, C, D, E, A, F8W_1, 6, in[13], 5 );
ROUND2_8W( A, B, C, D, E, F8W_1, 5, in[14], 5 );
ROUND2_8W( E, A, B, C, D, F8W_1, 15, in[ 0], 5 );
ROUND2_8W( D, E, A, B, C, F8W_1, 13, in[ 3], 5 );
ROUND2_8W( C, D, E, A, B, F8W_1, 11, in[ 9], 5 );
ROUND2_8W( B, C, D, E, A, F8W_1, 11, in[11], 5 );
tmp = _mm256_add_epi32( _mm256_add_epi32( h[1], C1 ), D2 );
h[1] = _mm256_add_epi32( _mm256_add_epi32( h[2], D1 ), E2 );
h[2] = _mm256_add_epi32( _mm256_add_epi32( h[3], E1 ), A2 );
h[3] = _mm256_add_epi32( _mm256_add_epi32( h[4], A1 ), B2 );
h[4] = _mm256_add_epi32( _mm256_add_epi32( h[0], B1 ), C2 );
h[0] = tmp;
}
void ripemd160_8way_init( ripemd160_8way_context *sc )
{
sc->val[0] = _mm256_set1_epi32( IV[0] );
sc->val[1] = _mm256_set1_epi32( IV[1] );
sc->val[2] = _mm256_set1_epi32( IV[2] );
sc->val[3] = _mm256_set1_epi32( IV[3] );
sc->val[4] = _mm256_set1_epi32( IV[4] );
sc->count_high = sc->count_low = 0;
}
void ripemd160_8way( ripemd160_8way_context *sc, const void *data, size_t len )
{
__m256i *vdata = (__m256i*)data;
size_t ptr;
const int block_size = 64;
ptr = (unsigned)sc->count_low & (block_size - 1U);
while ( len > 0 )
{
size_t clen;
uint32_t clow, clow2;
clen = block_size - ptr;
if ( clen > len )
clen = len;
memcpy_256( sc->buf + (ptr>>2), vdata, clen>>2 );
vdata = vdata + (clen>>2);
ptr += clen;
len -= clen;
if ( ptr == block_size )
{
ripemd160_8way_round( sc );
ptr = 0;
}
clow = sc->count_low;
clow2 = clow + clen;
sc->count_low = clow2;
if ( clow2 < clow )
sc->count_high++;
}
}
void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
{
unsigned ptr, u;
uint32_t low, high;
const int block_size = 64;
const int pad = block_size - 8;
ptr = (unsigned)sc->count_low & ( block_size - 1U);
sc->buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
ptr += 4;
if ( ptr > pad )
{
memset_zero_256( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
ripemd160_8way_round( sc );
memset_zero_256( sc->buf, pad>>2 );
}
else
memset_zero_256( sc->buf + (ptr>>2), (pad - ptr) >> 2 );
low = sc->count_low;
high = (sc->count_high << 3) | (low >> 29);
low = low << 3;
sc->buf[ pad>>2 ] = _mm256_set1_epi32( low );
sc->buf[ (pad>>2) + 1 ] = _mm256_set1_epi32( high );
ripemd160_8way_round( sc );
for (u = 0; u < 5; u ++)
casti_m256i( dst, u ) = sc->val[u];
}
#endif // __AVX2__

View File

@@ -19,5 +19,20 @@ void ripemd160_4way_init( ripemd160_4way_context *sc );
void ripemd160_4way( ripemd160_4way_context *sc, const void *data, size_t len );
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst );
#endif
#endif
#if defined (__AVX2__)
typedef struct
{
__m256i buf[64>>2];
__m256i val[5];
uint32_t count_high, count_low;
} __attribute__ ((aligned (64))) ripemd160_8way_context;
void ripemd160_8way_init( ripemd160_8way_context *sc );
void ripemd160_8way( ripemd160_8way_context *sc, const void *data, size_t len );
void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst );
#endif // __AVX2__
#endif // __AVX__
#endif // RIPEMD_HASH_4WAY_H__

View File

@@ -39,7 +39,7 @@
#include <stdio.h>
// SHA256 4 way 32 bit
// SHA-256 32 bit
static const sph_u32 H256[8] = {
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85),
@@ -83,6 +83,8 @@ static const sph_u32 K256[64] = {
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
};
// SHA-256 4 way
#define SHA2s_MEXP( a, b, c, d ) \
_mm_add_epi32( _mm_add_epi32( _mm_add_epi32( \
SSG2_1( W[a] ), W[b] ), SSG2_0( W[c] ) ), W[d] );
@@ -291,13 +293,297 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
sc->buf[ ( pad+4 ) >> 2 ] =
mm_bswap_32( _mm_set1_epi32( low ) );
sha256_4way_round( sc->buf, sc->val );
for ( u = 0; u < 8; u ++ )
((__m128i*)dst)[u] = mm_bswap_32( sc->val[u] );
}
#if defined(__AVX2__)
// SHA512 4 way 64 bit
// SHA-256 8 way
#define CHx(X, Y, Z) \
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
#define MAJx(X, Y, Z) \
_mm256_or_si256( _mm256_and_si256( X, Y ), \
_mm256_and_si256( _mm256_or_si256( X, Y ), Z ) )
#define BSG2_0x(x) \
_mm256_xor_si256( _mm256_xor_si256( \
mm256_rotr_32(x, 2), mm256_rotr_32(x, 13) ), mm256_rotr_32( x, 22) )
#define BSG2_1x(x) \
_mm256_xor_si256( _mm256_xor_si256( \
mm256_rotr_32(x, 6), mm256_rotr_32(x, 11) ), mm256_rotr_32( x, 25) )
#define SSG2_0x(x) \
_mm256_xor_si256( _mm256_xor_si256( \
mm256_rotr_32(x, 7), mm256_rotr_32(x, 18) ), _mm256_srli_epi32(x, 3) )
#define SSG2_1x(x) \
_mm256_xor_si256( _mm256_xor_si256( \
mm256_rotr_32(x, 17), mm256_rotr_32(x, 19) ), _mm256_srli_epi32(x, 10) )
#define SHA2x_MEXP( a, b, c, d ) \
_mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
SSG2_1x( W[a] ), W[b] ), SSG2_0x( W[c] ) ), W[d] );
#define SHA2s_8WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
do { \
register __m256i T1, T2; \
T1 = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
_mm256_add_epi32( H, BSG2_1x(E) ), CHx(E, F, G) ), \
_mm256_set1_epi32( K256[( (j)+(i) )] ) ), W[i] ); \
T2 = _mm256_add_epi32( BSG2_0x(A), MAJx(A, B, C) ); \
D = _mm256_add_epi32( D, T1 ); \
H = _mm256_add_epi32( T1, T2 ); \
} while (0)
static void
sha256_8way_round( __m256i *in, __m256i r[8] )
{
register __m256i A, B, C, D, E, F, G, H;
__m256i W[16];
W[ 0] = mm256_bswap_32( in[ 0] );
W[ 1] = mm256_bswap_32( in[ 1] );
W[ 2] = mm256_bswap_32( in[ 2] );
W[ 3] = mm256_bswap_32( in[ 3] );
W[ 4] = mm256_bswap_32( in[ 4] );
W[ 5] = mm256_bswap_32( in[ 5] );
W[ 6] = mm256_bswap_32( in[ 6] );
W[ 7] = mm256_bswap_32( in[ 7] );
W[ 8] = mm256_bswap_32( in[ 8] );
W[ 9] = mm256_bswap_32( in[ 9] );
W[10] = mm256_bswap_32( in[10] );
W[11] = mm256_bswap_32( in[11] );
W[12] = mm256_bswap_32( in[12] );
W[13] = mm256_bswap_32( in[13] );
W[14] = mm256_bswap_32( in[14] );
W[15] = mm256_bswap_32( in[15] );
A = r[0];
B = r[1];
C = r[2];
D = r[3];
E = r[4];
F = r[5];
G = r[6];
H = r[7];
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
//printf("sha256 8 step: D= %08lx H= %08lx\n",*(uint32_t*)&D,*(uint32_t*)&H);
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 3, 0 );
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 4, 0 );
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 5, 0 );
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 6, 0 );
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 7, 0 );
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 8, 0 );
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 9, 0 );
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 10, 0 );
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 11, 0 );
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 12, 0 );
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 13, 0 );
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 14, 0 );
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 15, 0 );
//printf("sha256 8 step: A= %08lx B= %08lx\n",*(uint32_t*)&A,*(uint32_t*)&B);
for ( int j = 16; j < 64; j += 16 )
{
W[ 0] = SHA2x_MEXP( 14, 9, 1, 0 );
W[ 1] = SHA2x_MEXP( 15, 10, 2, 1 );
W[ 2] = SHA2x_MEXP( 0, 11, 3, 2 );
W[ 3] = SHA2x_MEXP( 1, 12, 4, 3 );
W[ 4] = SHA2x_MEXP( 2, 13, 5, 4 );
W[ 5] = SHA2x_MEXP( 3, 14, 6, 5 );
W[ 6] = SHA2x_MEXP( 4, 15, 7, 6 );
W[ 7] = SHA2x_MEXP( 5, 0, 8, 7 );
W[ 8] = SHA2x_MEXP( 6, 1, 9, 8 );
W[ 9] = SHA2x_MEXP( 7, 2, 10, 9 );
W[10] = SHA2x_MEXP( 8, 3, 11, 10 );
W[11] = SHA2x_MEXP( 9, 4, 12, 11 );
W[12] = SHA2x_MEXP( 10, 5, 13, 12 );
W[13] = SHA2x_MEXP( 11, 6, 14, 13 );
W[14] = SHA2x_MEXP( 12, 7, 15, 14 );
W[15] = SHA2x_MEXP( 13, 8, 0, 15 );
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, j );
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, j );
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, j );
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 3, j );
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 4, j );
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 5, j );
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 6, j );
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 7, j );
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 8, j );
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 9, j );
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 10, j );
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 11, j );
SHA2s_8WAY_STEP( E, F, G, H, A, B, C, D, 12, j );
SHA2s_8WAY_STEP( D, E, F, G, H, A, B, C, 13, j );
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 14, j );
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 15, j );
}
r[0] = _mm256_add_epi32( r[0], A );
r[1] = _mm256_add_epi32( r[1], B );
r[2] = _mm256_add_epi32( r[2], C );
r[3] = _mm256_add_epi32( r[3], D );
r[4] = _mm256_add_epi32( r[4], E );
r[5] = _mm256_add_epi32( r[5], F );
r[6] = _mm256_add_epi32( r[6], G );
r[7] = _mm256_add_epi32( r[7], H );
}
void sha256_8way_init( sha256_8way_context *sc )
{
sc->count_high = sc->count_low = 0;
sc->val[0] = _mm256_set1_epi32( H256[0] );
sc->val[1] = _mm256_set1_epi32( H256[1] );
sc->val[2] = _mm256_set1_epi32( H256[2] );
sc->val[3] = _mm256_set1_epi32( H256[3] );
sc->val[4] = _mm256_set1_epi32( H256[4] );
sc->val[5] = _mm256_set1_epi32( H256[5] );
sc->val[6] = _mm256_set1_epi32( H256[6] );
sc->val[7] = _mm256_set1_epi32( H256[7] );
}
void sha256_8way( sha256_8way_context *sc, const void *data, size_t len )
{
__m256i *vdata = (__m256i*)data;
size_t ptr;
const int buf_size = 64;
/*
printf("sha256 8 update1: len= %d\n", len);
uint32_t* d = (uint32_t*)data;
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[128],d[136],d[144],d[152]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[160],d[168],d[176],d[184]);
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[192],d[200],d[208],d[216]);
*/
ptr = (unsigned)sc->count_low & (buf_size - 1U);
while ( len > 0 )
{
size_t clen;
uint32_t clow, clow2;
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_256( sc->buf + (ptr>>2), vdata, clen>>2 );
vdata = vdata + (clen>>2);
ptr += clen;
len -= clen;
if ( ptr == buf_size )
{
/*
printf("sha256 8 update2: compress\n");
d = (uint32_t*)sc->buf;
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
d= (uint32_t*)sc->val;
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
sha256_8way_round( sc->buf, sc->val );
/*
printf("sha256 8 update3\n");
d= (uint32_t*)sc->val;
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
ptr = 0;
}
clow = sc->count_low;
clow2 = SPH_T32( clow + clen );
sc->count_low = clow2;
if ( clow2 < clow )
sc->count_high++;
}
}
void sha256_8way_close( sha256_8way_context *sc, void *dst )
{
unsigned ptr, u;
uint32_t low, high;
const int buf_size = 64;
const int pad = buf_size - 8;
ptr = (unsigned)sc->count_low & (buf_size - 1U);
/*
printf("sha256 8 close1: ptr= %d\n", ptr);
uint32_t* d = (uint32_t*)sc->buf;
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
*/
sc->buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
ptr += 4;
if ( ptr > pad )
{
memset_zero_256( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
//printf("sha256 8 close2: compress\n");
//uint32_t* d = (uint32_t*)sc->buf;
//printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
sha256_8way_round( sc->buf, sc->val );
//d= (uint32_t*)sc->val;
//printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
memset_zero_256( sc->buf, pad >> 2 );
}
else
memset_zero_256( sc->buf + (ptr>>2), (pad - ptr) >> 2 );
low = sc->count_low;
high = (sc->count_high << 3) | (low >> 29);
low = low << 3;
sc->buf[ pad >> 2 ] =
mm256_bswap_32( _mm256_set1_epi32( high ) );
sc->buf[ ( pad+4 ) >> 2 ] =
mm256_bswap_32( _mm256_set1_epi32( low ) );
/*
d = (uint32_t*)sc->buf;
printf("sha256 8 close3: compress\n");
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
d= (uint32_t*)sc->val;
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
sha256_8way_round( sc->buf, sc->val );
/*
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
*/
for ( u = 0; u < 8; u ++ )
((__m256i*)dst)[u] = mm256_bswap_32( sc->val[u] );
}
// SHA-512 4 way 64 bit
static const sph_u64 H512[8] = {
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),

View File

@@ -46,7 +46,9 @@
#if defined(__AVX__)
#define SPH_SIZE_sha256 256
//#define SPH_SIZE_sha256 256
// SHA-256 4 way
typedef struct {
__m128i buf[64>>2];
@@ -60,7 +62,21 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst );
#if defined (__AVX2__)
#define SPH_SIZE_sha512 512
// SHA-256 8 way
typedef struct {
__m256i buf[64>>2];
__m256i val[8];
uint32_t count_high, count_low;
} sha256_8way_context;
void sha256_8way_init( sha256_8way_context *sc );
void sha256_8way( sha256_8way_context *sc, const void *data, size_t len );
void sha256_8way_close( sha256_8way_context *sc, void *dst );
//#define SPH_SIZE_sha512 512
// SHA-512 4 way
typedef struct {
__m256i buf[128>>3];

View File

@@ -39,7 +39,6 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = first_nonce;
// hash is returned deinterleaved
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
// data is 80 bytes, 20 u32 or 4 u64.
@@ -48,47 +47,23 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
mm256_interleave_4x64( vdata, edata, edata, edata, edata, 640 );
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
skeinhash_4way( hash, vdata );
if ( hash[7] < Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
// always put nonce0 in work data for compartibility with
// non vectored algos.
pdata[19] = n;
}
if ( (hash+8)[7] < Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
}
if ( (hash+16)[7] < Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
}
if ( (hash+24)[7] < Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce)

View File

@@ -36,51 +36,29 @@ int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = first_nonce;
// hash is returned deinterleaved
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
swab32_array( endiandata, pdata, 20 );
mm256_interleave_4x64( vdata, edata, edata, edata, edata, 640 );
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
skein2hash( hash, vdata );
if ( hash[7] < Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
}
if ( (hash+8)[7] < Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
}
if ( (hash+16)[7] < Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
}
if ( (hash+24)[7] < Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce)

View File

@@ -61,12 +61,8 @@ int scanhash_whirlpool_4way( int thr_id, struct work* work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
@@ -83,42 +79,19 @@ int scanhash_whirlpool_4way( int thr_id, struct work* work, uint32_t max_nonce,
do {
const uint32_t Htarg = ptarget[7];
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
pdata[19] = n;
whirlpool_hash_4way( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio(work, hash);
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;

View File

@@ -171,12 +171,8 @@ int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -195,42 +191,21 @@ int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
c11_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -191,12 +191,8 @@ int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
volatile uint8_t *restart = &(work_restart[thr_id].restart);
int i;
@@ -224,45 +220,24 @@ int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
timetravel_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
*hashes_done = n - first_nonce + 1;
return num_found;
}

View File

@@ -229,12 +229,8 @@ int scanhash_timetravel10_4way( int thr_id, struct work *work,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
volatile uint8_t *restart = &(work_restart[thr_id].restart);
int i;
@@ -262,42 +258,20 @@ int scanhash_timetravel10_4way( int thr_id, struct work *work,
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
timetravel10_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );

View File

@@ -70,12 +70,8 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint
const uint32_t Htarg = ptarget[7];
uint32_t n = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
uint64_t htmax[] = { 0,
0xF,
@@ -112,49 +108,24 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint
{
uint32_t mask = masks[m];
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
tribus_hash_4way( hash, vdata );
pdata[19] = n;
if ( ( !(hash[7] & mask) )
&& fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( !( (hash+(i<<3))[7] & mask ) )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio(work, hash);
}
if ( ( !((hash+8)[7] & mask) )
&& fulltest (hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio(work, hash+8);
}
if ( ( !((hash+16)[7] & mask) )
&& fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio(work, hash+16);
}
if ( ( !((hash+24)[7] & mask) )
&& fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio(work, hash+24);
}
n += 4;
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( (num_found == 0) && ( n < max_nonce )
&& !work_restart[thr_id].restart);
break;

View File

@@ -170,12 +170,8 @@ int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -194,42 +190,21 @@ int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
x11_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -243,12 +243,8 @@ int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
swab32_array( endiandata, pdata, 20 );
@@ -278,42 +274,21 @@ int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce,
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
x11evo_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & hmask ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & hmask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & hmask ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( (hash+16)[7] & hmask ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( (hash+24)[7] & hmask ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -177,12 +177,8 @@ int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -201,42 +197,21 @@ int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
x11gost_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -199,12 +199,8 @@ int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -223,42 +219,21 @@ int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
x12_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -120,12 +120,8 @@ int scanhash_phi1612_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t _ALIGN(64) endiandata[20];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
if ( opt_benchmark )
@@ -138,42 +134,20 @@ int scanhash_phi1612_4way( int thr_id, struct work *work, uint32_t max_nonce,
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
phi1612_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -84,12 +84,8 @@ int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
volatile uint8_t *restart = &(work_restart[thr_id].restart);
@@ -102,42 +98,20 @@ int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce,
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
skunk_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n +=4;
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );

View File

@@ -195,12 +195,8 @@ int scanhash_x13_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -219,42 +215,21 @@ int scanhash_x13_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
x13_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -220,12 +220,8 @@ int scanhash_x13sm3_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -247,42 +243,21 @@ int scanhash_x13sm3_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
x13sm3_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -114,12 +114,8 @@ int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
volatile uint8_t *restart = &(work_restart[thr_id].restart);
@@ -132,42 +128,20 @@ int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *edata = (uint64_t*)endiandata;
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
polytimos_4way_hash(hash, vdata);
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;

View File

@@ -89,12 +89,8 @@ int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( opt_benchmark )
@@ -108,42 +104,20 @@ int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
veltor_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );

View File

@@ -205,12 +205,8 @@ int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -229,42 +225,21 @@ int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
x14_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -224,12 +224,8 @@ int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -248,42 +244,21 @@ int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
x15_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -314,12 +314,8 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
volatile uint8_t *restart = &(work_restart[thr_id].restart);
for ( int k=0; k < 19; k++ )
@@ -342,41 +338,20 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
x16r_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );

View File

@@ -235,12 +235,8 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
@@ -259,42 +255,21 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t mask = masks[m];
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
x17_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

View File

@@ -384,12 +384,8 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint32_t *noncep = vdata + 73; // 9*8 + 1
if ( opt_benchmark )
ptarget[7] = 0x0cff;
@@ -403,43 +399,21 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
xevan_4way_blake512_midstate( vdata );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
be32enc( noncep, n );
be32enc( noncep+2, n+1 );
be32enc( noncep+4, n+2 );
be32enc( noncep+6, n+3 );
xevan_4way_hash( hash, vdata );
pdata[19] = n;
if ( ( hash[7] <= Htarg ) && fulltest( hash, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( (hash+8)[7] <= Htarg ) && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( (hash+16)[7] <= Htarg ) && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( (hash+24)[7] <= Htarg ) && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )

84
api.c
View File

@@ -98,6 +98,7 @@ extern int opt_api_remote;
extern double global_hashrate;
extern uint32_t accepted_count;
extern uint32_t rejected_count;
extern uint32_t solved_count;
#define cpu_threads opt_n_threads
@@ -110,18 +111,19 @@ extern int cpu_fanpercent(void);
static void cpustatus(int thr_id)
{
if (thr_id >= 0 && thr_id < opt_n_threads) {
struct cpu_info *cpu = &thr_info[thr_id].cpu;
char buf[512]; *buf = '\0';
if ( thr_id >= 0 && thr_id < opt_n_threads )
{
// struct cpu_info *cpu = &thr_info[thr_id].cpu;
char buf[512]; *buf = '\0';
char units[4] = {0};
double hashrate = thr_hashrates[thr_id];
cpu->thr_id = thr_id;
cpu->khashes = thr_hashrates[thr_id] / 1000.0; //todo: stats_get_speed(thr_id, 0.0) / 1000.0;
snprintf(buf, sizeof(buf), "CPU=%d;KHS=%.2f|", thr_id, cpu->khashes);
// append to buffer
strcat(buffer, buf);
}
scale_hash_for_display ( &hashrate, units );
snprintf( buf, sizeof(buf), "CPU=%d;%sH/s=%.2f|", thr_id, units,
hashrate );
// append to buffer
strcat( buffer, buf );
}
}
/*****************************************************************************/
@@ -129,42 +131,42 @@ static void cpustatus(int thr_id)
/**
* Returns miner global infos
*/
static char *getsummary(char *params)
static char *getsummary( char *params )
{
char algo[64]; *algo = '\0';
time_t ts = time(NULL);
double uptime = difftime(ts, startup);
double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0);
double diff = net_diff > 0. ? net_diff : stratum_diff;
char diff_str[16];
struct cpu_info cpu = { 0 };
char algo[64]; *algo = '\0';
time_t ts = time(NULL);
double uptime = difftime(ts, startup);
double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0);
double diff = net_diff > 0. ? net_diff : stratum_diff;
char diff_str[16];
double hrate = (double)global_hashrate;
struct cpu_info cpu = { 0 };
#ifdef USE_MONITORING
cpu.has_monitoring = true;
cpu.cpu_temp = cpu_temp(0);
cpu.cpu_fan = cpu_fanpercent();
cpu.cpu_clock = cpu_clock(0);
cpu.has_monitoring = true;
cpu.cpu_temp = cpu_temp(0);
cpu.cpu_fan = cpu_fanpercent();
cpu.cpu_clock = cpu_clock(0);
#endif
get_currentalgo(algo, sizeof(algo));
get_currentalgo(algo, sizeof(algo));
// if diff is integer don't display decimals
if ( diff == trunc( diff ) )
sprintf( diff_str, "%.0f", diff);
else
sprintf( diff_str, "%.6f", diff);
// if diff is integer don't display decimals
if ( diff == trunc( diff ) )
sprintf( diff_str, "%.0f", diff);
else
sprintf( diff_str, "%.6f", diff);
*buffer = '\0';
sprintf(buffer, "NAME=%s;VER=%s;API=%s;"
"ALGO=%s;CPUS=%d;KHS=%.2f;ACC=%d;REJ=%d;"
"ACCMN=%.3f;DIFF=%s;TEMP=%.1f;FAN=%d;FREQ=%d;"
"UPTIME=%.0f;TS=%u|",
PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
algo, opt_n_threads, (double)global_hashrate / 1000.0,
accepted_count, rejected_count, accps, diff_str,
cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
uptime, (uint32_t) ts);
return buffer;
*buffer = '\0';
sprintf( buffer, "NAME=%s;VER=%s;API=%s;"
"ALGO=%s;CPUS=%d;HS=%.2f;KHS=%.2f;ACC=%d;REJ=%d;SOL=%d;"
"ACCMN=%.3f;DIFF=%s;TEMP=%.1f;FAN=%d;FREQ=%d;"
"UPTIME=%.0f;TS=%u|",
PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
algo, opt_n_threads, hrate, hrate/1000.0,
accepted_count, rejected_count, solved_count,
accps, diff_str, cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
uptime, (uint32_t) ts);
return buffer;
}
/**

128
avxdefs.h
View File

@@ -182,6 +182,10 @@ static inline __m128i foo()
// returns p[i]
#define casti_m128i(p,i) (((__m128i*)(p))[(i)])
// p = any aligned pointer, o = scaled offset
// returns p+o
#define casto_m128i(p,i) (((__m128i*)(p))+(i))
//
// Memory functions
// n = number of __m128i, bytes/16
@@ -671,6 +675,10 @@ typedef union m256_v8 m256_v8;
// returns p[i]
#define casti_m256i(p,i) (((__m256i*)(p))[(i)])
// p = any aligned pointer, o = scaled offset
// returns p+o
#define casto_m256i(p,i) (((__m256i*)(p))+(i))
//
// Memory functions
// n = number of 256 bit (32 byte) vectors
@@ -1529,6 +1537,38 @@ static inline void mm256_interleave_8x32( void *dst, const void *src0,
s3[18], s2[18], s1[18], s0[18] );
d[19] = _mm256_set_epi32( s7[19], s6[19], s5[19], s4[19],
s3[19], s2[19], s1[19], s0[19] );
if ( bit_len <= 640 ) return;
d[20] = _mm256_set_epi32( s7[20], s6[20], s5[20], s4[20],
s3[20], s2[20], s1[20], s0[20] );
d[21] = _mm256_set_epi32( s7[21], s6[21], s5[21], s4[21],
s3[21], s2[21], s1[21], s0[21] );
d[22] = _mm256_set_epi32( s7[22], s6[22], s5[22], s4[22],
s3[22], s2[22], s1[22], s0[22] );
d[23] = _mm256_set_epi32( s7[23], s6[23], s5[23], s4[23],
s3[23], s2[23], s1[23], s0[23] );
if ( bit_len <= 768 ) return;
d[24] = _mm256_set_epi32( s7[24], s6[24], s5[24], s4[24],
s3[24], s2[24], s1[24], s0[24] );
d[25] = _mm256_set_epi32( s7[25], s6[25], s5[25], s4[25],
s3[25], s2[25], s1[25], s0[25] );
d[26] = _mm256_set_epi32( s7[26], s6[26], s5[26], s4[26],
s3[26], s2[26], s1[26], s0[26] );
d[27] = _mm256_set_epi32( s7[27], s6[27], s5[27], s4[27],
s3[27], s2[27], s1[27], s0[27] );
d[28] = _mm256_set_epi32( s7[28], s6[28], s5[28], s4[28],
s3[28], s2[28], s1[28], s0[28] );
d[29] = _mm256_set_epi32( s7[29], s6[29], s5[29], s4[29],
s3[29], s2[29], s1[29], s0[29] );
d[30] = _mm256_set_epi32( s7[30], s6[30], s5[30], s4[30],
s3[30], s2[30], s1[30], s0[30] );
d[31] = _mm256_set_epi32( s7[31], s6[31], s5[31], s4[31],
s3[31], s2[31], s1[31], s0[31] );
// bit_len == 1024
}
// probably obsolete with double pack 2x32->64, 4x64->256.
@@ -1607,31 +1647,71 @@ static inline void mm256_deinterleave_8x32( void *dst0, void *dst1, void *dst2,
// null change for overrun space, vector indexing doesn't work for
// 32 bit data
if ( bit_len <= 640 )
{
uint32_t *d = ((uint32_t*)d0) + 8;
d0[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[152], s[144], s[136], s[128] );
d = ((uint32_t*)d1) + 8;
d1[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[153], s[145], s[137], s[129] );
d = ((uint32_t*)d2) + 8;
d2[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[154], s[146], s[138], s[130]);
d = ((uint32_t*)d3) + 8;
d3[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[155], s[147], s[139], s[131] );
d = ((uint32_t*)d4) + 8;
d4[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[156], s[148], s[140], s[132] );
d = ((uint32_t*)d5) + 8;
d5[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[157], s[149], s[141], s[133] );
d = ((uint32_t*)d6) + 8;
d6[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[158], s[150], s[142], s[134] );
d = ((uint32_t*)d7) + 8;
d7[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[159], s[151], s[143], s[135] );
return;
}
uint32_t *d = ((uint32_t*)d0) + 8;
d0[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[152], s[144], s[136], s[128] );
d = ((uint32_t*)d1) + 8;
d1[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[153], s[145], s[137], s[129] );
d = ((uint32_t*)d2) + 8;
d2[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[154], s[146], s[138], s[130]);
d = ((uint32_t*)d3) + 8;
d3[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[155], s[147], s[139], s[131] );
d = ((uint32_t*)d4) + 8;
d4[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[156], s[148], s[140], s[132] );
d = ((uint32_t*)d5) + 8;
d5[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[157], s[149], s[141], s[133] );
d = ((uint32_t*)d6) + 8;
d6[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[158], s[150], s[142], s[134] );
d = ((uint32_t*)d7) + 8;
d7[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
s[159], s[151], s[143], s[135] );
d0[2] = _mm256_set_epi32( s[184], s[176], s[168], s[160],
s[152], s[144], s[136], s[128] );
d1[2] = _mm256_set_epi32( s[185], s[177], s[169], s[161],
s[153], s[145], s[137], s[129] );
d2[2] = _mm256_set_epi32( s[186], s[178], s[170], s[162],
s[154], s[146], s[138], s[130] );
d3[2] = _mm256_set_epi32( s[187], s[179], s[171], s[163],
s[155], s[147], s[139], s[131] );
d4[2] = _mm256_set_epi32( s[188], s[180], s[172], s[164],
s[156], s[148], s[140], s[132] );
d5[2] = _mm256_set_epi32( s[189], s[181], s[173], s[165],
s[157], s[149], s[141], s[133] );
d6[2] = _mm256_set_epi32( s[190], s[182], s[174], s[166],
s[158], s[150], s[142], s[134] );
d7[2] = _mm256_set_epi32( s[191], s[183], s[175], s[167],
s[159], s[151], s[143], s[135] );
if ( bit_len <= 768 ) return;
d0[3] = _mm256_set_epi32( s[248], s[240], s[232], s[224],
s[216], s[208], s[200], s[192] );
d1[3] = _mm256_set_epi32( s[249], s[241], s[233], s[225],
s[217], s[209], s[201], s[193] );
d2[3] = _mm256_set_epi32( s[250], s[242], s[234], s[226],
s[218], s[210], s[202], s[194] );
d3[3] = _mm256_set_epi32( s[251], s[243], s[235], s[227],
s[219], s[211], s[203], s[195] );
d4[3] = _mm256_set_epi32( s[252], s[244], s[236], s[228],
s[220], s[212], s[204], s[196] );
d5[3] = _mm256_set_epi32( s[253], s[245], s[237], s[229],
s[221], s[213], s[205], s[197] );
d6[3] = _mm256_set_epi32( s[254], s[246], s[238], s[230],
s[222], s[214], s[206], s[198] );
d7[3] = _mm256_set_epi32( s[255], s[247], s[239], s[231],
s[223], s[215], s[207], s[199] );
// bit_len == 1024
}
// Deinterleave 8 arrays into indivdual buffers for scalar processing

View File

@@ -18,8 +18,8 @@ rm -f config.status
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
make -j 4

27
buildjdd.sh Executable file
View File

@@ -0,0 +1,27 @@
#!/bin/bash
#if [ "$OS" = "Windows_NT" ]; then
# ./mingw64.sh
# exit 0
#fi
# Linux build
make distclean || echo clean
rm -f config.status
./autogen.sh || echo done
# Ubuntu 10.04 (gcc 4.4)
# extracflags="-O3 -march=native -Wall -D_REENTRANT -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants -fbranch-target-load-optimize2 -fsched2-use-superblocks -falign-loops=16 -falign-functions=16 -falign-jumps=16 -falign-labels=16"
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
make -j 4
strip -s cpuminer

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.2.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.3.2.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.8.2'
PACKAGE_STRING='cpuminer-opt 3.8.2'
PACKAGE_VERSION='3.8.3.2'
PACKAGE_STRING='cpuminer-opt 3.8.3.2'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.8.2 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.8.3.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1392,7 +1392,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.8.2:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.8.3.2:";;
esac
cat <<\_ACEOF
@@ -1497,7 +1497,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.8.2
cpuminer-opt configure 3.8.3.2
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.8.2, which was
It was created by cpuminer-opt $as_me 3.8.3.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2981,7 +2981,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.8.2'
VERSION='3.8.3.2'
cat >>confdefs.h <<_ACEOF
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.8.2, which was
This file was extended by cpuminer-opt $as_me 3.8.3.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6743,7 +6743,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.8.2
cpuminer-opt config.status 3.8.3.2
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.8.2])
AC_INIT([cpuminer-opt], [3.8.3.2])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -140,6 +140,7 @@ bool opt_stratum_stats = false;
uint32_t accepted_count = 0L;
uint32_t rejected_count = 0L;
uint32_t solved_count = 0L;
double *thr_hashrates;
double *thr_hashcount;
double global_hashcount = 0;
@@ -432,6 +433,7 @@ static bool get_mininginfo(CURL *curl, struct work *work)
return true;
}
// hodl needs 4 but leave it at 3 until gbt better understood
#define BLOCK_VERSION_CURRENT 3
static bool gbt_work_decode(const json_t *val, struct work *work)
@@ -769,6 +771,8 @@ static int share_result( int result, struct work *work, const char *reason )
float rate;
char rate_s[8] = {0};
double sharediff = work ? work->sharediff : stratum.sharediff;
bool solved = result && (net_diff > 0.0 ) && ( sharediff >= net_diff );
char sol[32] = {0};
int i;
pthread_mutex_lock(&stats_lock);
@@ -778,6 +782,16 @@ static int share_result( int result, struct work *work, const char *reason )
hashrate += thr_hashrates[i];
}
result ? accepted_count++ : rejected_count++;
if ( solved )
{
solved_count++;
if ( use_colors )
sprintf( sol, CL_GRN " Solved" CL_WHT " %d", solved_count );
else
sprintf( sol, " Solved %d", solved_count );
}
pthread_mutex_unlock(&stats_lock);
global_hashcount = hashcount;
global_hashrate = hashrate;
@@ -787,9 +801,13 @@ static int share_result( int result, struct work *work, const char *reason )
: ( 100. * rejected_count / total_submits ) );
if (use_colors)
{
sres = (result ? CL_GRN "Accepted" CL_WHT : CL_RED "Rejected" CL_WHT );
}
else
{
sres = (result ? "Accepted" : "Rejected" );
}
// Contrary to rounding convention 100% means zero rejects, exactly 100%.
// Rates > 99% and < 100% (rejects>0) display 99.9%.
@@ -844,13 +862,13 @@ static int share_result( int result, struct work *work, const char *reason )
else
{
#if ((defined(_WIN64) || defined(__WINDOWS__)))
applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g, %s %sH/s",
applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g%s, %s %sH/s",
sres, ( result ? accepted_count : rejected_count ),
total_submits, rate_s, sharediff, hr, hr_units );
total_submits, rate_s, sharediff, sol, hr, hr_units );
#else
applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g, %s %sH/s, %dC",
applog( LOG_NOTICE, "%s %lu/%lu (%s%%), diff %.3g%s, %s %sH/s, %dC",
sres, ( result ? accepted_count : rejected_count ),
total_submits, rate_s, sharediff, hr, hr_units,
total_submits, rate_s, sharediff, sol, hr, hr_units,
(uint32_t)cpu_temp(0) );
#endif
}
@@ -1549,6 +1567,7 @@ void SHA256_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
}
}
// default
void std_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / opt_diff_factor );
@@ -1558,7 +1577,7 @@ void scrypt_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (65536.0 * opt_diff_factor) );
}
// another popular choice.
void alt_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
@@ -1608,6 +1627,14 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
{
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
// the job_id check doesn't work as intended, it's a char pointer!
// For stratum the pointers can be dereferenced and the strings compared,
// benchmark not, getwork & gbt unsure.
// || ( have_straum && strcmp( work->job_id, g_work->job_id ) ) ) )
// or
// || ( !benchmark && strcmp( work->job_id, g_work->job_id ) ) ) )
// For now leave it as is, it seems stable.
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|| ( work->job_id != g_work->job_id ) ) )
@@ -1617,7 +1644,7 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
*nonceptr = 0xffffffffU / opt_n_threads * thr_id;
if ( opt_randomize )
*nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
}
else
++(*nonceptr);
@@ -1767,6 +1794,8 @@ static void *miner_thread( void *userdata )
{
algo_gate.wait_for_diff( &stratum );
pthread_mutex_lock( &g_work_lock );
if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce )
algo_gate.stratum_gen_work( &stratum, &g_work );
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce,
stratum.job.clean );
pthread_mutex_unlock( &g_work_lock );
@@ -1866,33 +1895,38 @@ static void *miner_thread( void *userdata )
hashes_done / (diff.tv_sec + diff.tv_usec * 1e-6);
pthread_mutex_unlock(&stats_lock);
}
// if nonce(s) submit work
if ( nonce_found && !opt_benchmark )
{
/*
int num_submitted = 0;
// look for 4way nonces
for ( int n = 0; n < 4; n++ )
if ( work.nfound[n] )
for ( int n = 0; n < nonce_found; n++ )
{
*algo_gate.get_nonceptr( work.data ) = work.nonces[n];
if ( submit_work( mythr, &work ) )
{
*algo_gate.get_nonceptr( work.data ) = work.nonces[n];
if ( !submit_work( mythr, &work ) )
{
applog( LOG_WARNING, "Failed to submit share." );
break;
}
applog( LOG_NOTICE, "Share submitted." );
num_submitted++;
applog( LOG_NOTICE, "Share submitted." );
num_submitted++;
}
else
{
applog( LOG_WARNING, "Failed to submit share." );
break;
}
}
// must be a one way algo, nonce is already in work data
if ( !num_submitted )
{
*/
if ( !submit_work( mythr, &work ) )
{
applog( LOG_WARNING, "Failed to submit share." );
break;
}
applog( LOG_NOTICE, "Share submitted." );
}
// }
// prevent stale work in solo
// we can't submit twice a block!
@@ -2348,7 +2382,7 @@ static void *stratum_thread(void *userdata )
if ( !opt_quiet )
{
if (net_diff > 0.)
applog(LOG_BLUE, "%s block %d, diff %.3f",
applog(LOG_BLUE, "%s block %d, network diff %.3f",
algo_names[opt_algo], stratum.bloc_height, net_diff);
else
applog(LOG_BLUE, "%s %s block %d", short_url,

View File

@@ -361,7 +361,6 @@ struct work {
size_t xnonce2_len;
unsigned char *xnonce2;
uint32_t nonces[8];
bool nfound[8];
};
struct stratum_job {
@@ -451,7 +450,7 @@ void applog_hash(void *hash);
void format_hashrate(double hashrate, char *output);
void print_hash_tests(void);
void scale_hash_for_display ( double* hashrate, char* units );
struct thr_info {
int id;