Compare commits

..

7 Commits

Author SHA1 Message Date
Jay D Dee
cedcf4d070 v3.12.4.5 2020-02-28 02:42:22 -05:00
Jay D Dee
81b50c3c71 v3.12.4.4 2020-02-25 14:07:32 -05:00
Jay D Dee
0e1e88f53e v3.12.4.3 2020-02-24 21:35:19 -05:00
Jay D Dee
45c77a5c81 v3.12.4.2 2020-02-23 15:31:06 -05:00
Jay D Dee
dbce7e0721 v3.12.4.1 2020-02-22 18:06:39 -05:00
Jay D Dee
6d66051de6 v3.12.4 2020-02-21 16:34:53 -05:00
Jay D Dee
b93be8816a v3.12.3.1 2020-02-18 12:05:47 -05:00
12 changed files with 928 additions and 588 deletions

View File

@@ -12,10 +12,24 @@ a false positive, they are flagged simply because they are cryptocurrency
miners. The source code is open for anyone to inspect. If you don't trust
the software, don't use it.
New thread:
https://bitcointalk.org/index.php?topic=5226770.msg53865575#msg53865575
Old thread:
https://bitcointalk.org/index.php?topic=1326803.0
mailto://jayddee246@gmail.com
This note is to confirm that bitcointalk users JayDDee and joblo are the
same person.
I created a new BCT user JayDDee to match my github user id.
The old thread has been locked but still contains useful information for
reading.
See file RELEASE_NOTES for change log and INSTALL_LINUX or INSTALL_WINDOWS
for compile instructions.

View File

@@ -65,6 +65,55 @@ If not what makes it happen or not happen?
Change Log
----------
v3.12.4.5
Issue #246: better stale share detection for getwork, and enhanced logging
of stale shares for stratum & getwork.
Issue #251: fixed incorrect share difficulty and share ratio in share
result log.
Changed submit log to include share diff and block height.
Small cosmetic changes to logs.
v3.12.4.4
Issue #246: Fixed net hashrate in getwork block log,
removed duplicate getwork block log,
other small tweaks to stats logs for getwork.
Issue #248: Fixed chronic stale shares with scrypt:1048576 (scryptn2).
v3.12.4.3
Fixed segfault in new block log for getwork.
Disabled silent discarding of stale work after the submit is logged.
v3.12.4.2
Issue #245: fixed getwork stale shares, solo mining with getwork now works.
Issue #246: implemented block and summary logs for getwork.
v3.12.4.1
Issue #245: fix scantime when mining solo with getwork.
Added debug logs for creation of stratum and longpoll threads, use -D to
enable.
v3.12.4
Issue #244: Change longpoll to ignore job id.
Lyra2rev2 AVX2 +3%, AVX512 +6%.
v3.12.3.1
Issue #241: Fixed regression that broke coinbase address in v3.11.7.
v3.12.3
Issue #238: Fixed skunk AVX2.

View File

@@ -94,12 +94,12 @@ bool lyra2rev2_thread_init()
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
#if defined (LYRA2REV2_8WAY)
#if defined (LYRA2REV2_16WAY)
l2v2_wholeMatrix = _mm_malloc( 2 * size, 64 ); // 2 way
init_lyra2rev2_8way_ctx();;
#elif defined (LYRA2REV2_4WAY)
init_lyra2rev2_16way_ctx();;
#elif defined (LYRA2REV2_8WAY)
l2v2_wholeMatrix = _mm_malloc( size, 64 );
init_lyra2rev2_4way_ctx();;
init_lyra2rev2_8way_ctx();;
#else
l2v2_wholeMatrix = _mm_malloc( size, 64 );
init_lyra2rev2_ctx();
@@ -109,12 +109,12 @@ bool lyra2rev2_thread_init()
bool register_lyra2rev2_algo( algo_gate_t* gate )
{
#if defined (LYRA2REV2_8WAY)
#if defined (LYRA2REV2_16WAY)
gate->scanhash = (void*)&scanhash_lyra2rev2_16way;
gate->hash = (void*)&lyra2rev2_16way_hash;
#elif defined (LYRA2REV2_8WAY)
gate->scanhash = (void*)&scanhash_lyra2rev2_8way;
gate->hash = (void*)&lyra2rev2_8way_hash;
#elif defined (LYRA2REV2_4WAY)
gate->scanhash = (void*)&scanhash_lyra2rev2_4way;
gate->hash = (void*)&lyra2rev2_4way_hash;
#else
gate->scanhash = (void*)&scanhash_lyra2rev2;
gate->hash = (void*)&lyra2rev2_hash;

View File

@@ -51,30 +51,32 @@ bool init_lyra2rev3_ctx();
//////////////////////////////////
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define LYRA2REV2_8WAY 1
#define LYRA2REV2_16WAY 1
#elif defined(__AVX2__)
#define LYRA2REV2_4WAY 1
#define LYRA2REV2_8WAY 1
#endif
extern __thread uint64_t* l2v2_wholeMatrix;
bool register_lyra2rev2_algo( algo_gate_t* gate );
#if defined(LYRA2REV2_8WAY)
#if defined(LYRA2REV2_16WAY)
void lyra2rev2_16way_hash( void *state, const void *input );
int scanhash_lyra2rev2_16way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
bool init_lyra2rev2_16way_ctx();
#elif defined(LYRA2REV2_8WAY)
void lyra2rev2_8way_hash( void *state, const void *input );
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
bool init_lyra2rev2_8way_ctx();
#elif defined(LYRA2REV2_4WAY)
void lyra2rev2_4way_hash( void *state, const void *input );
int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
bool init_lyra2rev2_4way_ctx();
#else
void lyra2rev2_hash( void *state, const void *input );
int scanhash_lyra2rev2( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );

View File

@@ -8,12 +8,30 @@
#include "algo/cubehash/cube-hash-2way.h"
#if 0
void lyra2rev2_8way_hash( void *state, const void *input )
#if defined (LYRA2REV2_16WAY)
typedef struct {
blake256_16way_context blake;
keccak256_8way_context keccak;
cubehashParam cube;
skein256_8way_context skein;
bmw256_16way_context bmw;
} lyra2v2_16way_ctx_holder __attribute__ ((aligned (64)));
static lyra2v2_16way_ctx_holder l2v2_16way_ctx;
bool init_lyra2rev2_16way_ctx()
{
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
uint32_t vhashB[8*8] __attribute__ ((aligned (64)));
keccak256_8way_init( &l2v2_16way_ctx.keccak );
cubehashInit( &l2v2_16way_ctx.cube, 256, 16, 32 );
skein256_8way_init( &l2v2_16way_ctx.skein );
bmw256_16way_init( &l2v2_16way_ctx.bmw );
return true;
}
void lyra2rev2_16way_hash( void *state, const void *input )
{
uint32_t vhash[8*16] __attribute__ ((aligned (128)));
uint32_t hash0[8] __attribute__ ((aligned (64)));
uint32_t hash1[8] __attribute__ ((aligned (64)));
uint32_t hash2[8] __attribute__ ((aligned (64)));
@@ -22,35 +40,60 @@ void lyra2rev2_8way_hash( void *state, const void *input )
uint32_t hash5[8] __attribute__ ((aligned (64)));
uint32_t hash6[8] __attribute__ ((aligned (64)));
uint32_t hash7[8] __attribute__ ((aligned (64)));
lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) );
uint32_t hash8[8] __attribute__ ((aligned (64)));
uint32_t hash9[8] __attribute__ ((aligned (64)));
uint32_t hash10[8] __attribute__ ((aligned (64)));
uint32_t hash11[8] __attribute__ ((aligned (64)));
uint32_t hash12[8] __attribute__ ((aligned (64)));
uint32_t hash13[8] __attribute__ ((aligned (64)));
uint32_t hash14[8] __attribute__ ((aligned (64)));
uint32_t hash15[8] __attribute__ ((aligned (64)));
lyra2v2_16way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v2_16way_ctx, sizeof(l2v2_16way_ctx) );
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
blake256_8way_close( &ctx.blake, vhash );
blake256_16way_update( &ctx.blake, input + (64<<4), 16 );
blake256_16way_close( &ctx.blake, vhash );
rintrlv_8x32_8x64( vhashA, vhash, 256 );
dintrlv_16x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7,
hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, vhash, 256 );
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 );
keccak256_8way_update( &ctx.keccak, vhash, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
intrlv_8x64( vhash, hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
keccak256_8way_init( &ctx.keccak );
keccak256_8way_update( &ctx.keccak, vhash, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
dintrlv_8x64( hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash5, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
cubehash_full( &ctx.cube, (byte*) hash8, 256, (const byte*) hash8, 32 );
cubehash_full( &ctx.cube, (byte*) hash9, 256, (const byte*) hash9, 32 );
cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
// cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
//
// dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
// dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
intrlv_2x256( vhash, hash0, hash1, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
@@ -64,61 +107,127 @@ void lyra2rev2_8way_hash( void *state, const void *input )
intrlv_2x256( vhash, hash6, hash7, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash6, hash7, vhash, 256 );
intrlv_2x256( vhash, hash8, hash9, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash8, hash9, vhash, 256 );
intrlv_2x256( vhash, hash10, hash11, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash10, hash11, vhash, 256 );
intrlv_2x256( vhash, hash12, hash13, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash12, hash13, vhash, 256 );
intrlv_2x256( vhash, hash14, hash15, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash14, hash15, vhash, 256 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 );
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
intrlv_8x64( vhash, hash8, hash9, hash10, hash11, hash12,
hash13, hash14, hash15, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
skein256_8way_init( &ctx.skein );
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
//
// dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
// dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
dintrlv_8x64( hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, vhash, 256 );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
cubehash_full( &ctx.cube, (byte*) hash8, 256, (const byte*) hash8, 32 );
cubehash_full( &ctx.cube, (byte*) hash9, 256, (const byte*) hash9, 32 );
cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
bmw256_8way_update( &ctx.bmw, vhash, 32 );
bmw256_8way_close( &ctx.bmw, state );
intrlv_16x32( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7,
hash8, hash9, hash10, hash11,
hash12, hash13, hash14, hash15, 256 );
bmw256_16way_update( &ctx.bmw, vhash, 32 );
bmw256_16way_close( &ctx.bmw, state );
}
#endif
int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &hash[7*16];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 16;
uint32_t n = first_nonce;
const uint32_t targ32 = ptarget[7];
__m512i *noncev = (__m512i*)vdata + 19;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0000ff;
mm512_bswap32_intrlv80_16x32( vdata, pdata );
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n );
blake256_16way_init( &l2v2_16way_ctx.blake );
blake256_16way_update( &l2v2_16way_ctx.blake, vdata, 64 );
#if defined (LYRA2REV2_8WAY)
do
{
lyra2rev2_16way_hash( hash, vdata );
for ( int lane = 0; lane < 16; lane++ )
if ( unlikely( hashd7[lane] <= targ32 ) )
{
extr_lane_16x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
n += 16;
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined (LYRA2REV2_8WAY)
typedef struct {
blake256_8way_context blake;
keccak256_8way_context keccak;
keccak256_4way_context keccak;
cubehashParam cube;
skein256_8way_context skein;
bmw256_8way_context bmw;
skein256_4way_context skein;
bmw256_8way_context bmw;
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
bool init_lyra2rev2_8way_ctx()
{
keccak256_8way_init( &l2v2_8way_ctx.keccak );
keccak256_4way_init( &l2v2_8way_ctx.keccak );
cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
skein256_8way_init( &l2v2_8way_ctx.skein );
skein256_4way_init( &l2v2_8way_ctx.skein );
bmw256_8way_init( &l2v2_8way_ctx.bmw );
return true;
}
@@ -126,7 +235,6 @@ bool init_lyra2rev2_8way_ctx()
void lyra2rev2_8way_hash( void *state, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
uint32_t hash0[8] __attribute__ ((aligned (64)));
uint32_t hash1[8] __attribute__ ((aligned (64)));
uint32_t hash2[8] __attribute__ ((aligned (64)));
@@ -141,14 +249,19 @@ void lyra2rev2_8way_hash( void *state, const void *input )
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
blake256_8way_close( &ctx.blake, vhash );
rintrlv_8x32_8x64( vhashA, vhash, 256 );
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
dintrlv_8x32( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
keccak256_4way_update( &ctx.keccak, vhash, 32 );
keccak256_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
keccak256_4way_init( &ctx.keccak );
keccak256_4way_update( &ctx.keccak, vhash, 32 );
keccak256_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
@@ -158,27 +271,25 @@ void lyra2rev2_8way_hash( void *state, const void *input )
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
intrlv_2x256( vhash, hash0, hash1, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash0, hash1, vhash, 256 );
intrlv_2x256( vhash, hash2, hash3, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash2, hash3, vhash, 256 );
intrlv_2x256( vhash, hash4, hash5, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash4, hash5, vhash, 256 );
intrlv_2x256( vhash, hash6, hash7, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash6, hash7, vhash, 256 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
LYRA2REV2( l2v2_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash4, 32, hash4, 32, hash4, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash5, 32, hash5, 32, hash5, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
LYRA2REV2( l2v2_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
skein256_4way_update( &ctx.skein, vhash, 32 );
skein256_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
skein256_4way_init( &ctx.skein );
skein256_4way_update( &ctx.skein, vhash, 32 );
skein256_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
@@ -189,8 +300,8 @@ void lyra2rev2_8way_hash( void *state, const void *input )
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 );
bmw256_8way_update( &ctx.bmw, vhash, 32 );
bmw256_8way_close( &ctx.bmw, state );
@@ -223,7 +334,6 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
do
{
lyra2rev2_8way_hash( hash, vdata );
pdata[19] = n;
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hashd7[lane] <= targ32 ) )
@@ -243,6 +353,9 @@ int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
return 0;
}
#endif
/*
#elif defined (LYRA2REV2_4WAY)
typedef struct {
@@ -367,3 +480,4 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
}
#endif
*/

View File

@@ -380,7 +380,7 @@ static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
#endif /* HAVE_SHA256_8WAY */
#if defined(USE_ASM) && defined(__x86_64__)
//#if defined(USE_ASM) && defined(__x86_64__)
#define SCRYPT_MAX_WAYS 12
#define HAVE_SCRYPT_3WAY 1
@@ -394,113 +394,6 @@ void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
#endif
#elif defined(USE_ASM) && defined(__i386__)
#define SCRYPT_MAX_WAYS 4
#define scrypt_best_throughput() 1
void scrypt_core(uint32_t *X, uint32_t *V, int N);
#elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
void scrypt_core(uint32_t *X, uint32_t *V, int N);
#if defined(__ARM_NEON__)
#undef HAVE_SHA256_4WAY
#define SCRYPT_MAX_WAYS 3
#define HAVE_SCRYPT_3WAY 1
#define scrypt_best_throughput() 3
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
#endif
#else
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
{
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
int i;
x00 = (B[ 0] ^= Bx[ 0]);
x01 = (B[ 1] ^= Bx[ 1]);
x02 = (B[ 2] ^= Bx[ 2]);
x03 = (B[ 3] ^= Bx[ 3]);
x04 = (B[ 4] ^= Bx[ 4]);
x05 = (B[ 5] ^= Bx[ 5]);
x06 = (B[ 6] ^= Bx[ 6]);
x07 = (B[ 7] ^= Bx[ 7]);
x08 = (B[ 8] ^= Bx[ 8]);
x09 = (B[ 9] ^= Bx[ 9]);
x10 = (B[10] ^= Bx[10]);
x11 = (B[11] ^= Bx[11]);
x12 = (B[12] ^= Bx[12]);
x13 = (B[13] ^= Bx[13]);
x14 = (B[14] ^= Bx[14]);
x15 = (B[15] ^= Bx[15]);
for (i = 0; i < 8; i += 2) {
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
/* Operate on columns. */
x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
/* Operate on rows. */
x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
#undef R
}
B[ 0] += x00;
B[ 1] += x01;
B[ 2] += x02;
B[ 3] += x03;
B[ 4] += x04;
B[ 5] += x05;
B[ 6] += x06;
B[ 7] += x07;
B[ 8] += x08;
B[ 9] += x09;
B[10] += x10;
B[11] += x11;
B[12] += x12;
B[13] += x13;
B[14] += x14;
B[15] += x15;
}
static inline void scrypt_core(uint32_t *X, uint32_t *V, int N)
{
int i;
for (i = 0; i < N; i++) {
memcpy(&V[i * 32], X, 128);
xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]);
}
for (i = 0; i < N; i++) {
uint32_t j = 32 * (X[16] & (N - 1));
for (uint8_t k = 0; k < 32; k++)
X[k] ^= V[j + k];
xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]);
}
}
#endif
#ifndef SCRYPT_MAX_WAYS
#define SCRYPT_MAX_WAYS 1
#define scrypt_best_throughput() 1
@@ -511,8 +404,8 @@ unsigned char *scrypt_buffer_alloc(int N)
return (uchar*) malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63);
}
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
uint32_t *midstate, unsigned char *scratchpad, int N)
static bool scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
uint32_t *midstate, unsigned char *scratchpad, int N, int thr_id )
{
uint32_t tstate[8], ostate[8];
uint32_t X[32];
@@ -527,11 +420,13 @@ static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
scrypt_core(X, V, N);
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
return true;
}
#ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
static bool scrypt_1024_1_1_256_4way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N,
int thrid )
{
uint32_t _ALIGN(128) tstate[4 * 8];
uint32_t _ALIGN(128) ostate[4 * 8];
@@ -545,32 +440,43 @@ static void scrypt_1024_1_1_256_4way(const uint32_t *input,
for (i = 0; i < 20; i++)
for (k = 0; k < 4; k++)
W[4 * i + k] = input[k * 20 + i];
for (i = 0; i < 8; i++)
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
tstate[4 * i + k] = midstate[i];
HMAC_SHA256_80_init_4way(W, tstate, ostate);
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
for (i = 0; i < 32; i++)
HMAC_SHA256_80_init_4way(W, tstate, ostate);
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
X[k * 32 + i] = W[4 * i + k];
scrypt_core(X + 0 * 32, V, N);
scrypt_core(X + 0 * 32, V, N);
scrypt_core(X + 1 * 32, V, N);
scrypt_core(X + 2 * 32, V, N);
scrypt_core(X + 3 * 32, V, N);
for (i = 0; i < 32; i++)
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
W[4 * i + k] = X[k * 32 + i];
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
for (i = 0; i < 8; i++)
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
output[k * 8 + i] = W[4 * i + k];
return true;
}
#endif /* HAVE_SHA256_4WAY */
#ifdef HAVE_SCRYPT_3WAY
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
static bool scrypt_1024_1_1_256_3way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N,
int thrid )
{
uint32_t _ALIGN(64) tstate[3 * 8], ostate[3 * 8];
uint32_t _ALIGN(64) X[3 * 32];
@@ -581,23 +487,34 @@ static void scrypt_1024_1_1_256_3way(const uint32_t *input,
memcpy(tstate + 0, midstate, 32);
memcpy(tstate + 8, midstate, 32);
memcpy(tstate + 16, midstate, 32);
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
if ( work_restart[thrid].restart ) return false;
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
scrypt_core_3way(X, V, N);
if ( work_restart[thrid].restart ) return false;
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
scrypt_core_3way(X, V, N);
if ( work_restart[thrid].restart ) return false;
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
return true;
}
#ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
static bool scrypt_1024_1_1_256_12way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N,
int thrid )
{
uint32_t _ALIGN(128) tstate[12 * 8];
uint32_t _ALIGN(128) ostate[12 * 8];
@@ -612,43 +529,60 @@ static void scrypt_1024_1_1_256_12way(const uint32_t *input,
for (i = 0; i < 20; i++)
for (k = 0; k < 4; k++)
W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
for (j = 0; j < 3; j++)
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
tstate[32 * j + 4 * i + k] = midstate[i];
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
if ( work_restart[thrid].restart ) return false;
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
for (j = 0; j < 3; j++)
if ( work_restart[thrid].restart ) return false;
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
scrypt_core_3way(X + 0 * 96, V, N);
scrypt_core_3way(X + 0 * 96, V, N);
scrypt_core_3way(X + 1 * 96, V, N);
scrypt_core_3way(X + 2 * 96, V, N);
scrypt_core_3way(X + 3 * 96, V, N);
for (j = 0; j < 3; j++)
if ( work_restart[thrid].restart ) return false;
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
for (j = 0; j < 3; j++)
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
return true;
}
#endif /* HAVE_SHA256_4WAY */
#endif /* HAVE_SCRYPT_3WAY */
#ifdef HAVE_SCRYPT_6WAY
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
static bool scrypt_1024_1_1_256_24way( const uint32_t *input,
uint32_t *output, uint32_t *midstate,
unsigned char *scratchpad, int N, int thrid )
{
uint32_t _ALIGN(128) tstate[24 * 8];
uint32_t _ALIGN(128) ostate[24 * 8];
@@ -657,41 +591,57 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
uint32_t *V;
int i, j, k;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
V = (uint32_t *)( ( (uintptr_t)(scratchpad) + 63 ) & ~ (uintptr_t)(63) );
for (j = 0; j < 3; j++)
for (i = 0; i < 20; i++)
for (k = 0; k < 8; k++)
for ( j = 0; j < 3; j++ )
for ( i = 0; i < 20; i++ )
for ( k = 0; k < 8; k++ )
W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 8; k++)
for ( j = 0; j < 3; j++ )
for ( i = 0; i < 8; i++ )
for ( k = 0; k < 8; k++ )
tstate[8 * 8 * j + 8 * i + k] = midstate[i];
HMAC_SHA256_80_init_8way(W + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init_8way(W + 256, tstate + 64, ostate + 64);
HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128);
PBKDF2_SHA256_80_128_8way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_80_128_8way(tstate + 64, ostate + 64, W + 256, W + 256);
PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++)
HMAC_SHA256_80_init_8way( W + 0, tstate + 0, ostate + 0 );
HMAC_SHA256_80_init_8way( W + 256, tstate + 64, ostate + 64 );
HMAC_SHA256_80_init_8way( W + 512, tstate + 128, ostate + 128 );
if ( work_restart[thrid].restart ) return false;
PBKDF2_SHA256_80_128_8way( tstate + 0, ostate + 0, W + 0, W + 0 );
PBKDF2_SHA256_80_128_8way( tstate + 64, ostate + 64, W + 256, W + 256 );
PBKDF2_SHA256_80_128_8way( tstate + 128, ostate + 128, W + 512, W + 512 );
if ( work_restart[thrid].restart ) return false;
for ( j = 0; j < 3; j++ )
for ( i = 0; i < 32; i++ )
for ( k = 0; k < 8; k++ )
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
scrypt_core_6way(X + 0 * 32, V, N);
scrypt_core_6way(X + 6 * 32, V, N);
scrypt_core_6way(X + 12 * 32, V, N);
scrypt_core_6way(X + 18 * 32, V, N);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++)
scrypt_core_6way( X + 0 * 32, V, N );
scrypt_core_6way( X + 6 * 32, V, N );
scrypt_core_6way( X + 12 * 32, V, N );
scrypt_core_6way( X + 18 * 32, V, N );
if ( work_restart[thrid].restart ) return false;
for ( j = 0; j < 3; j++ )
for ( i = 0; i < 32; i++ )
for ( k = 0; k < 8; k++ )
W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
PBKDF2_SHA256_128_32_8way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_128_32_8way(tstate + 64, ostate + 64, W + 256, W + 256);
PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512);
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 8; k++)
PBKDF2_SHA256_128_32_8way( tstate + 0, ostate + 0, W + 0, W + 0 );
PBKDF2_SHA256_128_32_8way( tstate + 64, ostate + 64, W + 256, W + 256 );
PBKDF2_SHA256_128_32_8way( tstate + 128, ostate + 128, W + 512, W + 512 );
for ( j = 0; j < 3; j++ )
for ( i = 0; i < 8; i++ )
for ( k = 0; k < 8; k++ )
output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
return true;
}
#endif /* HAVE_SCRYPT_6WAY */
@@ -703,7 +653,6 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
uint32_t midstate[8];
uint32_t n = pdata[19] - 1;
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
int throughput = scrypt_best_throughput();
int i;
@@ -714,6 +663,8 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
throughput *= 4;
#endif
// applog(LOG_INFO,"Scrypt thoughput %d",throughput);
for (i = 0; i < throughput; i++)
memcpy(data + i * 20, pdata, 80);
@@ -721,46 +672,50 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
sha256_transform(midstate, data, 0);
do {
bool rc = true;
for (i = 0; i < throughput; i++)
data[i * 20 + 19] = ++n;
#if defined(HAVE_SHA256_4WAY)
if (throughput == 4)
scrypt_1024_1_1_256_4way(data, hash, midstate,
scratchbuf, scratchbuf_size );
rc = scrypt_1024_1_1_256_4way(data, hash, midstate,
scratchbuf, scratchbuf_size, thr_id );
else
#endif
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
if (throughput == 12)
scrypt_1024_1_1_256_12way(data, hash, midstate,
scratchbuf, scratchbuf_size );
rc = scrypt_1024_1_1_256_12way(data, hash, midstate,
scratchbuf, scratchbuf_size, thr_id );
else
#endif
#if defined(HAVE_SCRYPT_6WAY)
if (throughput == 24)
scrypt_1024_1_1_256_24way(data, hash, midstate,
scratchbuf, scratchbuf_size );
rc = scrypt_1024_1_1_256_24way(data, hash, midstate,
scratchbuf, scratchbuf_size, thr_id );
else
#endif
#if defined(HAVE_SCRYPT_3WAY)
if (throughput == 3)
scrypt_1024_1_1_256_3way(data, hash, midstate,
scratchbuf, scratchbuf_size );
rc = scrypt_1024_1_1_256_3way(data, hash, midstate,
scratchbuf, scratchbuf_size, thr_id );
else
#endif
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf,
scratchbuf_size );
rc = scrypt_1024_1_1_256(data, hash, midstate, scratchbuf,
scratchbuf_size, thr_id );
for (i = 0; i < throughput; i++) {
if (unlikely(hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget))) {
if ( rc )
for ( i = 0; i < throughput; i++ )
{
if ( unlikely( valid_hash( hash + i * 8, ptarget ) ) )
{
pdata[19] = data[i * 20 + 19];
submit_solution( work, hash, mythr );
test_hash_and_submit( work, hash, mythr );
// submit_lane_solution( work, hash, mythr, i );
}
}
} while ( likely( n < max_nonce && !(*restart) ) );
} while ( likely( ( n < ( max_nonce - throughput ) ) && !(*restart) ) );
*hashes_done = n - pdata[19] + 1;
*hashes_done = n - pdata[19];
pdata[19] = n;
return 0;
}
@@ -779,7 +734,6 @@ bool register_scrypt_algo( algo_gate_t* gate )
gate->optimizations = SSE2_OPT | AVX2_OPT;
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
gate->scanhash = (void*)&scanhash_scrypt;
// gate->hash = (void*)&scrypt_1024_1_1_256_24way;
opt_target_factor = 65536.0;
if ( !opt_param_n )

View File

@@ -73,6 +73,7 @@ bool register_yescryptr8g_algo( algo_gate_t* gate )
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_yespower_r8g;
gate->hash = (void*)&yespower_tls;
pk_buffer_size = 26;
opt_sapling = true;
opt_target_factor = 65536.0;
return true;

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.3.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.4.5.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.12.3'
PACKAGE_STRING='cpuminer-opt 3.12.3'
PACKAGE_VERSION='3.12.4.5'
PACKAGE_STRING='cpuminer-opt 3.12.4.5'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.12.3 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.12.4.5 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.12.3:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.12.4.5:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.12.3
cpuminer-opt configure 3.12.4.5
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.12.3, which was
It was created by cpuminer-opt $as_me 3.12.4.5, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.12.3'
VERSION='3.12.4.5'
cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.12.3, which was
This file was extended by cpuminer-opt $as_me 3.12.4.5, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.12.3
cpuminer-opt config.status 3.12.4.5
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.12.3])
AC_INIT([cpuminer-opt], [3.12.4.5])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

File diff suppressed because it is too large Load Diff

25
miner.h
View File

@@ -312,6 +312,19 @@ int varint_encode( unsigned char *p, uint64_t n );
size_t address_to_script( unsigned char *out, size_t outsz, const char *addr );
int timeval_subtract( struct timeval *result, struct timeval *x,
struct timeval *y);
// Bitcoin formula for converting difficulty to an equivalent
// number of hashes.
//
// https://en.bitcoin.it/wiki/Difficulty
//
// hash = diff * 2**32
//
// diff_to_hash = 2**32 = 0x100000000 = 4294967296 = exp32;
const double exp32; // 2**32
const double exp64; // 2**64
bool fulltest( const uint32_t *hash, const uint32_t *target );
bool valid_hash( const void*, const void* );
@@ -332,11 +345,12 @@ struct thr_info {
//struct thr_info *thr_info;
bool submit_solution( struct work *work, const void *hash,
struct thr_info *thr );
bool submit_lane_solution( struct work *work, const void *hash,
struct thr_info *thr, const int lane );
bool submit_solution( struct work *work, const void *hash,
struct thr_info *thr );
bool submit_lane_solution( struct work *work, const void *hash,
struct thr_info *thr, const int lane );
bool test_hash_and_submit( struct work*, const void*, struct thr_info* );
bool submit_work( struct thr_info *thr, const struct work *work_in );
@@ -378,6 +392,7 @@ struct work {
size_t xnonce2_len;
unsigned char *xnonce2;
bool sapling;
bool stale;
// x16rt
uint32_t merkleroothash[8];
@@ -754,6 +769,8 @@ extern uint32_t solved_block_count;
extern pthread_mutex_t applog_lock;
extern pthread_mutex_t stats_lock;
extern bool opt_sapling;
extern const int pk_buffer_size_max;
extern int pk_buffer_size;
static char const usage[] = "\
Usage: " PACKAGE_NAME " [OPTIONS]\n\

108
util.c
View File

@@ -159,8 +159,6 @@ void applog2( int prio, const char *fmt, ... )
}
void applog(int prio, const char *fmt, ...)
{
va_list ap;
@@ -921,25 +919,28 @@ bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen)
return true;
}
size_t address_to_script(unsigned char *out, size_t outsz, const char *addr)
size_t address_to_script( unsigned char *out, size_t outsz, const char *addr )
{
unsigned char addrbin[26];
unsigned char addrbin[ pk_buffer_size_max ];
int addrver;
size_t rv;
if (!b58dec(addrbin, sizeof(addrbin), addr))
if ( !b58dec( addrbin, outsz, addr ) )
return 0;
addrver = b58check(addrbin, sizeof(addrbin), addr);
if (addrver < 0)
addrver = b58check( addrbin, outsz, addr );
if ( addrver < 0 )
return 0;
switch (addrver) {
switch ( addrver )
{
case 5: /* Bitcoin script hash */
case 196: /* Testnet script hash */
if (outsz < (rv = 23))
if ( outsz < ( rv = 23 ) )
return rv;
out[ 0] = 0xa9; /* OP_HASH160 */
out[ 1] = 0x14; /* push 20 bytes */
memcpy(&out[2], &addrbin[1], 20);
memcpy( &out[2], &addrbin[1], 20 );
out[22] = 0x87; /* OP_EQUAL */
return rv;
default:
@@ -948,7 +949,7 @@ size_t address_to_script(unsigned char *out, size_t outsz, const char *addr)
out[ 0] = 0x76; /* OP_DUP */
out[ 1] = 0xa9; /* OP_HASH160 */
out[ 2] = 0x14; /* push 20 bytes */
memcpy(&out[3], &addrbin[1], 20);
memcpy( &out[3], &addrbin[1], 20 );
out[23] = 0x88; /* OP_EQUALVERIFY */
out[24] = 0xac; /* OP_CHECKSIG */
return rv;
@@ -1038,37 +1039,57 @@ bool fulltest( const uint32_t *hash, const uint32_t *target )
return rc;
}
/*
void diff_to_target(uint32_t *target, double diff)
{
uint64_t m;
uint64_t m;
int k;
for (k = 6; k > 0 && diff > 1.0; k--)
diff /= 4294967296.0;
m = (uint64_t)(4294901760.0 / diff);
if (m == 0 && k == 6)
memset(target, 0xff, 32);
else {
memset(target, 0, 32);
target[k] = (uint32_t)m;
target[k + 1] = (uint32_t)(m >> 32);
}
}
*/
void diff_to_target(uint32_t *target, double diff)
{
uint64_t *t = (uint64_t*)target;
uint64_t m;
int k;
const double exp64 = (double)0xffffffffffffffff + 1.;
// static const double exp64 = (double)0xffffffffffffffff + 1.;
for ( k = 3; k > 0 && diff > 1.0; k-- )
diff /= exp64;
// for (k = 6; k > 0 && diff > 1.0; k--)
// diff /= 4294967296.0;
m = (uint64_t)( 0xffff0000 / diff );
if unlikely( m == 0 && k == 3 )
memset( target, 0xff, 32 );
m = (uint64_t)( 0xffff0000 / diff );
if unlikely( m == 0 && k == 3 )
memset( t, 0xff, 32 );
else
{
memset( target, 0, 32 );
((uint64_t*)target)[k] = m;
// target[k] = (uint32_t)m;
// target[k + 1] = (uint32_t)(m >> 32);
memset( t, 0, 32 );
t[k] = m;
}
}
// Only used by stratum pools
// deprecated
void work_set_target(struct work* work, double diff)
{
diff_to_target( work->target, diff );
work->targetdiff = diff;
}
// Only used by longpoll pools
double target_to_diff(uint32_t* target)
{
uchar* tgt = (uchar*) target;
@@ -1545,35 +1566,44 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
ret = true;
if (!opt_extranonce)
if ( !opt_extranonce )
goto out;
// subscribe to extranonce (optional)
sprintf(s, "{\"id\": 3, \"method\": \"mining.extranonce.subscribe\", \"params\": []}");
if (!stratum_send_line(sctx, s))
if ( !stratum_send_line( sctx, s ) )
goto out;
if (!socket_full(sctx->sock, 3)) {
applog(LOG_WARNING, "stratum extranonce subscribe timed out");
goto out;
if ( !socket_full( sctx->sock, 3 ) )
{
applog( LOG_WARNING, "Extranonce disabled, subscribe timed out" );
opt_extranonce = false;
goto out;
}
if ( !opt_quiet )
applog( LOG_INFO, "Extranonce subscription enabled" );
sret = stratum_recv_line(sctx);
if (sret) {
json_t *extra = JSON_LOADS(sret, &err);
if (!extra) {
sret = stratum_recv_line( sctx );
if ( sret )
{
json_t *extra = JSON_LOADS( sret, &err );
if ( !extra )
{
applog(LOG_WARNING, "JSON decode failed(%d): %s", err.line, err.text);
} else {
if (json_integer_value(json_object_get(extra, "id")) != 3) {
}
else
{
if ( json_integer_value(json_object_get( extra, "id" ) ) != 3 )
{
// we receive a standard method if extranonce is ignored
if (!stratum_handle_method(sctx, sret))
applog(LOG_WARNING, "Stratum answer id is not correct!");
if ( !stratum_handle_method( sctx, sret ) )
applog( LOG_WARNING, "Stratum answer id is not correct!" );
}
res_val = json_object_get(extra, "result");
res_val = json_object_get( extra, "result" );
// if (opt_debug && (!res_val || json_is_false(res_val)))
// applog(LOG_DEBUG, "extranonce subscribe not supported");
json_decref(extra);
json_decref( extra );
}
free(sret);
}