Compare commits

..

4 Commits

Author SHA1 Message Date
Jay D Dee
3363d61524 v3.8.4.1 2018-03-22 14:28:03 -04:00
Jay D Dee
20fe05054c v3.8.4 2018-03-18 12:51:03 -04:00
Jay D Dee
157508bd07 v3.8.3.3 2018-02-25 14:15:07 -05:00
Jay D Dee
c24a4bdbc2 v3.8.3.2 2018-02-24 14:36:19 -05:00
22 changed files with 1706 additions and 1321 deletions

View File

@@ -68,6 +68,7 @@ cpuminer_SOURCES = \
algo/cryptonight/cryptonight.c\
algo/cubehash/sph_cubehash.c \
algo/cubehash/sse2/cubehash_sse2.c\
algo/cubehash/cube-hash-2way.c \
algo/echo/sph_echo.c \
algo/echo/aes_ni/hash.c\
algo/gost/sph_gost.c \
@@ -242,7 +243,7 @@ cpuminer_SOURCES = \
algo/x17/hmq1725.c \
algo/yescrypt/yescrypt.c \
algo/yescrypt/sha256_Y.c \
algo/yescrypt/yescrypt-simd.c
algo/yescrypt/yescrypt-best.c
disable_flags =

View File

@@ -28,11 +28,12 @@ performance.
ARM CPUs are not supported.
2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
Centos are known to work and have all dependencies in their repositories.
Others may work but may require more effort.
Centos, are known to work and have all dependencies in their repositories.
Others may work but may require more effort. Older versions such as Centos 6
don't work due to missing features.
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
MacOS, OSx is not supported.
MacOS, OSx and Android are not supported.
3. Stratum pool. Some algos may work wallet mining using getwork or GBT. YMMV.
@@ -110,6 +111,7 @@ Supported Algorithms
yescrypt Globalboost-Y (BSTY)
yescryptr8 BitZeny (ZNY)
yescryptr16 Yenten (YTN)
yescryptr32 WAVI
zr5 Ziftr
Errata

View File

@@ -1,4 +1,4 @@
cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
puminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
This feature requires recent SW including GCC version 5 or higher and
openssl version 1.1 or higher. It may also require using "-march=znver1"
compile flag.
@@ -90,7 +90,8 @@ Additional optional compile flags, add the following to CFLAGS to activate:
SPH may give slightly better performance on algos that use sha256 when using
openssl 1.0.1 or older. Openssl 1.0.2 adds AVX2 and 1.1 adds SHA and perform
better than SPH.
better than SPH. This option is ignored when 4-way is used, even for CPUs
with SHA.
Start mining.
@@ -159,6 +160,34 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------
v3.8.4.1
Fixed sha256t low difficulty rejects.
Fixed compile error on CPUs with AVX512.
v3.8.4
Added yescryptr32 algo for WAVI coin.
Added URL to API data.
Improved detection of __int128 support (linux only)
Compile support for CPUs without SSSE3 (no binary support)
v3.8.3.3
Integrated getblocktemplate with algo_gate.
Added support for hodl gbt (untested).
Reworked some recent quick fixes.
v3.8.3.2
Reverted gbt changes from v3.8.0 that broke getwork.
Reverted scaled hash rate for API, added HS term in addition to KHS.
Added blocks solved to console display and API.
v3.8.3.1
Fixed regression in v3.8.3 that broke several algos.
v3.8.3
More restoration of lost lyra2 hash.

View File

@@ -119,9 +119,11 @@ void init_algo_gate( algo_gate_t* gate )
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
gate->malloc_txs_request = (void*)&std_malloc_txs_request;
gate->set_target = (void*)&std_set_target;
gate->work_decode = (void*)&std_le_work_decode;
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
gate->build_block_header = (void*)&std_build_block_header;
gate->build_extraheader = (void*)&std_build_extraheader;
gate->set_work_data_endian = (void*)&do_nothing;
gate->calc_network_diff = (void*)&std_calc_network_diff;
@@ -225,6 +227,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
case ALGO_ZR5: register_zr5_algo ( gate ); break;
default:
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );

View File

@@ -127,7 +127,10 @@ void ( *set_target) ( struct work*, double );
bool ( *submit_getwork_result ) ( CURL*, struct work* );
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
uint32_t*, uint32_t, uint32_t );
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
char* ( *malloc_txs_request ) ( struct work* );
void ( *set_work_data_endian ) ( struct work* );
double ( *calc_network_diff ) ( struct work* );
bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
@@ -228,11 +231,17 @@ void std_le_build_stratum_request( char *req, struct work *work );
void std_be_build_stratum_request( char *req, struct work *work );
void jr2_build_stratum_request ( char *req, struct work *work );
char* std_malloc_txs_request( struct work *work );
// Default is do_nothing (assumed LE)
void set_work_data_big_endian( struct work *work );
double std_calc_network_diff( struct work *work );
void std_build_block_header( struct work* g_work, uint32_t version,
uint32_t *prevhash, uint32_t *merkle_root,
uint32_t ntime, uint32_t nbits );
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );

View File

@@ -0,0 +1,205 @@
#if defined(__AVX2__)
#include <stdbool.h>
#include <unistd.h>
#include <memory.h>
#include "cube-hash-2way.h"
// 2x128
static void transform_2way( cube_2way_context *sp )
{
int r;
const int rounds = sp->rounds;
__m256i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3;
x0 = _mm256_load_si256( (__m256i*)sp->h );
x1 = _mm256_load_si256( (__m256i*)sp->h + 1 );
x2 = _mm256_load_si256( (__m256i*)sp->h + 2 );
x3 = _mm256_load_si256( (__m256i*)sp->h + 3 );
x4 = _mm256_load_si256( (__m256i*)sp->h + 4 );
x5 = _mm256_load_si256( (__m256i*)sp->h + 5 );
x6 = _mm256_load_si256( (__m256i*)sp->h + 6 );
x7 = _mm256_load_si256( (__m256i*)sp->h + 7 );
for ( r = 0; r < rounds; ++r )
{
x4 = _mm256_add_epi32( x0, x4 );
x5 = _mm256_add_epi32( x1, x5 );
x6 = _mm256_add_epi32( x2, x6 );
x7 = _mm256_add_epi32( x3, x7 );
y0 = x2;
y1 = x3;
y2 = x0;
y3 = x1;
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
_mm256_srli_epi32( y0, 25 ) );
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 7 ),
_mm256_srli_epi32( y1, 25 ) );
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 7 ),
_mm256_srli_epi32( y2, 25 ) );
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 7 ),
_mm256_srli_epi32( y3, 25 ) );
x0 = _mm256_xor_si256( x0, x4 );
x1 = _mm256_xor_si256( x1, x5 );
x2 = _mm256_xor_si256( x2, x6 );
x3 = _mm256_xor_si256( x3, x7 );
x4 = mm256_swap128_64( x4 );
x5 = mm256_swap128_64( x5 );
x6 = mm256_swap128_64( x6 );
x7 = mm256_swap128_64( x7 );
x4 = _mm256_add_epi32( x0, x4 );
x5 = _mm256_add_epi32( x1, x5 );
x6 = _mm256_add_epi32( x2, x6 );
x7 = _mm256_add_epi32( x3, x7 );
y0 = x1;
y1 = x0;
y2 = x3;
y3 = x2;
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
_mm256_srli_epi32( y0, 21 ) );
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
_mm256_srli_epi32( y1, 21 ) );
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 11 ),
_mm256_srli_epi32( y2, 21 ) );
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 11 ),
_mm256_srli_epi32( y3, 21 ) );
x0 = _mm256_xor_si256( x0, x4 );
x1 = _mm256_xor_si256( x1, x5 );
x2 = _mm256_xor_si256( x2, x6 );
x3 = _mm256_xor_si256( x3, x7 );
x4 = mm256_swap64_32( x4 );
x5 = mm256_swap64_32( x5 );
x6 = mm256_swap64_32( x6 );
x7 = mm256_swap64_32( x7 );
}
_mm256_store_si256( (__m256i*)sp->h, x0 );
_mm256_store_si256( (__m256i*)sp->h + 1, x1 );
_mm256_store_si256( (__m256i*)sp->h + 2, x2 );
_mm256_store_si256( (__m256i*)sp->h + 3, x3 );
_mm256_store_si256( (__m256i*)sp->h + 4, x4 );
_mm256_store_si256( (__m256i*)sp->h + 5, x5 );
_mm256_store_si256( (__m256i*)sp->h + 6, x6 );
_mm256_store_si256( (__m256i*)sp->h + 7, x7 );
}
cube_2way_context cube_2way_ctx_cache __attribute__ ((aligned (64)));
int cube_2way_reinit( cube_2way_context *sp )
{
memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
return 0;
}
int cube_2way_init( cube_2way_context *sp, int hashbitlen, int rounds,
int blockbytes )
{
int i;
// all sizes of __m128i
cube_2way_ctx_cache.hashlen = hashbitlen/128;
cube_2way_ctx_cache.blocksize = blockbytes/16;
cube_2way_ctx_cache.rounds = rounds;
cube_2way_ctx_cache.pos = 0;
for ( i = 0; i < 8; ++i )
cube_2way_ctx_cache.h[i] = m256_zero;
cube_2way_ctx_cache.h[0] = _mm256_set_epi32(
0, rounds, blockbytes, hashbitlen / 8,
0, rounds, blockbytes, hashbitlen / 8 );
for ( i = 0; i < 10; ++i )
transform_2way( &cube_2way_ctx_cache );
memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
return 0;
}
int cube_2way_update( cube_2way_context *sp, const void *data, size_t size )
{
const int len = size / 16;
const __m256i *in = (__m256i*)data;
int i;
// It is assumed data is aligned to 256 bits and is a multiple of 128 bits.
// Current usage sata is either 64 or 80 bytes.
for ( i = 0; i < len; i++ )
{
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
sp->pos++;
if ( sp->pos == sp->blocksize )
{
transform_2way( sp );
sp->pos = 0;
}
}
return 0;
}
int cube_2way_close( cube_2way_context *sp, void *output )
{
__m256i *hash = (__m256i*)output;
int i;
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
transform_2way( sp );
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
1,0,0,0 ) );
for ( i = 0; i < 10; ++i )
transform_2way( &cube_2way_ctx_cache );
for ( i = 0; i < sp->hashlen; i++ )
hash[i] = sp->h[i];
return 0;
}
int cube_2way_update_close( cube_2way_context *sp, void *output,
const void *data, size_t size )
{
const int len = size / 16;
const __m256i *in = (__m256i*)data;
__m256i *hash = (__m256i*)output;
int i;
for ( i = 0; i < len; i++ )
{
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
sp->pos++;
if ( sp->pos == sp->blocksize )
{
transform_2way( sp );
sp->pos = 0;
}
}
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
transform_2way( sp );
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
1,0,0,0 ) );
for ( i = 0; i < 10; ++i )
transform_2way( &cube_2way_ctx_cache );
for ( i = 0; i < sp->hashlen; i++ )
hash[i] = sp->h[i];
return 0;
}
#endif

View File

@@ -0,0 +1,36 @@
#ifndef CUBE_HASH_2WAY_H__
#define CUBE_HASH_2WAY_H__
#if defined(__AVX2__)
#include <stdint.h>
#include "avxdefs.h"
// 2x128, 2 way parallel SSE2
struct _cube_2way_context
{
int hashlen; // __m128i
int rounds;
int blocksize; // __m128i
int pos; // number of __m128i read into x from current block
__m256i h[8] __attribute__ ((aligned (64)));
};
typedef struct _cube_2way_context cube_2way_context;
int cube_2way_init( cube_2way_context* sp, int hashbitlen, int rounds,
int blockbytes );
// reinitialize context with same parameters, much faster.
int cube_2way_reinit( cube_2way_context *sp );
int cube_2way_update( cube_2way_context *sp, const void *data, size_t size );
int cube_2way_close( cube_2way_context *sp, void *output );
int cube_2way_update_close( cube_2way_context *sp, void *output,
const void *data, size_t size );
#endif
#endif

View File

@@ -42,15 +42,82 @@ void hodl_le_build_stratum_request( char* req, struct work* work,
free( xnonce2str );
}
void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
char* hodl_malloc_txs_request( struct work *work )
{
uchar merkle_root[64] = { 0 };
size_t t;
char* req;
json_t *val;
char data_str[2 * sizeof(work->data) + 1];
int i;
algo_gate.gen_merkle_root( merkle_root, sctx );
for ( i = 0; i < ARRAY_SIZE(work->data); i++ )
be32enc( work->data + i, work->data[i] );
bin2hex( data_str, (unsigned char *)work->data, 88 );
if ( work->workid )
{
char *params;
val = json_object();
json_object_set_new( val, "workid", json_string( work->workid ) );
params = json_dumps( val, 0 );
json_decref( val );
req = malloc( 128 + 2*88 + strlen( work->txs ) + strlen( params ) );
sprintf( req,
"{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":1}\r\n",
data_str, work->txs, params);
free( params );
}
else
{
req = malloc( 128 + 2*88 + strlen(work->txs));
sprintf( req,
"{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":1}\r\n",
data_str, work->txs);
}
return req;
}
void hodl_build_block_header( struct work* g_work, uint32_t version,
uint32_t *prevhash, uint32_t *merkle_tree,
uint32_t ntime, uint32_t nbits )
{
int i;
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = version;
if ( have_stratum )
for ( i = 0; i < 8; i++ )
g_work->data[ 1+i ] = le32dec( prevhash + i );
else
for (i = 0; i < 8; i++)
g_work->data[ 8-i ] = le32dec( prevhash + i );
for ( i = 0; i < 8; i++ )
g_work->data[ 9+i ] = be32dec( merkle_tree + i );
g_work->data[ algo_gate.ntime_index ] = ntime;
g_work->data[ algo_gate.nbits_index ] = nbits;
g_work->data[22] = 0x80000000;
g_work->data[31] = 0x00000280;
}
// hodl build_extra_header is redundant, hodl can use std_build_extra_header
// and call hodl_build_block_header.
#if 0
void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
{
uchar merkle_tree[64] = { 0 };
size_t t;
// int i;
algo_gate.gen_merkle_root( merkle_tree, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
le32dec( sctx->job.ntime ), le32dec( sctx->job.nbits ) );
/*
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );
@@ -63,7 +130,9 @@ void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
g_work->data[ algo_gate.nbits_index ] = le32dec( sctx->job.nbits );
g_work->data[22] = 0x80000000;
g_work->data[31] = 0x00000280;
*/
}
#endif
// called only by thread 0, saves a backup of g_work
void hodl_get_new_work( struct work* work, struct work* g_work)
@@ -73,6 +142,22 @@ void hodl_get_new_work( struct work* work, struct work* g_work)
hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999;
}
json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
{
json_t *val;
char *req = NULL;
if ( have_gbt )
{
req = malloc( strlen( gbt_lp_req ) + strlen( lp_id ) + 1 );
sprintf( req, gbt_lp_req, lp_id );
}
val = json_rpc_call( curl, lp_url, rpc_userpass,
req ? req : getwork_req, err, JSON_RPC_LONGPOLL );
free( req );
return val;
}
// called by every thread, copies the backup to each thread's work.
void hodl_resync_threads( struct work* work )
{
@@ -108,17 +193,26 @@ bool register_hodl_algo( algo_gate_t* gate )
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
return false;
#endif
// if ( TOTAL_CHUNKS % opt_n_threads )
// {
// applog(LOG_ERR,"Thread count must be power of 2.");
// return false;
// }
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = AES_OPT | AVX_OPT | AVX2_OPT;
gate->scanhash = (void*)&hodl_scanhash;
gate->get_new_work = (void*)&hodl_get_new_work;
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;
gate->set_target = (void*)&hodl_set_target;
gate->build_stratum_request = (void*)&hodl_le_build_stratum_request;
gate->build_extraheader = (void*)&hodl_build_extraheader;
gate->malloc_txs_request = (void*)&hodl_malloc_txs_request;
gate->build_block_header = (void*)&hodl_build_block_header;
// gate->build_extraheader = (void*)&hodl_build_extraheader;
gate->resync_threads = (void*)&hodl_resync_threads;
gate->do_this_thread = (void*)&hodl_do_this_thread;
gate->work_cmp_size = 76;
hodl_scratchbuf = (unsigned char*)malloc( 1 << 30 );
allow_getwork = false;
return ( hodl_scratchbuf != NULL );
}

View File

@@ -10,8 +10,13 @@
#ifndef NO_AES_NI
void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, void *MidHash)
void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount,
void *MidHash )
{
const int Chunk = TOTAL_CHUNKS / ThreadCount;
const uint32_t StartChunk = ThreadID * Chunk;
const uint32_t EndChunk = StartChunk + Chunk;
#ifdef __AVX__
uint64_t* TempBufs[ SHA512_PARALLEL_N ] ;
uint64_t* desination[ SHA512_PARALLEL_N ];
@@ -22,9 +27,7 @@ void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, voi
memcpy( TempBufs[i], MidHash, 32 );
}
uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
for ( uint32_t i = StartChunk;
i < StartChunk + (TOTAL_CHUNKS / ThreadCount); i+= SHA512_PARALLEL_N )
for ( uint32_t i = StartChunk; i < EndChunk; i += SHA512_PARALLEL_N )
{
for ( int j = 0; j < SHA512_PARALLEL_N; ++j )
{
@@ -41,9 +44,7 @@ void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, voi
uint32_t TempBuf[8];
memcpy( TempBuf, MidHash, 32 );
uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
for ( uint32_t i = StartChunk;
i < StartChunk + (TOTAL_CHUNKS / ThreadCount); ++i )
for ( uint32_t i = StartChunk; i < EndChunk; ++i )
{
TempBuf[0] = i;
SHA512( ( uint8_t *)TempBuf, 32,

View File

@@ -55,23 +55,23 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
// returns void, updates all args
#define G_4X64(a,b,c,d) \
a = _mm256_add_epi64( a, b ); \
d = mm256_rotr_64( _mm256_xor_si256( d, a), 32 ); \
d = mm256_ror_64( _mm256_xor_si256( d, a), 32 ); \
c = _mm256_add_epi64( c, d ); \
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 24 ); \
b = mm256_ror_64( _mm256_xor_si256( b, c ), 24 ); \
a = _mm256_add_epi64( a, b ); \
d = mm256_rotr_64( _mm256_xor_si256( d, a ), 16 ); \
d = mm256_ror_64( _mm256_xor_si256( d, a ), 16 ); \
c = _mm256_add_epi64( c, d ); \
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 63 );
b = mm256_ror_64( _mm256_xor_si256( b, c ), 63 );
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
G_4X64( s0, s1, s2, s3 ); \
s1 = mm256_rotr256_1x64( s1); \
s1 = mm256_ror256_1x64( s1); \
s2 = mm256_swap_128( s2 ); \
s3 = mm256_rotl256_1x64( s3 ); \
s3 = mm256_rol256_1x64( s3 ); \
G_4X64( s0, s1, s2, s3 ); \
s1 = mm256_rotl256_1x64( s1 ); \
s1 = mm256_rol256_1x64( s1 ); \
s2 = mm256_swap_128( s2 ); \
s3 = mm256_rotr256_1x64( s3 );
s3 = mm256_ror256_1x64( s3 );
#define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
@@ -94,25 +94,25 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
// returns void, all args updated
#define G_2X64(a,b,c,d) \
a = _mm_add_epi64( a, b ); \
d = mm_rotr_64( _mm_xor_si128( d, a), 32 ); \
d = mm_ror_64( _mm_xor_si128( d, a), 32 ); \
c = _mm_add_epi64( c, d ); \
b = mm_rotr_64( _mm_xor_si128( b, c ), 24 ); \
b = mm_ror_64( _mm_xor_si128( b, c ), 24 ); \
a = _mm_add_epi64( a, b ); \
d = mm_rotr_64( _mm_xor_si128( d, a ), 16 ); \
d = mm_ror_64( _mm_xor_si128( d, a ), 16 ); \
c = _mm_add_epi64( c, d ); \
b = mm_rotr_64( _mm_xor_si128( b, c ), 63 );
b = mm_ror_64( _mm_xor_si128( b, c ), 63 );
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
G_2X64( s0, s2, s4, s6 ); \
G_2X64( s1, s3, s5, s7 ); \
mm_rotl256_1x64( s2, s3 ); \
mm_ror256_1x64( s2, s3 ); \
mm_swap_128( s4, s5 ); \
mm_rotr256_1x64( s6, s7 ); \
mm_rol256_1x64( s6, s7 ); \
G_2X64( s0, s2, s4, s6 ); \
G_2X64( s1, s3, s5, s7 ); \
mm_rotr256_1x64( s2, s3 ); \
mm_rol256_1x64( s2, s3 ); \
mm_swap_128( s4, s5 ); \
mm_rotl256_1x64( s6, s7 );
mm_ror256_1x64( s6, s7 );
#define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \

View File

@@ -40,6 +40,35 @@ void lbry_le_build_stratum_request( char *req, struct work *work,
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
free(xnonce2str);
}
// don't use lbry_build_block_header, it can't handle clasim, do it inline
// in lbry_build_extraheader. The side effect is no gbt support for lbry.
void lbry_build_block_header( struct work* g_work, uint32_t version,
uint32_t *prevhash, uint32_t *merkle_root,
uint32_t ntime, uint32_t nbits )
{
int i;
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = version;
if ( have_stratum )
for ( i = 0; i < 8; i++ )
g_work->data[1 + i] = le32dec( prevhash + i );
else
for (i = 0; i < 8; i++)
g_work->data[ 8-i ] = le32dec( prevhash + i );
for ( i = 0; i < 8; i++ )
g_work->data[9 + i] = be32dec( merkle_root + i );
// for ( int i = 0; i < 8; i++ )
// g_work->data[17 + i] = claim[i];
g_work->data[ LBRY_NTIME_INDEX ] = ntime;
g_work->data[ LBRY_NBITS_INDEX ] = nbits;
g_work->data[28] = 0x80000000;
}
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{
unsigned char merkle_root[64] = { 0 };
@@ -50,14 +79,23 @@ void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
// algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
// (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_root,
// le32dec( sctx->job.ntime ), le32dec( sctx->job.nbits ) );
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );
for ( i = 0; i < 8; i++ )
g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
for ( i = 0; i < 8; i++ )
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
for ( int i = 0; i < 8; i++ )
g_work->data[17 + i] = ((uint32_t*)sctx->job.claim)[i];
g_work->data[ LBRY_NTIME_INDEX ] = le32dec(sctx->job.ntime);
g_work->data[ LBRY_NBITS_INDEX ] = le32dec(sctx->job.nbits);
g_work->data[28] = 0x80000000;
@@ -86,6 +124,7 @@ bool register_lbry_algo( algo_gate_t* gate )
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
gate->get_max64 = (void*)&lbry_get_max64;
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
// gate->build_block_header = (void*)&build_block_header;
gate->build_extraheader = (void*)&lbry_build_extraheader;
gate->set_target = (void*)&lbry_set_target;
gate->ntime_index = LBRY_NTIME_INDEX;

View File

@@ -155,7 +155,7 @@ bool register_sha256t_algo( algo_gate_t* gate )
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256t;
gate->hash = (void*)&sha256t_hash;
gate->set_target = (void*)&sha256t_set_target;
// gate->set_target = (void*)&sha256t_set_target;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
}

View File

@@ -52,21 +52,6 @@ extern "C"{
#define C32 SPH_C32
/*
* As of round 2 of the SHA-3 competition, the published reference
* implementation and test vectors are wrong, because they use
* big-endian AES tables while the internal decoding uses little-endian.
* The code below follows the specification. To turn it into a code
* which follows the reference implementation (the one called "BugFix"
* on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out
* the code below (from the '#define AES_BIG_ENDIAN...' to the definition
* of the AES_ROUND_NOKEY macro) and replace it with the version which
* is commented out afterwards.
*/
#define AES_BIG_ENDIAN 0
#include "algo/sha/aes_helper.c"
static const sph_u32 IV512[] = {
C32(0x72FCCDD8), C32(0x79CA4727), C32(0x128A077B), C32(0x40D55AEC),
C32(0xD1901A06), C32(0x430AE307), C32(0xB29F5CD1), C32(0xDF07FBFC),
@@ -74,210 +59,19 @@ static const sph_u32 IV512[] = {
C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
};
// Return hi 128 bits with elements shifted one lane with vacated lane filled
// with data rotated from lo.
// Partially rotate elements in two 128 bit vectors as one 256 bit vector
// and return the rotated high 128 bits.
// Similar to mm_rotr256_1x32 but only a partial rotation as lo is not
// completed. It's faster than a full rotation.
#if defined(__SSSE3__)
static inline __m128i mm_rotr256hi_1x32( __m128i hi, __m128i lo, int n )
{ return _mm_or_si128( _mm_srli_si128( hi, n<<2 ),
_mm_slli_si128( lo, 16 - (n<<2) ) );
}
#define mm_rotr256hi_1x32( hi, lo ) _mm_alignr_epi8( lo, hi, 4 )
#define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \
sph_u32 t0 = (x0); \
sph_u32 t1 = (x1); \
sph_u32 t2 = (x2); \
sph_u32 t3 = (x3); \
AES_ROUND_NOKEY_LE(t0, t1, t2, t3, x0, x1, x2, x3); \
} while (0)
#else // SSE2
#define mm_rotr256hi_1x32( hi, lo ) \
_mm_or_si128( _mm_srli_si128( hi, 4 ), \
_mm_slli_si128( lo, 12 ) )
#define KEY_EXPAND_ELT(k0, k1, k2, k3) do { \
sph_u32 kt; \
AES_ROUND_NOKEY(k1, k2, k3, k0); \
kt = (k0); \
(k0) = (k1); \
(k1) = (k2); \
(k2) = (k3); \
(k3) = kt; \
} while (0)
#if SPH_SMALL_FOOTPRINT_SHAVITE
/*
* This function assumes that "msg" is aligned for 32-bit access.
*/
static void
c512(sph_shavite_big_context *sc, const void *msg)
{
sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
sph_u32 p8, p9, pA, pB, pC, pD, pE, pF;
sph_u32 rk[448];
size_t u;
int r, s;
#if SPH_LITTLE_ENDIAN
memcpy(rk, msg, 128);
#else
for (u = 0; u < 32; u += 4) {
rk[u + 0] = sph_dec32le_aligned(
(const unsigned char *)msg + (u << 2) + 0);
rk[u + 1] = sph_dec32le_aligned(
(const unsigned char *)msg + (u << 2) + 4);
rk[u + 2] = sph_dec32le_aligned(
(const unsigned char *)msg + (u << 2) + 8);
rk[u + 3] = sph_dec32le_aligned(
(const unsigned char *)msg + (u << 2) + 12);
}
#endif
u = 32;
for (;;) {
for (s = 0; s < 4; s ++) {
sph_u32 x0, x1, x2, x3;
x0 = rk[u - 31];
x1 = rk[u - 30];
x2 = rk[u - 29];
x3 = rk[u - 32];
AES_ROUND_NOKEY(x0, x1, x2, x3);
rk[u + 0] = x0 ^ rk[u - 4];
rk[u + 1] = x1 ^ rk[u - 3];
rk[u + 2] = x2 ^ rk[u - 2];
rk[u + 3] = x3 ^ rk[u - 1];
if (u == 32) {
rk[ 32] ^= sc->count0;
rk[ 33] ^= sc->count1;
rk[ 34] ^= sc->count2;
rk[ 35] ^= SPH_T32(~sc->count3);
} else if (u == 440) {
rk[440] ^= sc->count1;
rk[441] ^= sc->count0;
rk[442] ^= sc->count3;
rk[443] ^= SPH_T32(~sc->count2);
}
u += 4;
x0 = rk[u - 31];
x1 = rk[u - 30];
x2 = rk[u - 29];
x3 = rk[u - 32];
AES_ROUND_NOKEY(x0, x1, x2, x3);
rk[u + 0] = x0 ^ rk[u - 4];
rk[u + 1] = x1 ^ rk[u - 3];
rk[u + 2] = x2 ^ rk[u - 2];
rk[u + 3] = x3 ^ rk[u - 1];
if (u == 164) {
rk[164] ^= sc->count3;
rk[165] ^= sc->count2;
rk[166] ^= sc->count1;
rk[167] ^= SPH_T32(~sc->count0);
} else if (u == 316) {
rk[316] ^= sc->count2;
rk[317] ^= sc->count3;
rk[318] ^= sc->count0;
rk[319] ^= SPH_T32(~sc->count1);
}
u += 4;
}
if (u == 448)
break;
for (s = 0; s < 8; s ++) {
rk[u + 0] = rk[u - 32] ^ rk[u - 7];
rk[u + 1] = rk[u - 31] ^ rk[u - 6];
rk[u + 2] = rk[u - 30] ^ rk[u - 5];
rk[u + 3] = rk[u - 29] ^ rk[u - 4];
u += 4;
}
}
p0 = sc->h[0x0];
p1 = sc->h[0x1];
p2 = sc->h[0x2];
p3 = sc->h[0x3];
p4 = sc->h[0x4];
p5 = sc->h[0x5];
p6 = sc->h[0x6];
p7 = sc->h[0x7];
p8 = sc->h[0x8];
p9 = sc->h[0x9];
pA = sc->h[0xA];
pB = sc->h[0xB];
pC = sc->h[0xC];
pD = sc->h[0xD];
pE = sc->h[0xE];
pF = sc->h[0xF];
u = 0;
for (r = 0; r < 14; r ++) {
#define C512_ELT(l0, l1, l2, l3, r0, r1, r2, r3) do { \
sph_u32 x0, x1, x2, x3; \
x0 = r0 ^ rk[u ++]; \
x1 = r1 ^ rk[u ++]; \
x2 = r2 ^ rk[u ++]; \
x3 = r3 ^ rk[u ++]; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk[u ++]; \
x1 ^= rk[u ++]; \
x2 ^= rk[u ++]; \
x3 ^= rk[u ++]; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk[u ++]; \
x1 ^= rk[u ++]; \
x2 ^= rk[u ++]; \
x3 ^= rk[u ++]; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk[u ++]; \
x1 ^= rk[u ++]; \
x2 ^= rk[u ++]; \
x3 ^= rk[u ++]; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
l0 ^= x0; \
l1 ^= x1; \
l2 ^= x2; \
l3 ^= x3; \
} while (0)
#define WROT(a, b, c, d) do { \
sph_u32 t = d; \
d = c; \
c = b; \
b = a; \
a = t; \
} while (0)
C512_ELT(p0, p1, p2, p3, p4, p5, p6, p7);
C512_ELT(p8, p9, pA, pB, pC, pD, pE, pF);
WROT(p0, p4, p8, pC);
WROT(p1, p5, p9, pD);
WROT(p2, p6, pA, pE);
WROT(p3, p7, pB, pF);
#undef C512_ELT
#undef WROT
}
sc->h[0x0] ^= p0;
sc->h[0x1] ^= p1;
sc->h[0x2] ^= p2;
sc->h[0x3] ^= p3;
sc->h[0x4] ^= p4;
sc->h[0x5] ^= p5;
sc->h[0x6] ^= p6;
sc->h[0x7] ^= p7;
sc->h[0x8] ^= p8;
sc->h[0x9] ^= p9;
sc->h[0xA] ^= pA;
sc->h[0xB] ^= pB;
sc->h[0xC] ^= pC;
sc->h[0xD] ^= pD;
sc->h[0xE] ^= pE;
sc->h[0xF] ^= pF;
}
#else
static void
c512( sph_shavite_big_context *sc, const void *msg )
@@ -331,7 +125,7 @@ c512( sph_shavite_big_context *sc, const void *msg )
for ( r = 0; r < 3; r ++ )
{
// round 1, 5, 9
k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = _mm_xor_si128( k00, k13 );
if ( r == 0 )
@@ -340,7 +134,7 @@ c512( sph_shavite_big_context *sc, const void *msg )
x = _mm_xor_si128( p0, k00 );
x = _mm_aesenc_si128( x, m128_zero );
k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = _mm_xor_si128( k01, k00 );
if ( r == 1 )
@@ -349,33 +143,33 @@ c512( sph_shavite_big_context *sc, const void *msg )
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, m128_zero );
k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = _mm_xor_si128( k02, k01 );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, m128_zero );
k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = _mm_xor_si128( k03, k02 );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, m128_zero );
p3 = _mm_xor_si128( p3, x );
k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = _mm_xor_si128( k10, k03 );
x = _mm_xor_si128( p2, k10 );
x = _mm_aesenc_si128( x, m128_zero );
k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = _mm_xor_si128( k11, k10 );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, m128_zero );
k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = _mm_xor_si128( k12, k11 );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, m128_zero );
k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = _mm_xor_si128( k13, k12 );
if ( r == 2 )
@@ -388,80 +182,80 @@ c512( sph_shavite_big_context *sc, const void *msg )
// round 2, 6, 10
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13, 1 ) );
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13 ) );
x = _mm_xor_si128( p3, k00 );
x = _mm_aesenc_si128( x, m128_zero );
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00, 1 ) );
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00 ) );
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, m128_zero );
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01, 1 ) );
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01 ) );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, m128_zero );
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02, 1 ) );
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02 ) );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, m128_zero );
p2 = _mm_xor_si128( p2, x );
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03, 1 ) );
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03 ) );
x = _mm_xor_si128( p1, k10 );
x = _mm_aesenc_si128( x, m128_zero );
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10, 1 ) );
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10 ) );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, m128_zero );
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11, 1 ) );
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11 ) );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, m128_zero );
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12, 1 ) );
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12 ) );
x = _mm_xor_si128( x, k13 );
x = _mm_aesenc_si128( x, m128_zero );
p0 = _mm_xor_si128( p0, x );
// round 3, 7, 11
k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = _mm_xor_si128( k00, k13 );
x = _mm_xor_si128( p2, k00 );
x = _mm_aesenc_si128( x, m128_zero );
k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = _mm_xor_si128( k01, k00 );
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, m128_zero );
k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = _mm_xor_si128( k02, k01 );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, m128_zero );
k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = _mm_xor_si128( k03, k02 );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, m128_zero );
p1 = _mm_xor_si128( p1, x );
k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = _mm_xor_si128( k10, k03 );
x = _mm_xor_si128( p0, k10 );
x = _mm_aesenc_si128( x, m128_zero );
k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = _mm_xor_si128( k11, k10 );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, m128_zero );
k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = _mm_xor_si128( k12, k11 );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, m128_zero );
k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = _mm_xor_si128( k13, k12 );
x = _mm_xor_si128( x, k13 );
@@ -470,36 +264,36 @@ c512( sph_shavite_big_context *sc, const void *msg )
// round 4, 8, 12
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13, 1 ) );
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13 ) );
x = _mm_xor_si128( p1, k00 );
x = _mm_aesenc_si128( x, m128_zero );
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00, 1 ) );
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00 ) );
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, m128_zero );
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01, 1 ) );
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01 ) );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, m128_zero );
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02, 1 ) );
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02 ) );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, m128_zero );
p0 = _mm_xor_si128( p0, x );
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03, 1 ) );
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03 ) );
x = _mm_xor_si128( p3, k10 );
x = _mm_aesenc_si128( x, m128_zero );
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10, 1 ) );
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10 ) );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, m128_zero );
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11, 1 ) );
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11 ) );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, m128_zero );
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12, 1 ) );
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12 ) );
x = _mm_xor_si128( x, k13 );
x = _mm_aesenc_si128( x, m128_zero );
@@ -508,44 +302,44 @@ c512( sph_shavite_big_context *sc, const void *msg )
// round 13
k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = _mm_xor_si128( k00, k13 );
x = _mm_xor_si128( p0, k00 );
x = _mm_aesenc_si128( x, m128_zero );
k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = _mm_xor_si128( k01, k00 );
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, m128_zero );
k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = _mm_xor_si128( k02, k01 );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, m128_zero );
k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = _mm_xor_si128( k03, k02 );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, m128_zero );
p3 = _mm_xor_si128( p3, x );
k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = _mm_xor_si128( k10, k03 );
x = _mm_xor_si128( p2, k10 );
x = _mm_aesenc_si128( x, m128_zero );
k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = _mm_xor_si128( k11, k10 );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, m128_zero );
k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = _mm_xor_si128( k12, _mm_xor_si128( k11, _mm_set_epi32(
~sc->count2, sc->count3, sc->count0, sc->count1 ) ) );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, m128_zero );
k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = _mm_xor_si128( k13, k12 );
x = _mm_xor_si128( x, k13 );
@@ -558,7 +352,6 @@ c512( sph_shavite_big_context *sc, const void *msg )
h[3] = _mm_xor_si128( h[3], p1 );
}
#endif
static void
shavite_big_aesni_init( sph_shavite_big_context *sc, const sph_u32 *iv )

View File

@@ -1363,10 +1363,11 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
{
HMAC_SHA256_CTX ctx;
HMAC_SHA256_Init(&ctx, buf, buflen);
if ( client_key_hack ) // GlobalBoost-Y buggy yescrypt
if ( yescrypt_client_key )
HMAC_SHA256_Update( &ctx, (uint8_t*)yescrypt_client_key,
yescrypt_client_key_len );
else
HMAC_SHA256_Update( &ctx, salt, saltlen );
else // Proper yescrypt
HMAC_SHA256_Update(&ctx, "Client Key", 10);
HMAC_SHA256_Final(sha256, &ctx);
}
/* Compute StoredKey */

View File

@@ -25,7 +25,7 @@
#include "compat.h"
#include "yescrypt.h"
#include "sha256_Y.h"
#include "algo-gate-api.h"
#define BYTES2CHARS(bytes) \
@@ -366,7 +366,8 @@ static int yescrypt_bsty(const uint8_t * passwd, size_t passwdlen,
uint64_t YESCRYPT_N;
uint32_t YESCRYPT_R;
uint32_t YESCRYPT_P;
bool client_key_hack;
char *yescrypt_client_key = NULL;
int yescrypt_client_key_len = 0;
/* main hash 80 bytes input */
void yescrypt_hash( const char *input, char *output, uint32_t len )
@@ -436,7 +437,8 @@ bool register_yescrypt_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescrypt_get_max64;
client_key_hack = true;
yescrypt_client_key = NULL;
yescrypt_client_key_len = 0;
YESCRYPT_N = 2048;
YESCRYPT_R = 8;
YESCRYPT_P = 1;
@@ -447,7 +449,8 @@ bool register_yescryptr8_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescrypt_get_max64;
client_key_hack = false;
yescrypt_client_key = "Client Key";
yescrypt_client_key_len = 10;
YESCRYPT_N = 2048;
YESCRYPT_R = 8;
YESCRYPT_P = 1;
@@ -458,10 +461,23 @@ bool register_yescryptr16_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescryptr16_get_max64;
client_key_hack = false;
yescrypt_client_key = "Client Key";
yescrypt_client_key_len = 10;
YESCRYPT_N = 4096;
YESCRYPT_R = 16;
YESCRYPT_P = 1;
return true;
}
bool register_yescryptr32_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescryptr16_get_max64;
yescrypt_client_key = "WaviBanana";
yescrypt_client_key_len = 10;
YESCRYPT_N = 4096;
YESCRYPT_R = 32;
YESCRYPT_P = 1;
return true;
}

View File

@@ -108,7 +108,8 @@ typedef enum {
__YESCRYPT_INIT_SHARED = 0x30000
} yescrypt_flags_t;
extern bool client_key_hack; // true for GlobalBoost-Y
extern char *yescrypt_client_key;
extern int yescrypt_client_key_len;
#define YESCRYPT_KNOWN_FLAGS \

15
api.c
View File

@@ -98,6 +98,7 @@ extern int opt_api_remote;
extern double global_hashrate;
extern uint32_t accepted_count;
extern uint32_t rejected_count;
extern uint32_t solved_count;
#define cpu_threads opt_n_threads
@@ -138,8 +139,7 @@ static char *getsummary( char *params )
double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0);
double diff = net_diff > 0. ? net_diff : stratum_diff;
char diff_str[16];
double hashrate = (double)global_hashrate;
char units[4] = {0};
double hrate = (double)global_hashrate;
struct cpu_info cpu = { 0 };
#ifdef USE_MONITORING
cpu.has_monitoring = true;
@@ -157,16 +157,15 @@ static char *getsummary( char *params )
sprintf( diff_str, "%.6f", diff);
*buffer = '\0';
scale_hash_for_display ( &hashrate, units );
sprintf( buffer, "NAME=%s;VER=%s;API=%s;"
"ALGO=%s;CPUS=%d;%sH/s=%.2f;ACC=%d;REJ=%d;"
"ALGO=%s;CPUS=%d;URL=%s;"
"HS=%.2f;KHS=%.2f;ACC=%d;REJ=%d;SOL=%d;"
"ACCMN=%.3f;DIFF=%s;TEMP=%.1f;FAN=%d;FREQ=%d;"
"UPTIME=%.0f;TS=%u|",
PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
algo, opt_n_threads, units, hashrate,
accepted_count, rejected_count, accps, diff_str,
cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
algo, opt_n_threads, short_url, hrate, hrate/1000.0,
accepted_count, rejected_count, solved_count,
accps, diff_str, cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
uptime, (uint32_t) ts);
return buffer;
}

1153
avxdefs.h

File diff suppressed because it is too large Load Diff

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.4.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.4.1.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.8.4'
PACKAGE_STRING='cpuminer-opt 3.8.4'
PACKAGE_VERSION='3.8.4.1'
PACKAGE_STRING='cpuminer-opt 3.8.4.1'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.8.4 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.8.4.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1392,7 +1392,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.8.4:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.8.4.1:";;
esac
cat <<\_ACEOF
@@ -1497,7 +1497,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.8.4
cpuminer-opt configure 3.8.4.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.8.4, which was
It was created by cpuminer-opt $as_me 3.8.4.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2981,7 +2981,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.8.4'
VERSION='3.8.4.1'
cat >>confdefs.h <<_ACEOF
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.8.4, which was
This file was extended by cpuminer-opt $as_me 3.8.4.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6743,7 +6743,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.8.4
cpuminer-opt config.status 3.8.4.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.8.3.1])
AC_INIT([cpuminer-opt], [3.8.4.1])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -103,7 +103,7 @@ enum algos opt_algo = ALGO_NULL;
int opt_scrypt_n = 0;
int opt_pluck_n = 128;
int opt_n_threads = 0;
#ifdef __GNUC__
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
__int128_t opt_affinity = -1LL;
#else
int64_t opt_affinity = -1LL;
@@ -177,7 +177,7 @@ static struct work g_work = {{ 0 }};
time_t g_work_time = 0;
static pthread_mutex_t g_work_lock;
static bool submit_old = false;
static char* lp_id;
char* lp_id;
static void workio_cmd_free(struct workio_cmd *wc);
@@ -200,7 +200,7 @@ static inline void drop_policy(void)
#define pthread_setaffinity_np(tid,sz,s) {} /* only do process affinity */
#endif
#ifdef __GNUC__
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
static void affine_to_cpu_mask( int id, unsigned __int128 mask )
#else
static void affine_to_cpu_mask( int id, unsigned long long mask )
@@ -213,7 +213,7 @@ static void affine_to_cpu_mask( int id, unsigned long long mask )
for ( uint8_t i = 0; i < ncpus; i++ )
{
// cpu mask
#ifdef __GNUC__
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
if( ( mask & ( (unsigned __int128)1ULL << i ) ) ) CPU_SET( i, &set );
#else
if( (ncpus > 64) || ( mask & (1ULL << i) ) ) CPU_SET( i, &set );
@@ -433,6 +433,8 @@ static bool get_mininginfo(CURL *curl, struct work *work)
return true;
}
// hodl needs 4 but leave it at 3 until gbt better understood
//#define BLOCK_VERSION_CURRENT 3
#define BLOCK_VERSION_CURRENT 4
static bool gbt_work_decode( const json_t *val, struct work *work )
@@ -454,25 +456,24 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
bool rc = false;
tmp = json_object_get( val, "mutable" );
if (tmp && json_is_array(tmp)) {
if ( tmp && json_is_array( tmp ) )
{
n = (int) json_array_size( tmp );
for (i = 0; i < n; i++) {
for ( i = 0; i < n; i++ )
{
const char *s = json_string_value( json_array_get( tmp, i ) );
if ( !s )
continue;
if (!strcmp(s, "coinbase/append"))
coinbase_append = true;
else if (!strcmp(s, "submit/coinbase"))
submit_coinbase = true;
else if (!strcmp(s, "version/force"))
version_force = true;
else if (!strcmp(s, "version/reduce"))
version_reduce = true;
if ( !strcmp( s, "coinbase/append" ) ) coinbase_append = true;
else if ( !strcmp( s, "submit/coinbase" ) ) submit_coinbase = true;
else if ( !strcmp( s, "version/force" ) ) version_force = true;
else if ( !strcmp( s, "version/reduce" ) ) version_reduce = true;
}
}
tmp = json_object_get( val, "height" );
if (!tmp || !json_is_integer(tmp)) {
if ( !tmp || !json_is_integer( tmp ) )
{
applog( LOG_ERR, "JSON invalid height" );
goto out;
}
@@ -480,20 +481,26 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
applog( LOG_BLUE, "Current block is %d", work->height );
tmp = json_object_get(val, "version");
if (!tmp || !json_is_integer(tmp)) {
if ( !tmp || !json_is_integer( tmp ) )
{
applog( LOG_ERR, "JSON invalid version" );
goto out;
}
version = (uint32_t) json_integer_value( tmp );
if ((version & 0xffU) > BLOCK_VERSION_CURRENT) {
if (version_reduce) {
if ( (version & 0xffU) > BLOCK_VERSION_CURRENT )
{
if ( version_reduce )
{
version = ( version & ~0xffU ) | BLOCK_VERSION_CURRENT;
} else if (have_gbt && allow_getwork && !version_force) {
applog(LOG_DEBUG, "Switching to getwork, gbt version %d",
version);
}
else if ( have_gbt && allow_getwork && !version_force )
{
applog( LOG_DEBUG, "Switching to getwork, gbt version %d", version );
have_gbt = false;
goto out;
} else if (!version_force) {
}
else if ( !version_force )
{
applog(LOG_ERR, "Unrecognized block version: %u", version);
goto out;
}
@@ -507,29 +514,34 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
}
tmp = json_object_get( val, "curtime" );
if (!tmp || !json_is_integer(tmp)) {
if ( !tmp || !json_is_integer( tmp ) )
{
applog( LOG_ERR, "JSON invalid curtime" );
goto out;
}
curtime = (uint32_t) json_integer_value(tmp);
if (unlikely(!jobj_binary(val, "bits", &bits, sizeof(bits)))) {
if ( unlikely( !jobj_binary( val, "bits", &bits, sizeof(bits) ) ) )
{
applog(LOG_ERR, "JSON invalid bits");
goto out;
}
/* find count and size of transactions */
txa = json_object_get(val, "transactions" );
if (!txa || !json_is_array(txa)) {
if ( !txa || !json_is_array( txa ) )
{
applog( LOG_ERR, "JSON invalid transactions" );
goto out;
}
tx_count = (int) json_array_size( txa );
tx_size = 0;
for (i = 0; i < tx_count; i++) {
for ( i = 0; i < tx_count; i++ )
{
const json_t *tx = json_array_get( txa, i );
const char *tx_hex = json_string_value( json_object_get( tx, "data" ) );
if (!tx_hex) {
if ( !tx_hex )
{
applog( LOG_ERR, "JSON invalid transactions" );
goto out;
}
@@ -538,30 +550,39 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
/* build coinbase transaction */
tmp = json_object_get( val, "coinbasetxn" );
if (tmp) {
if ( tmp )
{
const char *cbtx_hex = json_string_value( json_object_get( tmp, "data" ));
cbtx_size = cbtx_hex ? (int) strlen( cbtx_hex ) / 2 : 0;
cbtx = (uchar*) malloc( cbtx_size + 100 );
if (cbtx_size < 60 || !hex2bin(cbtx, cbtx_hex, cbtx_size)) {
if ( cbtx_size < 60 || !hex2bin( cbtx, cbtx_hex, cbtx_size ) )
{
applog( LOG_ERR, "JSON invalid coinbasetxn" );
goto out;
}
} else {
}
else
{
int64_t cbvalue;
if (!pk_script_size) {
if (allow_getwork) {
if ( !pk_script_size )
{
if ( allow_getwork )
{
applog( LOG_INFO, "No payout address provided, switching to getwork");
have_gbt = false;
} else
}
else
applog( LOG_ERR, "No payout address provided" );
goto out;
}
tmp = json_object_get( val, "coinbasevalue" );
if (!tmp || !json_is_number(tmp)) {
if ( !tmp || !json_is_number( tmp ) )
{
applog( LOG_ERR, "JSON invalid coinbasevalue" );
goto out;
}
cbvalue = (int64_t) (json_is_integer(tmp) ? json_integer_value(tmp) : json_number_value(tmp));
cbvalue = (int64_t) ( json_is_integer( tmp ) ? json_integer_value( tmp )
: json_number_value( tmp ) );
cbtx = (uchar*) malloc(256);
le32enc( (uint32_t *)cbtx, 1 ); /* version */
cbtx[4] = 1; /* in-counter */
@@ -590,13 +611,18 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
{
unsigned char xsig[100];
int xsig_len = 0;
if (*coinbase_sig) {
if ( *coinbase_sig )
{
n = (int) strlen( coinbase_sig );
if (cbtx[41] + xsig_len + n <= 100) {
if ( cbtx[41] + xsig_len + n <= 100 )
{
memcpy( xsig+xsig_len, coinbase_sig, n );
xsig_len += n;
} else {
applog(LOG_WARNING, "Signature does not fit in coinbase, skipping");
}
else
{
applog( LOG_WARNING,
"Signature does not fit in coinbase, skipping" );
}
}
tmp = json_object_get( val, "coinbaseaux" );
@@ -608,11 +634,13 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
unsigned char buf[100];
const char *s = json_string_value( json_object_iter_value( iter ) );
n = s ? (int) ( strlen(s) / 2 ) : 0;
if (!s || n > 100 || !hex2bin(buf, s, n)) {
if ( !s || n > 100 || !hex2bin( buf, s, n ) )
{
applog(LOG_ERR, "JSON invalid coinbaseaux");
break;
}
if (cbtx[41] + xsig_len + n <= 100) {
if ( cbtx[41] + xsig_len + n <= 100 )
{
memcpy( xsig+xsig_len, buf, n );
xsig_len += n;
}
@@ -674,16 +702,9 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
}
/* assemble block header */
work->data[0] = swab32(version);
for (i = 0; i < 8; i++)
work->data[8 - i] = le32dec(prevhash + i);
for (i = 0; i < 8; i++)
work->data[9 + i] = be32dec((uint32_t *)merkle_tree[0] + i);
work->data[17] = swab32(curtime);
work->data[18] = le32dec(&bits);
memset(work->data + 19, 0x00, 52);
work->data[20] = 0x80000000;
work->data[31] = 0x00000280;
algo_gate.build_block_header( work, swab32( version ),
(uint32_t*) prevhash, (uint32_t*) merkle_tree,
swab32( curtime ), le32dec( &bits ) );
if ( unlikely( !jobj_binary(val, "target", target, sizeof(target)) ) )
{
@@ -696,7 +717,8 @@ static bool gbt_work_decode(const json_t *val, struct work *work)
tmp = json_object_get( val, "workid" );
if ( tmp )
{
if (!json_is_string(tmp)) {
if ( !json_is_string( tmp ) )
{
applog( LOG_ERR, "JSON invalid workid" );
goto out;
}
@@ -715,7 +737,8 @@ out:
{
char *lp_uri;
tmp = json_object_get( val, "longpolluri" );
lp_uri = json_is_string(tmp) ? strdup(json_string_value(tmp)) : rpc_url;
lp_uri = json_is_string( tmp ) ? strdup( json_string_value( tmp ) )
: rpc_url;
have_longpoll = true;
tq_push(thr_info[longpoll_thr_id].q, lp_uri);
}
@@ -781,7 +804,7 @@ static int share_result( int result, struct work *work, const char *reason )
hashrate += thr_hashrates[i];
}
result ? accepted_count++ : rejected_count++;
/*
if ( solved )
{
solved_count++;
@@ -790,7 +813,7 @@ static int share_result( int result, struct work *work, const char *reason )
else
sprintf( sol, " Solved %d", solved_count );
}
*/
pthread_mutex_unlock(&stats_lock);
global_hashcount = hashcount;
global_hashrate = hashrate;
@@ -1049,45 +1072,12 @@ bool jr2_submit_getwork_result( CURL *curl, struct work *work )
return true;
}
static bool submit_upstream_work( CURL *curl, struct work *work )
char* std_malloc_txs_request( struct work *work )
{
json_t *val, *res;
char req[JSON_BUF_LEN];
int i;
/* pass if the previous hash is not the current previous hash */
if ( !submit_old && memcmp( &work->data[1], &g_work.data[1], 32 ) )
{
if (opt_debug)
applog(LOG_DEBUG, "DEBUG: stale work detected, discarding");
return true;
}
if ( !have_stratum && allow_mininginfo )
{
struct work wheight;
get_mininginfo( curl, &wheight );
if ( work->height && work->height <= net_blocks )
{
if (opt_debug)
applog(LOG_WARNING, "block %u was already solved", work->height);
return true;
}
}
if ( have_stratum )
{
stratum.sharediff = work->sharediff;
algo_gate.build_stratum_request( req, work, &stratum );
if ( unlikely( !stratum_send_line( &stratum, req ) ) )
{
applog(LOG_ERR, "submit_upstream_work stratum_send_line failed");
return false;
}
return true;
}
else if (work->txs)
{
char data_str[2 * sizeof(work->data) + 1];
char *req;
json_t *val;
char data_str[2 * sizeof(work->data) + 1];
int i;
for ( i = 0; i < ARRAY_SIZE(work->data); i++ )
be32enc( work->data + i, work->data[i] );
@@ -1099,7 +1089,8 @@ static bool submit_upstream_work( CURL *curl, struct work *work )
json_object_set_new( val, "workid", json_string( work->workid ) );
params = json_dumps( val, 0 );
json_decref( val );
req = (char*) malloc(128 + 2 * 80 + strlen(work->txs) + strlen(params));
req = (char*) malloc( 128 + 2 * 80 + strlen( work->txs )
+ strlen( params ) );
sprintf( req,
"{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":4}\r\n",
data_str, work->txs, params );
@@ -1112,9 +1103,52 @@ static bool submit_upstream_work( CURL *curl, struct work *work )
"{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":4}\r\n",
data_str, work->txs);
}
return req;
}
static bool submit_upstream_work( CURL *curl, struct work *work )
{
/* pass if the previous hash is not the current previous hash */
if ( !submit_old && memcmp( &work->data[1], &g_work.data[1], 32 ) )
{
if (opt_debug)
applog(LOG_DEBUG, "DEBUG: stale work detected, discarding");
return true;
}
if ( !have_stratum && allow_mininginfo )
{
struct work wheight;
get_mininginfo( curl, &wheight );
if ( work->height && work->height <= net_blocks )
{
if (opt_debug)
applog(LOG_WARNING, "block %u was already solved", work->height);
return true;
}
}
if ( have_stratum )
{
char req[JSON_BUF_LEN];
stratum.sharediff = work->sharediff;
algo_gate.build_stratum_request( req, work, &stratum );
if ( unlikely( !stratum_send_line( &stratum, req ) ) )
{
applog(LOG_ERR, "submit_upstream_work stratum_send_line failed");
return false;
}
return true;
}
else if ( work->txs )
{
char *req = NULL;
json_t *val, *res;
req = algo_gate.malloc_txs_request( work );
val = json_rpc_call( curl, rpc_url, rpc_userpass, req, NULL, 0 );
free( req );
if ( unlikely( !val ) )
{
applog( LOG_ERR, "submit_upstream_work json_rpc_call failed" );
@@ -1148,7 +1182,7 @@ static bool submit_upstream_work( CURL *curl, struct work *work )
return algo_gate.submit_getwork_result( curl, work );
}
static const char *getwork_req =
const char *getwork_req =
"{\"method\": \"getwork\", \"params\": [], \"id\":0}\r\n";
#define GBT_CAPABILITIES "[\"coinbasetxn\", \"coinbasevalue\", \"longpoll\", \"workid\"]"
@@ -1156,7 +1190,7 @@ static const char *getwork_req =
static const char *gbt_req =
"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
GBT_CAPABILITIES "}], \"id\":0}\r\n";
static const char *gbt_lp_req =
const char *gbt_lp_req =
"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
GBT_CAPABILITIES ", \"longpollid\": \"%s\"}], \"id\":0}\r\n";
@@ -1179,8 +1213,8 @@ start:
else
{
val = json_rpc_call( curl, rpc_url, rpc_userpass,
have_gbt ? gbt_req : getwork_req,
&err, have_gbt ? JSON_RPC_QUIET_404 : 0);
have_gbt ? gbt_req : getwork_req, &err,
have_gbt ? JSON_RPC_QUIET_404 : 0);
}
gettimeofday( &tv_end, NULL );
@@ -1626,16 +1660,18 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
{
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
// This logic depends on expression short circuiting to prevent tripping
// over NULL job_id pointers when benchmarking.
if ( ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
&& clean_job )
|| ( *nonceptr >= *end_nonce_ptr )
|| ( !opt_benchmark && strcmp( work->job_id, g_work->job_id ) ) )
{
if ( *nonceptr >= *end_nonce_ptr )
algo_gate.stratum_gen_work( &stratum, g_work );
// the job_id check doesn't work as intended, it's a char pointer!
// For stratum the pointers can be dereferenced and the strings compared,
// benchmark not, getwork & gbt unsure.
// || ( have_straum && strcmp( work->job_id, g_work->job_id ) ) ) )
// or
// || ( !benchmark && strcmp( work->job_id, g_work->job_id ) ) ) )
// For now leave it as is, it seems stable.
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|| ( work->job_id != g_work->job_id ) ) )
{
work_free( work );
work_copy( work, g_work );
*nonceptr = 0xffffffffU / opt_n_threads * thr_id;
@@ -1756,7 +1792,7 @@ static void *miner_thread( void *userdata )
if (opt_debug)
applog( LOG_DEBUG, "Binding thread %d to cpu %d (mask %x)",
thr_id, thr_id % num_cpus, ( 1ULL << (thr_id % num_cpus) ) );
#ifdef __GNUC__
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
affine_to_cpu_mask( thr_id,
(unsigned __int128)1LL << (thr_id % num_cpus) );
#else
@@ -1791,6 +1827,8 @@ static void *miner_thread( void *userdata )
{
algo_gate.wait_for_diff( &stratum );
pthread_mutex_lock( &g_work_lock );
if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce )
algo_gate.stratum_gen_work( &stratum, &g_work );
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce,
stratum.job.clean );
pthread_mutex_unlock( &g_work_lock );
@@ -1876,7 +1914,7 @@ static void *miner_thread( void *userdata )
gettimeofday( (struct timeval *) &tv_start, NULL );
// Scan for nonce
nonce_found = (bool) algo_gate.scanhash( thr_id, &work, max_nonce,
nonce_found = algo_gate.scanhash( thr_id, &work, max_nonce,
&hashes_done );
// record scanhash elapsed time
@@ -1891,37 +1929,31 @@ static void *miner_thread( void *userdata )
pthread_mutex_unlock( &stats_lock );
}
// if nonce(s) submit work
// if nonce(s) found submit work
if ( nonce_found && !opt_benchmark )
{
/*
int num_submitted = 0;
{ // 4 way with multiple nonces, copy individually to work and submit.
if ( nonce_found > 1 )
for ( int n = 0; n < nonce_found; n++ )
{
*algo_gate.get_nonceptr( work.data ) = work.nonces[n];
if ( submit_work( mythr, &work ) )
{
applog( LOG_NOTICE, "Share submitted." );
num_submitted++;
}
else
{
applog( LOG_WARNING, "Failed to submit share." );
break;
}
}
// must be a one way algo, nonce is already in work data
if ( !num_submitted )
{
*/
else
{ // only 1 nonce, in work ready to submit.
if ( !submit_work( mythr, &work ) )
{
applog( LOG_WARNING, "Failed to submit share." );
break;
}
applog( LOG_NOTICE, "Share submitted." );
// }
}
// prevent stale work in solo
// we can't submit twice a block!
@@ -2006,18 +2038,14 @@ json_t *std_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
{
json_t *val;
char *req = NULL;
// if (have_gbt)
// {
if (have_gbt)
{
req = (char*) malloc( strlen(gbt_lp_req) + strlen(lp_id) + 1 );
sprintf( req, gbt_lp_req, lp_id );
// }
//TODO this code makes no sense, this first call should be removed.
// also remove conditional expression in second call, no getwork.
// val = json_rpc_call( curl, rpc_url, rpc_userpass, getwork_req, err,
// JSON_RPC_LONGPOLL );
// val = json_rpc_call( curl, lp_url, rpc_userpass, req ? req : getwork_req,
// err, JSON_RPC_LONGPOLL);
val = json_rpc_call( curl, lp_url, rpc_userpass, req,
}
val = json_rpc_call( curl, rpc_url, rpc_userpass, getwork_req, err,
JSON_RPC_LONGPOLL );
val = json_rpc_call( curl, lp_url, rpc_userpass, req ? req : getwork_req,
err, JSON_RPC_LONGPOLL);
free(req);
return val;
@@ -2235,27 +2263,45 @@ out:
return ret;
}
void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
// used by stratum and gbt
void std_build_block_header( struct work* g_work, uint32_t version,
uint32_t *prevhash, uint32_t *merkle_tree,
uint32_t ntime, uint32_t nbits )
{
uchar merkle_root[64] = { 0 };
size_t t;
int i;
algo_gate.gen_merkle_root( merkle_root, sctx );
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = version;
if ( have_stratum )
for ( i = 0; i < 8; i++ )
g_work->data[ 1+i ] = le32dec( prevhash + i );
else
for (i = 0; i < 8; i++)
g_work->data[ 8-i ] = le32dec( prevhash + i );
for ( i = 0; i < 8; i++ )
g_work->data[ 9+i ] = be32dec( merkle_tree + i );
g_work->data[ algo_gate.ntime_index ] = ntime;
g_work->data[ algo_gate.nbits_index ] = nbits;
g_work->data[20] = 0x80000000;
g_work->data[31] = 0x00000280;
}
void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{
uchar merkle_tree[64] = { 0 };
size_t t;
algo_gate.gen_merkle_root( merkle_tree, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );
for ( i = 0; i < 8; i++ )
g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
for ( i = 0; i < 8; i++ )
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
g_work->data[ algo_gate.ntime_index ] = le32dec(sctx->job.ntime);
g_work->data[ algo_gate.nbits_index ] = le32dec(sctx->job.nbits);
g_work->data[20] = 0x80000000;
g_work->data[31] = 0x00000280;
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
}
void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )

View File

@@ -424,7 +424,7 @@ extern size_t rpc2_bloblen;
extern uint32_t rpc2_target;
extern char *rpc2_job_id;
extern char *rpc_user;
extern char *short_url;
json_t *json_rpc2_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, int *curl_err, int flags);
bool rpc2_login(CURL *curl);
@@ -553,6 +553,7 @@ enum algos {
ALGO_YESCRYPT,
ALGO_YESCRYPTR8,
ALGO_YESCRYPTR16,
ALGO_YESCRYPTR32,
ALGO_ZR5,
ALGO_COUNT
};
@@ -629,6 +630,7 @@ static const char* const algo_names[] = {
"yescrypt",
"yescryptr8",
"yescryptr16",
"yescryptr32",
"zr5",
"\0"
};
@@ -648,6 +650,10 @@ extern int opt_timeout;
extern bool want_longpoll;
extern bool have_longpoll;
extern bool have_gbt;
extern char* lp_id;
extern char *rpc_userpass;
extern const char *gbt_lp_req;
extern const char *getwork_req;
extern bool allow_getwork;
extern bool want_stratum;
extern bool have_stratum;
@@ -760,6 +766,7 @@ Options:\n\
yescrypt Globlboost-Y (BSTY)\n\
yescryptr8 BitZeny (ZNY)\n\
yescryptr16 Yenten (YTN)\n\
yescryptr32 WAVI\n\
zr5 Ziftr\n\
-o, --url=URL URL of mining server\n\
-O, --userpass=U:P username:password pair for mining server\n\