mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
3363d61524 | ||
![]() |
20fe05054c | ||
![]() |
157508bd07 | ||
![]() |
c24a4bdbc2 | ||
![]() |
59c7848d91 |
@@ -68,6 +68,7 @@ cpuminer_SOURCES = \
|
||||
algo/cryptonight/cryptonight.c\
|
||||
algo/cubehash/sph_cubehash.c \
|
||||
algo/cubehash/sse2/cubehash_sse2.c\
|
||||
algo/cubehash/cube-hash-2way.c \
|
||||
algo/echo/sph_echo.c \
|
||||
algo/echo/aes_ni/hash.c\
|
||||
algo/gost/sph_gost.c \
|
||||
@@ -242,7 +243,7 @@ cpuminer_SOURCES = \
|
||||
algo/x17/hmq1725.c \
|
||||
algo/yescrypt/yescrypt.c \
|
||||
algo/yescrypt/sha256_Y.c \
|
||||
algo/yescrypt/yescrypt-simd.c
|
||||
algo/yescrypt/yescrypt-best.c
|
||||
|
||||
disable_flags =
|
||||
|
||||
|
18
README.md
18
README.md
@@ -28,11 +28,12 @@ performance.
|
||||
ARM CPUs are not supported.
|
||||
|
||||
2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
|
||||
Centos are known to work and have all dependencies in their repositories.
|
||||
Others may work but may require more effort.
|
||||
Centos, are known to work and have all dependencies in their repositories.
|
||||
Others may work but may require more effort. Older versions such as Centos 6
|
||||
don't work due to missing features.
|
||||
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
|
||||
|
||||
MacOS, OSx is not supported.
|
||||
MacOS, OSx and Android are not supported.
|
||||
|
||||
3. Stratum pool. Some algos may work wallet mining using getwork or GBT. YMMV.
|
||||
|
||||
@@ -110,6 +111,7 @@ Supported Algorithms
|
||||
yescrypt Globalboost-Y (BSTY)
|
||||
yescryptr8 BitZeny (ZNY)
|
||||
yescryptr16 Yenten (YTN)
|
||||
yescryptr32 WAVI
|
||||
zr5 Ziftr
|
||||
|
||||
Errata
|
||||
@@ -142,11 +144,11 @@ Donations
|
||||
|
||||
cpuminer-opt has no fees of any kind but donations are accepted.
|
||||
|
||||
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
|
||||
ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
|
||||
LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
|
||||
BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
|
||||
BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ
|
||||
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
|
||||
ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
|
||||
LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
|
||||
BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
|
||||
BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ
|
||||
|
||||
Happy mining!
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
|
||||
puminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
|
||||
This feature requires recent SW including GCC version 5 or higher and
|
||||
openssl version 1.1 or higher. It may also require using "-march=znver1"
|
||||
compile flag.
|
||||
@@ -90,7 +90,8 @@ Additional optional compile flags, add the following to CFLAGS to activate:
|
||||
|
||||
SPH may give slightly better performance on algos that use sha256 when using
|
||||
openssl 1.0.1 or older. Openssl 1.0.2 adds AVX2 and 1.1 adds SHA and perform
|
||||
better than SPH.
|
||||
better than SPH. This option is ignored when 4-way is used, even for CPUs
|
||||
with SHA.
|
||||
|
||||
Start mining.
|
||||
|
||||
@@ -159,6 +160,34 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.8.4.1
|
||||
|
||||
Fixed sha256t low difficulty rejects.
|
||||
Fixed compile error on CPUs with AVX512.
|
||||
|
||||
v3.8.4
|
||||
|
||||
Added yescryptr32 algo for WAVI coin.
|
||||
Added URL to API data.
|
||||
Improved detection of __int128 support (linux only)
|
||||
Compile support for CPUs without SSSE3 (no binary support)
|
||||
|
||||
v3.8.3.3
|
||||
|
||||
Integrated getblocktemplate with algo_gate.
|
||||
Added support for hodl gbt (untested).
|
||||
Reworked some recent quick fixes.
|
||||
|
||||
v3.8.3.2
|
||||
|
||||
Reverted gbt changes from v3.8.0 that broke getwork.
|
||||
Reverted scaled hash rate for API, added HS term in addition to KHS.
|
||||
Added blocks solved to console display and API.
|
||||
|
||||
v3.8.3.1
|
||||
|
||||
Fixed regression in v3.8.3 that broke several algos.
|
||||
|
||||
v3.8.3
|
||||
|
||||
More restoration of lost lyra2 hash.
|
||||
|
@@ -119,9 +119,11 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
|
||||
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
|
||||
gate->malloc_txs_request = (void*)&std_malloc_txs_request;
|
||||
gate->set_target = (void*)&std_set_target;
|
||||
gate->work_decode = (void*)&std_le_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
|
||||
gate->build_block_header = (void*)&std_build_block_header;
|
||||
gate->build_extraheader = (void*)&std_build_extraheader;
|
||||
gate->set_work_data_endian = (void*)&do_nothing;
|
||||
gate->calc_network_diff = (void*)&std_calc_network_diff;
|
||||
@@ -225,6 +227,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
default:
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
|
||||
|
@@ -127,7 +127,10 @@ void ( *set_target) ( struct work*, double );
|
||||
bool ( *submit_getwork_result ) ( CURL*, struct work* );
|
||||
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
|
||||
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
|
||||
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
|
||||
uint32_t*, uint32_t, uint32_t );
|
||||
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
|
||||
char* ( *malloc_txs_request ) ( struct work* );
|
||||
void ( *set_work_data_endian ) ( struct work* );
|
||||
double ( *calc_network_diff ) ( struct work* );
|
||||
bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
|
||||
@@ -228,11 +231,17 @@ void std_le_build_stratum_request( char *req, struct work *work );
|
||||
void std_be_build_stratum_request( char *req, struct work *work );
|
||||
void jr2_build_stratum_request ( char *req, struct work *work );
|
||||
|
||||
char* std_malloc_txs_request( struct work *work );
|
||||
|
||||
// Default is do_nothing (assumed LE)
|
||||
void set_work_data_big_endian( struct work *work );
|
||||
|
||||
double std_calc_network_diff( struct work *work );
|
||||
|
||||
void std_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits );
|
||||
|
||||
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
|
||||
|
||||
json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
|
||||
|
@@ -53,6 +53,7 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
@@ -126,7 +127,7 @@ int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) )
|
||||
{
|
||||
found[i] = true;
|
||||
pdata[19] = n+i;
|
||||
num_found++;
|
||||
nonces[i] = n+i;
|
||||
work_set_target_ratio( work, hash+1 );
|
||||
|
@@ -59,6 +59,7 @@ int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
@@ -119,6 +120,7 @@ int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -53,6 +53,7 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
@@ -124,6 +125,7 @@ int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -61,6 +61,7 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[DECRED_NONCE_INDEX] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
205
algo/cubehash/cube-hash-2way.c
Normal file
205
algo/cubehash/cube-hash-2way.c
Normal file
@@ -0,0 +1,205 @@
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <unistd.h>
|
||||
#include <memory.h>
|
||||
#include "cube-hash-2way.h"
|
||||
|
||||
// 2x128
|
||||
|
||||
static void transform_2way( cube_2way_context *sp )
|
||||
{
|
||||
int r;
|
||||
const int rounds = sp->rounds;
|
||||
|
||||
__m256i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3;
|
||||
|
||||
x0 = _mm256_load_si256( (__m256i*)sp->h );
|
||||
x1 = _mm256_load_si256( (__m256i*)sp->h + 1 );
|
||||
x2 = _mm256_load_si256( (__m256i*)sp->h + 2 );
|
||||
x3 = _mm256_load_si256( (__m256i*)sp->h + 3 );
|
||||
x4 = _mm256_load_si256( (__m256i*)sp->h + 4 );
|
||||
x5 = _mm256_load_si256( (__m256i*)sp->h + 5 );
|
||||
x6 = _mm256_load_si256( (__m256i*)sp->h + 6 );
|
||||
x7 = _mm256_load_si256( (__m256i*)sp->h + 7 );
|
||||
|
||||
for ( r = 0; r < rounds; ++r )
|
||||
{
|
||||
x4 = _mm256_add_epi32( x0, x4 );
|
||||
x5 = _mm256_add_epi32( x1, x5 );
|
||||
x6 = _mm256_add_epi32( x2, x6 );
|
||||
x7 = _mm256_add_epi32( x3, x7 );
|
||||
y0 = x2;
|
||||
y1 = x3;
|
||||
y2 = x0;
|
||||
y3 = x1;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
_mm256_srli_epi32( y0, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 7 ),
|
||||
_mm256_srli_epi32( y1, 25 ) );
|
||||
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 7 ),
|
||||
_mm256_srli_epi32( y2, 25 ) );
|
||||
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 7 ),
|
||||
_mm256_srli_epi32( y3, 25 ) );
|
||||
x0 = _mm256_xor_si256( x0, x4 );
|
||||
x1 = _mm256_xor_si256( x1, x5 );
|
||||
x2 = _mm256_xor_si256( x2, x6 );
|
||||
x3 = _mm256_xor_si256( x3, x7 );
|
||||
x4 = mm256_swap128_64( x4 );
|
||||
x5 = mm256_swap128_64( x5 );
|
||||
x6 = mm256_swap128_64( x6 );
|
||||
x7 = mm256_swap128_64( x7 );
|
||||
x4 = _mm256_add_epi32( x0, x4 );
|
||||
x5 = _mm256_add_epi32( x1, x5 );
|
||||
x6 = _mm256_add_epi32( x2, x6 );
|
||||
x7 = _mm256_add_epi32( x3, x7 );
|
||||
y0 = x1;
|
||||
y1 = x0;
|
||||
y2 = x3;
|
||||
y3 = x2;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
|
||||
_mm256_srli_epi32( y0, 21 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
|
||||
_mm256_srli_epi32( y1, 21 ) );
|
||||
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 11 ),
|
||||
_mm256_srli_epi32( y2, 21 ) );
|
||||
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 11 ),
|
||||
_mm256_srli_epi32( y3, 21 ) );
|
||||
x0 = _mm256_xor_si256( x0, x4 );
|
||||
x1 = _mm256_xor_si256( x1, x5 );
|
||||
x2 = _mm256_xor_si256( x2, x6 );
|
||||
x3 = _mm256_xor_si256( x3, x7 );
|
||||
x4 = mm256_swap64_32( x4 );
|
||||
x5 = mm256_swap64_32( x5 );
|
||||
x6 = mm256_swap64_32( x6 );
|
||||
x7 = mm256_swap64_32( x7 );
|
||||
}
|
||||
|
||||
_mm256_store_si256( (__m256i*)sp->h, x0 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 1, x1 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 2, x2 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 3, x3 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 4, x4 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 5, x5 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 6, x6 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 7, x7 );
|
||||
|
||||
}
|
||||
|
||||
cube_2way_context cube_2way_ctx_cache __attribute__ ((aligned (64)));
|
||||
|
||||
int cube_2way_reinit( cube_2way_context *sp )
|
||||
{
|
||||
memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int cube_2way_init( cube_2way_context *sp, int hashbitlen, int rounds,
|
||||
int blockbytes )
|
||||
{
|
||||
int i;
|
||||
|
||||
// all sizes of __m128i
|
||||
cube_2way_ctx_cache.hashlen = hashbitlen/128;
|
||||
cube_2way_ctx_cache.blocksize = blockbytes/16;
|
||||
cube_2way_ctx_cache.rounds = rounds;
|
||||
cube_2way_ctx_cache.pos = 0;
|
||||
|
||||
for ( i = 0; i < 8; ++i )
|
||||
cube_2way_ctx_cache.h[i] = m256_zero;
|
||||
|
||||
cube_2way_ctx_cache.h[0] = _mm256_set_epi32(
|
||||
0, rounds, blockbytes, hashbitlen / 8,
|
||||
0, rounds, blockbytes, hashbitlen / 8 );
|
||||
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform_2way( &cube_2way_ctx_cache );
|
||||
|
||||
memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int cube_2way_update( cube_2way_context *sp, const void *data, size_t size )
|
||||
{
|
||||
const int len = size / 16;
|
||||
const __m256i *in = (__m256i*)data;
|
||||
int i;
|
||||
|
||||
// It is assumed data is aligned to 256 bits and is a multiple of 128 bits.
|
||||
// Current usage sata is either 64 or 80 bytes.
|
||||
|
||||
for ( i = 0; i < len; i++ )
|
||||
{
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
|
||||
sp->pos++;
|
||||
if ( sp->pos == sp->blocksize )
|
||||
{
|
||||
transform_2way( sp );
|
||||
sp->pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cube_2way_close( cube_2way_context *sp, void *output )
|
||||
{
|
||||
__m256i *hash = (__m256i*)output;
|
||||
int i;
|
||||
|
||||
// pos is zero for 64 byte data, 1 for 80 byte data.
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
|
||||
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
|
||||
transform_2way( sp );
|
||||
|
||||
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
|
||||
1,0,0,0 ) );
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform_2way( &cube_2way_ctx_cache );
|
||||
|
||||
for ( i = 0; i < sp->hashlen; i++ )
|
||||
hash[i] = sp->h[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cube_2way_update_close( cube_2way_context *sp, void *output,
|
||||
const void *data, size_t size )
|
||||
{
|
||||
const int len = size / 16;
|
||||
const __m256i *in = (__m256i*)data;
|
||||
__m256i *hash = (__m256i*)output;
|
||||
int i;
|
||||
|
||||
for ( i = 0; i < len; i++ )
|
||||
{
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
|
||||
sp->pos++;
|
||||
if ( sp->pos == sp->blocksize )
|
||||
{
|
||||
transform_2way( sp );
|
||||
sp->pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// pos is zero for 64 byte data, 1 for 80 byte data.
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
|
||||
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
|
||||
transform_2way( sp );
|
||||
|
||||
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
|
||||
1,0,0,0 ) );
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform_2way( &cube_2way_ctx_cache );
|
||||
|
||||
for ( i = 0; i < sp->hashlen; i++ )
|
||||
hash[i] = sp->h[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
36
algo/cubehash/cube-hash-2way.h
Normal file
36
algo/cubehash/cube-hash-2way.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef CUBE_HASH_2WAY_H__
|
||||
#define CUBE_HASH_2WAY_H__
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include <stdint.h>
|
||||
#include "avxdefs.h"
|
||||
|
||||
// 2x128, 2 way parallel SSE2
|
||||
|
||||
struct _cube_2way_context
|
||||
{
|
||||
int hashlen; // __m128i
|
||||
int rounds;
|
||||
int blocksize; // __m128i
|
||||
int pos; // number of __m128i read into x from current block
|
||||
__m256i h[8] __attribute__ ((aligned (64)));
|
||||
};
|
||||
|
||||
typedef struct _cube_2way_context cube_2way_context;
|
||||
|
||||
int cube_2way_init( cube_2way_context* sp, int hashbitlen, int rounds,
|
||||
int blockbytes );
|
||||
// reinitialize context with same parameters, much faster.
|
||||
int cube_2way_reinit( cube_2way_context *sp );
|
||||
|
||||
int cube_2way_update( cube_2way_context *sp, const void *data, size_t size );
|
||||
|
||||
int cube_2way_close( cube_2way_context *sp, void *output );
|
||||
|
||||
int cube_2way_update_close( cube_2way_context *sp, void *output,
|
||||
const void *data, size_t size );
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
@@ -93,6 +93,7 @@ int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -42,15 +42,82 @@ void hodl_le_build_stratum_request( char* req, struct work* work,
|
||||
free( xnonce2str );
|
||||
}
|
||||
|
||||
void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
|
||||
char* hodl_malloc_txs_request( struct work *work )
|
||||
{
|
||||
char* req;
|
||||
json_t *val;
|
||||
char data_str[2 * sizeof(work->data) + 1];
|
||||
int i;
|
||||
|
||||
for ( i = 0; i < ARRAY_SIZE(work->data); i++ )
|
||||
be32enc( work->data + i, work->data[i] );
|
||||
|
||||
bin2hex( data_str, (unsigned char *)work->data, 88 );
|
||||
if ( work->workid )
|
||||
{
|
||||
char *params;
|
||||
val = json_object();
|
||||
json_object_set_new( val, "workid", json_string( work->workid ) );
|
||||
params = json_dumps( val, 0 );
|
||||
json_decref( val );
|
||||
req = malloc( 128 + 2*88 + strlen( work->txs ) + strlen( params ) );
|
||||
sprintf( req,
|
||||
"{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":1}\r\n",
|
||||
data_str, work->txs, params);
|
||||
free( params );
|
||||
}
|
||||
else
|
||||
{
|
||||
req = malloc( 128 + 2*88 + strlen(work->txs));
|
||||
sprintf( req,
|
||||
"{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":1}\r\n",
|
||||
data_str, work->txs);
|
||||
}
|
||||
return req;
|
||||
}
|
||||
|
||||
void hodl_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_tree,
|
||||
uint32_t ntime, uint32_t nbits )
|
||||
{
|
||||
uchar merkle_root[64] = { 0 };
|
||||
size_t t;
|
||||
int i;
|
||||
|
||||
algo_gate.gen_merkle_root( merkle_root, sctx );
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = version;
|
||||
|
||||
if ( have_stratum )
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[ 1+i ] = le32dec( prevhash + i );
|
||||
else
|
||||
for (i = 0; i < 8; i++)
|
||||
g_work->data[ 8-i ] = le32dec( prevhash + i );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[ 9+i ] = be32dec( merkle_tree + i );
|
||||
|
||||
g_work->data[ algo_gate.ntime_index ] = ntime;
|
||||
g_work->data[ algo_gate.nbits_index ] = nbits;
|
||||
g_work->data[22] = 0x80000000;
|
||||
g_work->data[31] = 0x00000280;
|
||||
}
|
||||
|
||||
// hodl build_extra_header is redundant, hodl can use std_build_extra_header
|
||||
// and call hodl_build_block_header.
|
||||
#if 0
|
||||
void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
|
||||
{
|
||||
uchar merkle_tree[64] = { 0 };
|
||||
size_t t;
|
||||
// int i;
|
||||
|
||||
algo_gate.gen_merkle_root( merkle_tree, sctx );
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
|
||||
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
|
||||
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
|
||||
le32dec( sctx->job.ntime ), le32dec( sctx->job.nbits ) );
|
||||
/*
|
||||
// Assemble block header
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
@@ -63,7 +130,9 @@ void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
|
||||
g_work->data[ algo_gate.nbits_index ] = le32dec( sctx->job.nbits );
|
||||
g_work->data[22] = 0x80000000;
|
||||
g_work->data[31] = 0x00000280;
|
||||
*/
|
||||
}
|
||||
#endif
|
||||
|
||||
// called only by thread 0, saves a backup of g_work
|
||||
void hodl_get_new_work( struct work* work, struct work* g_work)
|
||||
@@ -73,6 +142,22 @@ void hodl_get_new_work( struct work* work, struct work* g_work)
|
||||
hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999;
|
||||
}
|
||||
|
||||
json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
|
||||
{
|
||||
json_t *val;
|
||||
char *req = NULL;
|
||||
|
||||
if ( have_gbt )
|
||||
{
|
||||
req = malloc( strlen( gbt_lp_req ) + strlen( lp_id ) + 1 );
|
||||
sprintf( req, gbt_lp_req, lp_id );
|
||||
}
|
||||
val = json_rpc_call( curl, lp_url, rpc_userpass,
|
||||
req ? req : getwork_req, err, JSON_RPC_LONGPOLL );
|
||||
free( req );
|
||||
return val;
|
||||
}
|
||||
|
||||
// called by every thread, copies the backup to each thread's work.
|
||||
void hodl_resync_threads( struct work* work )
|
||||
{
|
||||
@@ -108,17 +193,26 @@ bool register_hodl_algo( algo_gate_t* gate )
|
||||
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
|
||||
return false;
|
||||
#endif
|
||||
// if ( TOTAL_CHUNKS % opt_n_threads )
|
||||
// {
|
||||
// applog(LOG_ERR,"Thread count must be power of 2.");
|
||||
// return false;
|
||||
// }
|
||||
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&hodl_scanhash;
|
||||
gate->get_new_work = (void*)&hodl_get_new_work;
|
||||
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;
|
||||
gate->set_target = (void*)&hodl_set_target;
|
||||
gate->build_stratum_request = (void*)&hodl_le_build_stratum_request;
|
||||
gate->build_extraheader = (void*)&hodl_build_extraheader;
|
||||
gate->malloc_txs_request = (void*)&hodl_malloc_txs_request;
|
||||
gate->build_block_header = (void*)&hodl_build_block_header;
|
||||
// gate->build_extraheader = (void*)&hodl_build_extraheader;
|
||||
gate->resync_threads = (void*)&hodl_resync_threads;
|
||||
gate->do_this_thread = (void*)&hodl_do_this_thread;
|
||||
gate->work_cmp_size = 76;
|
||||
hodl_scratchbuf = (unsigned char*)malloc( 1 << 30 );
|
||||
allow_getwork = false;
|
||||
return ( hodl_scratchbuf != NULL );
|
||||
}
|
||||
|
||||
|
@@ -10,23 +10,26 @@
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
|
||||
void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, void *MidHash)
|
||||
void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount,
|
||||
void *MidHash )
|
||||
{
|
||||
#ifdef __AVX__
|
||||
uint64_t* TempBufs[SHA512_PARALLEL_N] ;
|
||||
uint64_t* desination[SHA512_PARALLEL_N];
|
||||
const int Chunk = TOTAL_CHUNKS / ThreadCount;
|
||||
const uint32_t StartChunk = ThreadID * Chunk;
|
||||
const uint32_t EndChunk = StartChunk + Chunk;
|
||||
|
||||
for ( int i=0; i<SHA512_PARALLEL_N; ++i )
|
||||
#ifdef __AVX__
|
||||
uint64_t* TempBufs[ SHA512_PARALLEL_N ] ;
|
||||
uint64_t* desination[ SHA512_PARALLEL_N ];
|
||||
|
||||
for ( int i=0; i < SHA512_PARALLEL_N; ++i )
|
||||
{
|
||||
TempBufs[i] = (uint64_t*)malloc(32);
|
||||
memcpy(TempBufs[i], MidHash, 32);
|
||||
TempBufs[i] = (uint64_t*)malloc( 32 );
|
||||
memcpy( TempBufs[i], MidHash, 32 );
|
||||
}
|
||||
|
||||
uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
|
||||
for ( uint32_t i = StartChunk;
|
||||
i < StartChunk + (TOTAL_CHUNKS / ThreadCount); i+= SHA512_PARALLEL_N )
|
||||
for ( uint32_t i = StartChunk; i < EndChunk; i += SHA512_PARALLEL_N )
|
||||
{
|
||||
for ( int j=0; j<SHA512_PARALLEL_N; ++j )
|
||||
for ( int j = 0; j < SHA512_PARALLEL_N; ++j )
|
||||
{
|
||||
( (uint32_t*)TempBufs[j] )[0] = i + j;
|
||||
desination[j] = (uint64_t*)( (uint8_t *)Garbage + ( (i+j)
|
||||
@@ -35,15 +38,13 @@ void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, voi
|
||||
sha512Compute32b_parallel( TempBufs, desination );
|
||||
}
|
||||
|
||||
for ( int i=0; i<SHA512_PARALLEL_N; ++i )
|
||||
for ( int i = 0; i < SHA512_PARALLEL_N; ++i )
|
||||
free( TempBufs[i] );
|
||||
#else
|
||||
uint32_t TempBuf[8];
|
||||
memcpy( TempBuf, MidHash, 32 );
|
||||
|
||||
uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
|
||||
for ( uint32_t i = StartChunk;
|
||||
i < StartChunk + (TOTAL_CHUNKS / ThreadCount); ++i )
|
||||
for ( uint32_t i = StartChunk; i < EndChunk; ++i )
|
||||
{
|
||||
TempBuf[0] = i;
|
||||
SHA512( ( uint8_t *)TempBuf, 32,
|
||||
|
@@ -139,6 +139,7 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( !( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -53,6 +53,7 @@ int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & 0xFFFFFF00 ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -124,6 +124,7 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -86,6 +86,7 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -118,6 +118,7 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -86,6 +86,7 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
@@ -197,6 +198,7 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -55,23 +55,23 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
// returns void, updates all args
|
||||
#define G_4X64(a,b,c,d) \
|
||||
a = _mm256_add_epi64( a, b ); \
|
||||
d = mm256_rotr_64( _mm256_xor_si256( d, a), 32 ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a), 32 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 24 ); \
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 24 ); \
|
||||
a = _mm256_add_epi64( a, b ); \
|
||||
d = mm256_rotr_64( _mm256_xor_si256( d, a ), 16 ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 63 );
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 63 );
|
||||
|
||||
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s1 = mm256_rotr256_1x64( s1); \
|
||||
s1 = mm256_ror256_1x64( s1); \
|
||||
s2 = mm256_swap_128( s2 ); \
|
||||
s3 = mm256_rotl256_1x64( s3 ); \
|
||||
s3 = mm256_rol256_1x64( s3 ); \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s1 = mm256_rotl256_1x64( s1 ); \
|
||||
s1 = mm256_rol256_1x64( s1 ); \
|
||||
s2 = mm256_swap_128( s2 ); \
|
||||
s3 = mm256_rotr256_1x64( s3 );
|
||||
s3 = mm256_ror256_1x64( s3 );
|
||||
|
||||
#define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
@@ -94,25 +94,25 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
// returns void, all args updated
|
||||
#define G_2X64(a,b,c,d) \
|
||||
a = _mm_add_epi64( a, b ); \
|
||||
d = mm_rotr_64( _mm_xor_si128( d, a), 32 ); \
|
||||
d = mm_ror_64( _mm_xor_si128( d, a), 32 ); \
|
||||
c = _mm_add_epi64( c, d ); \
|
||||
b = mm_rotr_64( _mm_xor_si128( b, c ), 24 ); \
|
||||
b = mm_ror_64( _mm_xor_si128( b, c ), 24 ); \
|
||||
a = _mm_add_epi64( a, b ); \
|
||||
d = mm_rotr_64( _mm_xor_si128( d, a ), 16 ); \
|
||||
d = mm_ror_64( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi64( c, d ); \
|
||||
b = mm_rotr_64( _mm_xor_si128( b, c ), 63 );
|
||||
b = mm_ror_64( _mm_xor_si128( b, c ), 63 );
|
||||
|
||||
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
mm_rotl256_1x64( s2, s3 ); \
|
||||
mm_ror256_1x64( s2, s3 ); \
|
||||
mm_swap_128( s4, s5 ); \
|
||||
mm_rotr256_1x64( s6, s7 ); \
|
||||
mm_rol256_1x64( s6, s7 ); \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
mm_rotr256_1x64( s2, s3 ); \
|
||||
mm_rol256_1x64( s2, s3 ); \
|
||||
mm_swap_128( s4, s5 ); \
|
||||
mm_rotl256_1x64( s6, s7 );
|
||||
mm_ror256_1x64( s6, s7 );
|
||||
|
||||
#define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
|
@@ -1,196 +0,0 @@
|
||||
/**
|
||||
* Header file for Blake2b's internal permutation in the form of a sponge.
|
||||
* This code is based on the original Blake2b's implementation provided by
|
||||
* Samuel Neves (https://blake2.net/)
|
||||
*
|
||||
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||
*
|
||||
* This software is hereby placed in the public domain.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
|
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef SPONGE_H_
|
||||
#define SPONGE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include "avxdefs.h"
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define ALIGN __attribute__ ((aligned(32)))
|
||||
#elif defined(_MSC_VER)
|
||||
#define ALIGN __declspec(align(32))
|
||||
#else
|
||||
#define ALIGN
|
||||
#endif
|
||||
|
||||
|
||||
/*Blake2b IV Array*/
|
||||
static const uint64_t blake2b_IV[8] =
|
||||
{
|
||||
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
||||
};
|
||||
|
||||
/*Blake2b's rotation*/
|
||||
static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
return ( w >> c ) | ( w << ( 64 - c ) );
|
||||
}
|
||||
|
||||
#if defined __AVX2__
|
||||
// only available with avx2
|
||||
|
||||
// process 4 columns in parallel
|
||||
// returns void, updates all args
|
||||
#define G_4X64(a,b,c,d) \
|
||||
a = _mm256_add_epi64( a, b ); \
|
||||
d = mm256_rotr_64( _mm256_xor_si256( d, a), 32 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 24 ); \
|
||||
a = _mm256_add_epi64( a, b ); \
|
||||
d = mm256_rotr_64( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 63 );
|
||||
|
||||
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s1 = mm256_rotr256_1x64( s1); \
|
||||
s2 = mm256_swap_128( s2 ); \
|
||||
s3 = mm256_rotl256_1x64( s3 ); \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s1 = mm256_rotl256_1x64( s1 ); \
|
||||
s2 = mm256_swap_128( s2 ); \
|
||||
s3 = mm256_rotr256_1x64( s3 );
|
||||
|
||||
#define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
|
||||
#else
|
||||
// only available with avx
|
||||
|
||||
// process 2 columns in parallel
|
||||
// returns void, all args updated
|
||||
#define G_2X64(a,b,c,d) \
|
||||
a = _mm_add_epi64( a, b ); \
|
||||
d = mm_rotr_64( _mm_xor_si128( d, a), 32 ); \
|
||||
c = _mm_add_epi64( c, d ); \
|
||||
b = mm_rotr_64( _mm_xor_si128( b, c ), 24 ); \
|
||||
a = _mm_add_epi64( a, b ); \
|
||||
d = mm_rotr_64( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi64( c, d ); \
|
||||
b = mm_rotr_64( _mm_xor_si128( b, c ), 63 );
|
||||
|
||||
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
mm_rotl256_1x64( s2, s3 ); \
|
||||
mm_swap_128( s4, s5 ); \
|
||||
mm_rotr256_1x64( s6, s7 ); \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
mm_rotr256_1x64( s2, s3 ); \
|
||||
mm_swap_128( s4, s5 ); \
|
||||
mm_rotl256_1x64( s6, s7 );
|
||||
|
||||
#define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
// Scalar
|
||||
//Blake2b's G function
|
||||
#define G(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b; \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = a + b; \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while(0)
|
||||
|
||||
|
||||
/*One Round of the Blake2b's compression function*/
|
||||
#define ROUND_LYRA(r) \
|
||||
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
||||
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
||||
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
||||
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
||||
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
|
||||
|
||||
|
||||
//---- Housekeeping
|
||||
void initState( uint64_t state[/*16*/] );
|
||||
|
||||
//---- Squeezes
|
||||
void squeeze( uint64_t *state, unsigned char *out, unsigned int len );
|
||||
void reducedSqueezeRow0( uint64_t* state, uint64_t* row, uint64_t nCols );
|
||||
|
||||
//---- Absorbs
|
||||
void absorbBlock( uint64_t *state, const uint64_t *in );
|
||||
void absorbBlockBlake2Safe( uint64_t *state, const uint64_t *in );
|
||||
|
||||
//---- Duplexes
|
||||
void reducedDuplexRow1( uint64_t *state, const uint64_t *rowIn,
|
||||
uint64_t *rowOut, uint64_t nCols);
|
||||
void reducedDuplexRowSetup( uint64_t *state, const uint64_t *rowIn,
|
||||
uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols );
|
||||
void reducedDuplexRow( uint64_t *state, const uint64_t *rowIn,
|
||||
uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols );
|
||||
|
||||
//---- Misc
|
||||
//void printArray(unsigned char *array, unsigned int size, char *name);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
////TESTS////
|
||||
//void reducedDuplexRowc(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||
//void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||
//void reducedDuplexRowSetupv4(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn2, uint64_t *rowOut1, uint64_t *rowOut2);
|
||||
//void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||
//void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||
//void reducedDuplexRowSetupv5d(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||
/////////////
|
||||
|
||||
|
||||
#endif /* SPONGE_H_ */
|
@@ -126,6 +126,7 @@ int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( !( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -189,6 +189,7 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -167,6 +167,7 @@ int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & 0xFFFFFF00 ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -115,6 +115,7 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
}
|
||||
if ( !( (hash+8)[7] & mask ) && fulltest( hash+8, ptarget) )
|
||||
{
|
||||
pdata[19] = n+1;
|
||||
nonces[ num_found++ ] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
|
@@ -125,6 +125,7 @@ int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( !( (hash+(i<<3))[7] & mask ) && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[27] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
@@ -227,6 +228,7 @@ int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( !( (hash+(i<<3))[7] & mask ) && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[27] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -40,6 +40,35 @@ void lbry_le_build_stratum_request( char *req, struct work *work,
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
|
||||
// don't use lbry_build_block_header, it can't handle clasim, do it inline
|
||||
// in lbry_build_extraheader. The side effect is no gbt support for lbry.
|
||||
void lbry_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits )
|
||||
{
|
||||
int i;
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = version;
|
||||
|
||||
if ( have_stratum )
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = le32dec( prevhash + i );
|
||||
else
|
||||
for (i = 0; i < 8; i++)
|
||||
g_work->data[ 8-i ] = le32dec( prevhash + i );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = be32dec( merkle_root + i );
|
||||
|
||||
// for ( int i = 0; i < 8; i++ )
|
||||
// g_work->data[17 + i] = claim[i];
|
||||
|
||||
g_work->data[ LBRY_NTIME_INDEX ] = ntime;
|
||||
g_work->data[ LBRY_NBITS_INDEX ] = nbits;
|
||||
g_work->data[28] = 0x80000000;
|
||||
}
|
||||
|
||||
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
unsigned char merkle_root[64] = { 0 };
|
||||
@@ -50,14 +79,23 @@ void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
// Assemble block header
|
||||
|
||||
// algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
|
||||
// (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_root,
|
||||
// le32dec( sctx->job.ntime ), le32dec( sctx->job.nbits ) );
|
||||
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
g_work->data[17 + i] = ((uint32_t*)sctx->job.claim)[i];
|
||||
|
||||
g_work->data[ LBRY_NTIME_INDEX ] = le32dec(sctx->job.ntime);
|
||||
g_work->data[ LBRY_NBITS_INDEX ] = le32dec(sctx->job.nbits);
|
||||
g_work->data[28] = 0x80000000;
|
||||
@@ -86,6 +124,7 @@ bool register_lbry_algo( algo_gate_t* gate )
|
||||
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
|
||||
gate->get_max64 = (void*)&lbry_get_max64;
|
||||
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
|
||||
// gate->build_block_header = (void*)&build_block_header;
|
||||
gate->build_extraheader = (void*)&lbry_build_extraheader;
|
||||
gate->set_target = (void*)&lbry_set_target;
|
||||
gate->ntime_index = LBRY_NTIME_INDEX;
|
||||
|
@@ -155,7 +155,7 @@ bool register_sha256t_algo( algo_gate_t* gate )
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t;
|
||||
gate->hash = (void*)&sha256t_hash;
|
||||
gate->set_target = (void*)&sha256t_set_target;
|
||||
// gate->set_target = (void*)&sha256t_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
}
|
||||
|
@@ -52,21 +52,6 @@ extern "C"{
|
||||
|
||||
#define C32 SPH_C32
|
||||
|
||||
/*
|
||||
* As of round 2 of the SHA-3 competition, the published reference
|
||||
* implementation and test vectors are wrong, because they use
|
||||
* big-endian AES tables while the internal decoding uses little-endian.
|
||||
* The code below follows the specification. To turn it into a code
|
||||
* which follows the reference implementation (the one called "BugFix"
|
||||
* on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out
|
||||
* the code below (from the '#define AES_BIG_ENDIAN...' to the definition
|
||||
* of the AES_ROUND_NOKEY macro) and replace it with the version which
|
||||
* is commented out afterwards.
|
||||
*/
|
||||
|
||||
#define AES_BIG_ENDIAN 0
|
||||
#include "algo/sha/aes_helper.c"
|
||||
|
||||
static const sph_u32 IV512[] = {
|
||||
C32(0x72FCCDD8), C32(0x79CA4727), C32(0x128A077B), C32(0x40D55AEC),
|
||||
C32(0xD1901A06), C32(0x430AE307), C32(0xB29F5CD1), C32(0xDF07FBFC),
|
||||
@@ -74,210 +59,19 @@ static const sph_u32 IV512[] = {
|
||||
C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
|
||||
};
|
||||
|
||||
// Return hi 128 bits with elements shifted one lane with vacated lane filled
|
||||
// with data rotated from lo.
|
||||
// Partially rotate elements in two 128 bit vectors as one 256 bit vector
|
||||
// and return the rotated high 128 bits.
|
||||
// Similar to mm_rotr256_1x32 but only a partial rotation as lo is not
|
||||
// completed. It's faster than a full rotation.
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
static inline __m128i mm_rotr256hi_1x32( __m128i hi, __m128i lo, int n )
|
||||
{ return _mm_or_si128( _mm_srli_si128( hi, n<<2 ),
|
||||
_mm_slli_si128( lo, 16 - (n<<2) ) );
|
||||
}
|
||||
#define mm_rotr256hi_1x32( hi, lo ) _mm_alignr_epi8( lo, hi, 4 )
|
||||
|
||||
#define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \
|
||||
sph_u32 t0 = (x0); \
|
||||
sph_u32 t1 = (x1); \
|
||||
sph_u32 t2 = (x2); \
|
||||
sph_u32 t3 = (x3); \
|
||||
AES_ROUND_NOKEY_LE(t0, t1, t2, t3, x0, x1, x2, x3); \
|
||||
} while (0)
|
||||
#else // SSE2
|
||||
|
||||
|
||||
#define KEY_EXPAND_ELT(k0, k1, k2, k3) do { \
|
||||
sph_u32 kt; \
|
||||
AES_ROUND_NOKEY(k1, k2, k3, k0); \
|
||||
kt = (k0); \
|
||||
(k0) = (k1); \
|
||||
(k1) = (k2); \
|
||||
(k2) = (k3); \
|
||||
(k3) = kt; \
|
||||
} while (0)
|
||||
#define mm_rotr256hi_1x32( hi, lo ) \
|
||||
_mm_or_si128( _mm_srli_si128( hi, 4 ), \
|
||||
_mm_slli_si128( lo, 12 ) )
|
||||
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_SHAVITE
|
||||
|
||||
/*
|
||||
* This function assumes that "msg" is aligned for 32-bit access.
|
||||
*/
|
||||
static void
|
||||
c512(sph_shavite_big_context *sc, const void *msg)
|
||||
{
|
||||
sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
|
||||
sph_u32 p8, p9, pA, pB, pC, pD, pE, pF;
|
||||
sph_u32 rk[448];
|
||||
size_t u;
|
||||
int r, s;
|
||||
|
||||
#if SPH_LITTLE_ENDIAN
|
||||
memcpy(rk, msg, 128);
|
||||
#else
|
||||
for (u = 0; u < 32; u += 4) {
|
||||
rk[u + 0] = sph_dec32le_aligned(
|
||||
(const unsigned char *)msg + (u << 2) + 0);
|
||||
rk[u + 1] = sph_dec32le_aligned(
|
||||
(const unsigned char *)msg + (u << 2) + 4);
|
||||
rk[u + 2] = sph_dec32le_aligned(
|
||||
(const unsigned char *)msg + (u << 2) + 8);
|
||||
rk[u + 3] = sph_dec32le_aligned(
|
||||
(const unsigned char *)msg + (u << 2) + 12);
|
||||
}
|
||||
#endif
|
||||
u = 32;
|
||||
for (;;) {
|
||||
for (s = 0; s < 4; s ++) {
|
||||
sph_u32 x0, x1, x2, x3;
|
||||
|
||||
x0 = rk[u - 31];
|
||||
x1 = rk[u - 30];
|
||||
x2 = rk[u - 29];
|
||||
x3 = rk[u - 32];
|
||||
AES_ROUND_NOKEY(x0, x1, x2, x3);
|
||||
rk[u + 0] = x0 ^ rk[u - 4];
|
||||
rk[u + 1] = x1 ^ rk[u - 3];
|
||||
rk[u + 2] = x2 ^ rk[u - 2];
|
||||
rk[u + 3] = x3 ^ rk[u - 1];
|
||||
if (u == 32) {
|
||||
rk[ 32] ^= sc->count0;
|
||||
rk[ 33] ^= sc->count1;
|
||||
rk[ 34] ^= sc->count2;
|
||||
rk[ 35] ^= SPH_T32(~sc->count3);
|
||||
} else if (u == 440) {
|
||||
rk[440] ^= sc->count1;
|
||||
rk[441] ^= sc->count0;
|
||||
rk[442] ^= sc->count3;
|
||||
rk[443] ^= SPH_T32(~sc->count2);
|
||||
}
|
||||
u += 4;
|
||||
|
||||
x0 = rk[u - 31];
|
||||
x1 = rk[u - 30];
|
||||
x2 = rk[u - 29];
|
||||
x3 = rk[u - 32];
|
||||
AES_ROUND_NOKEY(x0, x1, x2, x3);
|
||||
rk[u + 0] = x0 ^ rk[u - 4];
|
||||
rk[u + 1] = x1 ^ rk[u - 3];
|
||||
rk[u + 2] = x2 ^ rk[u - 2];
|
||||
rk[u + 3] = x3 ^ rk[u - 1];
|
||||
if (u == 164) {
|
||||
rk[164] ^= sc->count3;
|
||||
rk[165] ^= sc->count2;
|
||||
rk[166] ^= sc->count1;
|
||||
rk[167] ^= SPH_T32(~sc->count0);
|
||||
} else if (u == 316) {
|
||||
rk[316] ^= sc->count2;
|
||||
rk[317] ^= sc->count3;
|
||||
rk[318] ^= sc->count0;
|
||||
rk[319] ^= SPH_T32(~sc->count1);
|
||||
}
|
||||
u += 4;
|
||||
}
|
||||
if (u == 448)
|
||||
break;
|
||||
for (s = 0; s < 8; s ++) {
|
||||
rk[u + 0] = rk[u - 32] ^ rk[u - 7];
|
||||
rk[u + 1] = rk[u - 31] ^ rk[u - 6];
|
||||
rk[u + 2] = rk[u - 30] ^ rk[u - 5];
|
||||
rk[u + 3] = rk[u - 29] ^ rk[u - 4];
|
||||
u += 4;
|
||||
}
|
||||
}
|
||||
|
||||
p0 = sc->h[0x0];
|
||||
p1 = sc->h[0x1];
|
||||
p2 = sc->h[0x2];
|
||||
p3 = sc->h[0x3];
|
||||
p4 = sc->h[0x4];
|
||||
p5 = sc->h[0x5];
|
||||
p6 = sc->h[0x6];
|
||||
p7 = sc->h[0x7];
|
||||
p8 = sc->h[0x8];
|
||||
p9 = sc->h[0x9];
|
||||
pA = sc->h[0xA];
|
||||
pB = sc->h[0xB];
|
||||
pC = sc->h[0xC];
|
||||
pD = sc->h[0xD];
|
||||
pE = sc->h[0xE];
|
||||
pF = sc->h[0xF];
|
||||
u = 0;
|
||||
for (r = 0; r < 14; r ++) {
|
||||
#define C512_ELT(l0, l1, l2, l3, r0, r1, r2, r3) do { \
|
||||
sph_u32 x0, x1, x2, x3; \
|
||||
x0 = r0 ^ rk[u ++]; \
|
||||
x1 = r1 ^ rk[u ++]; \
|
||||
x2 = r2 ^ rk[u ++]; \
|
||||
x3 = r3 ^ rk[u ++]; \
|
||||
AES_ROUND_NOKEY(x0, x1, x2, x3); \
|
||||
x0 ^= rk[u ++]; \
|
||||
x1 ^= rk[u ++]; \
|
||||
x2 ^= rk[u ++]; \
|
||||
x3 ^= rk[u ++]; \
|
||||
AES_ROUND_NOKEY(x0, x1, x2, x3); \
|
||||
x0 ^= rk[u ++]; \
|
||||
x1 ^= rk[u ++]; \
|
||||
x2 ^= rk[u ++]; \
|
||||
x3 ^= rk[u ++]; \
|
||||
AES_ROUND_NOKEY(x0, x1, x2, x3); \
|
||||
x0 ^= rk[u ++]; \
|
||||
x1 ^= rk[u ++]; \
|
||||
x2 ^= rk[u ++]; \
|
||||
x3 ^= rk[u ++]; \
|
||||
AES_ROUND_NOKEY(x0, x1, x2, x3); \
|
||||
l0 ^= x0; \
|
||||
l1 ^= x1; \
|
||||
l2 ^= x2; \
|
||||
l3 ^= x3; \
|
||||
} while (0)
|
||||
|
||||
#define WROT(a, b, c, d) do { \
|
||||
sph_u32 t = d; \
|
||||
d = c; \
|
||||
c = b; \
|
||||
b = a; \
|
||||
a = t; \
|
||||
} while (0)
|
||||
|
||||
C512_ELT(p0, p1, p2, p3, p4, p5, p6, p7);
|
||||
C512_ELT(p8, p9, pA, pB, pC, pD, pE, pF);
|
||||
|
||||
WROT(p0, p4, p8, pC);
|
||||
WROT(p1, p5, p9, pD);
|
||||
WROT(p2, p6, pA, pE);
|
||||
WROT(p3, p7, pB, pF);
|
||||
|
||||
#undef C512_ELT
|
||||
#undef WROT
|
||||
}
|
||||
sc->h[0x0] ^= p0;
|
||||
sc->h[0x1] ^= p1;
|
||||
sc->h[0x2] ^= p2;
|
||||
sc->h[0x3] ^= p3;
|
||||
sc->h[0x4] ^= p4;
|
||||
sc->h[0x5] ^= p5;
|
||||
sc->h[0x6] ^= p6;
|
||||
sc->h[0x7] ^= p7;
|
||||
sc->h[0x8] ^= p8;
|
||||
sc->h[0x9] ^= p9;
|
||||
sc->h[0xA] ^= pA;
|
||||
sc->h[0xB] ^= pB;
|
||||
sc->h[0xC] ^= pC;
|
||||
sc->h[0xD] ^= pD;
|
||||
sc->h[0xE] ^= pE;
|
||||
sc->h[0xF] ^= pF;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void
|
||||
c512( sph_shavite_big_context *sc, const void *msg )
|
||||
@@ -331,7 +125,7 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
for ( r = 0; r < 3; r ++ )
|
||||
{
|
||||
// round 1, 5, 9
|
||||
k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
|
||||
k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
|
||||
k00 = _mm_xor_si128( k00, k13 );
|
||||
|
||||
if ( r == 0 )
|
||||
@@ -340,7 +134,7 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
|
||||
x = _mm_xor_si128( p0, k00 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) );
|
||||
k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) );
|
||||
k01 = _mm_xor_si128( k01, k00 );
|
||||
|
||||
if ( r == 1 )
|
||||
@@ -349,33 +143,33 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
|
||||
x = _mm_xor_si128( x, k01 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
|
||||
k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
|
||||
k02 = _mm_xor_si128( k02, k01 );
|
||||
|
||||
x = _mm_xor_si128( x, k02 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
|
||||
k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
|
||||
k03 = _mm_xor_si128( k03, k02 );
|
||||
|
||||
x = _mm_xor_si128( x, k03 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
p3 = _mm_xor_si128( p3, x );
|
||||
k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
|
||||
k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
|
||||
k10 = _mm_xor_si128( k10, k03 );
|
||||
|
||||
x = _mm_xor_si128( p2, k10 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
|
||||
k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
|
||||
k11 = _mm_xor_si128( k11, k10 );
|
||||
|
||||
x = _mm_xor_si128( x, k11 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
|
||||
k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
|
||||
k12 = _mm_xor_si128( k12, k11 );
|
||||
|
||||
x = _mm_xor_si128( x, k12 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
|
||||
k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
|
||||
k13 = _mm_xor_si128( k13, k12 );
|
||||
|
||||
if ( r == 2 )
|
||||
@@ -388,80 +182,80 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
|
||||
// round 2, 6, 10
|
||||
|
||||
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13, 1 ) );
|
||||
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13 ) );
|
||||
x = _mm_xor_si128( p3, k00 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00, 1 ) );
|
||||
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00 ) );
|
||||
x = _mm_xor_si128( x, k01 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01, 1 ) );
|
||||
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01 ) );
|
||||
x = _mm_xor_si128( x, k02 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02, 1 ) );
|
||||
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02 ) );
|
||||
x = _mm_xor_si128( x, k03 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
p2 = _mm_xor_si128( p2, x );
|
||||
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03, 1 ) );
|
||||
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03 ) );
|
||||
x = _mm_xor_si128( p1, k10 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10, 1 ) );
|
||||
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10 ) );
|
||||
x = _mm_xor_si128( x, k11 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11, 1 ) );
|
||||
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11 ) );
|
||||
x = _mm_xor_si128( x, k12 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12, 1 ) );
|
||||
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12 ) );
|
||||
x = _mm_xor_si128( x, k13 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
p0 = _mm_xor_si128( p0, x );
|
||||
|
||||
// round 3, 7, 11
|
||||
|
||||
k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
|
||||
k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
|
||||
k00 = _mm_xor_si128( k00, k13 );
|
||||
|
||||
x = _mm_xor_si128( p2, k00 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) );
|
||||
k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) );
|
||||
k01 = _mm_xor_si128( k01, k00 );
|
||||
|
||||
x = _mm_xor_si128( x, k01 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
|
||||
k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
|
||||
k02 = _mm_xor_si128( k02, k01 );
|
||||
|
||||
x = _mm_xor_si128( x, k02 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
|
||||
k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
|
||||
k03 = _mm_xor_si128( k03, k02 );
|
||||
|
||||
x = _mm_xor_si128( x, k03 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
p1 = _mm_xor_si128( p1, x );
|
||||
k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
|
||||
k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
|
||||
k10 = _mm_xor_si128( k10, k03 );
|
||||
|
||||
x = _mm_xor_si128( p0, k10 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
|
||||
k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
|
||||
k11 = _mm_xor_si128( k11, k10 );
|
||||
|
||||
x = _mm_xor_si128( x, k11 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
|
||||
k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
|
||||
k12 = _mm_xor_si128( k12, k11 );
|
||||
|
||||
x = _mm_xor_si128( x, k12 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
|
||||
k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
|
||||
k13 = _mm_xor_si128( k13, k12 );
|
||||
|
||||
x = _mm_xor_si128( x, k13 );
|
||||
@@ -470,36 +264,36 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
|
||||
// round 4, 8, 12
|
||||
|
||||
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13, 1 ) );
|
||||
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13 ) );
|
||||
|
||||
x = _mm_xor_si128( p1, k00 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00, 1 ) );
|
||||
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k01 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01, 1 ) );
|
||||
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k02 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02, 1 ) );
|
||||
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k03 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
p0 = _mm_xor_si128( p0, x );
|
||||
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03, 1 ) );
|
||||
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03 ) );
|
||||
|
||||
x = _mm_xor_si128( p3, k10 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10, 1 ) );
|
||||
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k11 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11, 1 ) );
|
||||
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k12 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12, 1 ) );
|
||||
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k13 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
@@ -508,44 +302,44 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
|
||||
// round 13
|
||||
|
||||
k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
|
||||
k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
|
||||
k00 = _mm_xor_si128( k00, k13 );
|
||||
|
||||
x = _mm_xor_si128( p0, k00 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) );
|
||||
k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) );
|
||||
k01 = _mm_xor_si128( k01, k00 );
|
||||
|
||||
x = _mm_xor_si128( x, k01 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
|
||||
k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
|
||||
k02 = _mm_xor_si128( k02, k01 );
|
||||
|
||||
x = _mm_xor_si128( x, k02 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
|
||||
k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
|
||||
k03 = _mm_xor_si128( k03, k02 );
|
||||
|
||||
x = _mm_xor_si128( x, k03 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
p3 = _mm_xor_si128( p3, x );
|
||||
k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
|
||||
k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
|
||||
k10 = _mm_xor_si128( k10, k03 );
|
||||
|
||||
x = _mm_xor_si128( p2, k10 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
|
||||
k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
|
||||
k11 = _mm_xor_si128( k11, k10 );
|
||||
|
||||
x = _mm_xor_si128( x, k11 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
|
||||
k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
|
||||
k12 = _mm_xor_si128( k12, _mm_xor_si128( k11, _mm_set_epi32(
|
||||
~sc->count2, sc->count3, sc->count0, sc->count1 ) ) );
|
||||
|
||||
x = _mm_xor_si128( x, k12 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
|
||||
k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
|
||||
k13 = _mm_xor_si128( k13, k12 );
|
||||
|
||||
x = _mm_xor_si128( x, k13 );
|
||||
@@ -558,7 +352,6 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
h[3] = _mm_xor_si128( h[3], p1 );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
shavite_big_aesni_init( sph_shavite_big_context *sc, const sph_u32 *iv )
|
||||
|
@@ -61,6 +61,7 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -56,6 +56,7 @@ int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -203,6 +203,7 @@ int scanhash_c11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -231,8 +231,9 @@ int scanhash_timetravel_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
|
||||
|
@@ -269,6 +269,7 @@ int scanhash_timetravel10_4way( int thr_id, struct work *work,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -121,6 +121,7 @@ int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint
|
||||
if ( ( !( (hash+(i<<3))[7] & mask ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -202,6 +202,7 @@ int scanhash_x11_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -286,6 +286,7 @@ int scanhash_x11evo_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & hmask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -209,6 +209,7 @@ int scanhash_x11gost_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -231,6 +231,7 @@ int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -145,6 +145,7 @@ int scanhash_phi1612_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -109,6 +109,7 @@ int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -227,6 +227,7 @@ int scanhash_x13_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -255,6 +255,7 @@ int scanhash_x13sm3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -139,6 +139,7 @@ int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -115,6 +115,7 @@ int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -237,6 +237,7 @@ int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -256,6 +256,7 @@ int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -349,6 +349,7 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -267,6 +267,7 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -411,6 +411,7 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
|
@@ -1363,10 +1363,11 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
|
||||
{
|
||||
HMAC_SHA256_CTX ctx;
|
||||
HMAC_SHA256_Init(&ctx, buf, buflen);
|
||||
if ( client_key_hack ) // GlobalBoost-Y buggy yescrypt
|
||||
HMAC_SHA256_Update(&ctx, salt, saltlen);
|
||||
else // Proper yescrypt
|
||||
HMAC_SHA256_Update(&ctx, "Client Key", 10);
|
||||
if ( yescrypt_client_key )
|
||||
HMAC_SHA256_Update( &ctx, (uint8_t*)yescrypt_client_key,
|
||||
yescrypt_client_key_len );
|
||||
else
|
||||
HMAC_SHA256_Update( &ctx, salt, saltlen );
|
||||
HMAC_SHA256_Final(sha256, &ctx);
|
||||
}
|
||||
/* Compute StoredKey */
|
||||
|
@@ -25,7 +25,7 @@
|
||||
#include "compat.h"
|
||||
|
||||
#include "yescrypt.h"
|
||||
|
||||
#include "sha256_Y.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#define BYTES2CHARS(bytes) \
|
||||
@@ -366,7 +366,8 @@ static int yescrypt_bsty(const uint8_t * passwd, size_t passwdlen,
|
||||
uint64_t YESCRYPT_N;
|
||||
uint32_t YESCRYPT_R;
|
||||
uint32_t YESCRYPT_P;
|
||||
bool client_key_hack;
|
||||
char *yescrypt_client_key = NULL;
|
||||
int yescrypt_client_key_len = 0;
|
||||
|
||||
/* main hash 80 bytes input */
|
||||
void yescrypt_hash( const char *input, char *output, uint32_t len )
|
||||
@@ -436,7 +437,8 @@ bool register_yescrypt_algo( algo_gate_t* gate )
|
||||
{
|
||||
yescrypt_gate_base( gate );
|
||||
gate->get_max64 = (void*)&yescrypt_get_max64;
|
||||
client_key_hack = true;
|
||||
yescrypt_client_key = NULL;
|
||||
yescrypt_client_key_len = 0;
|
||||
YESCRYPT_N = 2048;
|
||||
YESCRYPT_R = 8;
|
||||
YESCRYPT_P = 1;
|
||||
@@ -447,7 +449,8 @@ bool register_yescryptr8_algo( algo_gate_t* gate )
|
||||
{
|
||||
yescrypt_gate_base( gate );
|
||||
gate->get_max64 = (void*)&yescrypt_get_max64;
|
||||
client_key_hack = false;
|
||||
yescrypt_client_key = "Client Key";
|
||||
yescrypt_client_key_len = 10;
|
||||
YESCRYPT_N = 2048;
|
||||
YESCRYPT_R = 8;
|
||||
YESCRYPT_P = 1;
|
||||
@@ -458,10 +461,23 @@ bool register_yescryptr16_algo( algo_gate_t* gate )
|
||||
{
|
||||
yescrypt_gate_base( gate );
|
||||
gate->get_max64 = (void*)&yescryptr16_get_max64;
|
||||
client_key_hack = false;
|
||||
yescrypt_client_key = "Client Key";
|
||||
yescrypt_client_key_len = 10;
|
||||
YESCRYPT_N = 4096;
|
||||
YESCRYPT_R = 16;
|
||||
YESCRYPT_P = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool register_yescryptr32_algo( algo_gate_t* gate )
|
||||
{
|
||||
yescrypt_gate_base( gate );
|
||||
gate->get_max64 = (void*)&yescryptr16_get_max64;
|
||||
yescrypt_client_key = "WaviBanana";
|
||||
yescrypt_client_key_len = 10;
|
||||
YESCRYPT_N = 4096;
|
||||
YESCRYPT_R = 32;
|
||||
YESCRYPT_P = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -108,7 +108,8 @@ typedef enum {
|
||||
__YESCRYPT_INIT_SHARED = 0x30000
|
||||
} yescrypt_flags_t;
|
||||
|
||||
extern bool client_key_hack; // true for GlobalBoost-Y
|
||||
extern char *yescrypt_client_key;
|
||||
extern int yescrypt_client_key_len;
|
||||
|
||||
|
||||
#define YESCRYPT_KNOWN_FLAGS \
|
||||
|
15
api.c
15
api.c
@@ -98,6 +98,7 @@ extern int opt_api_remote;
|
||||
extern double global_hashrate;
|
||||
extern uint32_t accepted_count;
|
||||
extern uint32_t rejected_count;
|
||||
extern uint32_t solved_count;
|
||||
|
||||
#define cpu_threads opt_n_threads
|
||||
|
||||
@@ -138,8 +139,7 @@ static char *getsummary( char *params )
|
||||
double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0);
|
||||
double diff = net_diff > 0. ? net_diff : stratum_diff;
|
||||
char diff_str[16];
|
||||
double hashrate = (double)global_hashrate;
|
||||
char units[4] = {0};
|
||||
double hrate = (double)global_hashrate;
|
||||
struct cpu_info cpu = { 0 };
|
||||
#ifdef USE_MONITORING
|
||||
cpu.has_monitoring = true;
|
||||
@@ -157,16 +157,15 @@ static char *getsummary( char *params )
|
||||
sprintf( diff_str, "%.6f", diff);
|
||||
|
||||
*buffer = '\0';
|
||||
scale_hash_for_display ( &hashrate, units );
|
||||
|
||||
sprintf( buffer, "NAME=%s;VER=%s;API=%s;"
|
||||
"ALGO=%s;CPUS=%d;%sH/s=%.2f;ACC=%d;REJ=%d;"
|
||||
"ALGO=%s;CPUS=%d;URL=%s;"
|
||||
"HS=%.2f;KHS=%.2f;ACC=%d;REJ=%d;SOL=%d;"
|
||||
"ACCMN=%.3f;DIFF=%s;TEMP=%.1f;FAN=%d;FREQ=%d;"
|
||||
"UPTIME=%.0f;TS=%u|",
|
||||
PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
|
||||
algo, opt_n_threads, units, hashrate,
|
||||
accepted_count, rejected_count, accps, diff_str,
|
||||
cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
|
||||
algo, opt_n_threads, short_url, hrate, hrate/1000.0,
|
||||
accepted_count, rejected_count, solved_count,
|
||||
accps, diff_str, cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
|
||||
uptime, (uint32_t) ts);
|
||||
return buffer;
|
||||
}
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.3.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.4.1.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.8.3'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.3'
|
||||
PACKAGE_VERSION='3.8.4.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.4.1'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.8.3 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.8.4.1 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1392,7 +1392,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.3:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.4.1:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1497,7 +1497,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.8.3
|
||||
cpuminer-opt configure 3.8.4.1
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.8.3, which was
|
||||
It was created by cpuminer-opt $as_me 3.8.4.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2981,7 +2981,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.8.3'
|
||||
VERSION='3.8.4.1'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.8.3, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.8.4.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6743,7 +6743,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.8.3
|
||||
cpuminer-opt config.status 3.8.4.1
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.8.3])
|
||||
AC_INIT([cpuminer-opt], [3.8.4.1])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
990
cpu-miner.c
990
cpu-miner.c
File diff suppressed because it is too large
Load Diff
9
miner.h
9
miner.h
@@ -424,7 +424,7 @@ extern size_t rpc2_bloblen;
|
||||
extern uint32_t rpc2_target;
|
||||
extern char *rpc2_job_id;
|
||||
extern char *rpc_user;
|
||||
|
||||
extern char *short_url;
|
||||
|
||||
json_t *json_rpc2_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, int *curl_err, int flags);
|
||||
bool rpc2_login(CURL *curl);
|
||||
@@ -553,6 +553,7 @@ enum algos {
|
||||
ALGO_YESCRYPT,
|
||||
ALGO_YESCRYPTR8,
|
||||
ALGO_YESCRYPTR16,
|
||||
ALGO_YESCRYPTR32,
|
||||
ALGO_ZR5,
|
||||
ALGO_COUNT
|
||||
};
|
||||
@@ -629,6 +630,7 @@ static const char* const algo_names[] = {
|
||||
"yescrypt",
|
||||
"yescryptr8",
|
||||
"yescryptr16",
|
||||
"yescryptr32",
|
||||
"zr5",
|
||||
"\0"
|
||||
};
|
||||
@@ -648,6 +650,10 @@ extern int opt_timeout;
|
||||
extern bool want_longpoll;
|
||||
extern bool have_longpoll;
|
||||
extern bool have_gbt;
|
||||
extern char* lp_id;
|
||||
extern char *rpc_userpass;
|
||||
extern const char *gbt_lp_req;
|
||||
extern const char *getwork_req;
|
||||
extern bool allow_getwork;
|
||||
extern bool want_stratum;
|
||||
extern bool have_stratum;
|
||||
@@ -760,6 +766,7 @@ Options:\n\
|
||||
yescrypt Globlboost-Y (BSTY)\n\
|
||||
yescryptr8 BitZeny (ZNY)\n\
|
||||
yescryptr16 Yenten (YTN)\n\
|
||||
yescryptr32 WAVI\n\
|
||||
zr5 Ziftr\n\
|
||||
-o, --url=URL URL of mining server\n\
|
||||
-O, --userpass=U:P username:password pair for mining server\n\
|
||||
|
Reference in New Issue
Block a user