Compare commits

..

1 Commits

Author SHA1 Message Date
Jay D Dee
f990f6a702 v3.11.3 2020-01-10 20:37:47 -05:00
91 changed files with 2445 additions and 3239 deletions

View File

@@ -33,6 +33,3 @@ Jay D Dee
xcouiz@gmail.com
Cryply
Colin Percival
Alexander Peslyak

View File

@@ -80,6 +80,7 @@ cpuminer_SOURCES = \
algo/cryptonight/cryptonight-common.c\
algo/cryptonight/cryptonight-aesni.c\
algo/cryptonight/cryptonight.c\
algo/cubehash/sph_cubehash.c \
algo/cubehash/cubehash_sse2.c\
algo/cubehash/cube-hash-2way.c \
algo/echo/sph_echo.c \
@@ -120,8 +121,6 @@ cpuminer_SOURCES = \
algo/keccak/keccak-hash-4way.c \
algo/keccak/keccak-4way.c\
algo/keccak/keccak-gate.c \
algo/keccak/sha3d-4way.c \
algo/keccak/sha3d.c \
algo/lanehash/lane.c \
algo/luffa/sph_luffa.c \
algo/luffa/luffa.c \
@@ -181,7 +180,6 @@ cpuminer_SOURCES = \
algo/sha/sph_sha2big.c \
algo/sha/sha256-hash-4way.c \
algo/sha/sha512-hash-4way.c \
algo/sha/hmac-sha256-hash.c \
algo/sha/sha2.c \
algo/sha/sha256t-gate.c \
algo/sha/sha256t-4way.c \
@@ -294,11 +292,12 @@ cpuminer_SOURCES = \
algo/x22/x25x.c \
algo/x22/x25x-4way.c \
algo/yescrypt/yescrypt.c \
algo/yescrypt/sha256_Y.c \
algo/yescrypt/yescrypt-best.c \
algo/yespower/yespower-gate.c \
algo/yespower/yespower-blake2b.c \
algo/yespower/crypto/blake2b-yp.c \
algo/yespower/yescrypt-r8g.c \
algo/yespower/sha256_p.c \
algo/yespower/yespower-opt.c
disable_flags =

View File

@@ -97,10 +97,10 @@ Supported Algorithms
qubit Qubit
scrypt scrypt(1024, 1, 1) (default)
scrypt:N scrypt(N, 1, 1)
scryptjane:nf
sha256d Double SHA-256
sha256q Quad SHA-256, Pyrite (PYE)
sha256t Triple SHA-256, Onecoin (OC)
sha3d Double keccak256 (BSHA3)
shavite3 Shavite3
skein Skein+Sha (Skeincoin)
skein2 Double Skein (Woodcoin)
@@ -134,7 +134,6 @@ Supported Algorithms
xevan Bitsend (BSD)
yescrypt Globalboost-Y (BSTY)
yescryptr8 BitZeny (ZNY)
yescryptr8g Koto (KOTO)
yescryptr16 Eli
yescryptr32 WAVI
yespower Cryply

View File

@@ -33,80 +33,9 @@ supported.
64 bit Linux or Windows operating system. Apple, Android and Raspberry Pi
are not supported. FreeBSD YMMV.
Reporting bugs
--------------
Bugs can be reported by sending am email to JayDDee246@gmail.com or opening
an issue in git: https://github.com/JayDDee/cpuminer-opt/issues
Please include the following information:
1. CPU model, operating system, cpuminer-opt version (must be latest),
binary file for Windows, changes to default build procedure for Linux.
2. Exact comand line (except user and pw) and intial output showing
the above requested info.
3. Additional program output showing any error messages or other
pertinent data.
4. A clear description of the problem including history, scope,
persistence or intermittance, and reproduceability.
In simpler terms:
What is it doing?
What should it be doing instead?
Did it work in a previous release?
Does it happen for all algos? All pools? All options? Solo?
Does it happen all the time?
If not what makes it happen or not happen?
Change Log
----------
v3.11.7
Added yescryptr8g algo fotr KOTO, including support for block version 5.
Added sha3d algo for BSHA3.
Removed memcmp and clean_job checks from get_new_work, now only check job_id.
Small improvement to sha512 and sha256 parallel implementations that don't
use SHA.
v3.11.6
Fixed CPU temperature regression from v3.11.5.
More improvements to share log. More compact, highlight incremented counter,
block height when solved, job id when stale.
v3.11.5
Fixed AVX512 detection that could cause compilation errors on CPUs
without AVX512.
Fixed "BLOCK SOLVED" log incorrectly displaying "Accepted" when a block
is solved.
Added share counter to share submitited & accepted logs
Added job id to share submitted log.
Share submitted log is no longer highlighted blue, there was too much blue.
Another CPU temperature fix for Linux.
Added bug reporting tips to RELEASE NOTES.
v3.11.4
Fixed scrypt segfault since v3.9.9.1.
Stale shares counted and reported seperately from other rejected shares.
Display of counters for solved blocks, rejects, stale shares suppressed in
periodic summary when zero.
v3.11.3
Fixed x12 AVX2 again.

View File

@@ -209,7 +209,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
case ALGO_SHA256Q: register_sha256q_algo ( gate ); break;
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
case ALGO_SHA3D: register_sha3d_algo ( gate ); break;
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
case ALGO_SKEIN: register_skein_algo ( gate ); break;
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
@@ -248,7 +247,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
*/
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
case ALGO_YESCRYPTR8G: register_yescryptr8g_algo ( gate ); break;
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
case ALGO_YESPOWER: register_yespower_algo ( gate ); break;

View File

@@ -121,55 +121,54 @@ void ( *hash_suw ) ( void*, const void* );
// Allocate thread local buffers and other initialization specific to miner
// threads.
bool ( *miner_thread_init ) ( int );
bool ( *miner_thread_init ) ( int );
// Generate global blockheader from stratum data.
void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* );
void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* );
// Get thread local copy of blockheader with unique nonce.
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t* );
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t*,
bool );
// Return pointer to nonce in blockheader.
uint32_t *( *get_nonceptr ) ( uint32_t* );
uint32_t *( *get_nonceptr ) ( uint32_t* );
// Decode getwork blockheader
bool ( *work_decode ) ( const json_t*, struct work* );
bool ( *work_decode ) ( const json_t*, struct work* );
// Extra getwork data
void ( *decode_extra_data ) ( struct work*, uint64_t* );
void ( *decode_extra_data ) ( struct work*, uint64_t* );
bool ( *submit_getwork_result ) ( CURL*, struct work* );
bool ( *submit_getwork_result ) ( CURL*, struct work* );
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
// Increment extranonce
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
uint32_t*, uint32_t, uint32_t,
unsigned char* );
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
uint32_t*, uint32_t, uint32_t );
// Build mining.submit message
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
char* ( *malloc_txs_request ) ( struct work* );
char* ( *malloc_txs_request ) ( struct work* );
// Big or little
void ( *set_work_data_endian ) ( struct work* );
void ( *set_work_data_endian ) ( struct work* );
double ( *calc_network_diff ) ( struct work* );
double ( *calc_network_diff ) ( struct work* );
// Wait for first work
bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
// Diverge mining threads
bool ( *do_this_thread ) ( int );
bool ( *do_this_thread ) ( int );
// After do_this_thread
void ( *resync_threads ) ( struct work* );
void ( *resync_threads ) ( struct work* );
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
bool ( *stratum_handle_response ) ( json_t* );
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
bool ( *stratum_handle_response )( json_t* );
set_t optimizations;
int ( *get_work_data_size ) ();
int ntime_index;
@@ -226,7 +225,7 @@ uint32_t *std_get_nonceptr( uint32_t *work_data );
uint32_t *jr2_get_nonceptr( uint32_t *work_data );
void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
uint32_t* end_nonce_ptr );
uint32_t* end_nonce_ptr, bool clean_job );
void jr2_get_new_work( struct work *work, struct work *g_work, int thr_id,
uint32_t* end_nonce_ptr );
@@ -257,8 +256,7 @@ double std_calc_network_diff( struct work *work );
void std_build_block_header( struct work* g_work, uint32_t version,
uint32_t *prevhash, uint32_t *merkle_root,
uint32_t ntime, uint32_t nbits,
unsigned char *final_sapling_hash );
uint32_t ntime, uint32_t nbits );
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );

View File

@@ -13,7 +13,7 @@ void blakehash_4way(void *state, const void *input)
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
blake256r14_4way_context ctx;
memcpy( &ctx, &blake_4w_ctx, sizeof ctx );
blake256r14_4way_update( &ctx, input + (64<<2), 16 );
blake256r14_4way( &ctx, input + (64<<2), 16 );
blake256r14_4way_close( &ctx, vhash );
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
}
@@ -36,7 +36,7 @@ int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
mm128_bswap32_intrlv80_4x32( vdata, pdata );
blake256r14_4way_init( &blake_4w_ctx );
blake256r14_4way_update( &blake_4w_ctx, vdata, 64 );
blake256r14_4way( &blake_4w_ctx, vdata, 64 );
do {
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );

View File

@@ -37,6 +37,8 @@
#ifndef __BLAKE_HASH_4WAY__
#define __BLAKE_HASH_4WAY__ 1
//#ifdef __SSE4_2__
#ifdef __cplusplus
extern "C"{
#endif
@@ -49,41 +51,46 @@ extern "C"{
#define SPH_SIZE_blake512 512
//////////////////////////
//
// Blake-256 4 way SSE2
// With SSE4.2 only Blake-256 4 way is available.
// With AVX2 Blake-256 8way & Blake-512 4 way are also available.
// Blake-256 4 way
typedef struct {
unsigned char buf[64<<2];
uint32_t H[8<<2];
// __m128i buf[16] __attribute__ ((aligned (64)));
// __m128i H[8];
// __m128i S[4];
size_t ptr;
uint32_t T0, T1;
int rounds; // 14 for blake, 8 for blakecoin & vanilla
} blake_4way_small_context __attribute__ ((aligned (64)));
// Default, 14 rounds, blake, decred
// Default 14 rounds
typedef blake_4way_small_context blake256_4way_context;
void blake256_4way_init(void *ctx);
void blake256_4way_update(void *ctx, const void *data, size_t len);
#define blake256_4way blake256_4way_update
void blake256_4way_close(void *ctx, void *dst);
// 14 rounds, blake, decred
typedef blake_4way_small_context blake256r14_4way_context;
void blake256r14_4way_init(void *cc);
void blake256r14_4way_update(void *cc, const void *data, size_t len);
#define blake256r14_4way blake256r14_4way_update
void blake256r14_4way_close(void *cc, void *dst);
// 8 rounds, blakecoin, vanilla
typedef blake_4way_small_context blake256r8_4way_context;
void blake256r8_4way_init(void *cc);
void blake256r8_4way_update(void *cc, const void *data, size_t len);
#define blake256r8_4way blake256r8_4way_update
void blake256r8_4way_close(void *cc, void *dst);
#ifdef __AVX2__
//////////////////////////
//
// Blake-256 8 way AVX2
// Blake-256 8 way
typedef struct {
__m256i buf[16] __attribute__ ((aligned (64)));
@@ -97,6 +104,7 @@ typedef struct {
typedef blake_8way_small_context blake256_8way_context;
void blake256_8way_init(void *cc);
void blake256_8way_update(void *cc, const void *data, size_t len);
//#define blake256_8way blake256_8way_update
void blake256_8way_close(void *cc, void *dst);
// 14 rounds, blake, decred
@@ -109,9 +117,10 @@ void blake256r14_8way_close(void *cc, void *dst);
typedef blake_8way_small_context blake256r8_8way_context;
void blake256r8_8way_init(void *cc);
void blake256r8_8way_update(void *cc, const void *data, size_t len);
#define blake256r8_8way blake256r8_8way_update
void blake256r8_8way_close(void *cc, void *dst);
// Blake-512 4 way AVX2
// Blake-512 4 way
typedef struct {
__m256i buf[16];
@@ -125,15 +134,14 @@ typedef blake_4way_big_context blake512_4way_context;
void blake512_4way_init( blake_4way_big_context *sc );
void blake512_4way_update( void *cc, const void *data, size_t len );
#define blake512_4way blake512_4way_update
void blake512_4way_close( void *cc, void *dst );
void blake512_4way_full( blake_4way_big_context *sc, void * dst,
const void *data, size_t len );
void blake512_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
void *dst );
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
////////////////////////////
//
// Blake-256 16 way AVX512
//Blake-256 16 way
typedef struct {
__m512i buf[16];
@@ -161,9 +169,8 @@ void blake256r8_16way_init(void *cc);
void blake256r8_16way_update(void *cc, const void *data, size_t len);
void blake256r8_16way_close(void *cc, void *dst);
////////////////////////////
//
//// Blake-512 8 way AVX512
// Blake-512 8 way
typedef struct {
__m512i buf[16];
@@ -178,10 +185,12 @@ typedef blake_8way_big_context blake512_8way_context;
void blake512_8way_init( blake_8way_big_context *sc );
void blake512_8way_update( void *cc, const void *data, size_t len );
void blake512_8way_close( void *cc, void *dst );
void blake512_8way_full( blake_8way_big_context *sc, void * dst,
const void *data, size_t len );
void blake512_8way_addbits_and_close( void *cc, unsigned ub, unsigned n,
void *dst );
#endif // AVX512
#endif // AVX2
#ifdef __cplusplus

View File

@@ -267,22 +267,22 @@ static const sph_u64 CB[16] = {
#define CBx_(n) CBx__(n)
#define CBx__(n) CB ## n
#define CB0 0x243F6A8885A308D3
#define CB1 0x13198A2E03707344
#define CB2 0xA4093822299F31D0
#define CB3 0x082EFA98EC4E6C89
#define CB4 0x452821E638D01377
#define CB5 0xBE5466CF34E90C6C
#define CB6 0xC0AC29B7C97C50DD
#define CB7 0x3F84D5B5B5470917
#define CB8 0x9216D5D98979FB1B
#define CB9 0xD1310BA698DFB5AC
#define CBA 0x2FFD72DBD01ADFB7
#define CBB 0xB8E1AFED6A267E96
#define CBC 0xBA7C9045F12C7F99
#define CBD 0x24A19947B3916CF7
#define CBE 0x0801F2E2858EFC16
#define CBF 0x636920D871574E69
#define CB0 SPH_C64(0x243F6A8885A308D3)
#define CB1 SPH_C64(0x13198A2E03707344)
#define CB2 SPH_C64(0xA4093822299F31D0)
#define CB3 SPH_C64(0x082EFA98EC4E6C89)
#define CB4 SPH_C64(0x452821E638D01377)
#define CB5 SPH_C64(0xBE5466CF34E90C6C)
#define CB6 SPH_C64(0xC0AC29B7C97C50DD)
#define CB7 SPH_C64(0x3F84D5B5B5470917)
#define CB8 SPH_C64(0x9216D5D98979FB1B)
#define CB9 SPH_C64(0xD1310BA698DFB5AC)
#define CBA SPH_C64(0x2FFD72DBD01ADFB7)
#define CBB SPH_C64(0xB8E1AFED6A267E96)
#define CBC SPH_C64(0xBA7C9045F12C7F99)
#define CBD SPH_C64(0x24A19947B3916CF7)
#define CBE SPH_C64(0x0801F2E2858EFC16)
#define CBF SPH_C64(0x636920D871574E69)
#define READ_STATE64(state) do { \
H0 = (state)->H[0]; \
@@ -349,9 +349,9 @@ static const sph_u64 CB[16] = {
#define DECL_STATE64_8WAY \
__m512i H0, H1, H2, H3, H4, H5, H6, H7; \
__m512i S0, S1, S2, S3; \
uint64_t T0, T1;
sph_u64 T0, T1;
#define COMPRESS64_8WAY( buf ) do \
#define COMPRESS64_8WAY do \
{ \
__m512i M0, M1, M2, M3, M4, M5, M6, M7; \
__m512i M8, M9, MA, MB, MC, MD, ME, MF; \
@@ -424,84 +424,6 @@ static const sph_u64 CB[16] = {
H7 = mm512_xor4( VF, V7, S3, H7 ); \
} while (0)
void blake512_8way_compress( blake_8way_big_context *sc )
{
__m512i M0, M1, M2, M3, M4, M5, M6, M7;
__m512i M8, M9, MA, MB, MC, MD, ME, MF;
__m512i V0, V1, V2, V3, V4, V5, V6, V7;
__m512i V8, V9, VA, VB, VC, VD, VE, VF;
__m512i shuf_bswap64;
V0 = sc->H[0];
V1 = sc->H[1];
V2 = sc->H[2];
V3 = sc->H[3];
V4 = sc->H[4];
V5 = sc->H[5];
V6 = sc->H[6];
V7 = sc->H[7];
V8 = _mm512_xor_si512( sc->S[0], m512_const1_64( CB0 ) );
V9 = _mm512_xor_si512( sc->S[1], m512_const1_64( CB1 ) );
VA = _mm512_xor_si512( sc->S[2], m512_const1_64( CB2 ) );
VB = _mm512_xor_si512( sc->S[3], m512_const1_64( CB3 ) );
VC = _mm512_xor_si512( _mm512_set1_epi64( sc->T0 ),
m512_const1_64( CB4 ) );
VD = _mm512_xor_si512( _mm512_set1_epi64( sc->T0 ),
m512_const1_64( CB5 ) );
VE = _mm512_xor_si512( _mm512_set1_epi64( sc->T1 ),
m512_const1_64( CB6 ) );
VF = _mm512_xor_si512( _mm512_set1_epi64( sc->T1 ),
m512_const1_64( CB7 ) );
shuf_bswap64 = m512_const_64( 0x38393a3b3c3d3e3f, 0x3031323334353637,
0x28292a2b2c2d2e2f, 0x2021222324252627,
0x18191a1b1c1d1e1f, 0x1011121314151617,
0x08090a0b0c0d0e0f, 0x0001020304050607 );
M0 = _mm512_shuffle_epi8( sc->buf[ 0], shuf_bswap64 );
M1 = _mm512_shuffle_epi8( sc->buf[ 1], shuf_bswap64 );
M2 = _mm512_shuffle_epi8( sc->buf[ 2], shuf_bswap64 );
M3 = _mm512_shuffle_epi8( sc->buf[ 3], shuf_bswap64 );
M4 = _mm512_shuffle_epi8( sc->buf[ 4], shuf_bswap64 );
M5 = _mm512_shuffle_epi8( sc->buf[ 5], shuf_bswap64 );
M6 = _mm512_shuffle_epi8( sc->buf[ 6], shuf_bswap64 );
M7 = _mm512_shuffle_epi8( sc->buf[ 7], shuf_bswap64 );
M8 = _mm512_shuffle_epi8( sc->buf[ 8], shuf_bswap64 );
M9 = _mm512_shuffle_epi8( sc->buf[ 9], shuf_bswap64 );
MA = _mm512_shuffle_epi8( sc->buf[10], shuf_bswap64 );
MB = _mm512_shuffle_epi8( sc->buf[11], shuf_bswap64 );
MC = _mm512_shuffle_epi8( sc->buf[12], shuf_bswap64 );
MD = _mm512_shuffle_epi8( sc->buf[13], shuf_bswap64 );
ME = _mm512_shuffle_epi8( sc->buf[14], shuf_bswap64 );
MF = _mm512_shuffle_epi8( sc->buf[15], shuf_bswap64 );
ROUND_B_8WAY(0);
ROUND_B_8WAY(1);
ROUND_B_8WAY(2);
ROUND_B_8WAY(3);
ROUND_B_8WAY(4);
ROUND_B_8WAY(5);
ROUND_B_8WAY(6);
ROUND_B_8WAY(7);
ROUND_B_8WAY(8);
ROUND_B_8WAY(9);
ROUND_B_8WAY(0);
ROUND_B_8WAY(1);
ROUND_B_8WAY(2);
ROUND_B_8WAY(3);
ROUND_B_8WAY(4);
ROUND_B_8WAY(5);
sc->H[0] = mm512_xor4( V8, V0, sc->S[0], sc->H[0] );
sc->H[1] = mm512_xor4( V9, V1, sc->S[1], sc->H[1] );
sc->H[2] = mm512_xor4( VA, V2, sc->S[2], sc->H[2] );
sc->H[3] = mm512_xor4( VB, V3, sc->S[3], sc->H[3] );
sc->H[4] = mm512_xor4( VC, V4, sc->S[0], sc->H[4] );
sc->H[5] = mm512_xor4( VD, V5, sc->S[1], sc->H[5] );
sc->H[6] = mm512_xor4( VE, V6, sc->S[2], sc->H[6] );
sc->H[7] = mm512_xor4( VF, V7, sc->S[3], sc->H[7] );
}
void blake512_8way_init( blake_8way_big_context *sc )
{
__m512i zero = m512_zero;
@@ -533,43 +455,39 @@ blake64_8way( blake_8way_big_context *sc, const void *data, size_t len )
const int buf_size = 128; // sizeof/8
// 64, 80 bytes: 1st pass copy data. 2nd pass copy padding and compress.
// 128 bytes: 1st pass copy data, compress. 2nd pass copy padding, compress.
buf = sc->buf;
ptr = sc->ptr;
if ( len < (buf_size - ptr) )
{
memcpy_512( buf + (ptr>>3), vdata, len>>3 );
ptr += len;
sc->ptr = ptr;
return;
memcpy_512( buf + (ptr>>3), vdata, len>>3 );
ptr += len;
sc->ptr = ptr;
return;
}
READ_STATE64(sc);
while ( len > 0 )
{
size_t clen;
size_t clen;
clen = buf_size - ptr;
if ( clen > len )
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_512( buf + (ptr>>3), vdata, clen>>3 );
ptr += clen;
vdata = vdata + (clen>>3);
len -= clen;
if ( ptr == buf_size )
{
if ( ( T0 = T0 + 1024 ) < 1024 )
T1 = T1 + 1;
COMPRESS64_8WAY( buf );
ptr = 0;
}
memcpy_512( buf + (ptr>>3), vdata, clen>>3 );
ptr += clen;
vdata = vdata + (clen>>3);
len -= clen;
if ( ptr == buf_size )
{
if ( ( T0 = SPH_T64(T0 + 1024) ) < 1024 )
T1 = SPH_T64(T1 + 1);
COMPRESS64_8WAY;
ptr = 0;
}
}
WRITE_STATE64(sc);
sc->ptr = ptr;
}
}
static void
blake64_8way_close( blake_8way_big_context *sc, void *dst )
@@ -577,22 +495,26 @@ blake64_8way_close( blake_8way_big_context *sc, void *dst )
__m512i buf[16];
size_t ptr;
unsigned bit_len;
uint64_t th, tl;
// uint64_t z, zz;
sph_u64 th, tl;
ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
// z = 0x80 >> n;
// zz = ((ub & -z) | z) & 0xFF;
// buf[ptr>>3] = _mm512_set1_epi64( zz );
buf[ptr>>3] = m512_const1_64( 0x80 );
tl = sc->T0 + bit_len;
th = sc->T1;
if (ptr == 0 )
{
sc->T0 = 0xFFFFFFFFFFFFFC00ULL;
sc->T1 = 0xFFFFFFFFFFFFFFFFULL;
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
}
else if ( sc->T0 == 0 )
{
sc->T0 = 0xFFFFFFFFFFFFFC00ULL + bit_len;
sc->T1 = sc->T1 - 1;
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL) + bit_len;
sc->T1 = SPH_T64(sc->T1 - 1);
}
else
{
@@ -613,8 +535,8 @@ blake64_8way_close( blake_8way_big_context *sc, void *dst )
memset_zero_512( buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
blake64_8way( sc, buf + (ptr>>3), 128 - ptr );
sc->T0 = 0xFFFFFFFFFFFFFC00ULL;
sc->T1 = 0xFFFFFFFFFFFFFFFFULL;
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
memset_zero_512( buf, 112>>3 );
buf[104>>3] = m512_const1_64( 0x0100000000000000ULL );
buf[112>>3] = m512_const1_64( bswap_64( th ) );
@@ -625,79 +547,6 @@ blake64_8way_close( blake_8way_big_context *sc, void *dst )
mm512_block_bswap_64( (__m512i*)dst, sc->H );
}
// init, update & close
void blake512_8way_full( blake_8way_big_context *sc, void * dst,
const void *data, size_t len )
{
// init
casti_m512i( sc->H, 0 ) = m512_const1_64( 0x6A09E667F3BCC908 );
casti_m512i( sc->H, 1 ) = m512_const1_64( 0xBB67AE8584CAA73B );
casti_m512i( sc->H, 2 ) = m512_const1_64( 0x3C6EF372FE94F82B );
casti_m512i( sc->H, 3 ) = m512_const1_64( 0xA54FF53A5F1D36F1 );
casti_m512i( sc->H, 4 ) = m512_const1_64( 0x510E527FADE682D1 );
casti_m512i( sc->H, 5 ) = m512_const1_64( 0x9B05688C2B3E6C1F );
casti_m512i( sc->H, 6 ) = m512_const1_64( 0x1F83D9ABFB41BD6B );
casti_m512i( sc->H, 7 ) = m512_const1_64( 0x5BE0CD19137E2179 );
casti_m512i( sc->S, 0 ) = m512_zero;
casti_m512i( sc->S, 1 ) = m512_zero;
casti_m512i( sc->S, 2 ) = m512_zero;
casti_m512i( sc->S, 3 ) = m512_zero;
sc->T0 = sc->T1 = 0;
sc->ptr = 0;
// update
memcpy_512( sc->buf, (__m512i*)data, len>>3 );
sc->ptr = len;
if ( len == 128 )
{
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
sc->T1 = sc->T1 + 1;
blake512_8way_compress( sc );
sc->ptr = 0;
}
// close
size_t ptr64 = sc->ptr >> 3;
unsigned bit_len;
uint64_t th, tl;
bit_len = sc->ptr << 3;
sc->buf[ptr64] = m512_const1_64( 0x80 );
tl = sc->T0 + bit_len;
th = sc->T1;
if ( ptr64 == 0 )
{
sc->T0 = 0xFFFFFFFFFFFFFC00ULL;
sc->T1 = 0xFFFFFFFFFFFFFFFFULL;
}
else if ( sc->T0 == 0 )
{
sc->T0 = 0xFFFFFFFFFFFFFC00ULL + bit_len;
sc->T1 = sc->T1 - 1;
}
else
sc->T0 -= 1024 - bit_len;
memset_zero_512( sc->buf + ptr64 + 1, 13 - ptr64 );
sc->buf[13] = m512_const1_64( 0x0100000000000000ULL );
sc->buf[14] = m512_const1_64( bswap_64( th ) );
sc->buf[15] = m512_const1_64( bswap_64( tl ) );
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
sc->T1 = sc->T1 + 1;
blake512_8way_compress( sc );
mm512_block_bswap_64( (__m512i*)dst, sc->H );
}
void
blake512_8way_update(void *cc, const void *data, size_t len)
{
@@ -706,6 +555,12 @@ blake512_8way_update(void *cc, const void *data, size_t len)
void
blake512_8way_close(void *cc, void *dst)
{
blake512_8way_addbits_and_close(cc, 0, 0, dst);
}
void
blake512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
blake64_8way_close(cc, dst);
}
@@ -741,7 +596,7 @@ blake512_8way_close(void *cc, void *dst)
#define DECL_STATE64_4WAY \
__m256i H0, H1, H2, H3, H4, H5, H6, H7; \
__m256i S0, S1, S2, S3; \
uint64_t T0, T1;
sph_u64 T0, T1;
#define COMPRESS64_4WAY do \
{ \
@@ -815,81 +670,6 @@ blake512_8way_close(void *cc, void *dst)
} while (0)
void blake512_4way_compress( blake_4way_big_context *sc )
{
__m256i M0, M1, M2, M3, M4, M5, M6, M7;
__m256i M8, M9, MA, MB, MC, MD, ME, MF;
__m256i V0, V1, V2, V3, V4, V5, V6, V7;
__m256i V8, V9, VA, VB, VC, VD, VE, VF;
__m256i shuf_bswap64;
V0 = sc->H[0];
V1 = sc->H[1];
V2 = sc->H[2];
V3 = sc->H[3];
V4 = sc->H[4];
V5 = sc->H[5];
V6 = sc->H[6];
V7 = sc->H[7];
V8 = _mm256_xor_si256( sc->S[0], m256_const1_64( CB0 ) );
V9 = _mm256_xor_si256( sc->S[1], m256_const1_64( CB1 ) );
VA = _mm256_xor_si256( sc->S[2], m256_const1_64( CB2 ) );
VB = _mm256_xor_si256( sc->S[3], m256_const1_64( CB3 ) );
VC = _mm256_xor_si256( _mm256_set1_epi64x( sc->T0 ),
m256_const1_64( CB4 ) );
VD = _mm256_xor_si256( _mm256_set1_epi64x( sc->T0 ),
m256_const1_64( CB5 ) );
VE = _mm256_xor_si256( _mm256_set1_epi64x( sc->T1 ),
m256_const1_64( CB6 ) );
VF = _mm256_xor_si256( _mm256_set1_epi64x( sc->T1 ),
m256_const1_64( CB7 ) );
shuf_bswap64 = m256_const_64( 0x18191a1b1c1d1e1f, 0x1011121314151617,
0x08090a0b0c0d0e0f, 0x0001020304050607 );
M0 = _mm256_shuffle_epi8( sc->buf[ 0], shuf_bswap64 );
M1 = _mm256_shuffle_epi8( sc->buf[ 1], shuf_bswap64 );
M2 = _mm256_shuffle_epi8( sc->buf[ 2], shuf_bswap64 );
M3 = _mm256_shuffle_epi8( sc->buf[ 3], shuf_bswap64 );
M4 = _mm256_shuffle_epi8( sc->buf[ 4], shuf_bswap64 );
M5 = _mm256_shuffle_epi8( sc->buf[ 5], shuf_bswap64 );
M6 = _mm256_shuffle_epi8( sc->buf[ 6], shuf_bswap64 );
M7 = _mm256_shuffle_epi8( sc->buf[ 7], shuf_bswap64 );
M8 = _mm256_shuffle_epi8( sc->buf[ 8], shuf_bswap64 );
M9 = _mm256_shuffle_epi8( sc->buf[ 9], shuf_bswap64 );
MA = _mm256_shuffle_epi8( sc->buf[10], shuf_bswap64 );
MB = _mm256_shuffle_epi8( sc->buf[11], shuf_bswap64 );
MC = _mm256_shuffle_epi8( sc->buf[12], shuf_bswap64 );
MD = _mm256_shuffle_epi8( sc->buf[13], shuf_bswap64 );
ME = _mm256_shuffle_epi8( sc->buf[14], shuf_bswap64 );
MF = _mm256_shuffle_epi8( sc->buf[15], shuf_bswap64 );
ROUND_B_4WAY(0);
ROUND_B_4WAY(1);
ROUND_B_4WAY(2);
ROUND_B_4WAY(3);
ROUND_B_4WAY(4);
ROUND_B_4WAY(5);
ROUND_B_4WAY(6);
ROUND_B_4WAY(7);
ROUND_B_4WAY(8);
ROUND_B_4WAY(9);
ROUND_B_4WAY(0);
ROUND_B_4WAY(1);
ROUND_B_4WAY(2);
ROUND_B_4WAY(3);
ROUND_B_4WAY(4);
ROUND_B_4WAY(5);
sc->H[0] = mm256_xor4( V8, V0, sc->S[0], sc->H[0] );
sc->H[1] = mm256_xor4( V9, V1, sc->S[1], sc->H[1] );
sc->H[2] = mm256_xor4( VA, V2, sc->S[2], sc->H[2] );
sc->H[3] = mm256_xor4( VB, V3, sc->S[3], sc->H[3] );
sc->H[4] = mm256_xor4( VC, V4, sc->S[0], sc->H[4] );
sc->H[5] = mm256_xor4( VD, V5, sc->S[1], sc->H[5] );
sc->H[6] = mm256_xor4( VE, V6, sc->S[2], sc->H[6] );
sc->H[7] = mm256_xor4( VF, V7, sc->S[3], sc->H[7] );
}
void blake512_4way_init( blake_4way_big_context *sc )
{
__m256i zero = m256_zero;
@@ -901,12 +681,10 @@ void blake512_4way_init( blake_4way_big_context *sc )
casti_m256i( sc->H, 5 ) = m256_const1_64( 0x9B05688C2B3E6C1F );
casti_m256i( sc->H, 6 ) = m256_const1_64( 0x1F83D9ABFB41BD6B );
casti_m256i( sc->H, 7 ) = m256_const1_64( 0x5BE0CD19137E2179 );
casti_m256i( sc->S, 0 ) = zero;
casti_m256i( sc->S, 1 ) = zero;
casti_m256i( sc->S, 2 ) = zero;
casti_m256i( sc->S, 3 ) = zero;
sc->T0 = sc->T1 = 0;
sc->ptr = 0;
}
@@ -925,31 +703,31 @@ blake64_4way( blake_4way_big_context *sc, const void *data, size_t len)
ptr = sc->ptr;
if ( len < (buf_size - ptr) )
{
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
ptr += len;
sc->ptr = ptr;
return;
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
ptr += len;
sc->ptr = ptr;
return;
}
READ_STATE64(sc);
while ( len > 0 )
{
size_t clen;
size_t clen;
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
ptr += clen;
vdata = vdata + (clen>>3);
len -= clen;
if ( ptr == buf_size )
{
if ( (T0 = T0 + 1024 ) < 1024 )
T1 = SPH_T64(T1 + 1);
COMPRESS64_4WAY;
ptr = 0;
}
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
ptr += clen;
vdata = vdata + (clen>>3);
len -= clen;
if (ptr == buf_size )
{
if ((T0 = SPH_T64(T0 + 1024)) < 1024)
T1 = SPH_T64(T1 + 1);
COMPRESS64_4WAY;
ptr = 0;
}
}
WRITE_STATE64(sc);
sc->ptr = ptr;
@@ -961,7 +739,7 @@ blake64_4way_close( blake_4way_big_context *sc, void *dst )
__m256i buf[16];
size_t ptr;
unsigned bit_len;
uint64_t th, tl;
sph_u64 th, tl;
ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
@@ -970,13 +748,13 @@ blake64_4way_close( blake_4way_big_context *sc, void *dst )
th = sc->T1;
if (ptr == 0 )
{
sc->T0 = 0xFFFFFFFFFFFFFC00ULL;
sc->T1 = 0xFFFFFFFFFFFFFFFFULL;
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
}
else if ( sc->T0 == 0 )
{
sc->T0 = 0xFFFFFFFFFFFFFC00ULL + bit_len;
sc->T1 = sc->T1 - 1;
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL) + bit_len;
sc->T1 = SPH_T64(sc->T1 - 1);
}
else
{
@@ -1010,77 +788,13 @@ blake64_4way_close( blake_4way_big_context *sc, void *dst )
mm256_block_bswap_64( (__m256i*)dst, sc->H );
}
// init, update & close
void blake512_4way_full( blake_4way_big_context *sc, void * dst,
const void *data, size_t len )
/*
void
blake512_4way_init(void *cc)
{
// init
casti_m256i( sc->H, 0 ) = m256_const1_64( 0x6A09E667F3BCC908 );
casti_m256i( sc->H, 1 ) = m256_const1_64( 0xBB67AE8584CAA73B );
casti_m256i( sc->H, 2 ) = m256_const1_64( 0x3C6EF372FE94F82B );
casti_m256i( sc->H, 3 ) = m256_const1_64( 0xA54FF53A5F1D36F1 );
casti_m256i( sc->H, 4 ) = m256_const1_64( 0x510E527FADE682D1 );
casti_m256i( sc->H, 5 ) = m256_const1_64( 0x9B05688C2B3E6C1F );
casti_m256i( sc->H, 6 ) = m256_const1_64( 0x1F83D9ABFB41BD6B );
casti_m256i( sc->H, 7 ) = m256_const1_64( 0x5BE0CD19137E2179 );
casti_m256i( sc->S, 0 ) = m256_zero;
casti_m256i( sc->S, 1 ) = m256_zero;
casti_m256i( sc->S, 2 ) = m256_zero;
casti_m256i( sc->S, 3 ) = m256_zero;
sc->T0 = sc->T1 = 0;
sc->ptr = 0;
// update
memcpy_256( sc->buf, (__m256i*)data, len>>3 );
sc->ptr += len;
if ( len == 128 )
{
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
sc->T1 = sc->T1 + 1;
blake512_4way_compress( sc );
sc->ptr = 0;
}
// close
size_t ptr64 = sc->ptr >> 3;
unsigned bit_len;
uint64_t th, tl;
bit_len = sc->ptr << 3;
sc->buf[ptr64] = m256_const1_64( 0x80 );
tl = sc->T0 + bit_len;
th = sc->T1;
if ( sc->ptr == 0 )
{
sc->T0 = 0xFFFFFFFFFFFFFC00ULL;
sc->T1 = 0xFFFFFFFFFFFFFFFFULL;
}
else if ( sc->T0 == 0 )
{
sc->T0 = 0xFFFFFFFFFFFFFC00ULL + bit_len;
sc->T1 = sc->T1 - 1;
}
else
sc->T0 -= 1024 - bit_len;
memset_zero_256( sc->buf + ptr64 + 1, 13 - ptr64 );
sc->buf[13] = m256_const1_64( 0x0100000000000000ULL );
sc->buf[14] = m256_const1_64( bswap_64( th ) );
sc->buf[15] = m256_const1_64( bswap_64( tl ) );
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
sc->T1 = sc->T1 + 1;
blake512_4way_compress( sc );
mm256_block_bswap_64( (__m256i*)dst, sc->H );
blake64_4way_init(cc, IV512, salt_zero_big);
}
*/
void
blake512_4way_update(void *cc, const void *data, size_t len)
@@ -1092,8 +806,17 @@ void
blake512_4way_close(void *cc, void *dst)
{
blake64_4way_close( cc, dst );
// blake512_4way_addbits_and_close(cc, dst);
}
/*
void
blake512_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
blake64_4way_close(cc, ub, n, dst, 8);
}
*/
#ifdef __cplusplus
}
#endif

View File

@@ -14,7 +14,7 @@ void blakecoin_4way_hash(void *state, const void *input)
blake256r8_4way_context ctx;
memcpy( &ctx, &blakecoin_4w_ctx, sizeof ctx );
blake256r8_4way_update( &ctx, input + (64<<2), 16 );
blake256r8_4way( &ctx, input + (64<<2), 16 );
blake256r8_4way_close( &ctx, vhash );
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
@@ -37,7 +37,7 @@ int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
mm128_bswap32_intrlv80_4x32( vdata, pdata );
blake256r8_4way_init( &blakecoin_4w_ctx );
blake256r8_4way_update( &blakecoin_4w_ctx, vdata, 64 );
blake256r8_4way( &blakecoin_4w_ctx, vdata, 64 );
do {
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
@@ -71,7 +71,7 @@ void blakecoin_8way_hash( void *state, const void *input )
blake256r8_8way_context ctx;
memcpy( &ctx, &blakecoin_8w_ctx, sizeof ctx );
blake256r8_8way_update( &ctx, input + (64<<3), 16 );
blake256r8_8way( &ctx, input + (64<<3), 16 );
blake256r8_8way_close( &ctx, vhash );
dintrlv_8x32( state, state+ 32, state+ 64, state+ 96, state+128,
@@ -95,7 +95,7 @@ int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
mm256_bswap32_intrlv80_8x32( vdata, pdata );
blake256r8_8way_init( &blakecoin_8w_ctx );
blake256r8_8way_update( &blakecoin_8w_ctx, vdata, 64 );
blake256r8_8way( &blakecoin_8w_ctx, vdata, 64 );
do {
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,

View File

@@ -21,7 +21,7 @@ void decred_hash_4way( void *state, const void *input )
blake256_4way_context ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &blake_mid, sizeof(blake_mid) );
blake256_4way_update( &ctx, tail, tail_len );
blake256_4way( &ctx, tail, tail_len );
blake256_4way_close( &ctx, vhash );
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
}
@@ -46,7 +46,7 @@ int scanhash_decred_4way( struct work *work, uint32_t max_nonce,
mm128_intrlv_4x32x( vdata, edata, edata, edata, edata, 180*8 );
blake256_4way_init( &blake_mid );
blake256_4way_update( &blake_mid, vdata, DECRED_MIDSTATE_LEN );
blake256_4way( &blake_mid, vdata, DECRED_MIDSTATE_LEN );
uint32_t *noncep = vdata + DECRED_NONCE_INDEX * 4;
do {

View File

@@ -22,23 +22,23 @@ extern void pentablakehash_4way( void *output, const void *input )
blake512_4way_init( &ctx );
blake512_4way_update( &ctx, input, 80 );
blake512_4way( &ctx, input, 80 );
blake512_4way_close( &ctx, vhash );
blake512_4way_init( &ctx );
blake512_4way_update( &ctx, vhash, 64 );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
blake512_4way_init( &ctx );
blake512_4way_update( &ctx, vhash, 64 );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
blake512_4way_init( &ctx );
blake512_4way_update( &ctx, vhash, 64 );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
blake512_4way_init( &ctx );
blake512_4way_update( &ctx, vhash, 64 );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
memcpy( output, hash0, 32 );

View File

@@ -168,66 +168,6 @@ int cube_4way_close( cube_4way_context *sp, void *output )
return 0;
}
int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
const void *data, size_t size )
{
__m512i *h = (__m512i*)sp->h;
__m128i *iv = (__m128i*)( hashbitlen == 512 ? (__m128i*)IV512
: (__m128i*)IV256 );
sp->hashlen = hashbitlen/128;
sp->blocksize = 32/16;
sp->rounds = 16;
sp->pos = 0;
h[ 0] = m512_const1_128( iv[0] );
h[ 1] = m512_const1_128( iv[1] );
h[ 2] = m512_const1_128( iv[2] );
h[ 3] = m512_const1_128( iv[3] );
h[ 4] = m512_const1_128( iv[4] );
h[ 5] = m512_const1_128( iv[5] );
h[ 6] = m512_const1_128( iv[6] );
h[ 7] = m512_const1_128( iv[7] );
h[ 0] = m512_const1_128( iv[0] );
h[ 1] = m512_const1_128( iv[1] );
h[ 2] = m512_const1_128( iv[2] );
h[ 3] = m512_const1_128( iv[3] );
h[ 4] = m512_const1_128( iv[4] );
h[ 5] = m512_const1_128( iv[5] );
h[ 6] = m512_const1_128( iv[6] );
h[ 7] = m512_const1_128( iv[7] );
const int len = size >> 4;
const __m512i *in = (__m512i*)data;
__m512i *hash = (__m512i*)output;
int i;
for ( i = 0; i < len; i++ )
{
sp->h[ sp->pos ] = _mm512_xor_si512( sp->h[ sp->pos ], in[i] );
sp->pos++;
if ( sp->pos == sp->blocksize )
{
transform_4way( sp );
sp->pos = 0;
}
}
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->h[ sp->pos ] = _mm512_xor_si512( sp->h[ sp->pos ],
m512_const2_64( 0, 0x0000000000000080 ) );
transform_4way( sp );
sp->h[7] = _mm512_xor_si512( sp->h[7],
m512_const2_64( 0x0000000100000000, 0 ) );
for ( i = 0; i < 10; ++i )
transform_4way( sp );
memcpy( hash, sp->h, sp->hashlen<<6);
return 0;
}
int cube_4way_update_close( cube_4way_context *sp, void *output,
const void *data, size_t size )
{
@@ -436,62 +376,4 @@ int cube_2way_update_close( cube_2way_context *sp, void *output,
return 0;
}
int cube_2way_full( cube_2way_context *sp, void *output, int hashbitlen,
const void *data, size_t size )
{
__m256i *h = (__m256i*)sp->h;
__m128i *iv = (__m128i*)( hashbitlen == 512 ? (__m128i*)IV512
: (__m128i*)IV256 );
sp->hashlen = hashbitlen/128;
sp->blocksize = 32/16;
sp->rounds = 16;
sp->pos = 0;
h[ 0] = m256_const1_128( iv[0] );
h[ 1] = m256_const1_128( iv[1] );
h[ 2] = m256_const1_128( iv[2] );
h[ 3] = m256_const1_128( iv[3] );
h[ 4] = m256_const1_128( iv[4] );
h[ 5] = m256_const1_128( iv[5] );
h[ 6] = m256_const1_128( iv[6] );
h[ 7] = m256_const1_128( iv[7] );
h[ 0] = m256_const1_128( iv[0] );
h[ 1] = m256_const1_128( iv[1] );
h[ 2] = m256_const1_128( iv[2] );
h[ 3] = m256_const1_128( iv[3] );
h[ 4] = m256_const1_128( iv[4] );
h[ 5] = m256_const1_128( iv[5] );
h[ 6] = m256_const1_128( iv[6] );
h[ 7] = m256_const1_128( iv[7] );
const int len = size >> 4;
const __m256i *in = (__m256i*)data;
__m256i *hash = (__m256i*)output;
int i;
for ( i = 0; i < len; i++ )
{
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
sp->pos++;
if ( sp->pos == sp->blocksize )
{
transform_2way( sp );
sp->pos = 0;
}
}
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
m256_const2_64( 0, 0x0000000000000080 ) );
transform_2way( sp );
sp->h[7] = _mm256_xor_si256( sp->h[7],
m256_const2_64( 0x0000000100000000, 0 ) );
for ( i = 0; i < 10; ++i ) transform_2way( sp );
memcpy( hash, sp->h, sp->hashlen<<5 );
return 0;
}
#endif

View File

@@ -21,12 +21,15 @@ typedef struct _cube_4way_context cube_4way_context;
int cube_4way_init( cube_4way_context* sp, int hashbitlen, int rounds,
int blockbytes );
// reinitialize context with same parameters, much faster.
int cube_4way_reinit( cube_4way_context *sp );
int cube_4way_update( cube_4way_context *sp, const void *data, size_t size );
int cube_4way_close( cube_4way_context *sp, void *output );
int cube_4way_update_close( cube_4way_context *sp, void *output,
const void *data, size_t size );
int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
const void *data, size_t size );
#endif
@@ -45,12 +48,15 @@ typedef struct _cube_2way_context cube_2way_context;
int cube_2way_init( cube_2way_context* sp, int hashbitlen, int rounds,
int blockbytes );
// reinitialize context with same parameters, much faster.
int cube_2way_reinit( cube_2way_context *sp );
int cube_2way_update( cube_2way_context *sp, const void *data, size_t size );
int cube_2way_close( cube_2way_context *sp, void *output );
int cube_2way_update_close( cube_2way_context *sp, void *output,
const void *data, size_t size );
int cube_2way_full( cube_2way_context *sp, void *output, int hashbitlen,
const void *data, size_t size );
#endif

View File

@@ -20,7 +20,6 @@
#include "hash_api.h"
//#include "vperm.h"
#include <immintrin.h>
#include "simd-utils.h"
MYALIGN const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F};
MYALIGN const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC};
@@ -518,165 +517,6 @@ HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
return SUCCESS;
}
HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
int nHashSize, const BitSequence *data, DataLength datalen )
{
int i, j;
state->k = m128_zero;
state->processed_bits = 0;
state->uBufferBytes = 0;
switch( nHashSize )
{
case 256:
state->uHashSize = 256;
state->uBlockLength = 192;
state->uRounds = 8;
state->hashsize = m128_const_64( 0, 0x100 );
state->const1536 = m128_const_64( 0, 0x600 );
break;
case 512:
state->uHashSize = 512;
state->uBlockLength = 128;
state->uRounds = 10;
state->hashsize = m128_const_64( 0, 0x200 );
state->const1536 = m128_const_64( 0, 0x400 );
break;
default:
return BAD_HASHBITLEN;
}
for(i = 0; i < 4; i++)
for(j = 0; j < nHashSize / 256; j++)
state->state[i][j] = state->hashsize;
for(i = 0; i < 4; i++)
for(j = nHashSize / 256; j < 4; j++)
state->state[i][j] = m128_zero;
unsigned int uBlockCount, uRemainingBytes;
if( (state->uBufferBytes + datalen) >= state->uBlockLength )
{
if( state->uBufferBytes != 0 )
{
// Fill the buffer
memcpy( state->buffer + state->uBufferBytes,
(void*)data, state->uBlockLength - state->uBufferBytes );
// Process buffer
Compress( state, state->buffer, 1 );
state->processed_bits += state->uBlockLength * 8;
data += state->uBlockLength - state->uBufferBytes;
datalen -= state->uBlockLength - state->uBufferBytes;
}
// buffer now does not contain any unprocessed bytes
uBlockCount = datalen / state->uBlockLength;
uRemainingBytes = datalen % state->uBlockLength;
if( uBlockCount > 0 )
{
Compress( state, data, uBlockCount );
state->processed_bits += uBlockCount * state->uBlockLength * 8;
data += uBlockCount * state->uBlockLength;
}
if( uRemainingBytes > 0 )
memcpy(state->buffer, (void*)data, uRemainingBytes);
state->uBufferBytes = uRemainingBytes;
}
else
{
memcpy( state->buffer + state->uBufferBytes, (void*)data, datalen );
state->uBufferBytes += datalen;
}
__m128i remainingbits;
// Add remaining bytes in the buffer
state->processed_bits += state->uBufferBytes * 8;
remainingbits = _mm_set_epi32( 0, 0, 0, state->uBufferBytes * 8 );
// Pad with 0x80
state->buffer[state->uBufferBytes++] = 0x80;
// Enough buffer space for padding in this block?
if( (state->uBlockLength - state->uBufferBytes) >= 18 )
{
// Pad with zeros
memset( state->buffer + state->uBufferBytes, 0, state->uBlockLength - (state->uBufferBytes + 18) );
// Hash size
*( (unsigned short*)(state->buffer + state->uBlockLength - 18) ) = state->uHashSize;
// Processed bits
*( (DataLength*)(state->buffer + state->uBlockLength - 16) ) =
state->processed_bits;
*( (DataLength*)(state->buffer + state->uBlockLength - 8) ) = 0;
// Last block contains message bits?
if( state->uBufferBytes == 1 )
{
state->k = _mm_xor_si128( state->k, state->k );
state->k = _mm_sub_epi64( state->k, state->const1536 );
}
else
{
state->k = _mm_add_epi64( state->k, remainingbits );
state->k = _mm_sub_epi64( state->k, state->const1536 );
}
// Compress
Compress( state, state->buffer, 1 );
}
else
{
// Fill with zero and compress
memset( state->buffer + state->uBufferBytes, 0,
state->uBlockLength - state->uBufferBytes );
state->k = _mm_add_epi64( state->k, remainingbits );
state->k = _mm_sub_epi64( state->k, state->const1536 );
Compress( state, state->buffer, 1 );
// Last block
memset( state->buffer, 0, state->uBlockLength - 18 );
// Hash size
*( (unsigned short*)(state->buffer + state->uBlockLength - 18) ) =
state->uHashSize;
// Processed bits
*( (DataLength*)(state->buffer + state->uBlockLength - 16) ) =
state->processed_bits;
*( (DataLength*)(state->buffer + state->uBlockLength - 8) ) = 0;
// Compress the last block
state->k = _mm_xor_si128( state->k, state->k );
state->k = _mm_sub_epi64( state->k, state->const1536 );
Compress( state, state->buffer, 1) ;
}
// Store the hash value
_mm_store_si128( (__m128i*)hashval + 0, state->state[0][0] );
_mm_store_si128( (__m128i*)hashval + 1, state->state[1][0] );
if( state->uHashSize == 512 )
{
_mm_store_si128( (__m128i*)hashval + 2, state->state[2][0] );
_mm_store_si128( (__m128i*)hashval + 3, state->state[3][0] );
}
return SUCCESS;
}
HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
{

View File

@@ -55,8 +55,6 @@ HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databit
HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
const BitSequence *data, DataLength databitlen );
HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
int nHashSize, const BitSequence *data, DataLength databitlen );
#endif // HASH_API_H

View File

@@ -313,92 +313,4 @@ int echo_4way_update_close( echo_4way_context *state, void *hashval,
return 0;
}
int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
const void *data, int datalen )
{
int i, j;
int databitlen = datalen * 8;
ctx->k = m512_zero;
ctx->processed_bits = 0;
ctx->uBufferBytes = 0;
switch( nHashSize )
{
case 256:
ctx->uHashSize = 256;
ctx->uBlockLength = 192;
ctx->uRounds = 8;
ctx->hashsize = _mm512_set4_epi32( 0, 0, 0, 0x100 );
ctx->const1536 = _mm512_set4_epi32( 0, 0, 0, 0x600 );
break;
case 512:
ctx->uHashSize = 512;
ctx->uBlockLength = 128;
ctx->uRounds = 10;
ctx->hashsize = _mm512_set4_epi32( 0, 0, 0, 0x200 );
ctx->const1536 = _mm512_set4_epi32( 0, 0, 0, 0x400);
break;
default:
return 1;
}
for( i = 0; i < 4; i++ )
for( j = 0; j < nHashSize / 256; j++ )
ctx->state[ i ][ j ] = ctx->hashsize;
for( i = 0; i < 4; i++ )
for( j = nHashSize / 256; j < 4; j++ )
ctx->state[ i ][ j ] = m512_zero;
// bytelen is either 32 (maybe), 64 or 80 or 128!
// all are less than full block.
int vlen = datalen / 32;
const int vblen = ctx->uBlockLength / 16; // 16 bytes per lane
__m512i remainingbits;
if ( databitlen == 1024 )
{
echo_4way_compress( ctx, data, 1 );
ctx->processed_bits = 1024;
remainingbits = m512_const2_64( 0, -1024 );
vlen = 0;
}
else
{
vlen = databitlen / 128; // * 4 lanes / 128 bits per lane
memcpy_512( ctx->buffer, data, vlen );
ctx->processed_bits += (unsigned int)( databitlen );
remainingbits = _mm512_set4_epi32( 0, 0, 0, databitlen );
}
ctx->buffer[ vlen ] = _mm512_set4_epi32( 0, 0, 0, 0x80 );
memset_zero_512( ctx->buffer + vlen + 1, vblen - vlen - 2 );
ctx->buffer[ vblen-2 ] =
_mm512_set4_epi32( (uint32_t)ctx->uHashSize << 16, 0, 0, 0 );
ctx->buffer[ vblen-1 ] =
_mm512_set4_epi64( 0, ctx->processed_bits,
0, ctx->processed_bits );
ctx->k = _mm512_add_epi64( ctx->k, remainingbits );
ctx->k = _mm512_sub_epi64( ctx->k, ctx->const1536 );
echo_4way_compress( ctx, ctx->buffer, 1 );
_mm512_store_si512( (__m512i*)hashval + 0, ctx->state[ 0 ][ 0] );
_mm512_store_si512( (__m512i*)hashval + 1, ctx->state[ 1 ][ 0] );
if ( ctx->uHashSize == 512 )
{
_mm512_store_si512( (__m512i*)hashval + 2, ctx->state[ 2 ][ 0 ] );
_mm512_store_si512( (__m512i*)hashval + 3, ctx->state[ 3 ][ 0 ] );
}
return 0;
}
#endif

View File

@@ -32,8 +32,5 @@ int echo_close( echo_4way_context *state, void *hashval );
int echo_4way_update_close( echo_4way_context *state, void *hashval,
const void *data, int databitlen );
int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
const void *data, int datalen );
#endif
#endif

View File

@@ -185,82 +185,6 @@ HashReturn_gr final_groestl( hashState_groestl* ctx, void* output )
return SUCCESS_GR;
}
int groestl512_full( hashState_groestl* ctx, void* output,
const void* input, uint64_t databitlen )
{
int i;
ctx->hashlen = 64;
SET_CONSTANTS();
for ( i = 0; i < SIZE512; i++ )
{
ctx->chaining[i] = _mm_setzero_si128();
ctx->buffer[i] = _mm_setzero_si128();
}
ctx->chaining[ 6 ] = m128_const_64( 0x0200000000000000, 0 );
ctx->buf_ptr = 0;
ctx->rem_ptr = 0;
const int len = (int)databitlen / 128;
const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i
const int hash_offset = SIZE512 - hashlen_m128i;
int rem = ctx->rem_ptr;
uint64_t blocks = len / SIZE512;
__m128i* in = (__m128i*)input;
// --- update ---
// digest any full blocks, process directly from input
for ( i = 0; i < blocks; i++ )
TF1024( ctx->chaining, &in[ i * SIZE512 ] );
ctx->buf_ptr = blocks * SIZE512;
// copy any remaining data to buffer, it may already contain data
// from a previous update for a midstate precalc
for ( i = 0; i < len % SIZE512; i++ )
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
i += rem; // use i as rem_ptr in final
//--- final ---
blocks++; // adjust for final block
if ( i == len -1 )
{
// only 128 bits left in buffer, all padding at once
ctx->buffer[i] = _mm_set_epi8( blocks,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 );
}
else
{
// add first padding
ctx->buffer[i] = _mm_set_epi8( 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 );
// add zero padding
for ( i += 1; i < SIZE512 - 1; i++ )
ctx->buffer[i] = _mm_setzero_si128();
// add length padding, second last byte is zero unless blocks > 255
ctx->buffer[i] = _mm_set_epi8( blocks, blocks>>8, 0,0, 0,0,0,0,
0, 0 ,0,0, 0,0,0,0 );
}
// digest final padding block and do output transform
TF1024( ctx->chaining, ctx->buffer );
OF1024( ctx->chaining );
// store hash result in output
for ( i = 0; i < hashlen_m128i; i++ )
casti_m128i( output, i ) = ctx->chaining[ hash_offset + i ];
return 0;
}
HashReturn_gr update_and_final_groestl( hashState_groestl* ctx, void* output,
const void* input, DataLength_gr databitlen )
{

View File

@@ -87,6 +87,5 @@ HashReturn_gr final_groestl( hashState_groestl*, void* );
HashReturn_gr update_and_final_groestl( hashState_groestl*, void*,
const void*, DataLength_gr );
int groestl512_full( hashState_groestl*, void*, const void*, uint64_t );
#endif /* __hash_h */

View File

@@ -15,7 +15,7 @@
#include "miner.h"
#include "simd-utils.h"
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(__VAES__)
int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen )

View File

@@ -18,8 +18,6 @@
#endif
#include <stdlib.h>
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define LENGTH (256)
//#include "brg_endian.h"
@@ -71,5 +69,4 @@ int groestl256_4way_init( groestl256_4way_context*, uint64_t );
int groestl256_4way_update_close( groestl256_4way_context*, void*,
const void*, uint64_t );
#endif
#endif

View File

@@ -15,22 +15,29 @@
#include "miner.h"
#include "simd-utils.h"
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(__VAES__)
int groestl512_4way_init( groestl512_4way_context* ctx, uint64_t hashlen )
{
int i;
ctx->hashlen = hashlen;
SET_CONSTANTS();
if (ctx->chaining == NULL || ctx->buffer == NULL)
return 1;
memset_zero_512( ctx->chaining, SIZE512 );
memset_zero_512( ctx->buffer, SIZE512 );
for ( i = 0; i < SIZE512; i++ )
{
ctx->chaining[i] = m512_zero;
ctx->buffer[i] = m512_zero;
}
// The only non-zero in the IV is len. It can be hard coded.
ctx->chaining[ 6 ] = m512_const2_64( 0x0200000000000000, 0 );
// uint64_t len = U64BIG((uint64_t)LENGTH);
// ctx->chaining[ COLS/2 -1 ] = _mm512_set4_epi64( len, 0, len, 0 );
// INIT_4way(ctx->chaining);
ctx->buf_ptr = 0;
ctx->rem_ptr = 0;
@@ -42,7 +49,7 @@ int groestl512_4way_update_close( groestl512_4way_context* ctx, void* output,
const void* input, uint64_t databitlen )
{
const int len = (int)databitlen / 128;
const int hashlen_m128i = 64 / 16; // bytes to __m128i
const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i
const int hash_offset = SIZE512 - hashlen_m128i;
int rem = ctx->rem_ptr;
int blocks = len / SIZE512;
@@ -51,13 +58,16 @@ int groestl512_4way_update_close( groestl512_4way_context* ctx, void* output,
// --- update ---
// digest any full blocks, process directly from input
for ( i = 0; i < blocks; i++ )
TF1024_4way( ctx->chaining, &in[ i * SIZE512 ] );
ctx->buf_ptr = blocks * SIZE512;
// copy any remaining data to buffer, it may already contain data
// from a previous update for a midstate precalc
for ( i = 0; i < len % SIZE512; i++ )
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
i += rem;
i += rem; // use i as rem_ptr in final
//--- final ---
@@ -71,71 +81,23 @@ int groestl512_4way_update_close( groestl512_4way_context* ctx, void* output,
}
else
{
// add first padding
ctx->buffer[i] = m512_const4_64( 0, 0x80, 0, 0x80 );
// add zero padding
for ( i += 1; i < SIZE512 - 1; i++ )
ctx->buffer[i] = m512_zero;
// add length padding, second last byte is zero unless blocks > 255
ctx->buffer[i] = m512_const1_128( _mm_set_epi8(
blocks, blocks>>8, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 ) );
}
// digest final padding block and do output transform
TF1024_4way( ctx->chaining, ctx->buffer );
OF1024_4way( ctx->chaining );
for ( i = 0; i < hashlen_m128i; i++ )
casti_m512i( output, i ) = ctx->chaining[ hash_offset + i ];
return 0;
}
int groestl512_4way_full( groestl512_4way_context* ctx, void* output,
const void* input, uint64_t datalen )
{
const int len = (int)datalen >> 4;
const int hashlen_m128i = 64 >> 4; // bytes to __m128i
const int hash_offset = SIZE512 - hashlen_m128i;
uint64_t blocks = len / SIZE512;
__m512i* in = (__m512i*)input;
int i;
// --- init ---
SET_CONSTANTS();
memset_zero_512( ctx->chaining, SIZE512 );
memset_zero_512( ctx->buffer, SIZE512 );
ctx->chaining[ 6 ] = m512_const2_64( 0x0200000000000000, 0 );
ctx->buf_ptr = 0;
ctx->rem_ptr = 0;
// --- update ---
for ( i = 0; i < blocks; i++ )
TF1024_4way( ctx->chaining, &in[ i * SIZE512 ] );
ctx->buf_ptr = blocks * SIZE512;
for ( i = 0; i < len % SIZE512; i++ )
ctx->buffer[ ctx->rem_ptr + i ] = in[ ctx->buf_ptr + i ];
i += ctx->rem_ptr;
// --- close ---
blocks++;
if ( i == SIZE512 - 1 )
{
// only 1 vector left in buffer, all padding at once
ctx->buffer[i] = m512_const2_64( blocks << 56, 0x80 );
}
else
{
ctx->buffer[i] = m512_const4_64( 0, 0x80, 0, 0x80 );
for ( i += 1; i < SIZE512 - 1; i++ )
ctx->buffer[i] = m512_zero;
ctx->buffer[i] = m512_const2_64( blocks << 56, 0 );
}
TF1024_4way( ctx->chaining, ctx->buffer );
OF1024_4way( ctx->chaining );
// store hash result in output
for ( i = 0; i < hashlen_m128i; i++ )
casti_m512i( output, i ) = ctx->chaining[ hash_offset + i ];

View File

@@ -1,3 +1,11 @@
/* hash.h Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#if !defined(GROESTL512_HASH_4WAY_H__)
#define GROESTL512_HASH_4WAY_H__ 1
@@ -10,10 +18,12 @@
#endif
#include <stdlib.h>
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define LENGTH (512)
//#include "brg_endian.h"
//#define NEED_UINT_64T
//#include "algo/sha/brg_types.h"
/* some sizes (number of bytes) */
#define ROWS (8)
#define LENGTHFIELDLEN (ROWS)
@@ -34,11 +44,34 @@
#define ROUNDS (ROUNDS1024)
//#endif
/*
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
#define U64BIG(a) (a)
#endif // IS_BIG_ENDIAN
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
#define U64BIG(a) \
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
(ROTL64(a,56) & li_64(FF000000FF000000)))
#endif // IS_LITTLE_ENDIAN
typedef unsigned char BitSequence_gr;
typedef unsigned long long DataLength_gr;
typedef enum { SUCCESS_GR = 0, FAIL_GR = 1, BAD_HASHBITLEN_GR = 2} HashReturn_gr;
*/
#define SIZE512 (SIZE_1024/16)
typedef struct {
__attribute__ ((aligned (128))) __m512i chaining[SIZE512];
__attribute__ ((aligned (64))) __m512i buffer[SIZE512];
int hashlen; // byte
int blk_count; // SIZE_m128i
int buf_ptr; // __m128i offset
int rem_ptr;
@@ -52,11 +85,10 @@ int groestl512_4way_init( groestl512_4way_context*, uint64_t );
int groestl512_4way_update( groestl512_4way_context*, const void*,
uint64_t );
int groestl512_4way_close( groestl512_4way_context*, void* );
int groestl512_4way_update_close( groestl512_4way_context*, void*,
const void*, uint64_t );
int groestl512_4way_full( groestl512_4way_context*, void*,
const void*, uint64_t );
#endif // VAES
#endif // GROESTL512_HASH_4WAY_H__
#endif /* __hash_h */

View File

@@ -161,7 +161,7 @@ bool register_hodl_algo( algo_gate_t* gate )
// return false;
// }
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
gate->optimizations = SSE42_OPT | AES_OPT | AVX2_OPT;
gate->optimizations = AES_OPT | AVX_OPT | AVX2_OPT;
gate->scanhash = (void*)&hodl_scanhash;
gate->get_new_work = (void*)&hodl_get_new_work;
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;

View File

@@ -41,10 +41,57 @@
extern "C"{
#endif
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_JH
#define SPH_SMALL_FOOTPRINT_JH 1
#endif
#if !defined SPH_JH_64 && SPH_64_TRUE
#define SPH_JH_64 1
#endif
#if !SPH_64
#undef SPH_JH_64
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
/*
* The internal bitslice representation may use either big-endian or
* little-endian (true bitslice operations do not care about the bit
* ordering, and the bit-swapping linear operations in JH happen to
* be invariant through endianness-swapping). The constants must be
* defined according to the chosen endianness; we use some
* byte-swapping macros for that.
*/
#if SPH_LITTLE_ENDIAN
#if SPH_64
#define C64e(x) ((SPH_C64(x) >> 56) \
| ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
| ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
| ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \
| ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \
| ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
| ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
| ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
#define dec64e_aligned sph_dec64le_aligned
#define enc64e sph_enc64le
#endif
#else
#if SPH_64
#define C64e(x) SPH_C64(x)
#define dec64e_aligned sph_dec64be_aligned
#define enc64e sph_enc64be
#endif
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define Sb_8W(x0, x1, x2, x3, c) \
@@ -105,97 +152,8 @@ do { \
x3 = _mm256_xor_si256( x3, x4 ); \
} while (0)
static const uint64_t C[] =
{
0x67f815dfa2ded572, 0x571523b70a15847b,
0xf6875a4d90d6ab81, 0x402bd1c3c54f9f4e,
0x9cfa455ce03a98ea, 0x9a99b26699d2c503,
0x8a53bbf2b4960266, 0x31a2db881a1456b5,
0xdb0e199a5c5aa303, 0x1044c1870ab23f40,
0x1d959e848019051c, 0xdccde75eadeb336f,
0x416bbf029213ba10, 0xd027bbf7156578dc,
0x5078aa3739812c0a, 0xd3910041d2bf1a3f,
0x907eccf60d5a2d42, 0xce97c0929c9f62dd,
0xac442bc70ba75c18, 0x23fcc663d665dfd1,
0x1ab8e09e036c6e97, 0xa8ec6c447e450521,
0xfa618e5dbb03f1ee, 0x97818394b29796fd,
0x2f3003db37858e4a, 0x956a9ffb2d8d672a,
0x6c69b8f88173fe8a, 0x14427fc04672c78a,
0xc45ec7bd8f15f4c5, 0x80bb118fa76f4475,
0xbc88e4aeb775de52, 0xf4a3a6981e00b882,
0x1563a3a9338ff48e, 0x89f9b7d524565faa,
0xfde05a7c20edf1b6, 0x362c42065ae9ca36,
0x3d98fe4e433529ce, 0xa74b9a7374f93a53,
0x86814e6f591ff5d0, 0x9f5ad8af81ad9d0e,
0x6a6234ee670605a7, 0x2717b96ebe280b8b,
0x3f1080c626077447, 0x7b487ec66f7ea0e0,
0xc0a4f84aa50a550d, 0x9ef18e979fe7e391,
0xd48d605081727686, 0x62b0e5f3415a9e7e,
0x7a205440ec1f9ffc, 0x84c9f4ce001ae4e3,
0xd895fa9df594d74f, 0xa554c324117e2e55,
0x286efebd2872df5b, 0xb2c4a50fe27ff578,
0x2ed349eeef7c8905, 0x7f5928eb85937e44,
0x4a3124b337695f70, 0x65e4d61df128865e,
0xe720b95104771bc7, 0x8a87d423e843fe74,
0xf2947692a3e8297d, 0xc1d9309b097acbdd,
0xe01bdc5bfb301b1d, 0xbf829cf24f4924da,
0xffbf70b431bae7a4, 0x48bcf8de0544320d,
0x39d3bb5332fcae3b, 0xa08b29e0c1c39f45,
0x0f09aef7fd05c9e5, 0x34f1904212347094,
0x95ed44e301b771a2, 0x4a982f4f368e3be9,
0x15f66ca0631d4088, 0xffaf52874b44c147,
0x30c60ae2f14abb7e, 0xe68c6eccc5b67046,
0x00ca4fbd56a4d5a4, 0xae183ec84b849dda,
0xadd1643045ce5773, 0x67255c1468cea6e8,
0x16e10ecbf28cdaa3, 0x9a99949a5806e933,
0x7b846fc220b2601f, 0x1885d1a07facced1,
0xd319dd8da15b5932, 0x46b4a5aac01c9a50,
0xba6b04e467633d9f, 0x7eee560bab19caf6,
0x742128a9ea79b11f, 0xee51363b35f7bde9,
0x76d350755aac571d, 0x01707da3fec2463a,
0x42d8a498afc135f7, 0x79676b9e20eced78,
0xa8db3aea15638341, 0x832c83324d3bc3fa,
0xf347271c1f3b40a7, 0x9a762db734f04059,
0xfd4f21d26c4e3ee7, 0xef5957dc398dfdb8,
0xdaeb492b490c9b8d, 0x0d70f36849d7a25b,
0x84558d7ad0ae3b7d, 0x658ef8e4f0e9a5f5,
0x533b1036f4a2b8a0, 0x5aec3e759e07a80c,
0x4f88e85692946891, 0x4cbcbaf8555cb05b,
0x7b9487f3993bbbe3, 0x5d1c6b72d6f4da75,
0x6db334dc28acae64, 0x71db28b850a5346c,
0x2a518d10f2e261f8, 0xfc75dd593364dbe3,
0xa23fce43f1bcac1c, 0xb043e8023cd1bb67,
0x75a12988ca5b0a33, 0x5c5316b44d19347f,
0x1e4d790ec3943b92, 0x3fafeeb6d7757479,
0x21391abef7d4a8ea, 0x5127234c097ef45c,
0xd23c32ba5324a326, 0xadd5a66d4a17a344,
0x08c9f2afa63e1db5, 0x563c6b91983d5983,
0x4d608672a17cf84c, 0xf6c76e08cc3ee246,
0x5e76bcb1b333982f, 0x2ae6c4efa566d62b,
0x36d4c1bee8b6f406, 0x6321efbc1582ee74,
0x69c953f40d4ec1fd, 0x26585806c45a7da7,
0x16fae0061614c17e, 0x3f9d63283daf907e,
0x0cd29b00e3f2c9d2, 0x300cd4b730ceaa5f,
0x9832e0f216512a74, 0x9af8cee3d830eb0d,
0x9279f1b57b9ec54b, 0xd36886046ee651ff,
0x316796e6574d239b, 0x05750a17f3a6e6cc,
0xce6c3213d98176b1, 0x62a205f88452173c,
0x47154778b3cb2bf4, 0x486a9323825446ff,
0x65655e4e0758df38, 0x8e5086fc897cfcf2,
0x86ca0bd0442e7031, 0x4e477830a20940f0,
0x8338f7d139eea065, 0xbd3a2ce437e95ef7,
0x6ff8130126b29721, 0xe7de9fefd1ed44a3,
0xd992257615dfa08b, 0xbe42dc12f6f7853c,
0x7eb027ab7ceca7d8, 0xdea83eaada7d8d53,
0xd86902bd93ce25aa, 0xf908731afd43f65a,
0xa5194a17daef5fc0, 0x6a21fd4c33664d97,
0x701541db3198b435, 0x9b54cdedbb0f1eea,
0x72409751a163d09a, 0xe26f4791bf9d75f6
};
#if SPH_JH_64
// Big endian version
/*
static const sph_u64 C[] = {
C64e(0x72d5dea2df15f867), C64e(0x7b84150ab7231557),
C64e(0x81abd6904d5a87f6), C64e(0x4e9f4fc5c3d12b40),
@@ -282,7 +240,6 @@ static const sph_u64 C[] = {
C64e(0x35b49831db411570), C64e(0xea1e0fbbedcd549b),
C64e(0x9ad063a151974072), C64e(0xf6759dbf91476fe2)
};
*/
#define Ceven_hi(r) (C[((r) << 2) + 0])
#define Ceven_lo(r) (C[((r) << 2) + 1])
@@ -470,7 +427,7 @@ do { \
h7h = _mm256_xor_si256( h7h, m3h ); \
h7l = _mm256_xor_si256( h7l, m3l ); \
/*
static const sph_u64 IV256[] = {
C64e(0xeb98a3412c20d3eb), C64e(0x92cdbe7b9cb245c1),
C64e(0x1c93519160d4c7fa), C64e(0x260082d67e508a03),
@@ -493,8 +450,11 @@ static const sph_u64 IV512[] = {
C64e(0xcf57f6ec9db1f856), C64e(0xa706887c5716b156),
C64e(0xe3c2fcdfe68517fb), C64e(0x545a4678cc8cdd4b)
};
*/
#else
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
@@ -524,6 +484,57 @@ static const sph_u64 IV512[] = {
W ## ro(h7); \
} while (0)
#if SPH_SMALL_FOOTPRINT_JH
#if SPH_JH_64
/*
* The "small footprint" 64-bit version just uses a partially unrolled
* loop.
*/
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define E8_8W do { \
unsigned r; \
for (r = 0; r < 42; r += 7) { \
SL_8W(0); \
SL_8W(1); \
SL_8W(2); \
SL_8W(3); \
SL_8W(4); \
SL_8W(5); \
SL_8W(6); \
} \
} while (0)
#endif
#define E8 do { \
unsigned r; \
for (r = 0; r < 42; r += 7) { \
SL(0); \
SL(1); \
SL(2); \
SL(3); \
SL(4); \
SL(5); \
SL(6); \
} \
} while (0)
#else
#endif
#else
#if SPH_JH_64
/*
* On a "true 64-bit" architecture, we can unroll at will.
*/
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
@@ -574,7 +585,6 @@ static const sph_u64 IV512[] = {
#endif // AVX512
#define E8 do { \
SLu( 0, 0); \
SLu( 1, 1); \
@@ -620,6 +630,13 @@ static const sph_u64 IV512[] = {
SLu(41, 6); \
} while (0)
#else
#endif
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
void jh256_8way_init( jh_8way_context *sc )
@@ -715,12 +732,12 @@ jh_8way_core( jh_8way_context *sc, const void *data, size_t len )
static void
jh_8way_close( jh_8way_context *sc, unsigned ub, unsigned n, void *dst,
size_t out_size_w32 )
size_t out_size_w32, const void *iv )
{
__m512i buf[16*4];
__m512i *dst512 = (__m512i*)dst;
size_t numz, u;
uint64_t l0, l1;
sph_u64 l0, l1, l0e, l1e;
buf[0] = m512_const1_64( 0x80ULL );
@@ -731,10 +748,12 @@ jh_8way_close( jh_8way_context *sc, unsigned ub, unsigned n, void *dst,
memset_zero_512( buf+1, (numz>>3) - 1 );
l0 = ( sc->block_count << 9 ) + ( sc->ptr << 3 );
l1 = ( sc->block_count >> 55 );
*(buf + (numz>>3) ) = _mm512_set1_epi64( bswap_64( l1 ) );
*(buf + (numz>>3) + 1) = _mm512_set1_epi64( bswap_64( l0 ) );
l0 = SPH_T64(sc->block_count << 9) + (sc->ptr << 3);
l1 = SPH_T64(sc->block_count >> 55);
sph_enc64be( &l0e, l0 );
sph_enc64be( &l1e, l1 );
*(buf + (numz>>3) ) = _mm512_set1_epi64( l1e );
*(buf + (numz>>3) + 1) = _mm512_set1_epi64( l0e );
jh_8way_core( sc, buf, numz + 16 );
@@ -753,7 +772,7 @@ jh256_8way_update(void *cc, const void *data, size_t len)
void
jh256_8way_close(void *cc, void *dst)
{
jh_8way_close(cc, 0, 0, dst, 8);
jh_8way_close(cc, 0, 0, dst, 8, IV256);
}
void
@@ -765,7 +784,7 @@ jh512_8way_update(void *cc, const void *data, size_t len)
void
jh512_8way_close(void *cc, void *dst)
{
jh_8way_close(cc, 0, 0, dst, 16);
jh_8way_close(cc, 0, 0, dst, 16, IV512);
}
#endif
@@ -863,12 +882,12 @@ jh_4way_core( jh_4way_context *sc, const void *data, size_t len )
static void
jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
size_t out_size_w32 )
size_t out_size_w32, const void *iv )
{
__m256i buf[16*4];
__m256i *dst256 = (__m256i*)dst;
size_t numz, u;
uint64_t l0, l1;
sph_u64 l0, l1, l0e, l1e;
buf[0] = m256_const1_64( 0x80ULL );
@@ -879,10 +898,12 @@ jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
memset_zero_256( buf+1, (numz>>3) - 1 );
l0 = ( sc->block_count << 9 ) + ( sc->ptr << 3 );
l1 = ( sc->block_count >> 55 );
*(buf + (numz>>3) ) = _mm256_set1_epi64x( bswap_64( l1 ) );
*(buf + (numz>>3) + 1) = _mm256_set1_epi64x( bswap_64( l0 ) );
l0 = SPH_T64(sc->block_count << 9) + (sc->ptr << 3);
l1 = SPH_T64(sc->block_count >> 55);
sph_enc64be( &l0e, l0 );
sph_enc64be( &l1e, l1 );
*(buf + (numz>>3) ) = _mm256_set1_epi64x( l1e );
*(buf + (numz>>3) + 1) = _mm256_set1_epi64x( l0e );
jh_4way_core( sc, buf, numz + 16 );
@@ -901,7 +922,7 @@ jh256_4way_update(void *cc, const void *data, size_t len)
void
jh256_4way_close(void *cc, void *dst)
{
jh_4way_close(cc, 0, 0, dst, 8 );
jh_4way_close(cc, 0, 0, dst, 8, IV256);
}
void
@@ -913,7 +934,7 @@ jh512_4way_update(void *cc, const void *data, size_t len)
void
jh512_4way_close(void *cc, void *dst)
{
jh_4way_close(cc, 0, 0, dst, 16 );
jh_4way_close(cc, 0, 0, dst, 16, IV512);
}

View File

@@ -43,6 +43,7 @@ extern "C"{
#endif
#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "simd-utils.h"
#define SPH_SIZE_jh256 256

View File

@@ -65,7 +65,7 @@ void jha_hash_4way( void *out, const void *input )
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
blake512_4way_init( &ctx_blake );
blake512_4way_update( &ctx_blake, vhash, 64 );
blake512_4way( &ctx_blake, vhash, 64 );
blake512_4way_close( &ctx_blake, vhashA );
jh512_4way_init( &ctx_jh );

View File

@@ -28,28 +28,26 @@ int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
__m512i *noncev = (__m512i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
int thr_id = mythr->id;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
do {
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
keccakhash_8way( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
if ( hash7[ lane<<1 ] <= Htarg )
{
extr_lane_8x64( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = bswap_32( n + lane );
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
@@ -81,28 +79,27 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
int thr_id = mythr->id;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do {
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
keccakhash_4way( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
if ( hash7[ lane<<1 ] <= Htarg )
{
extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ))
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = bswap_32( n + lane );
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;

View File

@@ -1,9 +1,5 @@
#include "keccak-gate.h"
#include "sph_keccak.h"
int hard_coded_eb = 1;
// KECCAK
bool register_keccak_algo( algo_gate_t* gate )
{
@@ -23,8 +19,6 @@ bool register_keccak_algo( algo_gate_t* gate )
return true;
};
// KECCAKC
bool register_keccakc_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT | AVX512_OPT;
@@ -43,50 +37,3 @@ bool register_keccakc_algo( algo_gate_t* gate )
return true;
};
// SHA3D
void sha3d( void *state, const void *input, int len )
{
uint32_t _ALIGN(64) buffer[16], hash[16];
sph_keccak_context ctx_keccak;
sph_keccak256_init( &ctx_keccak );
sph_keccak256 ( &ctx_keccak, input, len );
sph_keccak256_close( &ctx_keccak, (void*) buffer );
sph_keccak256_init( &ctx_keccak );
sph_keccak256 ( &ctx_keccak, buffer, 32 );
sph_keccak256_close( &ctx_keccak, (void*) hash );
memcpy(state, hash, 32);
}
void sha3d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
{
sha3d( merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size );
for ( int i = 0; i < sctx->job.merkle_count; i++ )
{
memcpy( merkle_root + 32, sctx->job.merkle[i], 32 );
sha256d( merkle_root, merkle_root, 64 );
}
}
bool register_sha3d_algo( algo_gate_t* gate )
{
hard_coded_eb = 6;
opt_extranonce = false;
gate->optimizations = AVX2_OPT | AVX512_OPT;
gate->gen_merkle_root = (void*)&sha3d_gen_merkle_root;
#if defined (KECCAK_8WAY)
gate->scanhash = (void*)&scanhash_sha3d_8way;
gate->hash = (void*)&sha3d_hash_8way;
#elif defined (KECCAK_4WAY)
gate->scanhash = (void*)&scanhash_sha3d_4way;
gate->hash = (void*)&sha3d_hash_4way;
#else
gate->scanhash = (void*)&scanhash_sha3d;
gate->hash = (void*)&sha3d_hash;
#endif
return true;
};

View File

@@ -10,37 +10,24 @@
#define KECCAK_4WAY 1
#endif
extern int hard_coded_eb;
#if defined(KECCAK_8WAY)
void keccakhash_8way( void *state, const void *input );
int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void sha3d_hash_8way( void *state, const void *input );
int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(KECCAK_4WAY)
void keccakhash_4way( void *state, const void *input );
int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void sha3d_hash_4way( void *state, const void *input );
int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#else
void keccakhash( void *state, const void *input );
int scanhash_keccak( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void sha3d_hash( void *state, const void *input );
int scanhash_sha3d( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif
#endif
#endif

View File

@@ -1,7 +1,6 @@
#include <stddef.h>
#include <stdint.h>
#include "keccak-hash-4way.h"
#include "keccak-gate.h"
static const uint64_t RC[] = {
0x0000000000000001, 0x0000000000008082,
@@ -164,12 +163,12 @@ static void keccak64_8way_close( keccak64_ctx_m512i *kc, void *dst,
unsigned eb;
union {
__m512i tmp[lim + 1];
uint64_t dummy; /* for alignment */
sph_u64 dummy; /* for alignment */
} u;
size_t j;
size_t m512_len = byte_len >> 3;
eb = hard_coded_eb;
eb = 0x100 >> 8;
if ( kc->ptr == (lim - 8) )
{
const uint64_t t = eb | 0x8000000000000000;
@@ -345,12 +344,12 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
unsigned eb;
union {
__m256i tmp[lim + 1];
uint64_t dummy; /* for alignment */
sph_u64 dummy; /* for alignment */
} u;
size_t j;
size_t m256_len = byte_len >> 3;
eb = hard_coded_eb;
eb = 0x100 >> 8;
if ( kc->ptr == (lim - 8) )
{
const uint64_t t = eb | 0x8000000000000000;

View File

@@ -43,8 +43,16 @@ extern "C"{
#ifdef __AVX2__
#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "simd-utils.h"
#define SPH_SIZE_keccak256 256
/**
* Output size (in bits) for Keccak-512.
*/
#define SPH_SIZE_keccak512 512
/**
* This structure is a context for Keccak computations: it contains the
* intermediate values and some data from the last entered block. Once a

View File

@@ -18,34 +18,36 @@ void keccakhash(void *state, const void *input)
memcpy(state, hash, 32);
}
int scanhash_keccak( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
int scanhash_keccak( struct work *work,
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) hash64[8];
uint32_t _ALIGN(64) endiandata[32];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce;
const int thr_id = mythr->id;
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
//const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
for ( int i=0; i < 19; i++ )
be32enc( &endiandata[i], pdata[i] );
uint32_t _ALIGN(32) hash64[8];
uint32_t endiandata[32];
do {
be32enc( &endiandata[19], n );
keccakhash( hash64, endiandata );
if ( valid_hash( hash64, ptarget ) && !opt_benchmark )
{
pdata[19] = n;
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < last_nonce && !work_restart[thr_id].restart );
for (int i=0; i < 19; i++)
be32enc(&endiandata[i], pdata[i]);
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
keccakhash(hash64, endiandata);
if (((hash64[7]&0xFFFFFF00)==0) &&
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}

View File

@@ -1,126 +0,0 @@
#include "keccak-gate.h"
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "sph_keccak.h"
#include "keccak-hash-4way.h"
#if defined(KECCAK_8WAY)
void sha3d_hash_8way(void *state, const void *input)
{
uint32_t buffer[16*8] __attribute__ ((aligned (128)));
keccak256_8way_context ctx;
keccak256_8way_init( &ctx );
keccak256_8way_update( &ctx, input, 80 );
keccak256_8way_close( &ctx, buffer );
keccak256_8way_init( &ctx );
keccak256_8way_update( &ctx, buffer, 32 );
keccak256_8way_close( &ctx, state );
}
int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[24*8] __attribute__ ((aligned (128)));
uint32_t hash[16*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[49]); // 3*16+1
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
do {
sha3d_hash_8way( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
{
extr_lane_8x64( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(KECCAK_4WAY)
void sha3d_hash_4way(void *state, const void *input)
{
uint32_t buffer[16*4] __attribute__ ((aligned (64)));
keccak256_4way_context ctx;
keccak256_4way_init( &ctx );
keccak256_4way_update( &ctx, input, 80 );
keccak256_4way_close( &ctx, buffer );
keccak256_4way_init( &ctx );
keccak256_4way_update( &ctx, buffer, 32 );
keccak256_4way_close( &ctx, state );
}
int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t hash[16*4] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[25]); // 3*8+1
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do {
sha3d_hash_4way( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
{
extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce;
return 0;
}
#endif

View File

@@ -1,50 +0,0 @@
#include "algo-gate-api.h"
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "sph_keccak.h"
void sha3d_hash(void *state, const void *input)
{
uint32_t buffer[16];
sph_keccak256_context ctx_keccak;
sph_keccak256_init( &ctx_keccak );
sph_keccak256 ( &ctx_keccak, input, 80 );
sph_keccak256_close( &ctx_keccak, buffer );
sph_keccak256_init( &ctx_keccak );
sph_keccak256 ( &ctx_keccak, buffer, 32 );
sph_keccak256_close( &ctx_keccak, state );
}
int scanhash_sha3d( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) hash64[8];
uint32_t _ALIGN(64) endiandata[32];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce;
const int thr_id = mythr->id;
for ( int i=0; i < 19; i++ )
be32enc( &endiandata[i], pdata[i] );
do {
be32enc( &endiandata[19], n );
sha3d_hash( hash64, endiandata );
if ( valid_hash( hash64, ptarget ) && !opt_benchmark )
{
pdata[19] = n;
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < last_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}

View File

@@ -32,8 +32,8 @@
#include <stddef.h>
#include <string.h>
#include "sph_keccak.h"
#include "keccak-gate.h"
#ifdef __cplusplus
extern "C"{
@@ -1616,7 +1616,7 @@ keccak_core(sph_keccak_context *kc, const void *data, size_t len, size_t lim)
} u; \
size_t j; \
\
eb = hard_coded_eb; \
eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
if (kc->ptr == (lim - 1)) { \
if (n == 7) { \
u.tmp[0] = eb; \

View File

@@ -459,11 +459,6 @@ int luffa_4way_init( luffa_4way_context *state, int hashbitlen )
return 0;
}
int luffa512_4way_init( luffa_4way_context *state )
{
return luffa_4way_init( state, 512 );
}
// Do not call luffa_update_close after having called luffa_update.
// Once luffa_update has been called only call luffa_update or luffa_close.
int luffa_4way_update( luffa_4way_context *state, const void *data,
@@ -501,14 +496,6 @@ int luffa_4way_update( luffa_4way_context *state, const void *data,
return 0;
}
/*
int luffa512_4way_update( luffa_4way_context *state, const void *data,
size_t len )
{
return luffa_4way_update( state, data, len );
}
*/
int luffa_4way_close( luffa_4way_context *state, void *hashval )
{
__m512i *buffer = (__m512i*)state->buffer;
@@ -531,77 +518,6 @@ int luffa_4way_close( luffa_4way_context *state, void *hashval )
return 0;
}
/*
int luffa512_4way_close( luffa_4way_context *state, void *hashval )
{
return luffa_4way_close( state, hashval );
}
*/
int luffa512_4way_full( luffa_4way_context *state, void *output,
const void *data, size_t inlen )
{
state->hashbitlen = 512;
__m128i *iv = (__m128i*)IV;
state->chainv[0] = m512_const1_128( iv[0] );
state->chainv[1] = m512_const1_128( iv[1] );
state->chainv[2] = m512_const1_128( iv[2] );
state->chainv[3] = m512_const1_128( iv[3] );
state->chainv[4] = m512_const1_128( iv[4] );
state->chainv[5] = m512_const1_128( iv[5] );
state->chainv[6] = m512_const1_128( iv[6] );
state->chainv[7] = m512_const1_128( iv[7] );
state->chainv[8] = m512_const1_128( iv[8] );
state->chainv[9] = m512_const1_128( iv[9] );
((__m512i*)state->buffer)[0] = m512_zero;
((__m512i*)state->buffer)[1] = m512_zero;
const __m512i *vdata = (__m512i*)data;
__m512i msg[2];
int i;
const int blocks = (int)( inlen >> 5 );
const __m512i shuff_bswap32 = m512_const_64(
0x3c3d3e3f38393a3b, 0x3435363730313233,
0x2c2d2e2f28292a2b, 0x2425262720212223,
0x1c1d1e1f18191a1b, 0x1415161710111213,
0x0c0d0e0f08090a0b, 0x0405060700010203 );
state->rembytes = inlen & 0x1F;
// full blocks
for ( i = 0; i < blocks; i++, vdata+=2 )
{
msg[0] = _mm512_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
msg[1] = _mm512_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
rnd512_4way( state, msg );
}
// 16 byte partial block exists for 80 byte len
if ( state->rembytes )
{
// padding of partial block
msg[0] = _mm512_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
msg[1] = m512_const2_64( 0, 0x0000000080000000 );
rnd512_4way( state, msg );
}
else
{
// empty pad block
msg[0] = m512_const2_64( 0, 0x0000000080000000 );
msg[1] = m512_zero;
rnd512_4way( state, msg );
}
finalization512_4way( state, (uint32*)output );
if ( state->hashbitlen > 512 )
finalization512_4way( state, (uint32*)( output+64 ) );
return 0;
}
int luffa_4way_update_close( luffa_4way_context *state,
void *output, const void *data, size_t inlen )
{
@@ -1115,69 +1031,6 @@ int luffa_2way_close( luffa_2way_context *state, void *hashval )
return 0;
}
int luffa512_2way_full( luffa_2way_context *state, void *output,
const void *data, size_t inlen )
{
state->hashbitlen = 512;
__m128i *iv = (__m128i*)IV;
state->chainv[0] = m256_const1_128( iv[0] );
state->chainv[1] = m256_const1_128( iv[1] );
state->chainv[2] = m256_const1_128( iv[2] );
state->chainv[3] = m256_const1_128( iv[3] );
state->chainv[4] = m256_const1_128( iv[4] );
state->chainv[5] = m256_const1_128( iv[5] );
state->chainv[6] = m256_const1_128( iv[6] );
state->chainv[7] = m256_const1_128( iv[7] );
state->chainv[8] = m256_const1_128( iv[8] );
state->chainv[9] = m256_const1_128( iv[9] );
((__m256i*)state->buffer)[0] = m256_zero;
((__m256i*)state->buffer)[1] = m256_zero;
const __m256i *vdata = (__m256i*)data;
__m256i msg[2];
int i;
const int blocks = (int)( inlen >> 5 );
const __m256i shuff_bswap32 = m256_const_64( 0x1c1d1e1f18191a1b,
0x1415161710111213,
0x0c0d0e0f08090a0b,
0x0405060700010203 );
state->rembytes = inlen & 0x1F;
// full blocks
for ( i = 0; i < blocks; i++, vdata+=2 )
{
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
msg[1] = _mm256_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
rnd512_2way( state, msg );
}
// 16 byte partial block exists for 80 byte len
if ( state->rembytes )
{
// padding of partial block
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
msg[1] = m256_const2_64( 0, 0x0000000080000000 );
rnd512_2way( state, msg );
}
else
{
// empty pad block
msg[0] = m256_const2_64( 0, 0x0000000080000000 );
msg[1] = m256_zero;
rnd512_2way( state, msg );
}
finalization512_2way( state, (uint32*)output );
if ( state->hashbitlen > 512 )
finalization512_2way( state, (uint32*)( output+32 ) );
return 0;
}
int luffa_2way_update_close( luffa_2way_context *state,
void *output, const void *data, size_t inlen )
{

View File

@@ -61,23 +61,11 @@ typedef struct {
} luffa_4way_context __attribute((aligned(128)));
int luffa_4way_init( luffa_4way_context *state, int hashbitlen );
//int luffa_4way_update( luffa_4way_context *state, const void *data,
// size_t len );
//int luffa_4way_close( luffa_4way_context *state, void *hashval );
int luffa_4way_update( luffa_4way_context *state, const void *data,
size_t len );
int luffa_4way_close( luffa_4way_context *state, void *hashval );
int luffa_4way_update_close( luffa_4way_context *state, void *output,
const void *data, size_t inlen );
int luffa512_4way_full( luffa_4way_context *state, void *output,
const void *data, size_t inlen );
int luffa512_4way_init( luffa_4way_context *state );
int luffa512_4way_update( luffa_4way_context *state, const void *data,
size_t len );
int luffa512_4way_close( luffa_4way_context *state, void *hashval );
int luffa512_4way_update_close( luffa_4way_context *state, void *output,
const void *data, size_t inlen );
#define luffa_4way_update luffa512_4way_update
#define luffa_4way_close luffa512_4way_close
#define luffa_4way_update_close luffa512_4way_update_close
#endif
@@ -94,8 +82,6 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
int luffa_2way_close( luffa_2way_context *state, void *hashval );
int luffa_2way_update_close( luffa_2way_context *state, void *output,
const void *data, size_t inlen );
int luffa512_2way_full( luffa_2way_context *state, void *output,
const void *data, size_t inlen );
#endif
#endif

View File

@@ -262,32 +262,38 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
const uint32_t last_nonce = max_nonce - 16;
const uint32_t last_nonce = max_nonce - 8;
const uint32_t Htarg = ptarget[7];
__m512i *noncev = (__m512i*)vdata + 19; // aligned
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
int thr_id = mythr->id; // thr_id arg is deprecated
if ( bench ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
mm512_bswap32_intrlv80_16x32( vdata, pdata );
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
blake256_16way_init( &allium_16way_ctx.blake );
blake256_16way_update( &allium_16way_ctx.blake, vdata, 64 );
do {
allium_16way_hash( hash, vdata );
*noncev = mm512_bswap_32( _mm512_set_epi32( n+15, n+14, n+13, n+12,
n+11, n+10, n+ 9, n+ 8,
n+ 7, n+ 6, n+ 5, n+ 4,
n+ 3, n+ 2, n +1, n ) );
for ( int lane = 0; lane < 16; lane++ )
if unlikely( valid_hash( hash+(lane<<3), ptarget ) && !bench )
allium_16way_hash( hash, vdata );
pdata[19] = n;
for ( int lane = 0; lane < 16; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
n += 16;
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce;
return 0;
}
@@ -314,19 +320,18 @@ bool init_allium_8way_ctx()
return true;
}
void allium_8way_hash( void *hash, const void *input )
void allium_8way_hash( void *state, const void *input )
{
uint64_t vhashA[4*8] __attribute__ ((aligned (64)));
uint64_t vhashB[4*8] __attribute__ ((aligned (64)));
// uint64_t hash[4*8] __attribute__ ((aligned (64)));
uint64_t *hash0 = (uint64_t*)hash;
uint64_t *hash1 = (uint64_t*)hash+ 4;
uint64_t *hash2 = (uint64_t*)hash+ 8;
uint64_t *hash3 = (uint64_t*)hash+12;
uint64_t *hash4 = (uint64_t*)hash+16;
uint64_t *hash5 = (uint64_t*)hash+20;
uint64_t *hash6 = (uint64_t*)hash+24;
uint64_t *hash7 = (uint64_t*)hash+28;
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
uint32_t vhashB[8*8] __attribute__ ((aligned (64)));
uint32_t hash0[8] __attribute__ ((aligned (32)));
uint32_t hash1[8] __attribute__ ((aligned (32)));
uint32_t hash2[8] __attribute__ ((aligned (32)));
uint32_t hash3[8] __attribute__ ((aligned (32)));
uint32_t hash4[8] __attribute__ ((aligned (64)));
uint32_t hash5[8] __attribute__ ((aligned (32)));
uint32_t hash6[8] __attribute__ ((aligned (32)));
uint32_t hash7[8] __attribute__ ((aligned (32)));
allium_8way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &allium_8way_ctx, sizeof(allium_8way_ctx) );
@@ -393,66 +398,69 @@ void allium_8way_hash( void *hash, const void *input )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, 256 );
dintrlv_4x64( hash4, hash5, hash6, hash7, vhashB, 256 );
update_and_final_groestl256( &ctx.groestl, hash0, hash0, 256 );
update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash1, hash1, 256 );
update_and_final_groestl256( &ctx.groestl, state+32, hash1, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash2, hash2, 256 );
update_and_final_groestl256( &ctx.groestl, state+64, hash2, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash3, hash3, 256 );
update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash4, hash4, 256 );
update_and_final_groestl256( &ctx.groestl, state+128, hash4, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash5, hash5, 256 );
update_and_final_groestl256( &ctx.groestl, state+160, hash5, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash6, hash6, 256 );
update_and_final_groestl256( &ctx.groestl, state+192, hash6, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash7, hash7, 256 );
update_and_final_groestl256( &ctx.groestl, state+224, hash7, 256 );
}
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint64_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint64_t *ptarget = (uint64_t*)work->target;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 19; // aligned
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
int thr_id = mythr->id;
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
mm256_bswap32_intrlv80_8x32( vdata, pdata );
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
blake256_8way_init( &allium_8way_ctx.blake );
blake256_8way_update( &allium_8way_ctx.blake, vdata, 64 );
do {
allium_8way_hash( hash, vdata );
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
n+3, n+2, n+1, n ) );
for ( int lane = 0; lane < 8; lane++ )
allium_8way_hash( hash, vdata );
pdata[19] = n;
for ( int lane = 0; lane < 8; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
{
const uint64_t *lane_hash = hash + (lane<<2);
if unlikely( valid_hash( lane_hash, ptarget ) && !bench )
if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
pdata[19] = n + lane;
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
}
}
n += 8;
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
} while likely( (n <= last_nonce) && !work_restart[thr_id].restart );
pdata[19] = n;
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -220,7 +220,7 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
// Assemble block header
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits), NULL );
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
for ( t = 0; t < 16; t++ )
g_work->data[ 20+t ] = ((uint32_t*)sctx->job.extra)[t];
}

View File

@@ -33,7 +33,7 @@ void lyra2h_4way_hash( void *state, const void *input )
blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
memcpy( &ctx_blake, &l2h_4way_blake_mid, sizeof l2h_4way_blake_mid );
blake256_4way_update( &ctx_blake, input + (64*4), 16 );
blake256_4way( &ctx_blake, input + (64*4), 16 );
blake256_4way_close( &ctx_blake, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );

View File

@@ -260,8 +260,8 @@ inline void reducedDuplexRowSetup_2way( uint64_t *State, uint64_t *rowIn,
// Overlap has 2 in Nrows chance reduced to 1 in Nrows because if both
// overlap it's unified.
// As a result normal is Nrows-2 / Nrows.
// for 4 rows: 1 unified, 2 overlap, 1 normal.
// for 8 rows: 1 unified, 2 overlap, 56 normal.
// for 4 rows: 1 unified, 1 overlap, 2 normal.
// for 8 rows: 1 unified, 1 overlap, 6 normal.
static inline void reducedDuplexRow_2way_normal( uint64_t *State,
uint64_t *rowIn, uint64_t *rowInOut0, uint64_t *rowInOut1,
@@ -338,18 +338,21 @@ static inline void reducedDuplexRow_2way_normal( uint64_t *State,
_mm512_store_si512( (__m512i*)State + 3, state3 );
}
// rowInOut0 ! = rowInOut1 != rowOut
static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
uint64_t *rowIn, uint64_t *rowInOut0, uint64_t *rowInOut1,
uint64_t *rowOut, uint64_t nCols)
{
int i;
register __m512i state0, state1, state2, state3;
__m512i *in = (__m512i*)rowIn;
__m512i *inout0 = (__m512i*)rowInOut0;
__m512i *inout1 = (__m512i*)rowInOut1;
__m512i *out = (__m512i*)rowOut;
// inout_ovly io;
ovly_512 io0, io1, io2;
inout_ovly io;
state0 = _mm512_load_si512( (__m512i*)State );
state1 = _mm512_load_si512( (__m512i*)State + 1 );
@@ -359,21 +362,6 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
for ( i = 0; i < nCols; i++ )
{
//Absorbing "M[prev] [+] M[row*]"
io0.v512 = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 ),
_mm512_load_si512( (__m512i*)inout1 ) );
io1.v512 = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 +1 ),
_mm512_load_si512( (__m512i*)inout1 +1 ) );
io2.v512 = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 +2 ),
_mm512_load_si512( (__m512i*)inout1 +2 ) );
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0.v512 ) );
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1.v512 ) );
state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io2.v512 ) );
/*
io.v512[0] = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 ),
_mm512_load_si512( (__m512i*)inout1 ) );
@@ -387,7 +375,6 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io.v512[0] ) );
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io.v512[1] ) );
state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io.v512[2] ) );
*/
//Applies the reduced-round transformation f to the sponge's state
LYRA_ROUND_2WAY_AVX512( state0, state1, state2, state3 );
@@ -401,21 +388,6 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
out[2] = _mm512_xor_si512( out[2], state2 );
// if out is the same row as inout, update with new data.
if ( rowOut == rowInOut0 )
{
io0.v512 = _mm512_mask_blend_epi64( 0x0f, io0.v512, out[0] );
io1.v512 = _mm512_mask_blend_epi64( 0x0f, io1.v512, out[1] );
io2.v512 = _mm512_mask_blend_epi64( 0x0f, io2.v512, out[2] );
}
if ( rowOut == rowInOut1 )
{
io0.v512 = _mm512_mask_blend_epi64( 0xf0, io0.v512, out[0] );
io1.v512 = _mm512_mask_blend_epi64( 0xf0, io1.v512, out[1] );
io2.v512 = _mm512_mask_blend_epi64( 0xf0, io2.v512, out[2] );
}
/*
if ( rowOut == rowInOut0 )
{
io.v512[0] = _mm512_mask_blend_epi64( 0x0f, io.v512[0], out[0] );
@@ -429,35 +401,27 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
io.v512[1] = _mm512_mask_blend_epi64( 0xf0, io.v512[1], out[1] );
io.v512[2] = _mm512_mask_blend_epi64( 0xf0, io.v512[2], out[2] );
}
*/
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
t0 = _mm512_permutex_epi64( state0, 0x93 );
t1 = _mm512_permutex_epi64( state1, 0x93 );
t2 = _mm512_permutex_epi64( state2, 0x93 );
io0.v512 = _mm512_xor_si512( io0.v512,
io.v512[0] = _mm512_xor_si512( io.v512[0],
_mm512_mask_blend_epi64( 0x11, t0, t2 ) );
io1.v512 = _mm512_xor_si512( io1.v512,
io.v512[1] = _mm512_xor_si512( io.v512[1],
_mm512_mask_blend_epi64( 0x11, t1, t0 ) );
io2.v512 = _mm512_xor_si512( io2.v512,
io.v512[2] = _mm512_xor_si512( io.v512[2],
_mm512_mask_blend_epi64( 0x11, t2, t1 ) );
}
casti_m256i( inout0, 0 ) = io0.v256lo;
casti_m256i( inout1, 1 ) = io0.v256hi;
casti_m256i( inout0, 2 ) = io1.v256lo;
casti_m256i( inout1, 3 ) = io1.v256hi;
casti_m256i( inout0, 4 ) = io2.v256lo;
casti_m256i( inout1, 5 ) = io2.v256hi;
/*
_mm512_mask_store_epi64( inout0, 0x0f, io.v512[0] );
_mm512_mask_store_epi64( inout1, 0xf0, io.v512[0] );
_mm512_mask_store_epi64( inout0 +1, 0x0f, io.v512[1] );
_mm512_mask_store_epi64( inout1 +1, 0xf0, io.v512[1] );
_mm512_mask_store_epi64( inout0 +2, 0x0f, io.v512[2] );
_mm512_mask_store_epi64( inout1 +2, 0xf0, io.v512[2] );
*/
//Goes to next block
in += BLOCK_LEN_M256I;
inout0 += BLOCK_LEN_M256I;
@@ -602,7 +566,7 @@ static inline void reducedDuplexRow_2way_unified( uint64_t *State,
inout[1] = _mm512_xor_si512( inout[1],
_mm512_mask_blend_epi64( 0x11, t1, t0 ) );
inout[2] = _mm512_xor_si512( inout[2],
_mm512_mask_blend_epi64( 0x11, t2, t1 ) );
_mm512_mask_blend_epi64( 0x11, t2, t1 ) );
out[0] = _mm512_xor_si512( out[0], state0 );
out[1] = _mm512_xor_si512( out[1], state1 );
@@ -611,9 +575,9 @@ static inline void reducedDuplexRow_2way_unified( uint64_t *State,
}
//Goes to next block
in += BLOCK_LEN_M256I;
in += BLOCK_LEN_M256I;
inout += BLOCK_LEN_M256I;
out += BLOCK_LEN_M256I;
out += BLOCK_LEN_M256I;
}
_mm512_store_si512( (__m512i*)State, state0 );
@@ -636,8 +600,8 @@ static inline void reducedDuplexRow_2way_unified( uint64_t *State,
// Wrapper
inline void reducedDuplexRow_2way( uint64_t *State, uint64_t *rowIn,
uint64_t *rowInOut0, uint64_t *rowInOut1,
uint64_t *rowOut, uint64_t nCols )
uint64_t *rowInOut0, uint64_t *rowInOut1,
uint64_t *rowOut, uint64_t nCols )
{
if ( rowInOut0 == rowInOut1 )
reducedDuplexRow_2way_unified( State, rowIn, rowInOut0, rowOut, nCols );
@@ -650,18 +614,18 @@ inline void reducedDuplexRow_2way( uint64_t *State, uint64_t *rowIn,
}
inline void reducedDuplexRow_2way_X( uint64_t *State, uint64_t *rowIn,
uint64_t *rowInOut0, uint64_t *rowInOut1,
uint64_t *rowOut, uint64_t nCols )
uint64_t *rowInOut0, uint64_t *rowInOut1,
uint64_t *rowOut, uint64_t nCols )
{
if ( rowInOut0 == rowInOut1 )
if ( rowInOut0 == rowInOut1 )
reducedDuplexRow_2way_unified( State, rowIn, rowInOut0, rowOut, nCols );
else if ( ( rowInOut0 == rowOut ) || ( rowInOut1 == rowOut ) )
{
asm volatile ( "nop" ); // Prevent GCC from optimizing
reducedDuplexRow_2way_overlap_X( State, rowIn, rowInOut0, rowInOut1,
rowOut, nCols );
}
else
else if ( ( rowInOut0 == rowOut ) || ( rowInOut1 == rowOut ) )
{
asm ( "nop" ); // This prevents GCC from merging with previous function
reducedDuplexRow_2way_overlap_X( State, rowIn, rowInOut0, rowInOut1,
rowOut, nCols );
}
else
reducedDuplexRow_2way_normal( State, rowIn, rowInOut0, rowInOut1,
rowOut, nCols );
}

View File

@@ -203,18 +203,6 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
union _ovly_512
{
__m512i v512;
struct
{
__m256i v256lo;
__m256i v256hi;
};
};
typedef union _ovly_512 ovly_512;
union _inout_ovly
{
__m512i v512[3];

View File

@@ -149,7 +149,7 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
char data_str[161], hash_str[65], target_str[65];
//uint8_t *bdata = 0;
uint8_t bdata[8192] __attribute__ ((aligned (64)));
int i, digits;
int rc = 0, i, digits;
int bytes;
size_t p = sizeof(unsigned long), a = 64/p, b = 32/p;
@@ -267,41 +267,48 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
SHA256_Final( (unsigned char*) hash, &ctxf_sha256 );
}
if ( unlikely( valid_hash( (uint64_t*)hash, (uint64_t*)ptarget )
&& !opt_benchmark ) )
// if ( unlikely( hash[7] <= ptarget[7] ) )
// if ( likely( fulltest( hash, ptarget ) && !opt_benchmark ) )
// rewrite to use 64 bit test.
const unsigned char *hash_ = (const unsigned char *)hash;
const unsigned char *target_ = (const unsigned char *)ptarget;
for ( i = 31; i >= 0; i-- )
{
if ( opt_debug )
if ( hash_[i] != target_[i] )
{
bin2hex( hash_str, (unsigned char *)hash, 32 );
bin2hex( target_str, (unsigned char *)ptarget, 32 );
bin2hex( data_str, (unsigned char *)data, 80 );
applog( LOG_DEBUG, "DEBUG: [%d thread] Found share!\ndata %s\nhash %s\ntarget %s",
thr_id, data_str, hash_str, target_str );
rc = hash_[i] < target_[i];
break;
}
}
if ( unlikely(rc) )
{
if ( opt_debug )
{
bin2hex(hash_str, (unsigned char *)hash, 32);
bin2hex(target_str, (unsigned char *)ptarget, 32);
bin2hex(data_str, (unsigned char *)data, 80);
applog(LOG_DEBUG, "DEBUG: [%d thread] Found share!\ndata %s\nhash %s\ntarget %s", thr_id,
data_str,
hash_str,
target_str);
}
pdata[19] = data[19];
submit_solution( work, hash, mythr );
}
} while ( n < max_nonce && !work_restart[thr_id].restart );
} while (n < max_nonce && !work_restart[thr_id].restart);
pdata[19] = n;
mpf_set_prec_raw( magifpi, prec0 );
mpf_set_prec_raw( magifpi0, prec0 );
mpf_set_prec_raw( mptmp, prec0 );
mpf_set_prec_raw( mpt1, prec0 );
mpf_set_prec_raw( mpt2, prec0 );
mpf_clear( magifpi );
mpf_clear( magifpi0 );
mpf_clear( mpten );
mpf_clear( mptmp );
mpf_clear( mpt1 );
mpf_clear( mpt2 );
mpz_clears( magipi, magisw, product, bns0, bns1, NULL );
mpf_set_prec_raw(magifpi, prec0);
mpf_set_prec_raw(magifpi0, prec0);
mpf_set_prec_raw(mptmp, prec0);
mpf_set_prec_raw(mpt1, prec0);
mpf_set_prec_raw(mpt2, prec0);
mpf_clear(magifpi);
mpf_clear(magifpi0);
mpf_clear(mpten);
mpf_clear(mptmp);
mpf_clear(mpt1);
mpf_clear(mpt2);
mpz_clears(magipi, magisw, product, bns0, bns1, NULL);
*hashes_done = n - first_nonce + 1;
return 0;

View File

@@ -154,13 +154,14 @@ int scanhash_zr5( struct work *work, uint32_t max_nonce,
}
void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t* end_nonce_ptr )
uint32_t* end_nonce_ptr, bool clean_job )
{
// ignore POK in first word
// const int nonce_i = 19;
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
|| ( *nonceptr >= *end_nonce_ptr ) )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr ) ) )
{
work_free( work );
work_copy( work, g_work );

View File

@@ -94,37 +94,6 @@ static const uint32_t K256[64] =
_mm_xor_si128( _mm_xor_si128( \
mm128_ror_32(x, 17), mm128_ror_32(x, 19) ), _mm_srli_epi32(x, 10) )
#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
do { \
__m128i K = _mm_set1_epi32( K256[( (j)+(i) )] ); \
__m128i T1 = mm128_ror_32( E, 14 ); \
__m128i T2 = mm128_ror_32( A, 9 ); \
__m128i T3 = _mm_xor_si128( F, G ); \
__m128i T4 = _mm_or_si128( A, B ); \
__m128i T5 = _mm_and_si128( A, B ); \
K = _mm_add_epi32( K, W[i] ); \
T1 = _mm_xor_si128( T1, E ); \
T2 = _mm_xor_si128( T2, A ); \
T3 = _mm_and_si128( T3, E ); \
T4 = _mm_and_si128( T4, C ); \
K = _mm_add_epi32( H, K ); \
T1 = mm128_ror_32( T1, 5 ); \
T2 = mm128_ror_32( T2, 11 ); \
T3 = _mm_xor_si128( T3, G ); \
T4 = _mm_or_si128( T4, T5 ); \
T1 = _mm_xor_si128( T1, E ); \
T2 = _mm_xor_si128( T2, A ); \
T1 = mm128_ror_32( T1, 6 ); \
T2 = mm128_ror_32( T2, 2 ); \
T1 = _mm_add_epi32( T1, T3 ); \
T2 = _mm_add_epi32( T2, T4 ); \
T1 = _mm_add_epi32( T1, K ); \
H = _mm_add_epi32( T1, T2 ); \
D = _mm_add_epi32( D, T1 ); \
} while (0)
/*
#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
do { \
__m128i T1, T2; \
@@ -135,8 +104,6 @@ do { \
D = _mm_add_epi32( D, T1 ); \
H = _mm_add_epi32( T1, T2 ); \
} while (0)
*/
static void
sha256_4way_round( sha256_4way_context *ctx, __m128i *in, __m128i r[8] )

View File

@@ -319,7 +319,7 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )
// SHA-512 4 way 64 bit
/*
#define CH(X, Y, Z) \
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
@@ -327,15 +327,6 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )
_mm256_or_si256( _mm256_and_si256( X, Y ), \
_mm256_and_si256( _mm256_or_si256( X, Y ), Z ) )
#define BSG5_0(x) \
mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \
_mm256_xor_si256( mm256_ror_64( x, 5 ), x ), 6 ), x ), 28 )
#define BSG5_1(x) \
mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \
_mm256_xor_si256( mm256_ror_64( x, 23 ), x ), 4 ), x ), 14 )
*/
/*
#define BSG5_0(x) \
_mm256_xor_si256( _mm256_xor_si256( \
mm256_ror_64(x, 28), mm256_ror_64(x, 34) ), mm256_ror_64(x, 39) )
@@ -343,8 +334,7 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )
#define BSG5_1(x) \
_mm256_xor_si256( _mm256_xor_si256( \
mm256_ror_64(x, 14), mm256_ror_64(x, 18) ), mm256_ror_64(x, 41) )
*/
/*
#define SSG5_0(x) \
_mm256_xor_si256( _mm256_xor_si256( \
mm256_ror_64(x, 1), mm256_ror_64(x, 8) ), _mm256_srli_epi64(x, 7) )
@@ -352,7 +342,7 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )
#define SSG5_1(x) \
_mm256_xor_si256( _mm256_xor_si256( \
mm256_ror_64(x, 19), mm256_ror_64(x, 61) ), _mm256_srli_epi64(x, 6) )
*/
// Interleave SSG0 & SSG1 for better throughput.
// return ssg0(w0) + ssg1(w1)
static inline __m256i ssg512_add( __m256i w0, __m256i w1 )
@@ -371,7 +361,7 @@ static inline __m256i ssg512_add( __m256i w0, __m256i w1 )
return _mm256_add_epi64( w0a, w1a );
}
/*
#define SSG512x2_0( w0, w1, i ) do \
{ \
__m256i X0a, X1a, X0b, X1b; \
@@ -401,51 +391,7 @@ static inline __m256i ssg512_add( __m256i w0, __m256i w1 )
w0 = _mm256_xor_si256( X0a, X0b ); \
w1 = _mm256_xor_si256( X1a, X1b ); \
} while(0)
*/
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
do { \
__m256i K = _mm256_set1_epi64x( K512[ i ] ); \
__m256i T1 = mm256_ror_64( E, 23 ); \
__m256i T2 = mm256_ror_64( A, 5 ); \
__m256i T3 = _mm256_xor_si256( F, G ); \
__m256i T4 = _mm256_or_si256( A, B ); \
__m256i T5 = _mm256_and_si256( A, B ); \
K = _mm256_add_epi64( K, W[i] ); \
T1 = _mm256_xor_si256( T1, E ); \
T2 = _mm256_xor_si256( T2, A ); \
T3 = _mm256_and_si256( T3, E ); \
T4 = _mm256_and_si256( T4, C ); \
K = _mm256_add_epi64( H, K ); \
T1 = mm256_ror_64( T1, 4 ); \
T2 = mm256_ror_64( T2, 6 ); \
T3 = _mm256_xor_si256( T3, G ); \
T4 = _mm256_or_si256( T4, T5 ); \
T1 = _mm256_xor_si256( T1, E ); \
T2 = _mm256_xor_si256( T2, A ); \
T1 = mm256_ror_64( T1, 14 ); \
T2 = mm256_ror_64( T2, 28 ); \
T1 = _mm256_add_epi64( T1, T3 ); \
T2 = _mm256_add_epi64( T2, T4 ); \
T1 = _mm256_add_epi64( T1, K ); \
H = _mm256_add_epi64( T1, T2 ); \
D = _mm256_add_epi64( D, T1 ); \
} while (0)
/*
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
do { \
__m256i K = _mm256_add_epi64( W[i], _mm256_set1_epi64x( K512[ i ] ) ); \
__m256i T1 = BSG5_1(E); \
__m256i T2 = BSG5_0(A); \
T1 = mm256_add4_64( T1, H, CH(E, F, G), K ); \
T2 = _mm256_add_epi64( T2, MAJ(A, B, C) ); \
D = _mm256_add_epi64( D, T1 ); \
H = _mm256_add_epi64( T1, T2 ); \
} while (0)
*/
/*
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
do { \
__m256i T1, T2; \
@@ -456,7 +402,7 @@ do { \
D = _mm256_add_epi64( D, T1 ); \
H = _mm256_add_epi64( T1, T2 ); \
} while (0)
*/
static void
sha512_4way_round( sha512_4way_context *ctx, __m256i *in, __m256i r[8] )

View File

@@ -3,9 +3,11 @@
#include <stdio.h>
// This is a fake, it actually does not do parallel AES, that requires VAES.
// This is only intended when the preceding and folllowing functions use the
// same 2x128 interleave.
// This implementation is deprecated, superseded by VAES in Icelake
// which provides HW based 4 way aes.
// It was created for AVX2 to eliminate interleaving between the
// preceding and following function.
// This code can be removed when current users have reverted to one way.
#if defined(__AVX2__)
@@ -408,94 +410,4 @@ void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
casti_m256i( dst, 3 ) = casti_m256i( ctx->h, 3 );
}
void shavite512_2way_full( shavite512_2way_context *ctx, void *dst,
const void *data, size_t len )
{
__m256i *h = (__m256i*)ctx->h;
__m128i *iv = (__m128i*)IV512;
h[0] = m256_const1_128( iv[0] );
h[1] = m256_const1_128( iv[1] );
h[2] = m256_const1_128( iv[2] );
h[3] = m256_const1_128( iv[3] );
ctx->ptr =
ctx->count0 =
ctx->count1 =
ctx->count2 =
ctx->count3 = 0;
unsigned char *buf = ctx->buf;
size_t ptr = ctx->ptr;
// process full blocks and load buf with remainder.
while ( len > 0 )
{
size_t clen;
clen = (sizeof ctx->buf) - ptr;
if ( clen > len << 1 )
clen = len << 1;
memcpy( buf + ptr, data, clen );
data = (const unsigned char *)data + clen;
ptr += clen;
len -= (clen >> 1);
if ( ptr == sizeof ctx->buf )
{
if ( ( ctx->count0 = ctx->count0 + 1024 ) == 0 )
{
ctx->count1 = ctx->count1 + 1;
if ( ctx->count1 == 0 )
{
ctx->count2 = ctx->count2 + 1;
if ( ctx->count2 == 0 )
ctx->count3 = ctx->count3 + 1;
}
}
c512_2way( ctx, buf );
ptr = 0;
}
}
uint32_t vp = ptr>>5;
// Count = { 0, 16, 64, 80 }. Outsize = 16 u32 = 512 bits = 0x0200
// Count is misaligned to 16 bits and straddles 2 vectors.
// Use u32 overlay to stage then u16 to load buf.
union
{
uint32_t u32[4];
uint16_t u16[8];
} count;
count.u32[0] = ctx->count0 += (ptr << 2); // ptr/2 * 8
count.u32[1] = ctx->count1;
count.u32[2] = ctx->count2;
count.u32[3] = ctx->count3;
if ( vp == 0 ) // empty buf, xevan.
{
casti_m256i( buf, 0 ) = m256_const2_64( 0, 0x0000000000000080 );
memset_zero_256( (__m256i*)buf + 1, 5 );
ctx->count0 = ctx->count1 = ctx->count2 = ctx->count3 = 0;
}
else // half full buf, everyone else.
{
casti_m256i( buf, vp++ ) = m256_const2_64( 0, 0x0000000000000080 );
memset_zero_256( (__m256i*)buf + vp, 6 - vp );
}
casti_m256i( buf, 6 ) = m256_const1_128(
_mm_insert_epi16( m128_zero, count.u16[0], 7 ) );
casti_m256i( buf, 7 ) = m256_const1_128( _mm_set_epi16(
0x0200, count.u16[7], count.u16[6], count.u16[5],
count.u16[4], count.u16[3], count.u16[2], count.u16[1] ) );
c512_2way( ctx, buf);
casti_m256i( dst, 0 ) = casti_m256i( ctx->h, 0 );
casti_m256i( dst, 1 ) = casti_m256i( ctx->h, 1 );
casti_m256i( dst, 2 ) = casti_m256i( ctx->h, 2 );
casti_m256i( dst, 3 ) = casti_m256i( ctx->h, 3 );
}
#endif // AVX2

View File

@@ -18,8 +18,6 @@ void shavite512_2way_update( shavite512_2way_context *ctx, const void *data,
void shavite512_2way_close( shavite512_2way_context *ctx, void *dst );
void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
const void *data, size_t len );
void shavite512_2way_full( shavite512_2way_context *ctx, void *dst,
const void *data, size_t len );
#endif // AVX2

View File

@@ -396,96 +396,4 @@ void shavite512_4way_update_close( shavite512_4way_context *ctx, void *dst,
casti_m512i( dst, 3 ) = casti_m512i( ctx->h, 3 );
}
void shavite512_4way_full( shavite512_4way_context *ctx, void *dst,
const void *data, size_t len )
{
__m512i *h = (__m512i*)ctx->h;
__m128i *iv = (__m128i*)IV512;
h[0] = m512_const1_128( iv[0] );
h[1] = m512_const1_128( iv[1] );
h[2] = m512_const1_128( iv[2] );
h[3] = m512_const1_128( iv[3] );
ctx->ptr =
ctx->count0 =
ctx->count1 =
ctx->count2 =
ctx->count3 = 0;
unsigned char *buf = ctx->buf;
size_t ptr = ctx->ptr;
// process full blocks and load buf with remainder.
while ( len > 0 )
{
size_t clen;
clen = (sizeof ctx->buf) - ptr;
if ( clen > len << 2 )
clen = len << 2;
memcpy( buf + ptr, data, clen );
data = (const unsigned char *)data + clen;
ptr += clen;
len -= (clen >> 2);
if ( ptr == sizeof ctx->buf )
{
if ( ( ctx->count0 = ctx->count0 + 1024 ) == 0 )
{
ctx->count1 = ctx->count1 + 1;
if ( ctx->count1 == 0 )
{
ctx->count2 = ctx->count2 + 1;
if ( ctx->count2 == 0 )
ctx->count3 = ctx->count3 + 1;
}
}
c512_4way( ctx, buf );
ptr = 0;
}
}
uint32_t vp = ptr>>6;
// Count = { 0, 16, 64, 80 }. Outsize = 16 u32 = 512 bits = 0x0200
// Count is misaligned to 16 bits and straddles 2 vectors.
// Use u32 overlay to stage then u16 to load buf.
union
{
uint32_t u32[4];
uint16_t u16[8];
} count;
count.u32[0] = ctx->count0 += (ptr << 1); // ptr/4 * 8
count.u32[1] = ctx->count1;
count.u32[2] = ctx->count2;
count.u32[3] = ctx->count3;
if ( vp == 0 ) // empty buf, xevan.
{
casti_m512i( buf, 0 ) = m512_const2_64( 0, 0x0000000000000080 );
memset_zero_512( (__m512i*)buf + 1, 5 );
ctx->count0 = ctx->count1 = ctx->count2 = ctx->count3 = 0;
}
else // half full buf, everyone else.
{
casti_m512i( buf, vp++ ) = m512_const2_64( 0, 0x0000000000000080 );
memset_zero_512( (__m512i*)buf + vp, 6 - vp );
}
casti_m512i( buf, 6 ) = m512_const1_128(
_mm_insert_epi16( m128_zero, count.u16[0], 7 ) );
casti_m512i( buf, 7 ) = m512_const1_128( _mm_set_epi16(
0x0200, count.u16[7], count.u16[6], count.u16[5],
count.u16[4], count.u16[3], count.u16[2], count.u16[1] ) );
c512_4way( ctx, buf);
casti_m512i( dst, 0 ) = casti_m512i( ctx->h, 0 );
casti_m512i( dst, 1 ) = casti_m512i( ctx->h, 1 );
casti_m512i( dst, 2 ) = casti_m512i( ctx->h, 2 );
casti_m512i( dst, 3 ) = casti_m512i( ctx->h, 3 );
}
#endif // VAES

View File

@@ -18,8 +18,6 @@ void shavite512_4way_update( shavite512_4way_context *ctx, const void *data,
void shavite512_4way_close( shavite512_4way_context *ctx, void *dst );
void shavite512_4way_update_close( shavite512_4way_context *ctx, void *dst,
const void *data, size_t len );
void shavite512_4way_full( shavite512_4way_context *ctx, void *dst,
const void *data, size_t len );
#endif // VAES

View File

@@ -1173,91 +1173,6 @@ int simd_4way_update_close( simd_4way_context *state, void *hashval,
return 0;
}
int simd512_4way_full( simd_4way_context *state, void *hashval,
const void *data, int datalen )
{
__m512i *A = (__m512i*)state->A;
state->hashbitlen = 512;
state->n_feistels = 8;
state->blocksize = 128*8;
state->count = 0;
for ( int i = 0; i < 8; i++ )
A[i] = _mm512_set4_epi32( SIMD_IV_512[4*i+3], SIMD_IV_512[4*i+2],
SIMD_IV_512[4*i+1], SIMD_IV_512[4*i+0] );
int current, i;
int bs = state->blocksize; // bits in one lane
int isshort = 1;
uint64_t l;
int databitlen = datalen * 8;
current = state->count & (bs - 1);
while ( databitlen > 0 )
{
if ( current == 0 && databitlen >= bs )
{
// We can hash the data directly from the input buffer.
SIMD_4way_Compress( state, data, 0 );
databitlen -= bs;
data += 4*( bs/8 );
state->count += bs;
}
else
{
// Copy a chunk of data to the buffer
int len = bs - current;
if ( databitlen < len )
{
memcpy( state->buffer + 4*( current/8 ), data, 4*( (databitlen)/8 ) );
state->count += databitlen;
break;
}
else
{
memcpy( state->buffer + 4*(current/8), data, 4*(len/8) );
state->count += len;
databitlen -= len;
data += 4*( len/8 );
current = 0;
SIMD_4way_Compress( state, state->buffer, 0 );
}
}
}
current = state->count & (state->blocksize - 1);
// If there is still some data in the buffer, hash it
if ( current )
{
current = current / 8;
memset( state->buffer + 4*current, 0, 4*( state->blocksize/8 - current) );
SIMD_4way_Compress( state, state->buffer, 0 );
}
//* Input the message length as the last block
memset( state->buffer, 0, 4*( state->blocksize/8 ) );
l = state->count;
for ( i = 0; i < 8; i++ )
{
state->buffer[ i ] = l & 0xff;
state->buffer[ i+16 ] = l & 0xff;
state->buffer[ i+32 ] = l & 0xff;
state->buffer[ i+48 ] = l & 0xff;
l >>= 8;
}
if ( state->count < 16384 )
isshort = 2;
SIMD_4way_Compress( state, state->buffer, isshort );
memcpy( hashval, state->A, 4*( state->hashbitlen / 8 ) );
return 0;
}
#endif // AVX512
////////////////////////////////////
@@ -2014,90 +1929,4 @@ int simd_2way_update_close( simd_2way_context *state, void *hashval,
return 0;
}
int simd512_2way_full( simd_2way_context *state, void *hashval,
const void *data, int datalen )
{
__m256i *A = (__m256i*)state->A;
state->hashbitlen = 512;
state->n_feistels = 8;
state->blocksize = 128*8;
state->count = 0;
for ( int i = 0; i < 8; i++ )
A[i] = _mm256_set_epi32( SIMD_IV_512[4*i+3], SIMD_IV_512[4*i+2],
SIMD_IV_512[4*i+1], SIMD_IV_512[4*i+0],
SIMD_IV_512[4*i+3], SIMD_IV_512[4*i+2],
SIMD_IV_512[4*i+1], SIMD_IV_512[4*i+0] );
int current, i;
int bs = state->blocksize; // bits in one lane
int isshort = 1;
uint64_t l;
int databitlen = datalen * 8;
current = state->count & (bs - 1);
while ( databitlen > 0 )
{
if ( current == 0 && databitlen >= bs )
{
// We can hash the data directly from the input buffer.
SIMD_2way_Compress( state, data, 0 );
databitlen -= bs;
data += 2*( bs/8 );
state->count += bs;
}
else
{
// Copy a chunk of data to the buffer
int len = bs - current;
if ( databitlen < len )
{
memcpy( state->buffer + 2*( current/8 ), data, 2*( (databitlen+7)/8 ) );
state->count += databitlen;
break;
}
else
{
memcpy( state->buffer + 2*(current/8), data, 2*(len/8) );
state->count += len;
databitlen -= len;
data += 2*( len/8 );
current = 0;
SIMD_2way_Compress( state, state->buffer, 0 );
}
}
}
current = state->count & (state->blocksize - 1);
// If there is still some data in the buffer, hash it
if ( current )
{
current = ( current+7 ) / 8;
memset( state->buffer + 2*current, 0, 2*( state->blocksize/8 - current) );
SIMD_2way_Compress( state, state->buffer, 0 );
}
//* Input the message length as the last block
memset( state->buffer, 0, 2*( state->blocksize/8 ) );
l = state->count;
for ( i = 0; i < 8; i++ )
{
state->buffer[ i ] = l & 0xff;
state->buffer[ i+16 ] = l & 0xff;
l >>= 8;
}
if ( state->count < 16384 )
isshort = 2;
SIMD_2way_Compress( state, state->buffer, isshort );
memcpy( hashval, state->A, 2*( state->hashbitlen / 8 ) );
return 0;
}
#endif

View File

@@ -26,8 +26,6 @@ int simd_4way_update( simd_4way_context *state, const void *data,
int simd_4way_close( simd_4way_context *state, void *hashval );
int simd_4way_update_close( simd_4way_context *state, void *hashval,
const void *data, int databitlen );
int simd512_4way_full( simd_4way_context *state, void *hashval,
const void *data, int datalen );
#endif
@@ -47,8 +45,5 @@ int simd_2way_update( simd_2way_context *state, const void *data,
int simd_2way_close( simd_2way_context *state, void *hashval );
int simd_2way_update_close( simd_2way_context *state, void *hashval,
const void *data, int databitlen );
int simd512_2way_full( simd_2way_context *state, void *hashval,
const void *data, int datalen );
#endif
#endif

View File

@@ -45,18 +45,18 @@ extern "C"{
#endif
/*
static const uint64_t IV256[] = {
0xCCD044A12FDB3E13, 0xE83590301A79A9EB,
0x55AEA0614F816E6F, 0x2A2767A4AE9B94DB,
0xEC06025E74DD7683, 0xE7A436CDC4746251,
0xC36FBAF9393AD185, 0x3EEDBA1833EDFC13
static const sph_u64 IV256[] = {
SPH_C64(0xCCD044A12FDB3E13), SPH_C64(0xE83590301A79A9EB),
SPH_C64(0x55AEA0614F816E6F), SPH_C64(0x2A2767A4AE9B94DB),
SPH_C64(0xEC06025E74DD7683), SPH_C64(0xE7A436CDC4746251),
SPH_C64(0xC36FBAF9393AD185), SPH_C64(0x3EEDBA1833EDFC13)
};
static const uint64_t IV512[] = {
0x4903ADFF749C51CE, 0x0D95DE399746DF03,
0x8FD1934127C79BCE, 0x9A255629FF352CB1,
0x5DB62599DF6CA7B0, 0xEABE394CA9D5C3F4,
0x991112C71A75B523, 0xAE18A40B660FCC33
static const sph_u64 IV512[] = {
SPH_C64(0x4903ADFF749C51CE), SPH_C64(0x0D95DE399746DF03),
SPH_C64(0x8FD1934127C79BCE), SPH_C64(0x9A255629FF352CB1),
SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4),
SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33)
};
*/
@@ -372,7 +372,7 @@ do { \
#define UBI_BIG_8WAY(etype, extra) \
do { \
uint64_t t0, t1, t2; \
sph_u64 t0, t1, t2; \
__m512i h8; \
__m512i m0 = buf[0]; \
__m512i m1 = buf[1]; \
@@ -391,8 +391,8 @@ do { \
__m512i p5 = m5; \
__m512i p6 = m6; \
__m512i p7 = m7; \
t0 = (uint64_t)(bcount << 6) + (uint64_t)(extra); \
t1 = (bcount >> 58) + ((uint64_t)(etype) << 55); \
t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
TFBIG_KINIT_8WAY(h0, h1, h2, h3, h4, h5, h6, h7, h8, t0, t1, t2); \
TFBIG_8WAY_4e(0); \
TFBIG_8WAY_4o(1); \
@@ -425,7 +425,7 @@ do { \
#define DECL_STATE_BIG_8WAY \
__m512i h0, h1, h2, h3, h4, h5, h6, h7; \
uint64_t bcount;
sph_u64 bcount;
#endif // AVX512
@@ -488,7 +488,7 @@ do { \
// scale buf offset by 4
#define UBI_BIG_4WAY(etype, extra) \
do { \
uint64_t t0, t1, t2; \
sph_u64 t0, t1, t2; \
__m256i h8; \
__m256i m0 = buf[0]; \
__m256i m1 = buf[1]; \
@@ -507,8 +507,8 @@ do { \
__m256i p5 = m5; \
__m256i p6 = m6; \
__m256i p7 = m7; \
t0 = (uint64_t)(bcount << 6) + (uint64_t)(extra); \
t1 = (bcount >> 58) + ((uint64_t)(etype) << 55); \
t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
TFBIG_KINIT_4WAY(h0, h1, h2, h3, h4, h5, h6, h7, h8, t0, t1, t2); \
TFBIG_4WAY_4e(0); \
TFBIG_4WAY_4o(1); \
@@ -542,7 +542,7 @@ do { \
#define DECL_STATE_BIG_4WAY \
__m256i h0, h1, h2, h3, h4, h5, h6, h7; \
uint64_t bcount;
sph_u64 bcount;
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)

View File

@@ -48,8 +48,14 @@ extern "C"{
#endif
#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "simd-utils.h"
// Output size in bits
#define SPH_SIZE_skein256 256
#define SPH_SIZE_skein512 512
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
typedef struct
@@ -57,11 +63,11 @@ typedef struct
__m512i buf[8];
__m512i h0, h1, h2, h3, h4, h5, h6, h7;
size_t ptr;
uint64_t bcount;
} skein_8way_big_context __attribute__ ((aligned (128)));
sph_u64 bcount;
} sph_skein_8way_big_context __attribute__ ((aligned (128)));
typedef skein_8way_big_context skein512_8way_context;
typedef skein_8way_big_context skein256_8way_context;
typedef sph_skein_8way_big_context skein512_8way_context;
typedef sph_skein_8way_big_context skein256_8way_context;
void skein512_8way_init( skein512_8way_context *sc );
void skein512_8way_update( void *cc, const void *data, size_t len );
@@ -78,19 +84,21 @@ typedef struct
__m256i buf[8];
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
size_t ptr;
uint64_t bcount;
} skein_4way_big_context __attribute__ ((aligned (128)));
sph_u64 bcount;
} sph_skein_4way_big_context __attribute__ ((aligned (128)));
typedef skein_4way_big_context skein512_4way_context;
typedef skein_4way_big_context skein256_4way_context;
typedef sph_skein_4way_big_context skein512_4way_context;
typedef sph_skein_4way_big_context skein256_4way_context;
void skein512_4way_init( skein512_4way_context *sc );
void skein512_4way_update( void *cc, const void *data, size_t len );
void skein512_4way_close( void *cc, void *dst );
//#define skein512_4way skein512_4way_update
void skein256_4way_init( skein256_4way_context *sc );
void skein256_4way_update( void *cc, const void *data, size_t len );
void skein256_4way_close( void *cc, void *dst );
//#define skein256_4way skein256_4way_update
#ifdef __cplusplus
}

View File

@@ -214,14 +214,14 @@ int scanhash_drop( struct work *work, uint32_t max_nonce,
}
void drop_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t* end_nonce_ptr )
uint32_t* end_nonce_ptr, bool clean_job )
{
// ignore POK in first word
// const int nonce_i = 19;
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
|| ( *nonceptr >= *end_nonce_ptr ) )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr ) ) )
{
work_free( work );
work_copy( work, g_work );

View File

@@ -527,7 +527,7 @@ int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
mm256_bswap32_intrlv80_4x64( vdata, pdata );
blake512_4way_init( &x13bcd_ctx_mid );
blake512_4way_update( &x13bcd_ctx_mid, vdata, 64 );
blake512_4way( &x13bcd_ctx_mid, vdata, 64 );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(

View File

@@ -227,7 +227,7 @@ int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
mm256_bswap32_intrlv80_4x64( vdata, pdata );
blake512_4way_init( &x13sm3_ctx_mid );
blake512_4way_update( &x13sm3_ctx_mid, vdata, 64 );
blake512_4way( &x13sm3_ctx_mid, vdata, 64 );
for ( int m=0; m < 6; m++ )
if ( Htarg <= htmax[m] )

View File

@@ -65,7 +65,6 @@ union _x16r_8way_context_overlay
typedef union _x16r_8way_context_overlay x16r_8way_context_overlay;
void x16r_8way_hash( void* output, const void* input )
{
uint32_t vhash[24*8] __attribute__ ((aligned (128)));
@@ -99,16 +98,18 @@ void x16r_8way_hash( void* output, const void* input )
switch ( algo )
{
case BLAKE:
blake512_8way_init( &ctx.blake );
if ( i == 0 )
blake512_8way_full( &ctx.blake, vhash, input, size );
blake512_8way_update( &ctx.blake, input, size );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
blake512_8way_full( &ctx.blake, vhash, vhash, size );
blake512_8way_update( &ctx.blake, vhash, size );
}
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5,
hash6, hash7, vhash );
blake512_8way_close( &ctx.blake, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case BMW:
bmw512_8way_init( &ctx.bmw );
@@ -127,22 +128,40 @@ void x16r_8way_hash( void* output, const void* input )
case GROESTL:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
groestl512_4way_full( &ctx.groestl, vhash, vhash, size );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhash, vhash, size<<3 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
groestl512_4way_full( &ctx.groestl, vhash, vhash, size );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhash, vhash, size<<3 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash4, (char*)in4, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash5, (char*)in5, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash6, (char*)in6, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash7, (char*)in7, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(const char*)in0, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(const char*)in1, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(const char*)in2, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(const char*)in3, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash4,
(const char*)in4, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash5,
(const char*)in5, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash6,
(const char*)in6, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash7,
(const char*)in7, size<<3 );
#endif
break;
break;
case SKEIN:
skein512_8way_init( &ctx.skein );
if ( i == 0 )
@@ -187,27 +206,33 @@ void x16r_8way_hash( void* output, const void* input )
break;
case LUFFA:
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
luffa512_4way_full( &ctx.luffa, vhash, vhash, size );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhash, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
luffa512_4way_full( &ctx.luffa, vhash, vhash, size );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhash, vhash, size);
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
break;
case CUBEHASH:
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
cube_4way_full( &ctx.cube, vhash, 512, vhash, size );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
cube_4way_full( &ctx.cube, vhash, 512, vhash, size );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
break;
case SHAVITE:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
sph_shavite512_init( &ctx.shavite );
@@ -235,42 +260,54 @@ void x16r_8way_hash( void* output, const void* input )
sph_shavite512( &ctx.shavite, in7, size );
sph_shavite512_close( &ctx.shavite, hash7 );
#endif
break;
break;
case SIMD:
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
simd512_4way_full( &ctx.simd, vhash, vhash, size );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
simd512_4way_full( &ctx.simd, vhash, vhash, size );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
break;
case ECHO:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
echo_4way_full( &ctx.echo, vhash, 512, vhash, size );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
echo_4way_full( &ctx.echo, vhash, 512, vhash, size );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)in0, size );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)in1, size );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)in2, size );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)in3, size );
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
(const BitSequence *)in4, size );
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
(const BitSequence *)in5, size );
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
(const BitSequence *)in6, size );
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
(const BitSequence *)in7, size );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
(const BitSequence*)in0, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash1,
(const BitSequence*)in1, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash2,
(const BitSequence*)in2, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash3,
(const BitSequence*)in3, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash4,
(const BitSequence*)in4, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash5,
(const BitSequence*)in5, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash6,
(const BitSequence*)in6, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash7,
(const BitSequence*)in7, size<<3 );
#endif
break;
break;
case HAMSI:
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
@@ -280,7 +317,7 @@ void x16r_8way_hash( void* output, const void* input )
hamsi512_8way_close( &ctx.hamsi, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in0, size );
@@ -343,18 +380,13 @@ void x16r_8way_hash( void* output, const void* input )
sph_whirlpool_close( &ctx.whirlpool, hash7 );
break;
case SHA_512:
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
sha512_8way_init( &ctx.sha512 );
if ( i == 0 )
sha512_8way_update( &ctx.sha512, input, size );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
sha512_8way_update( &ctx.sha512, vhash, size );
}
sha512_8way_update( &ctx.sha512, vhash, size );
sha512_8way_close( &ctx.sha512, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
hash7, vhash );
break;
}
size = 64;
@@ -399,7 +431,7 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
}
do
@@ -473,13 +505,15 @@ void x16r_4way_hash( void* output, const void* input )
switch ( algo )
{
case BLAKE:
blake512_4way_init( &ctx.blake );
if ( i == 0 )
blake512_4way_full( &ctx.blake, vhash, input, size );
blake512_4way_update( &ctx.blake, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
blake512_4way_full( &ctx.blake, vhash, vhash, size );
blake512_4way_update( &ctx.blake, vhash, size );
}
blake512_4way_close( &ctx.blake, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break;
case BMW:
@@ -495,10 +529,18 @@ void x16r_4way_hash( void* output, const void* input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break;
case GROESTL:
groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(const char*)in0, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(const char*)in1, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(const char*)in2, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(const char*)in3, size<<3 );
break;
case SKEIN:
skein512_4way_init( &ctx.skein );
@@ -538,10 +580,12 @@ void x16r_4way_hash( void* output, const void* input )
break;
case LUFFA:
intrlv_2x128( vhash, in0, in1, size<<3 );
luffa512_2way_full( &ctx.luffa, vhash, vhash, size );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size );
dintrlv_2x128_512( hash0, hash1, vhash );
intrlv_2x128( vhash, in2, in3, size<<3 );
luffa512_2way_full( &ctx.luffa, vhash, vhash, size );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size);
dintrlv_2x128_512( hash2, hash3, vhash );
break;
case CUBEHASH:
@@ -574,21 +618,27 @@ void x16r_4way_hash( void* output, const void* input )
break;
case SIMD:
intrlv_2x128( vhash, in0, in1, size<<3 );
simd512_2way_full( &ctx.simd, vhash, vhash, size );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_2x128_512( hash0, hash1, vhash );
intrlv_2x128( vhash, in2, in3, size<<3 );
simd512_2way_full( &ctx.simd, vhash, vhash, size );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_2x128_512( hash2, hash3, vhash );
break;
case ECHO:
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)in0, size );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)in1, size );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)in2, size );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)in3, size );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
(const BitSequence*)in0, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash1,
(const BitSequence*)in1, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash2,
(const BitSequence*)in2, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash3,
(const BitSequence*)in3, size<<3 );
break;
case HAMSI:
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
@@ -677,7 +727,7 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
}
do

View File

@@ -39,13 +39,9 @@
#include <openssl/sha.h>
#endif
#if defined(X21S_8WAY) || defined(X21S_4WAY)
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
#endif
#if defined (X21S_8WAY)
static __thread uint64_t* x21s_8way_matrix;

View File

@@ -72,19 +72,27 @@ void x17_8way_hash( void *state, const void *input )
uint64_t hash7[8] __attribute__ ((aligned (64)));
x17_8way_context_overlay ctx;
blake512_8way_full( &ctx.blake, vhash, input, 80 );
// 1 Blake
blake512_8way_init( &ctx.blake );
blake512_8way_update( &ctx.blake, input, 80 );
blake512_8way_close( &ctx.blake, vhash );
// 2 Bmw
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, 64 );
bmw512_8way_close( &ctx.bmw, vhash );
// 3 Groestl
#if defined(__VAES__)
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
#else
@@ -92,44 +100,65 @@ void x17_8way_hash( void *state, const void *input )
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
#endif
// 4 Skein parallel 4 way 64 bit
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
// 5 JH
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
jh512_8way_close( &ctx.jh, vhash );
// 6 Keccak
keccak512_8way_init( &ctx.keccak );
keccak512_8way_update( &ctx.keccak, vhash, 64 );
keccak512_8way_close( &ctx.keccak, vhash );
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
luffa512_4way_full( &ctx.luffa, vhashA, vhashA, 64 );
luffa512_4way_full( &ctx.luffa, vhashB, vhashB, 64 );
// 7 Luffa
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
cube_4way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
cube_4way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
// 8 Cubehash
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 64 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 64 );
// 9 Shavite
#if defined(__VAES__)
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
#else
@@ -166,13 +195,20 @@ void x17_8way_hash( void *state, const void *input )
#endif
simd512_4way_full( &ctx.simd, vhashA, vhashA, 64 );
simd512_4way_full( &ctx.simd, vhashB, vhashB, 64 );
// 10 Simd
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashA, vhashA, 512 );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashB, vhashB, 512 );
// 11 Echo
#if defined(__VAES__)
echo_4way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
echo_4way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashA, vhashA, 512 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashB, vhashB, 512 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
@@ -181,28 +217,37 @@ void x17_8way_hash( void *state, const void *input )
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, 64 );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, 64 );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, 64 );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
(const BitSequence *)hash4, 64 );
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
(const BitSequence *)hash5, 64 );
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
(const BitSequence *)hash6, 64 );
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
(const BitSequence *)hash7, 64 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash0,
(const BitSequence *) hash0, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash1,
(const BitSequence *) hash1, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash2,
(const BitSequence *) hash2, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash4,
(const BitSequence *) hash4, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash5,
(const BitSequence *) hash5, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash6,
(const BitSequence *) hash6, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash7,
(const BitSequence *) hash7, 512 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
#endif
// 12 Hamsi
hamsi512_8way_init( &ctx.hamsi );
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
hamsi512_8way_close( &ctx.hamsi, vhash );
@@ -210,6 +255,7 @@ void x17_8way_hash( void *state, const void *input )
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
// 13 Fugue serial
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
@@ -235,6 +281,7 @@ void x17_8way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash7, 64 );
sph_fugue512_close( &ctx.fugue, hash7 );
// 14 Shabal, parallel 8 way 32 bit
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -245,6 +292,7 @@ void x17_8way_hash( void *state, const void *input )
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
// 15 Whirlpool serial
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
@@ -270,6 +318,7 @@ void x17_8way_hash( void *state, const void *input )
sph_whirlpool( &ctx.whirlpool, hash7, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
// 16 SHA512 parallel 64 bit
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -277,6 +326,7 @@ void x17_8way_hash( void *state, const void *input )
sha512_8way_update( &ctx.sha512, vhash, 64 );
sha512_8way_close( &ctx.sha512, vhash );
// 17 Haval parallel 32 bit
rintrlv_8x64_8x32( vhashA, vhash, 512 );
haval256_5_8way_init( &ctx.haval );
@@ -299,28 +349,25 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x17_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if unlikely( ( hash7[ lane ] <= Htarg ) && !bench )
if unlikely( ( hash7[ lane ] <= Htarg ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if likely( valid_hash( lane_hash, ptarget ) )
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = bswap_32( n + lane );
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
@@ -354,7 +401,7 @@ typedef union _x17_4way_context_overlay x17_4way_context_overlay;
void x17_4way_hash( void *state, const void *input )
{
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (128)));
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
uint64_t hash0[8] __attribute__ ((aligned (64)));
@@ -363,59 +410,91 @@ void x17_4way_hash( void *state, const void *input )
uint64_t hash3[8] __attribute__ ((aligned (64)));
x17_4way_context_overlay ctx;
blake512_4way_full( &ctx.blake, vhash, input, 80 );
// 1 Blake parallel 4 way 64 bit
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw
bmw512_4way_init( &ctx.bmw );
bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serialize
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
// 3 Groestl
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
// Parallellize
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
// 4 Skein parallel 4 way 64 bit
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
// 5 JH
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash );
// 6 Keccak
keccak512_4way_init( &ctx.keccak );
keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
// 7 Luffa parallel 2 way 128 bit
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
luffa512_2way_full( &ctx.luffa, vhashA, vhashA, 64 );
luffa512_2way_full( &ctx.luffa, vhashB, vhashB, 64 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
// 8 Cubehash
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, 64 );
// 9 Shavite
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
// 10 Simd
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
dintrlv_2x128_512( hash0, hash1, vhashA );
dintrlv_2x128_512( hash2, hash3, vhashB );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, 64 );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, 64 );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, 64 );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
// 11 Echo serial
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash0,
(const BitSequence *) hash0, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash1,
(const BitSequence *) hash1, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash2,
(const BitSequence *) hash2, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 );
// 12 Hamsi parallel 4 way 64 bit
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi );
@@ -424,6 +503,7 @@ void x17_4way_hash( void *state, const void *input )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
// 13 Fugue serial
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
@@ -437,6 +517,7 @@ void x17_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
// 14 Shabal, parallel 4 way 32 bit
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal );
@@ -445,6 +526,7 @@ void x17_4way_hash( void *state, const void *input )
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
// 15 Whirlpool serial
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
@@ -458,12 +540,14 @@ void x17_4way_hash( void *state, const void *input )
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
// 16 SHA512 parallel 64 bit
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
sha512_4way_init( &ctx.sha512 );
sha512_4way_update( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash );
// 17 Haval parallel 32 bit
rintrlv_4x64_4x32( vhashB, vhash, 512 );
haval256_5_4way_init( &ctx.haval );
@@ -474,8 +558,8 @@ void x17_4way_hash( void *state, const void *input )
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[16*4] __attribute__ ((aligned (128)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
@@ -486,30 +570,27 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x17_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash7[ lane ] <= Htarg && !bench ) )
{
if unlikely( ( hash7[ lane ] <= Htarg ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = bswap_32( n + lane );
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -11,8 +11,9 @@
#include "algo/skein/skein-hash-4way.h"
#include "algo/luffa/luffa-hash-2way.h"
#include "algo/cubehash/cube-hash-2way.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/shavite/shavite-hash-2way.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/simd-hash-2way.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/hamsi/hamsi-hash-4way.h"
@@ -73,7 +74,9 @@ void xevan_8way_hash( void *output, const void *input )
const int dataLen = 128;
xevan_8way_context_overlay ctx __attribute__ ((aligned (64)));
blake512_8way_full( &ctx.blake, vhash, input, 80 );
blake512_8way_init( &ctx.blake );
blake512_8way_update( &ctx.blake, input, 80 );
blake512_8way_close( &ctx.blake, vhash );
memset( &vhash[8<<3], 0, 64<<3 );
bmw512_8way_init( &ctx.bmw );
@@ -84,8 +87,10 @@ void xevan_8way_hash( void *output, const void *input )
rintrlv_8x64_4x128( vhashA, vhashB, vhash, dataLen<<3 );
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, dataLen );
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, dataLen );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, dataLen<<3 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, dataLen<<3 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, dataLen<<3 );
@@ -94,14 +99,30 @@ void xevan_8way_hash( void *output, const void *input )
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7,
dataLen<<3 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, dataLen<<3 );
@@ -122,16 +143,22 @@ void xevan_8way_hash( void *output, const void *input )
rintrlv_8x64_4x128( vhashA, vhashB, vhash, dataLen<<3 );
luffa512_4way_full( &ctx.luffa, vhashA, vhashA, dataLen );
luffa512_4way_full( &ctx.luffa, vhashB, vhashB, dataLen );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
cube_4way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
cube_4way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
#if defined(__VAES__)
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, dataLen );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
#else
@@ -168,13 +195,17 @@ void xevan_8way_hash( void *output, const void *input )
#endif
simd512_4way_full( &ctx.simd, vhashA, vhashA, dataLen );
simd512_4way_full( &ctx.simd, vhashB, vhashB, dataLen );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
#if defined(__VAES__)
echo_4way_full( &ctx.echo, vhashA, 512, vhashA, dataLen );
echo_4way_full( &ctx.echo, vhashB, 512, vhashB, dataLen );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashA, vhashA, dataLen<<3 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashB, vhashB, dataLen<<3 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, dataLen<<3 );
@@ -183,23 +214,31 @@ void xevan_8way_hash( void *output, const void *input )
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, dataLen<<3 );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
(const BitSequence *)hash4, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
(const BitSequence *)hash5, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
(const BitSequence *)hash6, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
(const BitSequence *)hash7, dataLen );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash0,
(const BitSequence *) hash0, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash1,
(const BitSequence *) hash1, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash2,
(const BitSequence *) hash2, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash4,
(const BitSequence *) hash4, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash5,
(const BitSequence *) hash5, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash6,
(const BitSequence *) hash6, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash7,
(const BitSequence *) hash7, dataLen<<3 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, dataLen<<3 );
@@ -289,7 +328,9 @@ void xevan_8way_hash( void *output, const void *input )
memset( &vhash[ 4<<3 ], 0, (dataLen-32) << 3 );
blake512_8way_full( &ctx.blake, vhash, vhash, dataLen );
blake512_8way_init( &ctx.blake );
blake512_8way_update( &ctx.blake, vhash, dataLen );
blake512_8way_close(&ctx.blake, vhash);
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, dataLen );
@@ -299,8 +340,10 @@ void xevan_8way_hash( void *output, const void *input )
rintrlv_8x64_4x128( vhashA, vhashB, vhash, dataLen<<3 );
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, dataLen );
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, dataLen );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, dataLen<<3 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, dataLen<<3 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, dataLen<<3 );
@@ -309,14 +352,30 @@ void xevan_8way_hash( void *output, const void *input )
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7,
dataLen<<3 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, dataLen<<3 );
@@ -337,16 +396,22 @@ void xevan_8way_hash( void *output, const void *input )
rintrlv_8x64_4x128( vhashA, vhashB, vhash, dataLen<<3 );
luffa512_4way_full( &ctx.luffa, vhashA, vhashA, dataLen );
luffa512_4way_full( &ctx.luffa, vhashB, vhashB, dataLen );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
cube_4way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
cube_4way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
#if defined(__VAES__)
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, dataLen );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
#else
@@ -383,13 +448,17 @@ void xevan_8way_hash( void *output, const void *input )
#endif
simd512_4way_full( &ctx.simd, vhashA, vhashA, dataLen );
simd512_4way_full( &ctx.simd, vhashB, vhashB, dataLen );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
#if defined(__VAES__)
echo_4way_full( &ctx.echo, vhashA, 512, vhashA, dataLen );
echo_4way_full( &ctx.echo, vhashB, 512, vhashB, dataLen );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashA, vhashA, dataLen<<3 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashB, vhashB, dataLen<<3 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, dataLen<<3 );
@@ -398,22 +467,30 @@ void xevan_8way_hash( void *output, const void *input )
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, dataLen<<3 );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
(const BitSequence *)hash4, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
(const BitSequence *)hash5, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
(const BitSequence *)hash6, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
(const BitSequence *)hash7, dataLen );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash0,
(const BitSequence *) hash0, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash1,
(const BitSequence *) hash1, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash2,
(const BitSequence *) hash2, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash4,
(const BitSequence *) hash4, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash5,
(const BitSequence *) hash5, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash6,
(const BitSequence *) hash6, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash7,
(const BitSequence *) hash7, dataLen<<3 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, dataLen<<3 );
@@ -580,7 +657,9 @@ void xevan_4way_hash( void *output, const void *input )
// parallel 4 way
blake512_4way_full( &ctx.blake, vhash, input, 80 );
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close(&ctx.blake, vhash);
memset( &vhash[8<<2], 0, 64<<2 );
bmw512_4way_init( &ctx.bmw );
@@ -590,10 +669,18 @@ void xevan_4way_hash( void *output, const void *input )
// Serial
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
dataLen<<3 );
// Parallel 4way
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
@@ -612,11 +699,15 @@ void xevan_4way_hash( void *output, const void *input )
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
luffa512_2way_full( &ctx.luffa, vhashA, vhashA, dataLen );
luffa512_2way_full( &ctx.luffa, vhashB, vhashB, dataLen );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
@@ -631,15 +722,18 @@ void xevan_4way_hash( void *output, const void *input )
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, dataLen );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash0,
(const BitSequence *) hash0, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash1,
(const BitSequence *) hash1, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash2,
(const BitSequence *) hash2, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, dataLen<<3 );
// Parallel
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
@@ -711,10 +805,18 @@ void xevan_4way_hash( void *output, const void *input )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, dataLen<<3 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
dataLen<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
dataLen<<3 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
@@ -732,11 +834,15 @@ void xevan_4way_hash( void *output, const void *input )
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
luffa512_2way_full( &ctx.luffa, vhashA, vhashA, dataLen );
luffa512_2way_full( &ctx.luffa, vhashB, vhashB, dataLen );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
@@ -751,14 +857,18 @@ void xevan_4way_hash( void *output, const void *input )
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, dataLen );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, dataLen );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash0,
(const BitSequence *) hash0, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash1,
(const BitSequence *) hash1, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash2,
(const BitSequence *) hash2, dataLen<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, dataLen<<3 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
@@ -824,7 +934,7 @@ int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id;
int thr_id = mythr->id; // thr_id arg is deprecated
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];

View File

@@ -167,10 +167,10 @@ void x22i_8way_hash( void *output, const void *input )
#if defined(__VAES__)
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
#else
@@ -214,12 +214,12 @@ void x22i_8way_hash( void *output, const void *input )
#if defined(__VAES__)
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashA, vhashA, 512 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashB, vhashB, 512 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashA, vhashA, 512 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashB, vhashB, 512 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
#else

409
algo/yescrypt/sha256_Y.c Normal file
View File

@@ -0,0 +1,409 @@
/*-
* Copyright 2005,2007,2009 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/types.h>
#include <stdint.h>
#include <string.h>
#include "sysendian.h"
#include "sha256_Y.h"
#include "compat.h"
/*
* Encode a length len/4 vector of (uint32_t) into a length len vector of
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
*/
static void
be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
{
size_t i;
for (i = 0; i < len / 4; i++)
be32enc(dst + i * 4, src[i]);
}
/*
* Decode a big-endian length len vector of (unsigned char) into a length
* len/4 vector of (uint32_t). Assumes len is a multiple of 4.
*/
static void
be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
{
size_t i;
for (i = 0; i < len / 4; i++)
dst[i] = be32dec(src + i * 4);
}
/* Elementary functions used by SHA256 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define SHR(x, n) (x >> n)
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
/* SHA256 round function */
#define RND(a, b, c, d, e, f, g, h, k) \
t0 = h + S1(e) + Ch(e, f, g) + k; \
t1 = S0(a) + Maj(a, b, c); \
d += t0; \
h = t0 + t1;
/* Adjusted round function for rotating state */
#define RNDr(S, W, i, k) \
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
S[(66 - i) % 8], S[(67 - i) % 8], \
S[(68 - i) % 8], S[(69 - i) % 8], \
S[(70 - i) % 8], S[(71 - i) % 8], \
W[i] + k)
/*
* SHA256 block compression function. The 256-bit state is transformed via
* the 512-bit input block to produce a new state.
*/
static void
SHA256_Transform_Y(uint32_t * state, const unsigned char block[64])
{
uint32_t _ALIGN(128) W[64], S[8];
uint32_t t0, t1;
int i;
/* 1. Prepare message schedule W. */
be32dec_vect(W, block, 64);
for (i = 16; i < 64; i++)
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
/* 2. Initialize working variables. */
memcpy(S, state, 32);
/* 3. Mix. */
RNDr(S, W, 0, 0x428a2f98);
RNDr(S, W, 1, 0x71374491);
RNDr(S, W, 2, 0xb5c0fbcf);
RNDr(S, W, 3, 0xe9b5dba5);
RNDr(S, W, 4, 0x3956c25b);
RNDr(S, W, 5, 0x59f111f1);
RNDr(S, W, 6, 0x923f82a4);
RNDr(S, W, 7, 0xab1c5ed5);
RNDr(S, W, 8, 0xd807aa98);
RNDr(S, W, 9, 0x12835b01);
RNDr(S, W, 10, 0x243185be);
RNDr(S, W, 11, 0x550c7dc3);
RNDr(S, W, 12, 0x72be5d74);
RNDr(S, W, 13, 0x80deb1fe);
RNDr(S, W, 14, 0x9bdc06a7);
RNDr(S, W, 15, 0xc19bf174);
RNDr(S, W, 16, 0xe49b69c1);
RNDr(S, W, 17, 0xefbe4786);
RNDr(S, W, 18, 0x0fc19dc6);
RNDr(S, W, 19, 0x240ca1cc);
RNDr(S, W, 20, 0x2de92c6f);
RNDr(S, W, 21, 0x4a7484aa);
RNDr(S, W, 22, 0x5cb0a9dc);
RNDr(S, W, 23, 0x76f988da);
RNDr(S, W, 24, 0x983e5152);
RNDr(S, W, 25, 0xa831c66d);
RNDr(S, W, 26, 0xb00327c8);
RNDr(S, W, 27, 0xbf597fc7);
RNDr(S, W, 28, 0xc6e00bf3);
RNDr(S, W, 29, 0xd5a79147);
RNDr(S, W, 30, 0x06ca6351);
RNDr(S, W, 31, 0x14292967);
RNDr(S, W, 32, 0x27b70a85);
RNDr(S, W, 33, 0x2e1b2138);
RNDr(S, W, 34, 0x4d2c6dfc);
RNDr(S, W, 35, 0x53380d13);
RNDr(S, W, 36, 0x650a7354);
RNDr(S, W, 37, 0x766a0abb);
RNDr(S, W, 38, 0x81c2c92e);
RNDr(S, W, 39, 0x92722c85);
RNDr(S, W, 40, 0xa2bfe8a1);
RNDr(S, W, 41, 0xa81a664b);
RNDr(S, W, 42, 0xc24b8b70);
RNDr(S, W, 43, 0xc76c51a3);
RNDr(S, W, 44, 0xd192e819);
RNDr(S, W, 45, 0xd6990624);
RNDr(S, W, 46, 0xf40e3585);
RNDr(S, W, 47, 0x106aa070);
RNDr(S, W, 48, 0x19a4c116);
RNDr(S, W, 49, 0x1e376c08);
RNDr(S, W, 50, 0x2748774c);
RNDr(S, W, 51, 0x34b0bcb5);
RNDr(S, W, 52, 0x391c0cb3);
RNDr(S, W, 53, 0x4ed8aa4a);
RNDr(S, W, 54, 0x5b9cca4f);
RNDr(S, W, 55, 0x682e6ff3);
RNDr(S, W, 56, 0x748f82ee);
RNDr(S, W, 57, 0x78a5636f);
RNDr(S, W, 58, 0x84c87814);
RNDr(S, W, 59, 0x8cc70208);
RNDr(S, W, 60, 0x90befffa);
RNDr(S, W, 61, 0xa4506ceb);
RNDr(S, W, 62, 0xbef9a3f7);
RNDr(S, W, 63, 0xc67178f2);
/* 4. Mix local working variables into global state */
for (i = 0; i < 8; i++)
state[i] += S[i];
#if 0
/* Clean the stack. */
memset(W, 0, 256);
memset(S, 0, 32);
t0 = t1 = 0;
#endif
}
static unsigned char PAD[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* Add padding and terminating bit-count. */
static void
SHA256_Pad_Y(SHA256_CTX_Y * ctx)
{
unsigned char len[8];
uint32_t r, plen;
/*
* Convert length to a vector of bytes -- we do this now rather
* than later because the length will change after we pad.
*/
be32enc_vect(len, ctx->count, 8);
/* Add 1--64 bytes so that the resulting length is 56 mod 64 */
r = (ctx->count[1] >> 3) & 0x3f;
plen = (r < 56) ? (56 - r) : (120 - r);
SHA256_Update_Y(ctx, PAD, (size_t)plen);
/* Add the terminating bit-count */
SHA256_Update_Y(ctx, len, 8);
}
/* SHA-256 initialization. Begins a SHA-256 operation. */
void
SHA256_Init_Y(SHA256_CTX_Y * ctx)
{
/* Zero bits processed so far */
ctx->count[0] = ctx->count[1] = 0;
/* Magic initialization constants */
ctx->state[0] = 0x6A09E667;
ctx->state[1] = 0xBB67AE85;
ctx->state[2] = 0x3C6EF372;
ctx->state[3] = 0xA54FF53A;
ctx->state[4] = 0x510E527F;
ctx->state[5] = 0x9B05688C;
ctx->state[6] = 0x1F83D9AB;
ctx->state[7] = 0x5BE0CD19;
}
/* Add bytes into the hash */
void
SHA256_Update_Y(SHA256_CTX_Y * ctx, const void *in, size_t len)
{
uint32_t bitlen[2];
uint32_t r;
const unsigned char *src = in;
/* Number of bytes left in the buffer from previous updates */
r = (ctx->count[1] >> 3) & 0x3f;
/* Convert the length into a number of bits */
bitlen[1] = ((uint32_t)len) << 3;
bitlen[0] = (uint32_t)(len >> 29);
/* Update number of bits */
if ((ctx->count[1] += bitlen[1]) < bitlen[1])
ctx->count[0]++;
ctx->count[0] += bitlen[0];
/* Handle the case where we don't need to perform any transforms */
if (len < 64 - r) {
memcpy(&ctx->buf[r], src, len);
return;
}
/* Finish the current block */
memcpy(&ctx->buf[r], src, 64 - r);
SHA256_Transform_Y(ctx->state, ctx->buf);
src += 64 - r;
len -= 64 - r;
/* Perform complete blocks */
while (len >= 64) {
SHA256_Transform_Y(ctx->state, src);
src += 64;
len -= 64;
}
/* Copy left over data into buffer */
memcpy(ctx->buf, src, len);
}
/*
* SHA-256 finalization. Pads the input data, exports the hash value,
* and clears the context state.
*/
void
SHA256_Final_Y(unsigned char digest[32], SHA256_CTX_Y * ctx)
{
/* Add padding */
SHA256_Pad_Y(ctx);
/* Write the hash */
be32enc_vect(digest, ctx->state, 32);
/* Clear the context state */
memset((void *)ctx, 0, sizeof(*ctx));
}
/* Initialize an HMAC-SHA256 operation with the given key. */
void
HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y * ctx, const void * _K, size_t Klen)
{
unsigned char pad[64];
unsigned char khash[32];
const unsigned char * K = _K;
size_t i;
/* If Klen > 64, the key is really SHA256(K). */
if (Klen > 64) {
SHA256_Init(&ctx->ictx);
SHA256_Update(&ctx->ictx, K, Klen);
SHA256_Final(khash, &ctx->ictx);
K = khash;
Klen = 32;
}
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
SHA256_Init(&ctx->ictx);
memset(pad, 0x36, 64);
for (i = 0; i < Klen; i++)
pad[i] ^= K[i];
SHA256_Update(&ctx->ictx, pad, 64);
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
SHA256_Init(&ctx->octx);
memset(pad, 0x5c, 64);
for (i = 0; i < Klen; i++)
pad[i] ^= K[i];
SHA256_Update(&ctx->octx, pad, 64);
/* Clean the stack. */
//memset(khash, 0, 32);
}
/* Add bytes to the HMAC-SHA256 operation. */
void
HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y * ctx, const void *in, size_t len)
{
/* Feed data to the inner SHA256 operation. */
SHA256_Update(&ctx->ictx, in, len);
}
/* Finish an HMAC-SHA256 operation. */
void
HMAC_SHA256_Final_Y(unsigned char digest[32], HMAC_SHA256_CTX_Y * ctx)
{
unsigned char ihash[32];
/* Finish the inner SHA256 operation. */
SHA256_Final(ihash, &ctx->ictx);
/* Feed the inner hash to the outer SHA256 operation. */
SHA256_Update(&ctx->octx, ihash, 32);
/* Finish the outer SHA256 operation. */
SHA256_Final(digest, &ctx->octx);
/* Clean the stack. */
//memset(ihash, 0, 32);
}
/**
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
*/
void
PBKDF2_SHA256_Y(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
{
HMAC_SHA256_CTX_Y PShctx, hctx;
uint8_t _ALIGN(128) T[32];
uint8_t _ALIGN(128) U[32];
uint8_t ivec[4];
size_t i, clen;
uint64_t j;
int k;
/* Compute HMAC state after processing P and S. */
HMAC_SHA256_Init_Y(&PShctx, passwd, passwdlen);
HMAC_SHA256_Update_Y(&PShctx, salt, saltlen);
/* Iterate through the blocks. */
for (i = 0; i * 32 < dkLen; i++) {
/* Generate INT(i + 1). */
be32enc(ivec, (uint32_t)(i + 1));
/* Compute U_1 = PRF(P, S || INT(i)). */
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX_Y));
HMAC_SHA256_Update_Y(&hctx, ivec, 4);
HMAC_SHA256_Final_Y(U, &hctx);
/* T_i = U_1 ... */
memcpy(T, U, 32);
for (j = 2; j <= c; j++) {
/* Compute U_j. */
HMAC_SHA256_Init_Y(&hctx, passwd, passwdlen);
HMAC_SHA256_Update_Y(&hctx, U, 32);
HMAC_SHA256_Final_Y(U, &hctx);
/* ... xor U_j ... */
for (k = 0; k < 32; k++)
T[k] ^= U[k];
}
/* Copy as many bytes as necessary into buf. */
clen = dkLen - i * 32;
if (clen > 32)
clen = 32;
memcpy(&buf[i * 32], T, clen);
}
/* Clean PShctx, since we never called _Final on it. */
//memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y));
}

View File

@@ -1,6 +1,5 @@
/*-
* Copyright 2009 Colin Percival
* Copyright 2013-2018 Alexander Peslyak
* Copyright 2005,2007,2009 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -24,26 +23,47 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
* $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $
*/
#ifndef _YESPOWERR8G_H_
#define _YESPOWERR8G_H_
#ifndef _SHA256_H_
#define _SHA256_H_
#include <sys/types.h>
#include <stdint.h>
#include <stdlib.h> /* for size_t */
#include "algo-gate-api.h"
#include "algo/yespower/yespower.h"
#include <openssl/sha.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct SHA256Context {
uint32_t state[8];
uint32_t count[2];
unsigned char buf[64];
} SHA256_CTX_Y;
extern int yespowerr8g_tls(const uint8_t *src, size_t srclen,
const yespower_params_t *params, yespower_binary_t *dst);
/*
typedef struct HMAC_SHA256Context {
SHA256_CTX_Y ictx;
SHA256_CTX_Y octx;
} HMAC_SHA256_CTX_Y;
*/
#ifdef __cplusplus
}
#endif
typedef struct HMAC_SHA256Context {
SHA256_CTX ictx;
SHA256_CTX octx;
} HMAC_SHA256_CTX_Y;
#endif /* !_YESPOWERR8G_H_ */
void SHA256_Init_Y(SHA256_CTX_Y *);
void SHA256_Update_Y(SHA256_CTX_Y *, const void *, size_t);
void SHA256_Final_Y(unsigned char [32], SHA256_CTX_Y *);
void HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y *, const void *, size_t);
void HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y *, const void *, size_t);
void HMAC_SHA256_Final_Y(unsigned char [32], HMAC_SHA256_CTX_Y *);
/**
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
*/
void PBKDF2_SHA256_Y(const uint8_t *, size_t, const uint8_t *, size_t,
uint64_t, uint8_t *, size_t);
#endif /* !_SHA256_H_ */

124
algo/yescrypt/sysendian.h Normal file
View File

@@ -0,0 +1,124 @@
/*-
* Copyright 2007-2009 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#ifndef _SYSENDIAN_H_
#define _SYSENDIAN_H_
/* If we don't have be64enc, the <sys/endian.h> we have isn't usable. */
#if !HAVE_DECL_BE64ENC
#undef HAVE_SYS_ENDIAN_H
#endif
#ifdef HAVE_SYS_ENDIAN_H
#include <sys/endian.h>
#else
#include <stdint.h>
static __inline uint64_t
be64dec(const void *pp)
{
const uint8_t *p = (uint8_t const *)pp;
return ((uint64_t)(p[7]) + ((uint64_t)(p[6]) << 8) +
((uint64_t)(p[5]) << 16) + ((uint64_t)(p[4]) << 24) +
((uint64_t)(p[3]) << 32) + ((uint64_t)(p[2]) << 40) +
((uint64_t)(p[1]) << 48) + ((uint64_t)(p[0]) << 56));
}
static __inline void
be64enc(void *pp, uint64_t x)
{
uint8_t * p = (uint8_t *)pp;
p[7] = x & 0xff;
p[6] = (x >> 8) & 0xff;
p[5] = (x >> 16) & 0xff;
p[4] = (x >> 24) & 0xff;
p[3] = (x >> 32) & 0xff;
p[2] = (x >> 40) & 0xff;
p[1] = (x >> 48) & 0xff;
p[0] = (x >> 56) & 0xff;
}
static __inline uint64_t
le64dec(const void *pp)
{
const uint8_t *p = (uint8_t const *)pp;
return ((uint64_t)(p[0]) + ((uint64_t)(p[1]) << 8) +
((uint64_t)(p[2]) << 16) + ((uint64_t)(p[3]) << 24) +
((uint64_t)(p[4]) << 32) + ((uint64_t)(p[5]) << 40) +
((uint64_t)(p[6]) << 48) + ((uint64_t)(p[7]) << 56));
}
static __inline void
le64enc(void *pp, uint64_t x)
{
uint8_t * p = (uint8_t *)pp;
p[0] = x & 0xff;
p[1] = (x >> 8) & 0xff;
p[2] = (x >> 16) & 0xff;
p[3] = (x >> 24) & 0xff;
p[4] = (x >> 32) & 0xff;
p[5] = (x >> 40) & 0xff;
p[6] = (x >> 48) & 0xff;
p[7] = (x >> 56) & 0xff;
}
static __inline uint32_t
be32dec(const void *pp)
{
const uint8_t *p = (uint8_t const *)pp;
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
}
static __inline void
be32enc(void *pp, uint32_t x)
{
uint8_t * p = (uint8_t *)pp;
p[3] = x & 0xff;
p[2] = (x >> 8) & 0xff;
p[1] = (x >> 16) & 0xff;
p[0] = (x >> 24) & 0xff;
}
#endif /* !HAVE_SYS_ENDIAN_H */
#endif /* !_SYSENDIAN_H_ */

View File

@@ -48,7 +48,9 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "algo/sha/hmac-sha256-hash.h"
#include "sha256_Y.h"
#include "sysendian.h"
#include "yescrypt.h"
#include "yescrypt-platform.h"
@@ -1310,7 +1312,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
}
/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, B_size);
PBKDF2_SHA256_Y(passwd, passwdlen, salt, saltlen, 1, B, B_size);
if (t || flags)
memcpy(sha256, B, sizeof(sha256));
@@ -1340,7 +1342,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
}
/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf, buflen);
PBKDF2_SHA256_Y(passwd, passwdlen, B, B_size, 1, buf, buflen);
/*
* Except when computing classic scrypt, allow all computation so far
@@ -1352,14 +1354,14 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
if ((t || flags) && buflen == sizeof(sha256)) {
/* Compute ClientKey */
{
HMAC_SHA256_CTX ctx;
HMAC_SHA256_Init(&ctx, buf, buflen);
HMAC_SHA256_CTX_Y ctx;
HMAC_SHA256_Init_Y(&ctx, buf, buflen);
if ( yescrypt_client_key )
HMAC_SHA256_Update( &ctx, (uint8_t*)yescrypt_client_key,
HMAC_SHA256_Update_Y( &ctx, (uint8_t*)yescrypt_client_key,
yescrypt_client_key_len );
else
HMAC_SHA256_Update( &ctx, salt, saltlen );
HMAC_SHA256_Final(sha256, &ctx);
HMAC_SHA256_Update_Y( &ctx, salt, saltlen );
HMAC_SHA256_Final_Y(sha256, &ctx);
}
/* Compute StoredKey */
{

View File

@@ -25,7 +25,7 @@
#include "compat.h"
#include "yescrypt.h"
#include "algo/sha/hmac-sha256-hash.h"
#include "sha256_Y.h"
#include "algo-gate-api.h"
#define BYTES2CHARS(bytes) \
@@ -385,30 +385,35 @@ void yescrypthash(void *output, const void *input)
int scanhash_yescrypt( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) vhash[8];
uint32_t _ALIGN(64) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce;
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t _ALIGN(64) vhash[8];
uint32_t _ALIGN(64) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
for ( int k = 0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] );
endiandata[19] = n;
do {
yescrypt_hash((char*) endiandata, (char*) vhash, 80);
if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
{
be32enc( pdata+19, n );
submit_solution( work, vhash, mythr );
}
endiandata[19] = ++n;
} while ( n < last_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
for (int k = 0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);
do {
be32enc(&endiandata[19], n);
yescrypt_hash((char*) endiandata, (char*) vhash, 80);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget )
&& !opt_benchmark )
{
pdata[19] = n;
submit_solution( work, vhash, mythr );
}
n++;
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
void yescrypt_gate_base(algo_gate_t *gate )

View File

@@ -30,8 +30,9 @@
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "simd-utils.h"
#include <algo/yespower/crypto/sph_types.h>
#include <algo/yespower/utils/sysendian.h>
#include "blake2b-yp.h"
// Cyclic right rotation.
@@ -271,7 +272,7 @@ void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t *
{
hmac_yp_ctx PShctx, hctx;
size_t i;
uint32_t ivec;
uint8_t ivec[4];
uint8_t U[32];
uint8_t T[32];
uint64_t j;
@@ -285,11 +286,11 @@ void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t *
/* Iterate through the blocks. */
for (i = 0; i * 32 < dkLen; i++) {
/* Generate INT(i + 1). */
ivec = bswap_32( i+1 );
be32enc(ivec, (uint32_t)(i + 1));
/* Compute U_1 = PRF(P, S || INT(i)). */
memcpy(&hctx, &PShctx, sizeof(hmac_yp_ctx));
hmac_blake2b_yp_update(&hctx, &ivec, 4);
hmac_blake2b_yp_update(&hctx, ivec, 4);
hmac_blake2b_yp_final(&hctx, U);
/* T_i = U_1 ... */

View File

@@ -0,0 +1 @@
#define insecure_memzero(buf, len) /* empty */

View File

@@ -28,10 +28,46 @@
#include <stdint.h>
#include <string.h>
#include "simd-utils.h"
#include "hmac-sha256-hash.h"
#include "sysendian.h"
#include "sha256_p.h"
#include "compat.h"
/* Elementary functions used by SHA256 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define SHR(x, n) (x >> n)
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
/* SHA256 round function */
#define RND(a, b, c, d, e, f, g, h, k) \
t0 = h + S1(e) + Ch(e, f, g) + k; \
t1 = S0(a) + Maj(a, b, c); \
d += t0; \
h = t0 + t1;
/* Adjusted round function for rotating state */
#define RNDr(S, W, i, k) \
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
S[(66 - i) % 8], S[(67 - i) % 8], \
S[(68 - i) % 8], S[(69 - i) % 8], \
S[(70 - i) % 8], S[(71 - i) % 8], \
W[i] + k)
/*
static unsigned char PAD[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
*/
/**
* SHA256_Buf(in, len, digest):
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
@@ -40,9 +76,9 @@ void
SHA256_Buf( const void * in, size_t len, uint8_t digest[32] )
{
SHA256_CTX ctx;
SHA256_Init( &ctx );
SHA256_Update( &ctx, in, len );
SHA256_Final( digest, &ctx );
SHA256_Init( &ctx );
SHA256_Update( &ctx, in, len );
SHA256_Final( digest, &ctx );
}
/**
@@ -51,18 +87,19 @@ SHA256_Buf( const void * in, size_t len, uint8_t digest[32] )
* length ${Klen}, and write the result to ${digest}.
*/
void
HMAC_SHA256_Buf( const void *K, size_t Klen, const void *in, size_t len,
uint8_t digest[32])
HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len,
uint8_t digest[32])
{
HMAC_SHA256_CTX ctx;
HMAC_SHA256_Init( &ctx, K, Klen );
HMAC_SHA256_Update( &ctx, in, len );
HMAC_SHA256_Final( digest, &ctx );
HMAC_SHA256_CTX ctx;
HMAC_SHA256_Init( &ctx, K, Klen );
HMAC_SHA256_Update( &ctx, in, len );
HMAC_SHA256_Final( digest, &ctx );
}
/* Initialize an HMAC-SHA256 operation with the given key. */
void
HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
HMAC_SHA256_Init( HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen )
{
unsigned char pad[64];
unsigned char khash[32];
@@ -70,8 +107,7 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
size_t i;
/* If Klen > 64, the key is really SHA256(K). */
if ( Klen > 64 )
{
if (Klen > 64) {
SHA256_Init( &ctx->ictx );
SHA256_Update( &ctx->ictx, K, Klen );
SHA256_Final( khash, &ctx->ictx );
@@ -80,7 +116,7 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
}
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
SHA256_Init( &ctx->ictx );
SHA256_Init( &ctx->ictx );
memset( pad, 0x36, 64 );
for ( i = 0; i < Klen; i++ )
pad[i] ^= K[i];
@@ -92,19 +128,23 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
for ( i = 0; i < Klen; i++ )
pad[i] ^= K[i];
SHA256_Update( &ctx->octx, pad, 64 );
/* Clean the stack. */
//memset(khash, 0, 32);
}
/* Add bytes to the HMAC-SHA256 operation. */
void
HMAC_SHA256_Update( HMAC_SHA256_CTX *ctx, const void *in, size_t len )
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void *in, size_t len)
{
/* Feed data to the inner SHA256 operation. */
SHA256_Update( &ctx->ictx, in, len );
}
/* Finish an HMAC-SHA256 operation. */
void
HMAC_SHA256_Final( unsigned char digest[32], HMAC_SHA256_CTX *ctx )
HMAC_SHA256_Final(unsigned char digest[32], HMAC_SHA256_CTX * ctx )
{
unsigned char ihash[32];
@@ -116,6 +156,9 @@ HMAC_SHA256_Final( unsigned char digest[32], HMAC_SHA256_CTX *ctx )
/* Finish the outer SHA256 operation. */
SHA256_Final( digest, &ctx->octx );
/* Clean the stack. */
//memset(ihash, 0, 32);
}
/**
@@ -124,51 +167,52 @@ HMAC_SHA256_Final( unsigned char digest[32], HMAC_SHA256_CTX *ctx )
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
*/
void
PBKDF2_SHA256( const uint8_t *passwd, size_t passwdlen, const uint8_t *salt,
size_t saltlen, uint64_t c, uint8_t *buf, size_t dkLen )
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
{
HMAC_SHA256_CTX PShctx, hctx;
uint8_t _ALIGN(128) T[32];
uint8_t _ALIGN(128) U[32];
uint32_t ivec;
uint8_t ivec[4];
size_t i, clen;
uint64_t j;
int k;
/* Compute HMAC state after processing P and S. */
HMAC_SHA256_Init( &PShctx, passwd, passwdlen );
HMAC_SHA256_Update( &PShctx, salt, saltlen );
HMAC_SHA256_Init(&PShctx, passwd, passwdlen);
HMAC_SHA256_Update(&PShctx, salt, saltlen);
/* Iterate through the blocks. */
for ( i = 0; i * 32 < dkLen; i++ )
{
for (i = 0; i * 32 < dkLen; i++) {
/* Generate INT(i + 1). */
ivec = bswap_32( i+1 );
be32enc(ivec, (uint32_t)(i + 1));
/* Compute U_1 = PRF(P, S || INT(i)). */
memcpy( &hctx, &PShctx, sizeof(HMAC_SHA256_CTX) );
HMAC_SHA256_Update( &hctx, &ivec, 4 );
HMAC_SHA256_Final( U, &hctx );
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
HMAC_SHA256_Update(&hctx, ivec, 4);
HMAC_SHA256_Final(U, &hctx);
/* T_i = U_1 ... */
memcpy( T, U, 32 );
memcpy(T, U, 32);
for ( j = 2; j <= c; j++ )
{
for (j = 2; j <= c; j++) {
/* Compute U_j. */
HMAC_SHA256_Init( &hctx, passwd, passwdlen );
HMAC_SHA256_Update( &hctx, U, 32 );
HMAC_SHA256_Final( U, &hctx );
HMAC_SHA256_Init(&hctx, passwd, passwdlen);
HMAC_SHA256_Update(&hctx, U, 32);
HMAC_SHA256_Final(U, &hctx);
/* ... xor U_j ... */
for ( k = 0; k < 32; k++ )
for (k = 0; k < 32; k++)
T[k] ^= U[k];
}
/* Copy as many bytes as necessary into buf. */
clen = dkLen - i * 32;
if ( clen > 32 )
if (clen > 32)
clen = 32;
memcpy( &buf[i * 32], T, clen );
memcpy(&buf[i * 32], T, clen);
}
/* Clean PShctx, since we never called _Final on it. */
//memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y));
}

View File

@@ -26,24 +26,23 @@
* $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $
*/
#ifndef HMAC_SHA256_H__
#define HMAC_SHA256_H__
#ifndef _SHA256_H_
#define _SHA256_H_
#include <sys/types.h>
#include <stdint.h>
#include <openssl/sha.h>
typedef struct HMAC_SHA256Context
{
SHA256_CTX ictx;
SHA256_CTX octx;
typedef struct HMAC_SHA256Context {
SHA256_CTX ictx;
SHA256_CTX octx;
} HMAC_SHA256_CTX;
void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
void SHA256_Buf( const void * in, size_t len, uint8_t digest[32] );
void HMAC_SHA256_Init( HMAC_SHA256_CTX *, const void *, size_t );
void HMAC_SHA256_Update( HMAC_SHA256_CTX *, const void *, size_t );
void HMAC_SHA256_Final( unsigned char [32], HMAC_SHA256_CTX * );
void HMAC_SHA256_Buf( const void *, size_t Klen, const void *,
void HMAC_SHA256_Buf( const void * K, size_t Klen, const void * in,
size_t len, uint8_t digest[32] );
/**
@@ -54,4 +53,4 @@ void HMAC_SHA256_Buf( const void *, size_t Klen, const void *,
void PBKDF2_SHA256( const uint8_t *, size_t, const uint8_t *, size_t,
uint64_t, uint8_t *, size_t);
#endif // HMAC_SHA256_H__
#endif /* !_SHA256_H_ */

94
algo/yespower/sysendian.h Normal file
View File

@@ -0,0 +1,94 @@
/*-
* Copyright 2007-2014 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _SYSENDIAN_H_
#define _SYSENDIAN_H_
#include <stdint.h>
/* Avoid namespace collisions with BSD <sys/endian.h>. */
#define be32dec libcperciva_be32dec
#define be32enc libcperciva_be32enc
#define be64enc libcperciva_be64enc
#define le32dec libcperciva_le32dec
#define le32enc libcperciva_le32enc
static inline uint32_t
be32dec(const void * pp)
{
const uint8_t * p = (uint8_t const *)pp;
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
}
static inline void
be32enc(void * pp, uint32_t x)
{
uint8_t * p = (uint8_t *)pp;
p[3] = x & 0xff;
p[2] = (x >> 8) & 0xff;
p[1] = (x >> 16) & 0xff;
p[0] = (x >> 24) & 0xff;
}
static inline void
be64enc(void * pp, uint64_t x)
{
uint8_t * p = (uint8_t *)pp;
p[7] = x & 0xff;
p[6] = (x >> 8) & 0xff;
p[5] = (x >> 16) & 0xff;
p[4] = (x >> 24) & 0xff;
p[3] = (x >> 32) & 0xff;
p[2] = (x >> 40) & 0xff;
p[1] = (x >> 48) & 0xff;
p[0] = (x >> 56) & 0xff;
}
static inline uint32_t
le32dec(const void * pp)
{
const uint8_t * p = (uint8_t const *)pp;
return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
}
static inline void
le32enc(void * pp, uint32_t x)
{
uint8_t * p = (uint8_t *)pp;
p[0] = x & 0xff;
p[1] = (x >> 8) & 0xff;
p[2] = (x >> 16) & 0xff;
p[3] = (x >> 24) & 0xff;
}
#endif /* !_SYSENDIAN_H_ */

View File

@@ -0,0 +1 @@
#define insecure_memzero(buf, len) /* empty */

View File

@@ -0,0 +1,94 @@
/*-
* Copyright 2007-2014 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _SYSENDIAN_H_
#define _SYSENDIAN_H_
#include <stdint.h>
/* Avoid namespace collisions with BSD <sys/endian.h>. */
#define be32dec libcperciva_be32dec
#define be32enc libcperciva_be32enc
#define be64enc libcperciva_be64enc
#define le32dec libcperciva_le32dec
#define le32enc libcperciva_le32enc
static inline uint32_t
be32dec(const void * pp)
{
const uint8_t * p = (uint8_t const *)pp;
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
}
static inline void
be32enc(void * pp, uint32_t x)
{
uint8_t * p = (uint8_t *)pp;
p[3] = x & 0xff;
p[2] = (x >> 8) & 0xff;
p[1] = (x >> 16) & 0xff;
p[0] = (x >> 24) & 0xff;
}
static inline void
be64enc(void * pp, uint64_t x)
{
uint8_t * p = (uint8_t *)pp;
p[7] = x & 0xff;
p[6] = (x >> 8) & 0xff;
p[5] = (x >> 16) & 0xff;
p[4] = (x >> 24) & 0xff;
p[3] = (x >> 32) & 0xff;
p[2] = (x >> 40) & 0xff;
p[1] = (x >> 48) & 0xff;
p[0] = (x >> 56) & 0xff;
}
static inline uint32_t
le32dec(const void * pp)
{
const uint8_t * p = (uint8_t const *)pp;
return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
}
static inline void
le32enc(void * pp, uint32_t x)
{
uint8_t * p = (uint8_t *)pp;
p[0] = x & 0xff;
p[1] = (x >> 8) & 0xff;
p[2] = (x >> 16) & 0xff;
p[3] = (x >> 24) & 0xff;
}
#endif /* !_SYSENDIAN_H_ */

View File

@@ -1,80 +0,0 @@
/*-
* Copyright 2013-2018 Alexander Peslyak
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "cpuminer-config.h"
#include "miner.h"
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include "yescrypt-r8g.h"
int scanhash_yespower_r8g( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint64_t hash[4] __attribute__((aligned(64)));
uint32_t endiandata[32];
uint32_t *pdata = work->data;
const uint64_t *ptarget = (const uint64_t*)work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce;
const int thr_id = mythr->id;
yespower_params_t params =
{
.version = YESPOWER_0_5,
.N = 2048,
.r = 8,
.pers = (const uint8_t *)endiandata,
.perslen = work->sapling ? 112 : 80,
};
//we need bigendian data...
for ( int i = 0 ; i < 32; i++ )
be32enc( &endiandata[ i], pdata[ i ]);
endiandata[19] = n;
do {
yespower_tls( (unsigned char *)endiandata, params.perslen,
&params, (yespower_binary_t*)hash );
if unlikely( valid_hash( hash, ptarget ) && !opt_benchmark )
{
be32enc( pdata+19, n );
submit_solution( work, hash, mythr );
}
endiandata[19] = ++n;
} while (n < last_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
bool register_yescryptr8g_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_yespower_r8g;
gate->hash = (void*)&yespower_tls;
opt_target_factor = 65536.0;
return true;
};

View File

@@ -95,7 +95,11 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "utils/insecure_memzero.h"
#include "utils/sysendian.h"
#include "crypto/blake2b-yp.h"
#include "yespower.h"
#ifdef __unix__
@@ -948,7 +952,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N,
salsa20_blk_t *dst = &X[i];
size_t k;
for (k = 0; k < 16; k++)
tmp->w[k] = src->w[k];
tmp->w[k] = le32dec(&src->w[k]);
salsa20_simd_shuffle(tmp, dst);
}
@@ -995,7 +999,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N,
salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
size_t k;
for (k = 0; k < 16; k++)
tmp->w[k] = src->w[k];
le32enc(&tmp->w[k], src->w[k]);
salsa20_simd_unshuffle(tmp, dst);
}
}
@@ -1021,7 +1025,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop,
salsa20_blk_t *dst = &X[i];
size_t k;
for (k = 0; k < 16; k++)
tmp->w[k] = src->w[k];
tmp->w[k] = le32dec(&src->w[k]);
salsa20_simd_shuffle(tmp, dst);
}
@@ -1051,7 +1055,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop,
salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
size_t k;
for (k = 0; k < 16; k++)
tmp->w[k] = src->w[k];
le32enc(&tmp->w[k], src->w[k]);
salsa20_simd_unshuffle(tmp, dst);
}
}

View File

@@ -32,8 +32,6 @@
static yespower_params_t yespower_params;
// YESPOWER
void yespower_hash( const char *input, char *output, uint32_t len )
{
yespower_tls( input, len, &yespower_params, (yespower_binary_t*)output );
@@ -42,33 +40,36 @@ void yespower_hash( const char *input, char *output, uint32_t len )
int scanhash_yespower( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) vhash[8];
uint32_t _ALIGN(64) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
uint32_t _ALIGN(64) vhash[8];
uint32_t _ALIGN(64) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
for ( int k = 0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] );
endiandata[19] = n;
do {
yespower_hash( (char*)endiandata, (char*)vhash, 80 );
if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
{
be32enc( pdata+19, n );
submit_solution( work, vhash, mythr );
}
endiandata[19] = ++n;
} while ( n < last_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
for (int k = 0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);
do {
be32enc(&endiandata[19], n);
yespower_hash((char*) endiandata, (char*) vhash, 80);
if ( vhash[7] <= Htarg && fulltest( vhash, ptarget )
&& !opt_benchmark )
{
pdata[19] = n;
submit_solution( work, vhash, mythr );
}
n++;
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
// YESPOWER-B2B
void yespower_b2b_hash( const char *input, char *output, uint32_t len )
{
@@ -78,30 +79,34 @@ void yespower_b2b_hash( const char *input, char *output, uint32_t len )
int scanhash_yespower_b2b( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) vhash[8];
uint32_t _ALIGN(64) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
const uint32_t last_nonce = max_nonce;
const int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t _ALIGN(64) vhash[8];
uint32_t _ALIGN(64) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
for ( int k = 0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] );
endiandata[19] = n;
do {
yespower_b2b_hash( (char*) endiandata, (char*) vhash, 80 );
if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
{
be32enc( pdata+19, n );
submit_solution( work, vhash, mythr );
}
endiandata[19] = ++n;
} while ( n < last_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
for (int k = 0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);
do {
be32enc(&endiandata[19], n);
yespower_b2b_hash((char*) endiandata, (char*) vhash, 80);
if ( vhash[7] < Htarg && fulltest( vhash, ptarget )
&& !opt_benchmark )
{
pdata[19] = n;
submit_solution( work, vhash, mythr );
}
n++;
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
bool register_yespower_algo( algo_gate_t* gate )
@@ -130,7 +135,7 @@ bool register_yespower_algo( algo_gate_t* gate )
if ( yespower_params.pers )
applog( LOG_NOTICE,"Key= \"%s\"\n", yespower_params.pers );
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->optimizations = SSE2_OPT;
gate->scanhash = (void*)&scanhash_yespower;
gate->hash = (void*)&yespower_hash;
opt_target_factor = 65536.0;
@@ -144,14 +149,14 @@ bool register_yespowerr16_algo( algo_gate_t* gate )
yespower_params.r = 16;
yespower_params.pers = NULL;
yespower_params.perslen = 0;
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->optimizations = SSE2_OPT;
gate->scanhash = (void*)&scanhash_yespower;
gate->hash = (void*)&yespower_hash;
opt_target_factor = 65536.0;
return true;
};
/* not used
bool register_yescrypt_05_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | SHA_OPT;
@@ -203,9 +208,6 @@ bool register_yescryptr32_05_algo( algo_gate_t* gate )
opt_target_factor = 65536.0;
return true;
}
*/
// POWER2B
bool register_power2b_algo( algo_gate_t* gate )
{
@@ -221,7 +223,7 @@ bool register_power2b_algo( algo_gate_t* gate )
applog( LOG_NOTICE,"Key= \"%s\"", yespower_params.pers );
applog( LOG_NOTICE,"Key length= %d\n", yespower_params.perslen );
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->optimizations = SSE2_OPT;
gate->scanhash = (void*)&scanhash_yespower_b2b;
gate->hash = (void*)&yespower_b2b_hash;
opt_target_factor = 65536.0;

View File

@@ -95,8 +95,13 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "algo/sha/hmac-sha256-hash.h"
#include "insecure_memzero.h"
#include "sha256_p.h"
#include "sysendian.h"
#include "yespower.h"
#include "yespower-platform.c"
#if __STDC_VERSION__ >= 199901L
@@ -524,7 +529,7 @@ static volatile uint64_t Smask2var = Smask2;
/* 64-bit without AVX. This relies on out-of-order execution and register
* renaming. It may actually be fastest on CPUs with AVX(2) as well - e.g.,
* it runs great on Haswell. */
//#warning "Note: using x86-64 inline assembly for pwxform. That's great."
#warning "Note: using x86-64 inline assembly for pwxform. That's great."
#undef MAYBE_MEMORY_BARRIER
#define MAYBE_MEMORY_BARRIER \
__asm__("" : : : "memory");
@@ -856,7 +861,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N,
salsa20_blk_t *dst = &X[i];
size_t k;
for (k = 0; k < 16; k++)
tmp->w[k] = src->w[k];
tmp->w[k] = le32dec(&src->w[k]);
salsa20_simd_shuffle(tmp, dst);
}
@@ -903,7 +908,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N,
salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
size_t k;
for (k = 0; k < 16; k++)
tmp->w[k] = src->w[k];
le32enc(&tmp->w[k], src->w[k]);
salsa20_simd_unshuffle(tmp, dst);
}
}
@@ -929,7 +934,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop,
salsa20_blk_t *dst = &X[i];
size_t k;
for (k = 0; k < 16; k++)
tmp->w[k] = src->w[k];
tmp->w[k] = le32dec(&src->w[k]);
salsa20_simd_shuffle(tmp, dst);
}
@@ -961,7 +966,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop,
salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
size_t k;
for (k = 0; k < 16; k++)
tmp->w[k] = src->w[k];
le32enc(&tmp->w[k], src->w[k]);
salsa20_simd_unshuffle(tmp, dst);
}
}

View File

@@ -51,8 +51,8 @@
#include <stdlib.h>
#include <string.h>
#include "algo/sha/hmac-sha256-hash.h"
//#include "sysendian.h"
#include "sha256_p.h"
#include "sysendian.h"
#include "yespower.h"
@@ -346,7 +346,7 @@ static void smix1(uint32_t *B, size_t r, uint32_t N,
/* 1: X <-- B */
for (k = 0; k < 2 * r; k++)
for (i = 0; i < 16; i++)
X[k * 16 + i] = B[k * 16 + (i * 5 % 16)];
X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]);
if (ctx->version != YESPOWER_0_5) {
for (k = 1; k < r; k++) {
@@ -378,7 +378,7 @@ static void smix1(uint32_t *B, size_t r, uint32_t N,
/* B' <-- X */
for (k = 0; k < 2 * r; k++)
for (i = 0; i < 16; i++)
B[k * 16 + (i * 5 % 16)] = X[k * 16 + i];
le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]);
}
/**
@@ -398,7 +398,7 @@ static void smix2(uint32_t *B, size_t r, uint32_t N, uint32_t Nloop,
/* X <-- B */
for (k = 0; k < 2 * r; k++)
for (i = 0; i < 16; i++)
X[k * 16 + i] = B[k * 16 + (i * 5 % 16)];
X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]);
/* 6: for i = 0 to N - 1 do */
for (i = 0; i < Nloop; i++) {
@@ -418,7 +418,7 @@ static void smix2(uint32_t *B, size_t r, uint32_t N, uint32_t Nloop,
/* 10: B' <-- X */
for (k = 0; k < 2 * r; k++)
for (i = 0; i < 16; i++)
B[k * 16 + (i * 5 % 16)] = X[k * 16 + i];
le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]);
}
/**

View File

@@ -71,7 +71,7 @@ typedef struct {
*/
typedef struct {
unsigned char uc[32];
} yespower_binary_t __attribute__ ((aligned (64)));
} yespower_binary_t;
/**
* yespower_init_local(local):

View File

@@ -44,23 +44,23 @@ mv cpuminer.exe cpuminer-aes-sse42.exe
strip -s cpuminer
mv cpuminer cpuminer-aes-sse42
#make clean || echo clean
#rm -f config.status
#CFLAGS="-O3 -march=corei7 -Wall" ./configure --with-curl
#make -j 16
#strip -s cpuminer.exe
#mv cpuminer.exe cpuminer-sse42.exe
#strip -s cpuminer
#mv cpuminer cpuminer-sse42
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=corei7 -Wall" ./configure --with-curl
make -j 16
strip -s cpuminer.exe
mv cpuminer.exe cpuminer-sse42.exe
strip -s cpuminer
mv cpuminer cpuminer-sse42
#make clean || echo clean
#rm -f config.status
#CFLAGS="-O3 -march=core2 -Wall" ./configure --with-curl
#make -j 16
#strip -s cpuminer.exe
#mv cpuminer.exe cpuminer-ssse3.exe
#strip -s cpuminer
#mv cpuminer cpuminer-ssse3
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=core2 -Wall" ./configure --with-curl
make -j 16
strip -s cpuminer.exe
mv cpuminer.exe cpuminer-ssse3.exe
strip -s cpuminer
mv cpuminer cpuminer-ssse3
make clean || echo clean
rm -f config.status

View File

@@ -3,8 +3,8 @@
# imake clean and rm all the targetted executables.
# tips to users.
rm cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen > /dev/null
rm cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen > /dev/null
rm cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-aes-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe > /dev/null
rm cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-aes-avx.exe cpuminer-aes-sse42.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-sse2.exe cpuminer-zen.exe > /dev/null
make distclean > /dev/null

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.7.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.3.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.11.7'
PACKAGE_STRING='cpuminer-opt 3.11.7'
PACKAGE_VERSION='3.11.3'
PACKAGE_STRING='cpuminer-opt 3.11.3'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.11.7 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.11.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.11.7:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.11.3:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.11.7
cpuminer-opt configure 3.11.3
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.11.7, which was
It was created by cpuminer-opt $as_me 3.11.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.11.7'
VERSION='3.11.3'
cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.11.7, which was
This file was extended by cpuminer-opt $as_me 3.11.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.11.7
cpuminer-opt config.status 3.11.3
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.11.7])
AC_INIT([cpuminer-opt], [3.11.3])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -157,7 +157,6 @@ bool opt_hash_meter = false;
uint32_t submitted_share_count= 0;
uint32_t accepted_share_count = 0;
uint32_t rejected_share_count = 0;
uint32_t stale_share_count = 0;
uint32_t solved_block_count = 0;
double *thr_hashrates;
double global_hashrate = 0;
@@ -506,7 +505,6 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
uint32_t version, curtime, bits;
uint32_t prevhash[8];
uint32_t target[8];
unsigned char final_sapling_hash[32];
int cbtx_size;
uchar *cbtx = NULL;
int tx_count, tx_size;
@@ -530,8 +528,8 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
continue;
if ( !strcmp( s, "coinbase/append" ) ) coinbase_append = true;
else if ( !strcmp( s, "submit/coinbase" ) ) submit_coinbase = true;
else if ( !strcmp( s, "version/force" ) ) version_force = true;
else if ( !strcmp( s, "version/reduce" ) ) version_reduce = true;
else if ( !strcmp( s, "version/force" ) ) version_force = true;
else if ( !strcmp( s, "version/reduce" ) ) version_reduce = true;
}
}
@@ -551,13 +549,12 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
goto out;
}
version = (uint32_t) json_integer_value( tmp );
if ( version == 5 )
work->sapling = true;
else if ( version > 4 )
// if ( (version & 0xffU) > BLOCK_VERSION_CURRENT )
if ( (version & 0xffU) > BLOCK_VERSION_CURRENT )
{
if ( version_reduce )
{
version = ( version & ~0xffU ) | BLOCK_VERSION_CURRENT;
}
else if ( have_gbt && allow_getwork && !version_force )
{
applog( LOG_DEBUG, "Switching to getwork, gbt version %d", version );
@@ -592,16 +589,6 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
goto out;
}
if ( work->sapling )
{
if ( unlikely( !jobj_binary( val, "finalsaplingroothash",
final_sapling_hash, sizeof(final_sapling_hash) ) ) )
{
applog( LOG_ERR, "JSON invalid finalsaplingroothash" );
goto out;
}
}
/* find count and size of transactions */
txa = json_object_get(val, "transactions" );
if ( !txa || !json_is_array( txa ) )
@@ -784,8 +771,7 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
/* assemble block header */
algo_gate.build_block_header( work, swab32( version ),
(uint32_t*) prevhash, (uint32_t*) merkle_tree,
swab32( curtime ), le32dec( &bits ),
final_sapling_hash );
swab32( curtime ), le32dec( &bits ) );
if ( unlikely( !jobj_binary(val, "target", target, sizeof(target)) ) )
{
@@ -833,15 +819,20 @@ out:
// returns the unit prefix and the hashrate appropriately scaled.
void scale_hash_for_display ( double* hashrate, char* prefix )
{
if ( *hashrate < 1e4 ) *prefix = 0;
else if ( *hashrate < 1e7 ) { *prefix = 'k'; *hashrate /= 1e3; }
else if ( *hashrate < 1e10 ) { *prefix = 'M'; *hashrate /= 1e6; }
else if ( *hashrate < 1e13 ) { *prefix = 'G'; *hashrate /= 1e9; }
else if ( *hashrate < 1e16 ) { *prefix = 'T'; *hashrate /= 1e12; }
else if ( *hashrate < 1e19 ) { *prefix = 'P'; *hashrate /= 1e15; }
else if ( *hashrate < 1e22 ) { *prefix = 'E'; *hashrate /= 1e18; }
else if ( *hashrate < 1e25 ) { *prefix = 'Z'; *hashrate /= 1e21; }
else { *prefix = 'Y'; *hashrate /= 1e24; }
if ( *hashrate < 1e4 ) // 0 H/s to 9999 h/s
*prefix = 0;
else if ( *hashrate < 1e7 ) // 10 kH/s to 9999 kh/s
{ *prefix = 'k'; *hashrate /= 1e3; }
else if ( *hashrate < 1e10 ) // 10 Mh/s to 9999 Mh/s
{ *prefix = 'M'; *hashrate /= 1e6; }
else if ( *hashrate < 1e13 ) // 10 Gh/s to 9999 Gh/s
{ *prefix = 'G'; *hashrate /= 1e9; }
else if ( *hashrate < 1e16 ) // 10 Th/s to 9999 Th/s
{ *prefix = 'T'; *hashrate /= 1e12; }
else if ( *hashrate < 1e19 ) // 10 Ph/s to 9999 Ph
{ *prefix = 'P'; *hashrate /= 1e15; }
else // 10 Eh/s and higher
{ *prefix = 'E'; *hashrate /= 1e18; }
}
static inline void sprintf_et( char *str, int seconds )
@@ -850,13 +841,10 @@ static inline void sprintf_et( char *str, int seconds )
unsigned int min = seconds / 60;
unsigned int sec = seconds % 60;
unsigned int hrs = min / 60;
if ( unlikely( hrs ) )
if ( hrs )
{
unsigned int years = hrs / (24*365);
unsigned int days = hrs / 24;
if ( years )
sprintf( str, "%uy%ud", years, years % 365 );
else if ( days ) //0d00h
if ( days ) //0d00h
sprintf( str, "%ud%02uh", days, hrs % 24 );
else // 0h00m
sprintf( str, "%uh%02um", hrs, min % 60 );
@@ -878,15 +866,13 @@ const double diff_to_hash = 4294967296.;
static struct timeval session_start;
static struct timeval five_min_start;
static uint64_t session_first_block = 0;
static double latency_sum = 0.;
static uint64_t submit_sum = 0;
static uint64_t accept_sum = 0;
static uint64_t stale_sum = 0;
static uint64_t reject_sum = 0;
static double norm_diff_sum = 0.;
static uint32_t last_block_height = 0;
//static bool new_job = false;
static bool new_job = false;
static double last_targetdiff = 0.;
static double ref_rate_hi = 0.;
static double ref_rate_lo = 1e100;
@@ -897,7 +883,6 @@ static uint32_t hi_temp = 0;
struct share_stats_t
{
int share_count;
struct timeval submit_time;
double net_diff;
double share_diff;
@@ -907,7 +892,7 @@ struct share_stats_t
};
#define s_stats_size 8
static struct share_stats_t share_stats[ s_stats_size ] = {{0}};
static struct share_stats_t share_stats[ s_stats_size ] = {0};
static int s_get_ptr = 0, s_put_ptr = 0;
static struct timeval last_submit_time = {0};
@@ -936,7 +921,6 @@ void report_summary_log( bool force )
uint64_t submits = submit_sum; submit_sum = 0;
uint64_t accepts = accept_sum; accept_sum = 0;
uint64_t rejects = reject_sum; reject_sum = 0;
uint64_t stales = stale_sum; stale_sum = 0;
// int latency = latency_sum; latency_sum = 0;
memcpy( &start_time, &five_min_start, sizeof start_time );
memcpy( &five_min_start, &now, sizeof now );
@@ -992,11 +976,7 @@ void report_summary_log( bool force )
submits, submitted_share_count );
applog2( LOG_INFO,"Accepted %6d %6d",
accepts, accepted_share_count );
if ( stale_share_count )
applog2( LOG_INFO,"Stale %6d %6d",
stales, stale_share_count );
if ( rejected_share_count )
applog2( LOG_INFO,"Rejected %6d %6d",
applog2( LOG_INFO,"Rejected %6d %6d",
rejects, rejected_share_count );
if ( solved_block_count )
applog2( LOG_INFO,"Blocks solved %6d",
@@ -1031,18 +1011,13 @@ static int share_result( int result, struct work *null_work,
int latency = 0;
struct share_stats_t my_stats = {0};
struct timeval ack_time, latency_tv, et;
char ares[48];
char sres[48];
char rres[48];
char bres[48];
// char job_id[48];
const char *sres = NULL;
bool solved = false;
bool stale = false;
char *acol = NULL, *bcol = NULL, *scol = NULL, *rcol = NULL;
// Mutex while we grab a snapshot of the stats.
pthread_mutex_lock( &stats_lock );
if ( likely( share_stats[ s_get_ptr ].submit_time.tv_sec ) )
if ( share_stats[ s_get_ptr ].submit_time.tv_sec )
{
memcpy( &my_stats, &share_stats[ s_get_ptr], sizeof my_stats );
memset( &share_stats[ s_get_ptr ], 0, sizeof my_stats );
@@ -1072,7 +1047,7 @@ static int share_result( int result, struct work *null_work,
my_stats.net_diff * 100.;
// check result
if ( likely( result ) )
if ( result )
{
accepted_share_count++;
if ( ( my_stats.net_diff > 0. ) && ( my_stats.share_diff >= net_diff ) )
@@ -1082,16 +1057,13 @@ static int share_result( int result, struct work *null_work,
}
}
else
{
if ( reason && strstr( reason, "Invalid job id" ) )
{
stale = true;
stale_share_count++;
}
else
rejected_share_count++;
}
rejected_share_count++;
/*
result ? accepted_share_count++ : rejected_share_count++;
solved = result && (my_stats.net_diff > 0.0 )
&& ( my_stats.share_diff >= net_diff );
solved_block_count += solved ? 1 : 0 ;
*/
// update global counters for summary report
pthread_mutex_lock( &stats_lock );
@@ -1099,88 +1071,37 @@ static int share_result( int result, struct work *null_work,
hashrate += thr_hashrates[i];
global_hashrate = hashrate;
if ( likely( result ) )
if ( result )
{
accept_sum++;
norm_diff_sum += my_stats.target_diff;
}
else
{
if ( stale )
stale_sum++;
else
reject_sum++;
}
reject_sum++;
submit_sum++;
latency_sum += latency;
pthread_mutex_unlock( &stats_lock );
bcol = acol = scol = rcol = "\0";
if ( likely( result ) )
{
if ( unlikely( solved ) )
{
sprintf( bres, "BLOCK SOLVED %d", solved_block_count );
sprintf( ares, "A%d", accepted_share_count );
}
else
{
sprintf( bres, "B%d", solved_block_count );
sprintf( ares, "Accepted %d", accepted_share_count );
}
sprintf( sres, "S%d", stale_share_count );
sprintf( rres, "R%d", rejected_share_count );
}
else
{
sprintf( ares, "A%d", accepted_share_count );
sprintf( bres, "B%d", solved_block_count );
if ( stale )
{
sprintf( sres, "Stale job %d", stale_share_count );
sprintf( rres, "R%d", rejected_share_count );
}
else
{
sprintf( sres, "S%d", stale_share_count );
sprintf( rres, "Rejected %d" , rejected_share_count );
}
}
bcol = acol = scol = rcol = CL_WHT;
if ( use_colors )
{
if ( likely( result ) )
{
if ( unlikely( solved ) )
{
bcol = CL_MAG;
acol = CL_GRN;
}
else
acol = CL_GRN;
}
else if ( stale )
scol = CL_YL2;
else
rcol = CL_RED;
}
sres = solved ? ( CL_MAG "BLOCK SOLVED" CL_WHT )
: ( result ? ( CL_GRN "Accepted" CL_WHT )
: ( CL_RED "Rejected" CL_WHT ) );
else // monochrome
sres = solved ? "BLOCK SOLVED" : ( result ? "Accepted" : "Rejected" );
applog( LOG_NOTICE, "%d %s%s %s%s %s%s %s%s" CL_WHT ", %.3f sec (%dms)",
my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
bres, share_time, latency );
applog( LOG_NOTICE, "%s, %.3f secs (%dms), A/R/B: %d/%d/%d",
sres, share_time, latency, accepted_share_count,
rejected_share_count, solved_block_count );
if ( have_stratum && !opt_quiet )
applog2( LOG_NOTICE, "Diff %.3g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
my_stats.share_diff, share_ratio, bcol, stratum.block_height,
scol, my_stats.job_id );
applog2( LOG_INFO, "Share diff %.3g (%5f%%), block %d, job %s",
my_stats.share_diff, share_ratio, stratum.block_height,
my_stats.job_id );
if ( unlikely( reason && !result ) )
if ( reason )
{
if ( !( opt_quiet || stale ) )
applog( LOG_WARNING, "Reject reason: %s", reason );
applog( LOG_WARNING, "Reject reason: %s", reason );
if ( opt_debug )
{
@@ -1188,20 +1109,20 @@ static int share_result( int result, struct work *null_work,
char str3[65];
// display share hash and target for troubleshooting
diff_to_target( (uint64_t*)str1, my_stats.share_diff );
diff_to_target( str1, my_stats.share_diff );
for ( int i = 0; i < 8; i++ )
be32enc( str2 + i, str1[7 - i] );
bin2hex( str3, (unsigned char*)str2, 12 );
applog2( LOG_INFO, "Hash: %s...", str3 );
diff_to_target( (uint64_t*)str1, my_stats.target_diff );
diff_to_target( str1, my_stats.target_diff );
for ( int i = 0; i < 8; i++ )
be32enc( str2 + i, str1[7 - i] );
bin2hex( str3, (unsigned char*)str2, 12 );
applog2( LOG_INFO, "Target: %s...", str3 );
}
if ( unlikely( opt_reset_on_stale && stale ) )
if ( opt_reset_on_stale && strstr( reason, "Invalid job id" ) )
stratum_need_reset = true;
}
@@ -1377,11 +1298,9 @@ char* std_malloc_txs_request( struct work *work )
char data_str[2 * sizeof(work->data) + 1];
int i;
int datasize = work->sapling ? 112 : 80;
for ( i = 0; i < ARRAY_SIZE(work->data); i++ )
be32enc( work->data + i, work->data[i] );
bin2hex( data_str, (unsigned char *)work->data, datasize );
bin2hex( data_str, (unsigned char *)work->data, 80 );
if ( work->workid )
{
char *params;
@@ -1389,7 +1308,7 @@ char* std_malloc_txs_request( struct work *work )
json_object_set_new( val, "workid", json_string( work->workid ) );
params = json_dumps( val, 0 );
json_decref( val );
req = (char*) malloc( 128 + 2 * datasize + strlen( work->txs )
req = (char*) malloc( 128 + 2 * 80 + strlen( work->txs )
+ strlen( params ) );
sprintf( req,
"{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":4}\r\n",
@@ -1398,7 +1317,7 @@ char* std_malloc_txs_request( struct work *work )
}
else
{
req = (char*) malloc( 128 + 2 * datasize + strlen( work->txs ) );
req = (char*) malloc( 128 + 2 * 80 + strlen( work->txs ) );
sprintf( req,
"{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":4}\r\n",
data_str, work->txs);
@@ -1716,7 +1635,7 @@ static void *workio_thread(void *userdata)
if ( jsonrpc_2 && !have_stratum )
ok = rpc2_workio_login( curl );
while ( likely(ok) )
while (ok)
{
struct workio_cmd *wc;
@@ -1792,8 +1711,7 @@ static bool get_work(struct thr_info *thr, struct work *work)
return true;
}
static bool submit_work( struct thr_info *thr,
const struct work *work_in )
static bool submit_work( struct thr_info *thr, const struct work *work_in )
{
struct workio_cmd *wc;
@@ -1817,22 +1735,20 @@ err_out:
return false;
}
// __float128?
// Convert little endian 256 bit (38 decimal digits) unsigned integer to
// double precision floating point with 15 decimal digits precision.
// returns u * ( 2**256 )
static inline double u256_to_double( const uint64_t *u )
// Convert little endian 256 bit unsigned integer to
// double precision floating point.
static inline double u256_to_double( const uint64_t* u )
{
const double exp64 = 4294967296.0 * 4294967296.0; // 2**64
return ( ( u[3] * exp64 + u[2] ) * exp64 + u[1] ) * exp64 + u[0];
const double f = 4294967296.0 * 4294967296.0; // 2**64
return ( ( u[3] * f + u[2] ) * f + u[1] ) * f + u[0];
}
void work_set_target_ratio( struct work* work, const void *hash )
void work_set_target_ratio( struct work* work, uint32_t* hash )
{
double dhash;
dhash = u256_to_double( (const uint64_t*)hash );
if ( likely( dhash > 0. ) )
if ( dhash > 0. )
work->sharediff = work->targetdiff *
u256_to_double( (const uint64_t*)( work->target ) ) / dhash;
else
@@ -1844,7 +1760,6 @@ void work_set_target_ratio( struct work* work, const void *hash )
// it can overflow the queue and overwrite stats for a share.
pthread_mutex_lock( &stats_lock );
share_stats[ s_put_ptr ].share_count = submitted_share_count;
gettimeofday( &share_stats[ s_put_ptr ].submit_time, NULL );
share_stats[ s_put_ptr ].share_diff = work->sharediff;
share_stats[ s_put_ptr ].net_diff = net_diff;
@@ -1857,39 +1772,37 @@ void work_set_target_ratio( struct work* work, const void *hash )
pthread_mutex_unlock( &stats_lock );
}
bool submit_solution( struct work *work, const void *hash,
bool submit_solution( struct work *work, void *hash,
struct thr_info *thr )
{
if ( likely( submit_work( thr, work ) ) )
if ( submit_work( thr, work ) )
{
submitted_share_count++;
work_set_target_ratio( work, hash );
if ( !opt_quiet )
applog( LOG_NOTICE, "%d submitted by thread %d, job %s",
submitted_share_count, thr->id, work->job_id );
applog( LOG_BLUE, "Share %d submitted by thread %d",
submitted_share_count, thr->id );
return true;
}
else
applog( LOG_WARNING, "%d failed to submit share.",
submitted_share_count );
applog( LOG_WARNING, "Failed to submit share." );
return false;
}
bool submit_lane_solution( struct work *work, const void *hash,
struct thr_info *thr, const int lane )
bool submit_lane_solution( struct work *work, void *hash,
struct thr_info *thr, int lane )
{
if ( likely( submit_work( thr, work ) ) )
if ( submit_work( thr, work ) )
{
submitted_share_count++;
work_set_target_ratio( work, hash );
if ( !opt_quiet )
applog( LOG_NOTICE, "%d submitted by thread %d, lane %d, job %s",
submitted_share_count, thr->id, lane, work->job_id );
applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d",
submitted_share_count, thr->id, lane );
return true;
}
else
applog( LOG_WARNING, "%d failed to submit share.",
submitted_share_count );
applog( LOG_WARNING, "Failed to submit share." );
return false;
}
@@ -1995,27 +1908,35 @@ double std_calc_network_diff( struct work* work )
return d;
}
uint32_t *std_get_nonceptr( uint32_t *work_data )
uint32_t* std_get_nonceptr( uint32_t *work_data )
{
return work_data + algo_gate.nonce_index;
}
uint32_t *jr2_get_nonceptr( uint32_t *work_data )
uint32_t* jr2_get_nonceptr( uint32_t *work_data )
{
// nonce is misaligned, use byte offset
return (uint32_t*) ( ((uint8_t*) work_data) + algo_gate.nonce_index );
}
void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t *end_nonce_ptr )
uint32_t *end_nonce_ptr, bool clean_job )
{
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
bool force_new_work = work->job_id ? strtoul( work->job_id, NULL, 16 ) !=
strtoul( g_work->job_id, NULL, 16 )
: true;
if ( force_new_work || *nonceptr >= *end_nonce_ptr )
// the job_id check doesn't work as intended, it's a char pointer!
// For stratum the pointers can be dereferenced and the strings compared,
// benchmark not, getwork & gbt unsure.
// || ( have_straum && strcmp( work->job_id, g_work->job_id ) ) ) )
// or
// || ( !benchmark && strcmp( work->job_id, g_work->job_id ) ) ) )
// For now leave it as is, it seems stable.
// strtoul seems to work.
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|| strtoul( work->job_id, NULL, 16 )
!= strtoul( g_work->job_id, NULL, 16 ) ) )
{
work_free( work );
work_copy( work, g_work );
@@ -2037,7 +1958,7 @@ void jr2_get_new_work( struct work* work, struct work* g_work, int thr_id,
if ( memcmp( work->data, g_work->data, algo_gate.nonce_index )
|| memcmp( ((uint8_t*) work->data) + JR2_WORK_CMP_INDEX_2,
((uint8_t*) g_work->data) + JR2_WORK_CMP_INDEX_2,
JR2_WORK_CMP_SIZE_2 ) )
JR2_WORK_CMP_SIZE_2 ) )
{
work_free( work );
work_copy( work, g_work );
@@ -2086,30 +2007,39 @@ static void *miner_thread( void *userdata )
* error if it fails */
if (!opt_benchmark && opt_priority == 0)
{
setpriority(PRIO_PROCESS, 0, 19);
drop_policy();
setpriority(PRIO_PROCESS, 0, 19);
drop_policy();
}
else
{
int prio = 0;
int prio = 0;
#ifndef WIN32
prio = 18;
// note: different behavior on linux (-19 to 19)
switch ( opt_priority )
{
case 1: prio = 5; break;
case 2: prio = 0; break;
case 3: prio = -5; break;
case 4: prio = -10; break;
case 5: prio = -15;
}
if (opt_debug)
applog(LOG_DEBUG, "Thread %d priority %d (nice %d)", thr_id,
prio = 18;
// note: different behavior on linux (-19 to 19)
switch (opt_priority)
{
case 1:
prio = 5;
break;
case 2:
prio = 0;
break;
case 3:
prio = -5;
break;
case 4:
prio = -10;
break;
case 5:
prio = -15;
}
if (opt_debug)
applog(LOG_DEBUG, "Thread %d priority %d (nice %d)", thr_id,
opt_priority, prio );
#endif
setpriority(PRIO_PROCESS, 0, prio);
if ( opt_priority == 0 )
drop_policy();
setpriority(PRIO_PROCESS, 0, prio);
if (opt_priority == 0)
drop_policy();
}
// CPU thread affinity
if ( num_cpus > 1 )
@@ -2162,7 +2092,7 @@ static void *miner_thread( void *userdata )
}
// wait for stratum to send first job
if ( have_stratum ) while ( unlikely( !g_work.job_id ) ) sleep(1);
if ( have_stratum ) while ( !stratum.job.job_id ) sleep(1);
while (1)
{
@@ -2171,14 +2101,15 @@ static void *miner_thread( void *userdata )
int64_t max64 = 1000;
int nonce_found = 0;
if ( likely( algo_gate.do_this_thread( thr_id ) ) )
if ( algo_gate.do_this_thread( thr_id ) )
{
if ( have_stratum )
{
pthread_mutex_lock( &g_work_lock );
if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce )
algo_gate.stratum_gen_work( &stratum, &g_work );
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce );
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce,
stratum.job.clean );
pthread_mutex_unlock( &g_work_lock );
}
else
@@ -2198,17 +2129,17 @@ static void *miner_thread( void *userdata )
}
g_work_time = time(NULL);
}
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce );
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce, true );
pthread_mutex_unlock( &g_work_lock );
}
} // do_this_thread
algo_gate.resync_threads( &work );
if ( unlikely( !algo_gate.ready_to_mine( &work, &stratum, thr_id ) ) )
if ( !algo_gate.ready_to_mine( &work, &stratum, thr_id ) )
continue;
// conditional mining
if ( unlikely( !wanna_mine( thr_id ) ) )
if (!wanna_mine(thr_id))
{
sleep(5);
continue;
@@ -2220,7 +2151,7 @@ static void *miner_thread( void *userdata )
max64 = g_work_time + ( have_longpoll ? LP_SCANTIME : opt_scantime )
- time(NULL);
// time limit
if ( unlikely( opt_time_limit && firstwork_time ) )
if ( opt_time_limit && firstwork_time )
{
int passed = (int)( time(NULL) - firstwork_time );
int remain = (int)( opt_time_limit - passed );
@@ -2278,17 +2209,15 @@ static void *miner_thread( void *userdata )
pthread_mutex_unlock( &stats_lock );
}
// If unsubmiited nonce(s) found, submit now.
if ( unlikely( nonce_found && !opt_benchmark ) )
if ( nonce_found && !opt_benchmark )
{
applog( LOG_WARNING, "BUG: See RELEASE_NOTES for reporting bugs. Algo = %s.",
algo_names[ opt_algo ] );
if ( !submit_work( mythr, &work ) )
{
applog( LOG_WARNING, "Failed to submit share." );
break;
}
if ( !opt_quiet )
applog( LOG_NOTICE, "%d: submitted by thread %d.",
applog( LOG_BLUE, "Share %d submitted by thread %d.",
accepted_share_count + rejected_share_count + 1,
mythr->id );
@@ -2303,7 +2232,7 @@ static void *miner_thread( void *userdata )
}
}
// display hashrate
if ( unlikely( opt_hash_meter ) )
if ( opt_hash_meter )
{
char hr[16];
char hr_units[2] = {0,0};
@@ -2321,8 +2250,8 @@ static void *miner_thread( void *userdata )
// Display benchmark total
// Update hashrate for API if no shares accepted yet.
if ( unlikely( ( opt_benchmark || !accepted_share_count )
&& thr_id == opt_n_threads - 1 ) )
if ( ( opt_benchmark || !accepted_share_count )
&& thr_id == opt_n_threads - 1 )
{
double hashrate = 0.;
for ( i = 0; i < opt_n_threads; i++ )
@@ -2591,14 +2520,13 @@ out:
// used by stratum and gbt
void std_build_block_header( struct work* g_work, uint32_t version,
uint32_t *prevhash, uint32_t *merkle_tree, uint32_t ntime,
uint32_t nbits, unsigned char *final_sapling_hash )
uint32_t *prevhash, uint32_t *merkle_tree,
uint32_t ntime, uint32_t nbits )
{
int i;
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = version;
g_work->sapling = be32dec( &version ) == 5 ? true : false;
if ( have_stratum )
for ( i = 0; i < 8; i++ )
@@ -2612,27 +2540,8 @@ void std_build_block_header( struct work* g_work, uint32_t version,
g_work->data[ algo_gate.ntime_index ] = ntime;
g_work->data[ algo_gate.nbits_index ] = nbits;
if ( g_work->sapling )
{
if ( have_stratum )
for ( i = 0; i < 8; i++ )
g_work->data[20 + i] = le32dec( (uint32_t*)final_sapling_hash + i );
else
{
for ( i = 0; i < 8; i++ )
g_work->data[27 - i] = le32dec( (uint32_t*)final_sapling_hash + i );
g_work->data[19] = 0;
}
g_work->data[28] = 0x80000000;
g_work->data[29] = 0x00000000;
g_work->data[30] = 0x00000000;
g_work->data[31] = 0x00000380;
}
else
{
g_work->data[20] = 0x80000000;
g_work->data[31] = 0x00000280;
}
g_work->data[20] = 0x80000000;
g_work->data[31] = 0x00000280;
}
void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
@@ -2646,8 +2555,7 @@ void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
// Assemble block header
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits),
sctx->job.final_sapling_hash );
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
}
void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
@@ -2662,11 +2570,12 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
algo_gate.build_extraheader( g_work, sctx );
net_diff = algo_gate.calc_network_diff( g_work );
algo_gate.set_work_data_endian( g_work );
work_set_target( g_work, sctx->job.diff
/ ( opt_target_factor * opt_diff_factor ) );
pthread_mutex_unlock( &sctx->work_lock );
work_set_target( g_work, sctx->job.diff
/ ( opt_target_factor * opt_diff_factor ) );
if ( opt_debug )
{
unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
@@ -2676,72 +2585,60 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
free( xnonce2str );
}
double hr = 0.;
pthread_mutex_lock( &stats_lock );
for ( int i = 0; i < opt_n_threads; i++ )
hr += thr_hashrates[i];
global_hashrate = hr;
pthread_mutex_unlock( &stats_lock );
if ( stratum_diff != sctx->job.diff )
applog( LOG_BLUE, "New stratum diff %g, block %d, job %s",
sctx->job.diff, sctx->block_height, g_work->job_id );
else if ( last_block_height != sctx->block_height )
applog( LOG_BLUE, "New block %d, job %s",
sctx->block_height, g_work->job_id );
else
applog( LOG_BLUE,"New job %s", g_work->job_id );
// Update data and calculate new estimates.
// Log new block and/or stratum difficulty change.
if ( ( stratum_diff != sctx->job.diff )
|| ( last_block_height != sctx->block_height ) )
|| ( last_block_height != sctx->block_height ) )
{
static bool multipool = false;
if ( stratum.block_height < last_block_height ) multipool = true;
if ( unlikely( !session_first_block ) )
session_first_block = stratum.block_height;
last_block_height = stratum.block_height;
stratum_diff = sctx->job.diff;
last_targetdiff = g_work->targetdiff;
double hr = 0.;
new_job = false;
pthread_mutex_lock( &stats_lock );
if ( !opt_quiet )
{
applog2( LOG_INFO, "%s: %s", algo_names[opt_algo], short_url );
applog2( LOG_INFO, "Diff: Net %.3g, Stratum %.3g, Target %.3g",
net_diff, stratum_diff, last_targetdiff );
if ( likely( hr > 0. ) )
{
char hr_units[4] = {0};
char block_ttf[32];
char share_ttf[32];
for ( int i = 0; i < opt_n_threads; i++ )
hr += thr_hashrates[i];
global_hashrate = hr;
pthread_mutex_unlock( &stats_lock );
sprintf_et( block_ttf, net_diff * diff_to_hash / hr );
sprintf_et( share_ttf, last_targetdiff * diff_to_hash / hr );
scale_hash_for_display ( &hr, hr_units );
applog2( LOG_INFO, "TTF @ %.2f %sh/s: block %s, share %s",
hr, hr_units, block_ttf, share_ttf );
if ( !multipool && net_diff > 0. )
{
struct timeval now, et;
gettimeofday( &now, NULL );
timeval_subtract( &et, &now, &session_start );
double net_hr = net_diff * diff_to_hash;
char net_ttf[32];
char net_hr_units[4] = {0};
if ( !opt_quiet )
{
if ( stratum_diff != sctx->job.diff )
applog( LOG_BLUE, "New stratum diff %g, block %d, job %s",
sctx->job.diff, sctx->block_height, g_work->job_id );
else if ( last_block_height != sctx->block_height )
applog( LOG_BLUE, "New block %d, job %s", sctx->block_height,
g_work->job_id );
else
applog( LOG_BLUE,"New job %s.", g_work->job_id );
}
sprintf_et( net_ttf,
( last_block_height - session_first_block ) == 0 ? 0 :
et.tv_sec / ( last_block_height - session_first_block ) );
// Update data and calculate new estimates.
stratum_diff = sctx->job.diff;
last_block_height = stratum.block_height;
last_targetdiff = g_work->targetdiff;
scale_hash_for_display ( &net_hr, net_hr_units );
if ( !opt_quiet )
{
applog2( LOG_INFO, "%s %s block %d", short_url,
algo_names[opt_algo], stratum.block_height );
applog2( LOG_INFO, "Diff: net %g, stratum %g, target %g",
net_diff, stratum_diff, last_targetdiff );
}
applog2( LOG_INFO, "TTF @ %.2f %sh/s: %s",
net_hr, net_hr_units, net_ttf );
}
} // hr > 0
} // !quiet
if ( hr > 0. )
{
char hr_units[4] = {0};
char block_ttf[32];
char share_ttf[32];
sprintf_et( block_ttf, net_diff * diff_to_hash / hr );
sprintf_et( share_ttf, last_targetdiff * diff_to_hash / hr );
scale_hash_for_display ( &hr, hr_units );
if ( !opt_quiet )
{
applog2( LOG_INFO, "TTF @ %.2f %sh/s: block %s, share %s",
hr, hr_units, block_ttf, share_ttf );
}
}
} // new diff/block
}
@@ -2769,7 +2666,7 @@ static void *stratum_thread(void *userdata )
{
int failures = 0;
if ( unlikely( stratum_need_reset ) )
if ( stratum_need_reset )
{
stratum_need_reset = false;
stratum_disconnect( &stratum );
@@ -2806,7 +2703,8 @@ static void *stratum_thread(void *userdata )
applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause);
sleep(opt_fail_pause);
}
if ( unlikely( jsonrpc_2 ) )
if (jsonrpc_2)
{
work_free(&g_work);
work_copy(&g_work, &stratum.work);
@@ -2819,12 +2717,28 @@ static void *stratum_thread(void *userdata )
if ( stratum.job.job_id
&& ( !g_work_time || strcmp( stratum.job.job_id, g_work.job_id ) ) )
{
new_job = true;
pthread_mutex_lock(&g_work_lock);
algo_gate.stratum_gen_work( &stratum, &g_work );
time(&g_work_time);
pthread_mutex_unlock(&g_work_lock);
restart_threads();
}
if ( stratum.job.clean || jsonrpc_2 )
{
if ( !opt_quiet && last_block_height && new_job
&& ( last_block_height == stratum.block_height ) )
{
new_job = false;
applog( LOG_BLUE,"New job %s", g_work.job_id );
}
}
else if (opt_debug && !opt_quiet)
{
applog( LOG_BLUE, "%s asks job %d for block %d", short_url,
strtoul( stratum.job.job_id, NULL, 16 ), stratum.block_height );
}
} // stratum.job.job_id
if ( stratum_socket_full( &stratum, opt_timeout ) )
{
@@ -2850,6 +2764,25 @@ static void *stratum_thread(void *userdata )
// check if this redundant
stratum_disconnect( &stratum );
}
/*
if ( !stratum_socket_full( &stratum, opt_timeout ) )
{
stratum_errors++;
applog(LOG_ERR, "Stratum connection timeout");
s = NULL;
}
else
s = stratum_recv_line(&stratum);
if ( !s )
{
stratum_disconnect(&stratum);
applog(LOG_WARNING, "Stratum connection interrupted");
continue;
}
if (!stratum_handle_method(&stratum, s))
stratum_handle_response(s);
free(s);
*/
} // loop
out:
return NULL;
@@ -3777,7 +3710,6 @@ int main(int argc, char *argv[])
applog(LOG_WARNING,"available on Linux. Using default affinity.");
opt_affinity = -1;
}
/*
else
{
affine_to_cpu_mask( -1, opt_affinity );
@@ -3796,12 +3728,8 @@ int main(int argc, char *argv[])
#endif
}
}
*/
}
applog( LOG_INFO, "Extranonce subscribe: %s",
opt_extranonce ? "YES" : "NO" );
#ifdef HAVE_SYSLOG_H
if (use_syslog)
openlog("cpuminer", LOG_PID, LOG_USER);
@@ -3831,7 +3759,7 @@ int main(int argc, char *argv[])
/* start work I/O thread */
if (thread_create(thr, workio_thread))
{
{
applog(LOG_ERR, "work thread create failed");
return 1;
}
@@ -3897,7 +3825,7 @@ int main(int argc, char *argv[])
thr->q = tq_new();
if (!thr->q)
return 1;
err = thread_create(thr, miner_thread);
err = thread_create(thr, miner_thread);
if (err) {
applog(LOG_ERR, "thread %d create failed", i);
return 1;

27
miner.h
View File

@@ -313,14 +313,12 @@ size_t address_to_script( unsigned char *out, size_t outsz, const char *addr );
int timeval_subtract( struct timeval *result, struct timeval *x,
struct timeval *y);
bool fulltest( const uint32_t *hash, const uint32_t *target );
bool valid_hash( const void*, const void* );
void work_set_target( struct work* work, double diff );
double target_to_diff( uint32_t* target );
extern void diff_to_target( uint64_t *target, double diff );
extern void diff_to_target(uint32_t *target, double diff);
double hash_target_ratio( uint32_t* hash, uint32_t* target );
void work_set_target_ratio( struct work* work, const void *hash );
void work_set_target_ratio( struct work* work, uint32_t* hash );
struct thr_info {
int id;
@@ -332,10 +330,10 @@ struct thr_info {
//struct thr_info *thr_info;
bool submit_solution( struct work *work, const void *hash,
bool submit_solution( struct work *work, void *hash,
struct thr_info *thr );
bool submit_lane_solution( struct work *work, const void *hash,
struct thr_info *thr, const int lane );
bool submit_lane_solution( struct work *work, void *hash,
struct thr_info *thr, int lane );
//bool submit_work( struct thr_info *thr, const struct work *work_in );
@@ -363,7 +361,7 @@ float cpu_temp( int core );
struct work {
uint32_t data[48] __attribute__ ((aligned (64)));
uint32_t target[8] __attribute__ ((aligned (64)));
uint32_t target[8];
double targetdiff;
// double shareratio;
@@ -376,8 +374,6 @@ struct work {
char *job_id;
size_t xnonce2_len;
unsigned char *xnonce2;
bool sapling;
// x16rt
uint32_t merkleroothash[8];
uint32_t witmerkleroothash[8];
@@ -389,9 +385,8 @@ struct work {
} __attribute__ ((aligned (64)));
struct stratum_job {
char *job_id;
unsigned char prevhash[32];
unsigned char final_sapling_hash[32];
char *job_id;
size_t coinbase_size;
unsigned char *coinbase;
unsigned char *xnonce2;
@@ -574,7 +569,6 @@ enum algos {
ALGO_SHA256D,
ALGO_SHA256Q,
ALGO_SHA256T,
ALGO_SHA3D,
ALGO_SHAVITE3,
ALGO_SKEIN,
ALGO_SKEIN2,
@@ -608,7 +602,6 @@ enum algos {
ALGO_XEVAN,
ALGO_YESCRYPT,
ALGO_YESCRYPTR8,
ALGO_YESCRYPTR8G,
ALGO_YESCRYPTR16,
ALGO_YESCRYPTR32,
ALGO_YESPOWER,
@@ -674,7 +667,6 @@ static const char* const algo_names[] = {
"sha256d",
"sha256q",
"sha256t",
"sha3d",
"shavite3",
"skein",
"skein2",
@@ -708,7 +700,6 @@ static const char* const algo_names[] = {
"xevan",
"yescrypt",
"yescryptr8",
"yescryptr8g",
"yescryptr16",
"yescryptr32",
"yespower",
@@ -841,8 +832,7 @@ Options:\n\
sha256d Double SHA-256\n\
sha256q Quad SHA-256, Pyrite (PYE)\n\
sha256t Triple SHA-256, Onecoin (OC)\n\
sha3d Double Keccak256 (BSHA3)\n\
shavite3 Shavite3\n\
shavite3 Shavite3\n\
skein Skein+Sha (Skeincoin)\n\
skein2 Double Skein (Woodcoin)\n\
skunk Signatum (SIGT)\n\
@@ -875,7 +865,6 @@ Options:\n\
xevan Bitsend (BSD)\n\
yescrypt Globalboost-Y (BSTY)\n\
yescryptr8 BitZeny (ZNY)\n\
yescryptr8g Koto (KOTO)\n\
yescryptr16 Eli\n\
yescryptr32 WAVI\n\
yespower Cryply\n\

View File

@@ -129,8 +129,8 @@ static inline __m512i m512_const_64( const uint64_t i7, const uint64_t i6,
}
// Equivalent of set1, broadcast 64 bit constant to all 64 bit elements.
#define m512_const1_256( v ) _mm512_broadcast_i64x4( v )
#define m512_const1_128( v ) _mm512_broadcast_i64x2( v )
#define m512_const1_256( i ) _mm512_broadcast_i64x4( i )
#define m512_const1_128( i ) _mm512_broadcast_i64x2( i )
#define m512_const1_64( i ) _mm512_broadcastq_epi64( mm128_mov64_128( i ) )
#define m512_const1_32( i ) _mm512_broadcastd_epi32( mm128_mov32_128( i ) )
#define m512_const1_16( i ) _mm512_broadcastw_epi16( mm128_mov32_128( i ) )
@@ -547,6 +547,8 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
//
// Rotate elements from 2 512 bit vectors in place, source arguments
// are overwritten.
// These can all be done with 2 permutex2var instructions but they are
// slower than either xor or alignr and require AVX512VBMI.
#define mm512_swap1024_512(v1, v2) \
v1 = _mm512_xor_si512(v1, v2); \

View File

@@ -41,16 +41,15 @@
"/sys/devices/platform/coretemp.0/hwmon/hwmon1/temp1_input"
#define HWMON_PATH3 \
"/sys/devices/platform/coretemp.0/hwmon/hwmon2/temp1_input"
"/sys/class/hwmon/hwmon0/temp1_input"
#define HWMON_PATH \
"/sys/class/hwmon/hwmon2/temp1_input"
// need this for Ryzen
/*
#define HWMON_ALT \
"/sys/class/hwmon/hwmon0/temp1_input"
/*
#define HWMON_ALT1 \
"/sys/devices/platform/coretemp.0/hwmon/hwmon1/temp1_input"
*/
@@ -85,9 +84,21 @@ static inline float linux_cputemp(int core)
if (!fd)
fd = fopen(HWMON_PATH, "r");
if (!fd)
fd = fopen(HWMON_ALT, "r");
if (!fd)
// fd = fopen(HWMON_ALT1, "r");
// if (!fd)
fd = fopen(HWMON_ALT2, "r");
if (!fd)
fd = fopen(HWMON_ALT3, "r");
if (!fd)
fd = fopen(HWMON_ALT4, "r");
if (!fd)
fd = fopen(HWMON_ALT5, "r");
if (!fd)
return tc;

167
util.c
View File

@@ -923,7 +923,7 @@ bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen)
size_t address_to_script(unsigned char *out, size_t outsz, const char *addr)
{
unsigned char addrbin[26];
unsigned char addrbin[25];
int addrver;
size_t rv;
@@ -982,89 +982,64 @@ int timeval_subtract(struct timeval *result, struct timeval *x,
return x->tv_sec < y->tv_sec;
}
// Use this when deinterleaved
// do 64 bit test 4 iterations
inline bool valid_hash( const void *hash, const void *target )
{
const uint64_t *h = (const uint64_t*)hash;
const uint64_t *t = (const uint64_t*)target;
if ( h[3] > t[3] ) return false;
if ( h[3] < t[3] ) return true;
if ( h[2] > t[2] ) return false;
if ( h[2] < t[2] ) return true;
if ( h[1] > t[1] ) return false;
if ( h[1] < t[1] ) return true;
if ( h[0] > t[0] ) return false;
return true;
}
bool fulltest( const uint32_t *hash, const uint32_t *target )
bool fulltest(const uint32_t *hash, const uint32_t *target)
{
int i;
bool rc = true;
for ( i = 7; i >= 0; i-- )
{
if ( hash[i] > target[i] )
{
for (i = 7; i >= 0; i--) {
if (hash[i] > target[i]) {
rc = false;
break;
}
if ( hash[i] < target[i] )
{
if (hash[i] < target[i]) {
rc = true;
break;
}
}
if ( opt_debug )
{
if (opt_debug) {
uint32_t hash_be[8], target_be[8];
char hash_str[65], target_str[65];
for ( i = 0; i < 8; i++ )
{
be32enc( hash_be + i, hash[7 - i] );
be32enc( target_be + i, target[7 - i] );
for (i = 0; i < 8; i++) {
be32enc(hash_be + i, hash[7 - i]);
be32enc(target_be + i, target[7 - i]);
}
bin2hex( hash_str, (unsigned char *)hash_be, 32 );
bin2hex( target_str, (unsigned char *)target_be, 32 );
bin2hex(hash_str, (unsigned char *)hash_be, 32);
bin2hex(target_str, (unsigned char *)target_be, 32);
applog( LOG_DEBUG, "DEBUG: %s\nHash: %s\nTarget: %s",
rc ? "hash <= target"
: "hash > target (false positive)",
hash_str, target_str );
applog(LOG_DEBUG, "DEBUG: %s\nHash: %s\nTarget: %s",
rc ? "hash <= target"
: "hash > target (false positive)",
hash_str,
target_str);
}
return rc;
}
void diff_to_target(uint64_t *target, double diff)
void diff_to_target(uint32_t *target, double diff)
{
uint64_t m;
int k;
const double exp64 = (double)0xffffffffffffffff + 1.;
for ( k = 3; k > 0 && diff > 1.0; k-- )
diff /= exp64;
// for (k = 6; k > 0 && diff > 1.0; k--)
// diff /= 4294967296.0;
m = (uint64_t)( 0xffff0000 / diff );
if unlikely( m == 0 && k == 3 )
memset( target, 0xff, 32 );
else
{
memset( target, 0, 32 );
target[k] = m;
// target[k] = (uint32_t)m;
// target[k + 1] = (uint32_t)(m >> 32);
for (k = 6; k > 0 && diff > 1.0; k--)
diff /= 4294967296.0;
m = (uint64_t)(4294901760.0 / diff);
if (m == 0 && k == 6)
memset(target, 0xff, 32);
else {
memset(target, 0, 32);
target[k] = (uint32_t)m;
target[k + 1] = (uint32_t)(m >> 32);
}
}
// Only used by stratum pools
void work_set_target(struct work* work, double diff)
{
diff_to_target( (uint64_t*)work->target, diff );
diff_to_target(work->target, diff);
work->targetdiff = diff;
}
@@ -1836,7 +1811,6 @@ static uint32_t getblocheight(struct stratum_ctx *sctx)
static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
{
const char *job_id, *prevhash, *coinb1, *coinb2, *version, *nbits, *stime;
const char *finalsaplinghash = NULL;
const char *denom10 = NULL, *denom100 = NULL, *denom1000 = NULL,
*denom10000 = NULL, *prooffullnode = NULL;
const char *extradata = NULL;
@@ -1897,18 +1871,6 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
goto out;
}
hex2bin( sctx->job.version, version, 4 );
int ver = be32dec( sctx->job.version );
if ( ver == 5 )
{
finalsaplinghash = json_string_value( json_array_get( params, 9 ) );
if ( !finalsaplinghash || strlen(finalsaplinghash) != 64 )
{
applog( LOG_ERR, "Stratum notify: invalid version 5 parameters" );
goto out;
}
}
if ( is_veil )
{
if ( !denom10 || !denom100 || !denom1000 || !denom10000
@@ -1922,69 +1884,66 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
}
if ( merkle_count )
merkle = (uchar**) malloc( merkle_count * sizeof(char *) );
merkle = (uchar**) malloc(merkle_count * sizeof(char *));
for ( i = 0; i < merkle_count; i++ )
{
const char *s = json_string_value( json_array_get( merkle_arr, i ) );
if ( !s || strlen(s) != 64 )
{
while ( i-- ) free( merkle[i] );
free( merkle );
applog( LOG_ERR, "Stratum notify: invalid Merkle branch" );
const char *s = json_string_value(json_array_get(merkle_arr, i));
if (!s || strlen(s) != 64) {
while (i--)
free(merkle[i]);
free(merkle);
applog(LOG_ERR, "Stratum notify: invalid Merkle branch");
goto out;
}
merkle[i] = (uchar*) malloc( 32 );
hex2bin( merkle[i], s, 32 );
merkle[i] = (uchar*) malloc(32);
hex2bin(merkle[i], s, 32);
}
pthread_mutex_lock( &sctx->work_lock );
pthread_mutex_lock(&sctx->work_lock);
coinb1_size = strlen( coinb1 ) / 2;
coinb2_size = strlen( coinb2 ) / 2;
coinb1_size = strlen(coinb1) / 2;
coinb2_size = strlen(coinb2) / 2;
sctx->job.coinbase_size = coinb1_size + sctx->xnonce1_size +
sctx->xnonce2_size + coinb2_size;
sctx->job.coinbase = (uchar*) realloc( sctx->job.coinbase,
sctx->job.coinbase_size );
sctx->job.coinbase = (uchar*) realloc(sctx->job.coinbase, sctx->job.coinbase_size);
sctx->job.xnonce2 = sctx->job.coinbase + coinb1_size + sctx->xnonce1_size;
hex2bin( sctx->job.coinbase, coinb1, coinb1_size );
memcpy( sctx->job.coinbase + coinb1_size,
sctx->xnonce1, sctx->xnonce1_size );
if ( !sctx->job.job_id || strcmp( sctx->job.job_id, job_id ) )
hex2bin(sctx->job.coinbase, coinb1, coinb1_size);
memcpy(sctx->job.coinbase + coinb1_size, sctx->xnonce1, sctx->xnonce1_size);
if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id))
memset(sctx->job.xnonce2, 0, sctx->xnonce2_size);
hex2bin( sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size );
free( sctx->job.job_id );
sctx->job.job_id = strdup( job_id );
hex2bin( sctx->job.prevhash, prevhash, 32 );
if ( has_claim ) hex2bin( sctx->job.extra, extradata, 32 );
if ( has_roots ) hex2bin( sctx->job.extra, extradata, 64 );
if ( ver == 5 )
hex2bin( sctx->job.final_sapling_hash, finalsaplinghash, 32 );
hex2bin(sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size);
free(sctx->job.job_id);
sctx->job.job_id = strdup(job_id);
hex2bin(sctx->job.prevhash, prevhash, 32);
if (has_claim) hex2bin(sctx->job.extra, extradata, 32);
if (has_roots) hex2bin(sctx->job.extra, extradata, 64);
if ( is_veil )
{
hex2bin( sctx->job.denom10, denom10, 32 );
hex2bin( sctx->job.denom100, denom100, 32 );
hex2bin( sctx->job.denom1000, denom1000, 32 );
hex2bin( sctx->job.denom10000, denom10000, 32 );
hex2bin( sctx->job.proofoffullnode, prooffullnode, 32 );
hex2bin(sctx->job.denom10, denom10, 32);
hex2bin(sctx->job.denom100, denom100, 32);
hex2bin(sctx->job.denom1000, denom1000, 32);
hex2bin(sctx->job.denom10000, denom10000, 32);
hex2bin(sctx->job.proofoffullnode, prooffullnode, 32);
}
sctx->block_height = getblocheight( sctx );
sctx->block_height = getblocheight(sctx);
for ( i = 0; i < sctx->job.merkle_count; i++ )
free( sctx->job.merkle[i] );
for (i = 0; i < sctx->job.merkle_count; i++)
free(sctx->job.merkle[i]);
free( sctx->job.merkle );
free(sctx->job.merkle);
sctx->job.merkle = merkle;
sctx->job.merkle_count = merkle_count;
hex2bin( sctx->job.nbits, nbits, 4 );
hex2bin( sctx->job.ntime, stime, 4 );
hex2bin(sctx->job.version, version, 4);
hex2bin(sctx->job.nbits, nbits, 4);
hex2bin(sctx->job.ntime, stime, 4);
sctx->job.clean = clean;
sctx->job.diff = sctx->next_diff;
pthread_mutex_unlock( &sctx->work_lock );
pthread_mutex_unlock(&sctx->work_lock);
ret = true;