mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
f990f6a702 |
3
AUTHORS
3
AUTHORS
@@ -33,6 +33,3 @@ Jay D Dee
|
||||
xcouiz@gmail.com
|
||||
|
||||
Cryply
|
||||
|
||||
Colin Percival
|
||||
Alexander Peslyak
|
||||
|
@@ -80,6 +80,7 @@ cpuminer_SOURCES = \
|
||||
algo/cryptonight/cryptonight-common.c\
|
||||
algo/cryptonight/cryptonight-aesni.c\
|
||||
algo/cryptonight/cryptonight.c\
|
||||
algo/cubehash/sph_cubehash.c \
|
||||
algo/cubehash/cubehash_sse2.c\
|
||||
algo/cubehash/cube-hash-2way.c \
|
||||
algo/echo/sph_echo.c \
|
||||
@@ -120,8 +121,6 @@ cpuminer_SOURCES = \
|
||||
algo/keccak/keccak-hash-4way.c \
|
||||
algo/keccak/keccak-4way.c\
|
||||
algo/keccak/keccak-gate.c \
|
||||
algo/keccak/sha3d-4way.c \
|
||||
algo/keccak/sha3d.c \
|
||||
algo/lanehash/lane.c \
|
||||
algo/luffa/sph_luffa.c \
|
||||
algo/luffa/luffa.c \
|
||||
@@ -181,7 +180,6 @@ cpuminer_SOURCES = \
|
||||
algo/sha/sph_sha2big.c \
|
||||
algo/sha/sha256-hash-4way.c \
|
||||
algo/sha/sha512-hash-4way.c \
|
||||
algo/sha/hmac-sha256-hash.c \
|
||||
algo/sha/sha2.c \
|
||||
algo/sha/sha256t-gate.c \
|
||||
algo/sha/sha256t-4way.c \
|
||||
@@ -294,11 +292,12 @@ cpuminer_SOURCES = \
|
||||
algo/x22/x25x.c \
|
||||
algo/x22/x25x-4way.c \
|
||||
algo/yescrypt/yescrypt.c \
|
||||
algo/yescrypt/sha256_Y.c \
|
||||
algo/yescrypt/yescrypt-best.c \
|
||||
algo/yespower/yespower-gate.c \
|
||||
algo/yespower/yespower-blake2b.c \
|
||||
algo/yespower/crypto/blake2b-yp.c \
|
||||
algo/yespower/yescrypt-r8g.c \
|
||||
algo/yespower/sha256_p.c \
|
||||
algo/yespower/yespower-opt.c
|
||||
|
||||
disable_flags =
|
||||
|
@@ -97,10 +97,10 @@ Supported Algorithms
|
||||
qubit Qubit
|
||||
scrypt scrypt(1024, 1, 1) (default)
|
||||
scrypt:N scrypt(N, 1, 1)
|
||||
scryptjane:nf
|
||||
sha256d Double SHA-256
|
||||
sha256q Quad SHA-256, Pyrite (PYE)
|
||||
sha256t Triple SHA-256, Onecoin (OC)
|
||||
sha3d Double keccak256 (BSHA3)
|
||||
shavite3 Shavite3
|
||||
skein Skein+Sha (Skeincoin)
|
||||
skein2 Double Skein (Woodcoin)
|
||||
@@ -134,7 +134,6 @@ Supported Algorithms
|
||||
xevan Bitsend (BSD)
|
||||
yescrypt Globalboost-Y (BSTY)
|
||||
yescryptr8 BitZeny (ZNY)
|
||||
yescryptr8g Koto (KOTO)
|
||||
yescryptr16 Eli
|
||||
yescryptr32 WAVI
|
||||
yespower Cryply
|
||||
|
@@ -33,80 +33,9 @@ supported.
|
||||
64 bit Linux or Windows operating system. Apple, Android and Raspberry Pi
|
||||
are not supported. FreeBSD YMMV.
|
||||
|
||||
Reporting bugs
|
||||
--------------
|
||||
|
||||
Bugs can be reported by sending am email to JayDDee246@gmail.com or opening
|
||||
an issue in git: https://github.com/JayDDee/cpuminer-opt/issues
|
||||
|
||||
Please include the following information:
|
||||
|
||||
1. CPU model, operating system, cpuminer-opt version (must be latest),
|
||||
binary file for Windows, changes to default build procedure for Linux.
|
||||
|
||||
2. Exact comand line (except user and pw) and intial output showing
|
||||
the above requested info.
|
||||
|
||||
3. Additional program output showing any error messages or other
|
||||
pertinent data.
|
||||
|
||||
4. A clear description of the problem including history, scope,
|
||||
persistence or intermittance, and reproduceability.
|
||||
|
||||
In simpler terms:
|
||||
|
||||
What is it doing?
|
||||
What should it be doing instead?
|
||||
Did it work in a previous release?
|
||||
Does it happen for all algos? All pools? All options? Solo?
|
||||
Does it happen all the time?
|
||||
If not what makes it happen or not happen?
|
||||
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.11.7
|
||||
|
||||
Added yescryptr8g algo fotr KOTO, including support for block version 5.
|
||||
|
||||
Added sha3d algo for BSHA3.
|
||||
|
||||
Removed memcmp and clean_job checks from get_new_work, now only check job_id.
|
||||
|
||||
Small improvement to sha512 and sha256 parallel implementations that don't
|
||||
use SHA.
|
||||
|
||||
v3.11.6
|
||||
|
||||
Fixed CPU temperature regression from v3.11.5.
|
||||
|
||||
More improvements to share log. More compact, highlight incremented counter,
|
||||
block height when solved, job id when stale.
|
||||
|
||||
v3.11.5
|
||||
|
||||
Fixed AVX512 detection that could cause compilation errors on CPUs
|
||||
without AVX512.
|
||||
|
||||
Fixed "BLOCK SOLVED" log incorrectly displaying "Accepted" when a block
|
||||
is solved.
|
||||
Added share counter to share submitited & accepted logs
|
||||
Added job id to share submitted log.
|
||||
Share submitted log is no longer highlighted blue, there was too much blue.
|
||||
|
||||
Another CPU temperature fix for Linux.
|
||||
|
||||
Added bug reporting tips to RELEASE NOTES.
|
||||
|
||||
v3.11.4
|
||||
|
||||
Fixed scrypt segfault since v3.9.9.1.
|
||||
|
||||
Stale shares counted and reported seperately from other rejected shares.
|
||||
|
||||
Display of counters for solved blocks, rejects, stale shares suppressed in
|
||||
periodic summary when zero.
|
||||
|
||||
v3.11.3
|
||||
|
||||
Fixed x12 AVX2 again.
|
||||
|
@@ -209,7 +209,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
||||
case ALGO_SHA256Q: register_sha256q_algo ( gate ); break;
|
||||
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
||||
case ALGO_SHA3D: register_sha3d_algo ( gate ); break;
|
||||
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
||||
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
||||
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
||||
@@ -248,7 +247,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
*/
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8G: register_yescryptr8g_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_YESPOWER: register_yespower_algo ( gate ); break;
|
||||
|
@@ -121,55 +121,54 @@ void ( *hash_suw ) ( void*, const void* );
|
||||
|
||||
// Allocate thread local buffers and other initialization specific to miner
|
||||
// threads.
|
||||
bool ( *miner_thread_init ) ( int );
|
||||
bool ( *miner_thread_init ) ( int );
|
||||
|
||||
// Generate global blockheader from stratum data.
|
||||
void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* );
|
||||
void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* );
|
||||
|
||||
// Get thread local copy of blockheader with unique nonce.
|
||||
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t* );
|
||||
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t*,
|
||||
bool );
|
||||
|
||||
// Return pointer to nonce in blockheader.
|
||||
uint32_t *( *get_nonceptr ) ( uint32_t* );
|
||||
uint32_t *( *get_nonceptr ) ( uint32_t* );
|
||||
|
||||
// Decode getwork blockheader
|
||||
bool ( *work_decode ) ( const json_t*, struct work* );
|
||||
bool ( *work_decode ) ( const json_t*, struct work* );
|
||||
|
||||
// Extra getwork data
|
||||
void ( *decode_extra_data ) ( struct work*, uint64_t* );
|
||||
void ( *decode_extra_data ) ( struct work*, uint64_t* );
|
||||
|
||||
bool ( *submit_getwork_result ) ( CURL*, struct work* );
|
||||
bool ( *submit_getwork_result ) ( CURL*, struct work* );
|
||||
|
||||
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
|
||||
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
|
||||
|
||||
// Increment extranonce
|
||||
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
|
||||
|
||||
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
|
||||
uint32_t*, uint32_t, uint32_t,
|
||||
unsigned char* );
|
||||
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
|
||||
|
||||
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
|
||||
uint32_t*, uint32_t, uint32_t );
|
||||
// Build mining.submit message
|
||||
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
|
||||
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
|
||||
|
||||
char* ( *malloc_txs_request ) ( struct work* );
|
||||
char* ( *malloc_txs_request ) ( struct work* );
|
||||
|
||||
// Big or little
|
||||
void ( *set_work_data_endian ) ( struct work* );
|
||||
void ( *set_work_data_endian ) ( struct work* );
|
||||
|
||||
double ( *calc_network_diff ) ( struct work* );
|
||||
double ( *calc_network_diff ) ( struct work* );
|
||||
|
||||
// Wait for first work
|
||||
bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
|
||||
bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
|
||||
|
||||
// Diverge mining threads
|
||||
bool ( *do_this_thread ) ( int );
|
||||
bool ( *do_this_thread ) ( int );
|
||||
|
||||
// After do_this_thread
|
||||
void ( *resync_threads ) ( struct work* );
|
||||
void ( *resync_threads ) ( struct work* );
|
||||
|
||||
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
|
||||
bool ( *stratum_handle_response ) ( json_t* );
|
||||
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
|
||||
bool ( *stratum_handle_response )( json_t* );
|
||||
set_t optimizations;
|
||||
int ( *get_work_data_size ) ();
|
||||
int ntime_index;
|
||||
@@ -226,7 +225,7 @@ uint32_t *std_get_nonceptr( uint32_t *work_data );
|
||||
uint32_t *jr2_get_nonceptr( uint32_t *work_data );
|
||||
|
||||
void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
|
||||
uint32_t* end_nonce_ptr );
|
||||
uint32_t* end_nonce_ptr, bool clean_job );
|
||||
void jr2_get_new_work( struct work *work, struct work *g_work, int thr_id,
|
||||
uint32_t* end_nonce_ptr );
|
||||
|
||||
@@ -257,8 +256,7 @@ double std_calc_network_diff( struct work *work );
|
||||
|
||||
void std_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits,
|
||||
unsigned char *final_sapling_hash );
|
||||
uint32_t ntime, uint32_t nbits );
|
||||
|
||||
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
|
||||
|
||||
|
@@ -13,7 +13,7 @@ void blakehash_4way(void *state, const void *input)
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256r14_4way_context ctx;
|
||||
memcpy( &ctx, &blake_4w_ctx, sizeof ctx );
|
||||
blake256r14_4way_update( &ctx, input + (64<<2), 16 );
|
||||
blake256r14_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r14_4way_close( &ctx, vhash );
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
@@ -36,7 +36,7 @@ int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake256r14_4way_init( &blake_4w_ctx );
|
||||
blake256r14_4way_update( &blake_4w_ctx, vdata, 64 );
|
||||
blake256r14_4way( &blake_4w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
@@ -37,6 +37,8 @@
|
||||
#ifndef __BLAKE_HASH_4WAY__
|
||||
#define __BLAKE_HASH_4WAY__ 1
|
||||
|
||||
//#ifdef __SSE4_2__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
@@ -49,41 +51,46 @@ extern "C"{
|
||||
|
||||
#define SPH_SIZE_blake512 512
|
||||
|
||||
//////////////////////////
|
||||
//
|
||||
// Blake-256 4 way SSE2
|
||||
// With SSE4.2 only Blake-256 4 way is available.
|
||||
// With AVX2 Blake-256 8way & Blake-512 4 way are also available.
|
||||
|
||||
// Blake-256 4 way
|
||||
|
||||
typedef struct {
|
||||
unsigned char buf[64<<2];
|
||||
uint32_t H[8<<2];
|
||||
// __m128i buf[16] __attribute__ ((aligned (64)));
|
||||
// __m128i H[8];
|
||||
// __m128i S[4];
|
||||
size_t ptr;
|
||||
uint32_t T0, T1;
|
||||
int rounds; // 14 for blake, 8 for blakecoin & vanilla
|
||||
} blake_4way_small_context __attribute__ ((aligned (64)));
|
||||
|
||||
// Default, 14 rounds, blake, decred
|
||||
// Default 14 rounds
|
||||
typedef blake_4way_small_context blake256_4way_context;
|
||||
void blake256_4way_init(void *ctx);
|
||||
void blake256_4way_update(void *ctx, const void *data, size_t len);
|
||||
#define blake256_4way blake256_4way_update
|
||||
void blake256_4way_close(void *ctx, void *dst);
|
||||
|
||||
// 14 rounds, blake, decred
|
||||
typedef blake_4way_small_context blake256r14_4way_context;
|
||||
void blake256r14_4way_init(void *cc);
|
||||
void blake256r14_4way_update(void *cc, const void *data, size_t len);
|
||||
#define blake256r14_4way blake256r14_4way_update
|
||||
void blake256r14_4way_close(void *cc, void *dst);
|
||||
|
||||
// 8 rounds, blakecoin, vanilla
|
||||
typedef blake_4way_small_context blake256r8_4way_context;
|
||||
void blake256r8_4way_init(void *cc);
|
||||
void blake256r8_4way_update(void *cc, const void *data, size_t len);
|
||||
#define blake256r8_4way blake256r8_4way_update
|
||||
void blake256r8_4way_close(void *cc, void *dst);
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
//////////////////////////
|
||||
//
|
||||
// Blake-256 8 way AVX2
|
||||
// Blake-256 8 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16] __attribute__ ((aligned (64)));
|
||||
@@ -97,6 +104,7 @@ typedef struct {
|
||||
typedef blake_8way_small_context blake256_8way_context;
|
||||
void blake256_8way_init(void *cc);
|
||||
void blake256_8way_update(void *cc, const void *data, size_t len);
|
||||
//#define blake256_8way blake256_8way_update
|
||||
void blake256_8way_close(void *cc, void *dst);
|
||||
|
||||
// 14 rounds, blake, decred
|
||||
@@ -109,9 +117,10 @@ void blake256r14_8way_close(void *cc, void *dst);
|
||||
typedef blake_8way_small_context blake256r8_8way_context;
|
||||
void blake256r8_8way_init(void *cc);
|
||||
void blake256r8_8way_update(void *cc, const void *data, size_t len);
|
||||
#define blake256r8_8way blake256r8_8way_update
|
||||
void blake256r8_8way_close(void *cc, void *dst);
|
||||
|
||||
// Blake-512 4 way AVX2
|
||||
// Blake-512 4 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16];
|
||||
@@ -125,15 +134,14 @@ typedef blake_4way_big_context blake512_4way_context;
|
||||
|
||||
void blake512_4way_init( blake_4way_big_context *sc );
|
||||
void blake512_4way_update( void *cc, const void *data, size_t len );
|
||||
#define blake512_4way blake512_4way_update
|
||||
void blake512_4way_close( void *cc, void *dst );
|
||||
void blake512_4way_full( blake_4way_big_context *sc, void * dst,
|
||||
const void *data, size_t len );
|
||||
void blake512_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst );
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
////////////////////////////
|
||||
//
|
||||
// Blake-256 16 way AVX512
|
||||
//Blake-256 16 way
|
||||
|
||||
typedef struct {
|
||||
__m512i buf[16];
|
||||
@@ -161,9 +169,8 @@ void blake256r8_16way_init(void *cc);
|
||||
void blake256r8_16way_update(void *cc, const void *data, size_t len);
|
||||
void blake256r8_16way_close(void *cc, void *dst);
|
||||
|
||||
////////////////////////////
|
||||
//
|
||||
//// Blake-512 8 way AVX512
|
||||
|
||||
// Blake-512 8 way
|
||||
|
||||
typedef struct {
|
||||
__m512i buf[16];
|
||||
@@ -178,10 +185,12 @@ typedef blake_8way_big_context blake512_8way_context;
|
||||
void blake512_8way_init( blake_8way_big_context *sc );
|
||||
void blake512_8way_update( void *cc, const void *data, size_t len );
|
||||
void blake512_8way_close( void *cc, void *dst );
|
||||
void blake512_8way_full( blake_8way_big_context *sc, void * dst,
|
||||
const void *data, size_t len );
|
||||
void blake512_8way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
void *dst );
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@@ -267,22 +267,22 @@ static const sph_u64 CB[16] = {
|
||||
#define CBx_(n) CBx__(n)
|
||||
#define CBx__(n) CB ## n
|
||||
|
||||
#define CB0 0x243F6A8885A308D3
|
||||
#define CB1 0x13198A2E03707344
|
||||
#define CB2 0xA4093822299F31D0
|
||||
#define CB3 0x082EFA98EC4E6C89
|
||||
#define CB4 0x452821E638D01377
|
||||
#define CB5 0xBE5466CF34E90C6C
|
||||
#define CB6 0xC0AC29B7C97C50DD
|
||||
#define CB7 0x3F84D5B5B5470917
|
||||
#define CB8 0x9216D5D98979FB1B
|
||||
#define CB9 0xD1310BA698DFB5AC
|
||||
#define CBA 0x2FFD72DBD01ADFB7
|
||||
#define CBB 0xB8E1AFED6A267E96
|
||||
#define CBC 0xBA7C9045F12C7F99
|
||||
#define CBD 0x24A19947B3916CF7
|
||||
#define CBE 0x0801F2E2858EFC16
|
||||
#define CBF 0x636920D871574E69
|
||||
#define CB0 SPH_C64(0x243F6A8885A308D3)
|
||||
#define CB1 SPH_C64(0x13198A2E03707344)
|
||||
#define CB2 SPH_C64(0xA4093822299F31D0)
|
||||
#define CB3 SPH_C64(0x082EFA98EC4E6C89)
|
||||
#define CB4 SPH_C64(0x452821E638D01377)
|
||||
#define CB5 SPH_C64(0xBE5466CF34E90C6C)
|
||||
#define CB6 SPH_C64(0xC0AC29B7C97C50DD)
|
||||
#define CB7 SPH_C64(0x3F84D5B5B5470917)
|
||||
#define CB8 SPH_C64(0x9216D5D98979FB1B)
|
||||
#define CB9 SPH_C64(0xD1310BA698DFB5AC)
|
||||
#define CBA SPH_C64(0x2FFD72DBD01ADFB7)
|
||||
#define CBB SPH_C64(0xB8E1AFED6A267E96)
|
||||
#define CBC SPH_C64(0xBA7C9045F12C7F99)
|
||||
#define CBD SPH_C64(0x24A19947B3916CF7)
|
||||
#define CBE SPH_C64(0x0801F2E2858EFC16)
|
||||
#define CBF SPH_C64(0x636920D871574E69)
|
||||
|
||||
#define READ_STATE64(state) do { \
|
||||
H0 = (state)->H[0]; \
|
||||
@@ -349,9 +349,9 @@ static const sph_u64 CB[16] = {
|
||||
#define DECL_STATE64_8WAY \
|
||||
__m512i H0, H1, H2, H3, H4, H5, H6, H7; \
|
||||
__m512i S0, S1, S2, S3; \
|
||||
uint64_t T0, T1;
|
||||
sph_u64 T0, T1;
|
||||
|
||||
#define COMPRESS64_8WAY( buf ) do \
|
||||
#define COMPRESS64_8WAY do \
|
||||
{ \
|
||||
__m512i M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
__m512i M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
@@ -424,84 +424,6 @@ static const sph_u64 CB[16] = {
|
||||
H7 = mm512_xor4( VF, V7, S3, H7 ); \
|
||||
} while (0)
|
||||
|
||||
void blake512_8way_compress( blake_8way_big_context *sc )
|
||||
{
|
||||
__m512i M0, M1, M2, M3, M4, M5, M6, M7;
|
||||
__m512i M8, M9, MA, MB, MC, MD, ME, MF;
|
||||
__m512i V0, V1, V2, V3, V4, V5, V6, V7;
|
||||
__m512i V8, V9, VA, VB, VC, VD, VE, VF;
|
||||
__m512i shuf_bswap64;
|
||||
|
||||
V0 = sc->H[0];
|
||||
V1 = sc->H[1];
|
||||
V2 = sc->H[2];
|
||||
V3 = sc->H[3];
|
||||
V4 = sc->H[4];
|
||||
V5 = sc->H[5];
|
||||
V6 = sc->H[6];
|
||||
V7 = sc->H[7];
|
||||
V8 = _mm512_xor_si512( sc->S[0], m512_const1_64( CB0 ) );
|
||||
V9 = _mm512_xor_si512( sc->S[1], m512_const1_64( CB1 ) );
|
||||
VA = _mm512_xor_si512( sc->S[2], m512_const1_64( CB2 ) );
|
||||
VB = _mm512_xor_si512( sc->S[3], m512_const1_64( CB3 ) );
|
||||
VC = _mm512_xor_si512( _mm512_set1_epi64( sc->T0 ),
|
||||
m512_const1_64( CB4 ) );
|
||||
VD = _mm512_xor_si512( _mm512_set1_epi64( sc->T0 ),
|
||||
m512_const1_64( CB5 ) );
|
||||
VE = _mm512_xor_si512( _mm512_set1_epi64( sc->T1 ),
|
||||
m512_const1_64( CB6 ) );
|
||||
VF = _mm512_xor_si512( _mm512_set1_epi64( sc->T1 ),
|
||||
m512_const1_64( CB7 ) );
|
||||
|
||||
shuf_bswap64 = m512_const_64( 0x38393a3b3c3d3e3f, 0x3031323334353637,
|
||||
0x28292a2b2c2d2e2f, 0x2021222324252627,
|
||||
0x18191a1b1c1d1e1f, 0x1011121314151617,
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 );
|
||||
|
||||
M0 = _mm512_shuffle_epi8( sc->buf[ 0], shuf_bswap64 );
|
||||
M1 = _mm512_shuffle_epi8( sc->buf[ 1], shuf_bswap64 );
|
||||
M2 = _mm512_shuffle_epi8( sc->buf[ 2], shuf_bswap64 );
|
||||
M3 = _mm512_shuffle_epi8( sc->buf[ 3], shuf_bswap64 );
|
||||
M4 = _mm512_shuffle_epi8( sc->buf[ 4], shuf_bswap64 );
|
||||
M5 = _mm512_shuffle_epi8( sc->buf[ 5], shuf_bswap64 );
|
||||
M6 = _mm512_shuffle_epi8( sc->buf[ 6], shuf_bswap64 );
|
||||
M7 = _mm512_shuffle_epi8( sc->buf[ 7], shuf_bswap64 );
|
||||
M8 = _mm512_shuffle_epi8( sc->buf[ 8], shuf_bswap64 );
|
||||
M9 = _mm512_shuffle_epi8( sc->buf[ 9], shuf_bswap64 );
|
||||
MA = _mm512_shuffle_epi8( sc->buf[10], shuf_bswap64 );
|
||||
MB = _mm512_shuffle_epi8( sc->buf[11], shuf_bswap64 );
|
||||
MC = _mm512_shuffle_epi8( sc->buf[12], shuf_bswap64 );
|
||||
MD = _mm512_shuffle_epi8( sc->buf[13], shuf_bswap64 );
|
||||
ME = _mm512_shuffle_epi8( sc->buf[14], shuf_bswap64 );
|
||||
MF = _mm512_shuffle_epi8( sc->buf[15], shuf_bswap64 );
|
||||
|
||||
ROUND_B_8WAY(0);
|
||||
ROUND_B_8WAY(1);
|
||||
ROUND_B_8WAY(2);
|
||||
ROUND_B_8WAY(3);
|
||||
ROUND_B_8WAY(4);
|
||||
ROUND_B_8WAY(5);
|
||||
ROUND_B_8WAY(6);
|
||||
ROUND_B_8WAY(7);
|
||||
ROUND_B_8WAY(8);
|
||||
ROUND_B_8WAY(9);
|
||||
ROUND_B_8WAY(0);
|
||||
ROUND_B_8WAY(1);
|
||||
ROUND_B_8WAY(2);
|
||||
ROUND_B_8WAY(3);
|
||||
ROUND_B_8WAY(4);
|
||||
ROUND_B_8WAY(5);
|
||||
|
||||
sc->H[0] = mm512_xor4( V8, V0, sc->S[0], sc->H[0] );
|
||||
sc->H[1] = mm512_xor4( V9, V1, sc->S[1], sc->H[1] );
|
||||
sc->H[2] = mm512_xor4( VA, V2, sc->S[2], sc->H[2] );
|
||||
sc->H[3] = mm512_xor4( VB, V3, sc->S[3], sc->H[3] );
|
||||
sc->H[4] = mm512_xor4( VC, V4, sc->S[0], sc->H[4] );
|
||||
sc->H[5] = mm512_xor4( VD, V5, sc->S[1], sc->H[5] );
|
||||
sc->H[6] = mm512_xor4( VE, V6, sc->S[2], sc->H[6] );
|
||||
sc->H[7] = mm512_xor4( VF, V7, sc->S[3], sc->H[7] );
|
||||
}
|
||||
|
||||
void blake512_8way_init( blake_8way_big_context *sc )
|
||||
{
|
||||
__m512i zero = m512_zero;
|
||||
@@ -533,43 +455,39 @@ blake64_8way( blake_8way_big_context *sc, const void *data, size_t len )
|
||||
|
||||
const int buf_size = 128; // sizeof/8
|
||||
|
||||
// 64, 80 bytes: 1st pass copy data. 2nd pass copy padding and compress.
|
||||
// 128 bytes: 1st pass copy data, compress. 2nd pass copy padding, compress.
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
if ( len < (buf_size - ptr) )
|
||||
{
|
||||
memcpy_512( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
memcpy_512( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE64(sc);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
size_t clen;
|
||||
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_512( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
if ( ( T0 = T0 + 1024 ) < 1024 )
|
||||
T1 = T1 + 1;
|
||||
COMPRESS64_8WAY( buf );
|
||||
ptr = 0;
|
||||
}
|
||||
memcpy_512( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
if ( ( T0 = SPH_T64(T0 + 1024) ) < 1024 )
|
||||
T1 = SPH_T64(T1 + 1);
|
||||
COMPRESS64_8WAY;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE64(sc);
|
||||
sc->ptr = ptr;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
blake64_8way_close( blake_8way_big_context *sc, void *dst )
|
||||
@@ -577,22 +495,26 @@ blake64_8way_close( blake_8way_big_context *sc, void *dst )
|
||||
__m512i buf[16];
|
||||
size_t ptr;
|
||||
unsigned bit_len;
|
||||
uint64_t th, tl;
|
||||
// uint64_t z, zz;
|
||||
sph_u64 th, tl;
|
||||
|
||||
ptr = sc->ptr;
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
// z = 0x80 >> n;
|
||||
// zz = ((ub & -z) | z) & 0xFF;
|
||||
// buf[ptr>>3] = _mm512_set1_epi64( zz );
|
||||
buf[ptr>>3] = m512_const1_64( 0x80 );
|
||||
tl = sc->T0 + bit_len;
|
||||
th = sc->T1;
|
||||
if (ptr == 0 )
|
||||
{
|
||||
sc->T0 = 0xFFFFFFFFFFFFFC00ULL;
|
||||
sc->T1 = 0xFFFFFFFFFFFFFFFFULL;
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
|
||||
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
|
||||
}
|
||||
else if ( sc->T0 == 0 )
|
||||
{
|
||||
sc->T0 = 0xFFFFFFFFFFFFFC00ULL + bit_len;
|
||||
sc->T1 = sc->T1 - 1;
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL) + bit_len;
|
||||
sc->T1 = SPH_T64(sc->T1 - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -613,8 +535,8 @@ blake64_8way_close( blake_8way_big_context *sc, void *dst )
|
||||
memset_zero_512( buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
|
||||
|
||||
blake64_8way( sc, buf + (ptr>>3), 128 - ptr );
|
||||
sc->T0 = 0xFFFFFFFFFFFFFC00ULL;
|
||||
sc->T1 = 0xFFFFFFFFFFFFFFFFULL;
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
|
||||
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
|
||||
memset_zero_512( buf, 112>>3 );
|
||||
buf[104>>3] = m512_const1_64( 0x0100000000000000ULL );
|
||||
buf[112>>3] = m512_const1_64( bswap_64( th ) );
|
||||
@@ -625,79 +547,6 @@ blake64_8way_close( blake_8way_big_context *sc, void *dst )
|
||||
mm512_block_bswap_64( (__m512i*)dst, sc->H );
|
||||
}
|
||||
|
||||
// init, update & close
|
||||
void blake512_8way_full( blake_8way_big_context *sc, void * dst,
|
||||
const void *data, size_t len )
|
||||
{
|
||||
|
||||
// init
|
||||
|
||||
casti_m512i( sc->H, 0 ) = m512_const1_64( 0x6A09E667F3BCC908 );
|
||||
casti_m512i( sc->H, 1 ) = m512_const1_64( 0xBB67AE8584CAA73B );
|
||||
casti_m512i( sc->H, 2 ) = m512_const1_64( 0x3C6EF372FE94F82B );
|
||||
casti_m512i( sc->H, 3 ) = m512_const1_64( 0xA54FF53A5F1D36F1 );
|
||||
casti_m512i( sc->H, 4 ) = m512_const1_64( 0x510E527FADE682D1 );
|
||||
casti_m512i( sc->H, 5 ) = m512_const1_64( 0x9B05688C2B3E6C1F );
|
||||
casti_m512i( sc->H, 6 ) = m512_const1_64( 0x1F83D9ABFB41BD6B );
|
||||
casti_m512i( sc->H, 7 ) = m512_const1_64( 0x5BE0CD19137E2179 );
|
||||
|
||||
casti_m512i( sc->S, 0 ) = m512_zero;
|
||||
casti_m512i( sc->S, 1 ) = m512_zero;
|
||||
casti_m512i( sc->S, 2 ) = m512_zero;
|
||||
casti_m512i( sc->S, 3 ) = m512_zero;
|
||||
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
|
||||
// update
|
||||
|
||||
memcpy_512( sc->buf, (__m512i*)data, len>>3 );
|
||||
sc->ptr = len;
|
||||
if ( len == 128 )
|
||||
{
|
||||
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
|
||||
sc->T1 = sc->T1 + 1;
|
||||
blake512_8way_compress( sc );
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
// close
|
||||
|
||||
size_t ptr64 = sc->ptr >> 3;
|
||||
unsigned bit_len;
|
||||
uint64_t th, tl;
|
||||
|
||||
bit_len = sc->ptr << 3;
|
||||
sc->buf[ptr64] = m512_const1_64( 0x80 );
|
||||
tl = sc->T0 + bit_len;
|
||||
th = sc->T1;
|
||||
|
||||
if ( ptr64 == 0 )
|
||||
{
|
||||
sc->T0 = 0xFFFFFFFFFFFFFC00ULL;
|
||||
sc->T1 = 0xFFFFFFFFFFFFFFFFULL;
|
||||
}
|
||||
else if ( sc->T0 == 0 )
|
||||
{
|
||||
sc->T0 = 0xFFFFFFFFFFFFFC00ULL + bit_len;
|
||||
sc->T1 = sc->T1 - 1;
|
||||
}
|
||||
else
|
||||
sc->T0 -= 1024 - bit_len;
|
||||
|
||||
memset_zero_512( sc->buf + ptr64 + 1, 13 - ptr64 );
|
||||
sc->buf[13] = m512_const1_64( 0x0100000000000000ULL );
|
||||
sc->buf[14] = m512_const1_64( bswap_64( th ) );
|
||||
sc->buf[15] = m512_const1_64( bswap_64( tl ) );
|
||||
|
||||
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
|
||||
sc->T1 = sc->T1 + 1;
|
||||
|
||||
blake512_8way_compress( sc );
|
||||
|
||||
mm512_block_bswap_64( (__m512i*)dst, sc->H );
|
||||
}
|
||||
|
||||
void
|
||||
blake512_8way_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
@@ -706,6 +555,12 @@ blake512_8way_update(void *cc, const void *data, size_t len)
|
||||
|
||||
void
|
||||
blake512_8way_close(void *cc, void *dst)
|
||||
{
|
||||
blake512_8way_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
void
|
||||
blake512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
blake64_8way_close(cc, dst);
|
||||
}
|
||||
@@ -741,7 +596,7 @@ blake512_8way_close(void *cc, void *dst)
|
||||
#define DECL_STATE64_4WAY \
|
||||
__m256i H0, H1, H2, H3, H4, H5, H6, H7; \
|
||||
__m256i S0, S1, S2, S3; \
|
||||
uint64_t T0, T1;
|
||||
sph_u64 T0, T1;
|
||||
|
||||
#define COMPRESS64_4WAY do \
|
||||
{ \
|
||||
@@ -815,81 +670,6 @@ blake512_8way_close(void *cc, void *dst)
|
||||
} while (0)
|
||||
|
||||
|
||||
void blake512_4way_compress( blake_4way_big_context *sc )
|
||||
{
|
||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7;
|
||||
__m256i M8, M9, MA, MB, MC, MD, ME, MF;
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7;
|
||||
__m256i V8, V9, VA, VB, VC, VD, VE, VF;
|
||||
__m256i shuf_bswap64;
|
||||
|
||||
V0 = sc->H[0];
|
||||
V1 = sc->H[1];
|
||||
V2 = sc->H[2];
|
||||
V3 = sc->H[3];
|
||||
V4 = sc->H[4];
|
||||
V5 = sc->H[5];
|
||||
V6 = sc->H[6];
|
||||
V7 = sc->H[7];
|
||||
V8 = _mm256_xor_si256( sc->S[0], m256_const1_64( CB0 ) );
|
||||
V9 = _mm256_xor_si256( sc->S[1], m256_const1_64( CB1 ) );
|
||||
VA = _mm256_xor_si256( sc->S[2], m256_const1_64( CB2 ) );
|
||||
VB = _mm256_xor_si256( sc->S[3], m256_const1_64( CB3 ) );
|
||||
VC = _mm256_xor_si256( _mm256_set1_epi64x( sc->T0 ),
|
||||
m256_const1_64( CB4 ) );
|
||||
VD = _mm256_xor_si256( _mm256_set1_epi64x( sc->T0 ),
|
||||
m256_const1_64( CB5 ) );
|
||||
VE = _mm256_xor_si256( _mm256_set1_epi64x( sc->T1 ),
|
||||
m256_const1_64( CB6 ) );
|
||||
VF = _mm256_xor_si256( _mm256_set1_epi64x( sc->T1 ),
|
||||
m256_const1_64( CB7 ) );
|
||||
shuf_bswap64 = m256_const_64( 0x18191a1b1c1d1e1f, 0x1011121314151617,
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 );
|
||||
|
||||
M0 = _mm256_shuffle_epi8( sc->buf[ 0], shuf_bswap64 );
|
||||
M1 = _mm256_shuffle_epi8( sc->buf[ 1], shuf_bswap64 );
|
||||
M2 = _mm256_shuffle_epi8( sc->buf[ 2], shuf_bswap64 );
|
||||
M3 = _mm256_shuffle_epi8( sc->buf[ 3], shuf_bswap64 );
|
||||
M4 = _mm256_shuffle_epi8( sc->buf[ 4], shuf_bswap64 );
|
||||
M5 = _mm256_shuffle_epi8( sc->buf[ 5], shuf_bswap64 );
|
||||
M6 = _mm256_shuffle_epi8( sc->buf[ 6], shuf_bswap64 );
|
||||
M7 = _mm256_shuffle_epi8( sc->buf[ 7], shuf_bswap64 );
|
||||
M8 = _mm256_shuffle_epi8( sc->buf[ 8], shuf_bswap64 );
|
||||
M9 = _mm256_shuffle_epi8( sc->buf[ 9], shuf_bswap64 );
|
||||
MA = _mm256_shuffle_epi8( sc->buf[10], shuf_bswap64 );
|
||||
MB = _mm256_shuffle_epi8( sc->buf[11], shuf_bswap64 );
|
||||
MC = _mm256_shuffle_epi8( sc->buf[12], shuf_bswap64 );
|
||||
MD = _mm256_shuffle_epi8( sc->buf[13], shuf_bswap64 );
|
||||
ME = _mm256_shuffle_epi8( sc->buf[14], shuf_bswap64 );
|
||||
MF = _mm256_shuffle_epi8( sc->buf[15], shuf_bswap64 );
|
||||
|
||||
ROUND_B_4WAY(0);
|
||||
ROUND_B_4WAY(1);
|
||||
ROUND_B_4WAY(2);
|
||||
ROUND_B_4WAY(3);
|
||||
ROUND_B_4WAY(4);
|
||||
ROUND_B_4WAY(5);
|
||||
ROUND_B_4WAY(6);
|
||||
ROUND_B_4WAY(7);
|
||||
ROUND_B_4WAY(8);
|
||||
ROUND_B_4WAY(9);
|
||||
ROUND_B_4WAY(0);
|
||||
ROUND_B_4WAY(1);
|
||||
ROUND_B_4WAY(2);
|
||||
ROUND_B_4WAY(3);
|
||||
ROUND_B_4WAY(4);
|
||||
ROUND_B_4WAY(5);
|
||||
|
||||
sc->H[0] = mm256_xor4( V8, V0, sc->S[0], sc->H[0] );
|
||||
sc->H[1] = mm256_xor4( V9, V1, sc->S[1], sc->H[1] );
|
||||
sc->H[2] = mm256_xor4( VA, V2, sc->S[2], sc->H[2] );
|
||||
sc->H[3] = mm256_xor4( VB, V3, sc->S[3], sc->H[3] );
|
||||
sc->H[4] = mm256_xor4( VC, V4, sc->S[0], sc->H[4] );
|
||||
sc->H[5] = mm256_xor4( VD, V5, sc->S[1], sc->H[5] );
|
||||
sc->H[6] = mm256_xor4( VE, V6, sc->S[2], sc->H[6] );
|
||||
sc->H[7] = mm256_xor4( VF, V7, sc->S[3], sc->H[7] );
|
||||
}
|
||||
|
||||
void blake512_4way_init( blake_4way_big_context *sc )
|
||||
{
|
||||
__m256i zero = m256_zero;
|
||||
@@ -901,12 +681,10 @@ void blake512_4way_init( blake_4way_big_context *sc )
|
||||
casti_m256i( sc->H, 5 ) = m256_const1_64( 0x9B05688C2B3E6C1F );
|
||||
casti_m256i( sc->H, 6 ) = m256_const1_64( 0x1F83D9ABFB41BD6B );
|
||||
casti_m256i( sc->H, 7 ) = m256_const1_64( 0x5BE0CD19137E2179 );
|
||||
|
||||
casti_m256i( sc->S, 0 ) = zero;
|
||||
casti_m256i( sc->S, 1 ) = zero;
|
||||
casti_m256i( sc->S, 2 ) = zero;
|
||||
casti_m256i( sc->S, 3 ) = zero;
|
||||
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
}
|
||||
@@ -925,31 +703,31 @@ blake64_4way( blake_4way_big_context *sc, const void *data, size_t len)
|
||||
ptr = sc->ptr;
|
||||
if ( len < (buf_size - ptr) )
|
||||
{
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE64(sc);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
size_t clen;
|
||||
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
if ( (T0 = T0 + 1024 ) < 1024 )
|
||||
T1 = SPH_T64(T1 + 1);
|
||||
COMPRESS64_4WAY;
|
||||
ptr = 0;
|
||||
}
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
if (ptr == buf_size )
|
||||
{
|
||||
if ((T0 = SPH_T64(T0 + 1024)) < 1024)
|
||||
T1 = SPH_T64(T1 + 1);
|
||||
COMPRESS64_4WAY;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE64(sc);
|
||||
sc->ptr = ptr;
|
||||
@@ -961,7 +739,7 @@ blake64_4way_close( blake_4way_big_context *sc, void *dst )
|
||||
__m256i buf[16];
|
||||
size_t ptr;
|
||||
unsigned bit_len;
|
||||
uint64_t th, tl;
|
||||
sph_u64 th, tl;
|
||||
|
||||
ptr = sc->ptr;
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
@@ -970,13 +748,13 @@ blake64_4way_close( blake_4way_big_context *sc, void *dst )
|
||||
th = sc->T1;
|
||||
if (ptr == 0 )
|
||||
{
|
||||
sc->T0 = 0xFFFFFFFFFFFFFC00ULL;
|
||||
sc->T1 = 0xFFFFFFFFFFFFFFFFULL;
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
|
||||
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
|
||||
}
|
||||
else if ( sc->T0 == 0 )
|
||||
{
|
||||
sc->T0 = 0xFFFFFFFFFFFFFC00ULL + bit_len;
|
||||
sc->T1 = sc->T1 - 1;
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL) + bit_len;
|
||||
sc->T1 = SPH_T64(sc->T1 - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1010,77 +788,13 @@ blake64_4way_close( blake_4way_big_context *sc, void *dst )
|
||||
mm256_block_bswap_64( (__m256i*)dst, sc->H );
|
||||
}
|
||||
|
||||
// init, update & close
|
||||
void blake512_4way_full( blake_4way_big_context *sc, void * dst,
|
||||
const void *data, size_t len )
|
||||
/*
|
||||
void
|
||||
blake512_4way_init(void *cc)
|
||||
{
|
||||
|
||||
// init
|
||||
|
||||
casti_m256i( sc->H, 0 ) = m256_const1_64( 0x6A09E667F3BCC908 );
|
||||
casti_m256i( sc->H, 1 ) = m256_const1_64( 0xBB67AE8584CAA73B );
|
||||
casti_m256i( sc->H, 2 ) = m256_const1_64( 0x3C6EF372FE94F82B );
|
||||
casti_m256i( sc->H, 3 ) = m256_const1_64( 0xA54FF53A5F1D36F1 );
|
||||
casti_m256i( sc->H, 4 ) = m256_const1_64( 0x510E527FADE682D1 );
|
||||
casti_m256i( sc->H, 5 ) = m256_const1_64( 0x9B05688C2B3E6C1F );
|
||||
casti_m256i( sc->H, 6 ) = m256_const1_64( 0x1F83D9ABFB41BD6B );
|
||||
casti_m256i( sc->H, 7 ) = m256_const1_64( 0x5BE0CD19137E2179 );
|
||||
|
||||
casti_m256i( sc->S, 0 ) = m256_zero;
|
||||
casti_m256i( sc->S, 1 ) = m256_zero;
|
||||
casti_m256i( sc->S, 2 ) = m256_zero;
|
||||
casti_m256i( sc->S, 3 ) = m256_zero;
|
||||
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
|
||||
// update
|
||||
|
||||
memcpy_256( sc->buf, (__m256i*)data, len>>3 );
|
||||
sc->ptr += len;
|
||||
if ( len == 128 )
|
||||
{
|
||||
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
|
||||
sc->T1 = sc->T1 + 1;
|
||||
blake512_4way_compress( sc );
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
// close
|
||||
|
||||
size_t ptr64 = sc->ptr >> 3;
|
||||
unsigned bit_len;
|
||||
uint64_t th, tl;
|
||||
|
||||
bit_len = sc->ptr << 3;
|
||||
sc->buf[ptr64] = m256_const1_64( 0x80 );
|
||||
tl = sc->T0 + bit_len;
|
||||
th = sc->T1;
|
||||
if ( sc->ptr == 0 )
|
||||
{
|
||||
sc->T0 = 0xFFFFFFFFFFFFFC00ULL;
|
||||
sc->T1 = 0xFFFFFFFFFFFFFFFFULL;
|
||||
}
|
||||
else if ( sc->T0 == 0 )
|
||||
{
|
||||
sc->T0 = 0xFFFFFFFFFFFFFC00ULL + bit_len;
|
||||
sc->T1 = sc->T1 - 1;
|
||||
}
|
||||
else
|
||||
sc->T0 -= 1024 - bit_len;
|
||||
|
||||
memset_zero_256( sc->buf + ptr64 + 1, 13 - ptr64 );
|
||||
sc->buf[13] = m256_const1_64( 0x0100000000000000ULL );
|
||||
sc->buf[14] = m256_const1_64( bswap_64( th ) );
|
||||
sc->buf[15] = m256_const1_64( bswap_64( tl ) );
|
||||
|
||||
if ( ( sc->T0 = sc->T0 + 1024 ) < 1024 )
|
||||
sc->T1 = sc->T1 + 1;
|
||||
|
||||
blake512_4way_compress( sc );
|
||||
|
||||
mm256_block_bswap_64( (__m256i*)dst, sc->H );
|
||||
blake64_4way_init(cc, IV512, salt_zero_big);
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
blake512_4way_update(void *cc, const void *data, size_t len)
|
||||
@@ -1092,8 +806,17 @@ void
|
||||
blake512_4way_close(void *cc, void *dst)
|
||||
{
|
||||
blake64_4way_close( cc, dst );
|
||||
|
||||
// blake512_4way_addbits_and_close(cc, dst);
|
||||
}
|
||||
|
||||
/*
|
||||
void
|
||||
blake512_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
blake64_4way_close(cc, ub, n, dst, 8);
|
||||
}
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -14,7 +14,7 @@ void blakecoin_4way_hash(void *state, const void *input)
|
||||
blake256r8_4way_context ctx;
|
||||
|
||||
memcpy( &ctx, &blakecoin_4w_ctx, sizeof ctx );
|
||||
blake256r8_4way_update( &ctx, input + (64<<2), 16 );
|
||||
blake256r8_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r8_4way_close( &ctx, vhash );
|
||||
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
@@ -37,7 +37,7 @@ int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake256r8_4way_init( &blakecoin_4w_ctx );
|
||||
blake256r8_4way_update( &blakecoin_4w_ctx, vdata, 64 );
|
||||
blake256r8_4way( &blakecoin_4w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
@@ -71,7 +71,7 @@ void blakecoin_8way_hash( void *state, const void *input )
|
||||
blake256r8_8way_context ctx;
|
||||
|
||||
memcpy( &ctx, &blakecoin_8w_ctx, sizeof ctx );
|
||||
blake256r8_8way_update( &ctx, input + (64<<3), 16 );
|
||||
blake256r8_8way( &ctx, input + (64<<3), 16 );
|
||||
blake256r8_8way_close( &ctx, vhash );
|
||||
|
||||
dintrlv_8x32( state, state+ 32, state+ 64, state+ 96, state+128,
|
||||
@@ -95,7 +95,7 @@ int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
blake256r8_8way_init( &blakecoin_8w_ctx );
|
||||
blake256r8_8way_update( &blakecoin_8w_ctx, vdata, 64 );
|
||||
blake256r8_8way( &blakecoin_8w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
|
@@ -21,7 +21,7 @@ void decred_hash_4way( void *state, const void *input )
|
||||
blake256_4way_context ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &blake_mid, sizeof(blake_mid) );
|
||||
blake256_4way_update( &ctx, tail, tail_len );
|
||||
blake256_4way( &ctx, tail, tail_len );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
@@ -46,7 +46,7 @@ int scanhash_decred_4way( struct work *work, uint32_t max_nonce,
|
||||
mm128_intrlv_4x32x( vdata, edata, edata, edata, edata, 180*8 );
|
||||
|
||||
blake256_4way_init( &blake_mid );
|
||||
blake256_4way_update( &blake_mid, vdata, DECRED_MIDSTATE_LEN );
|
||||
blake256_4way( &blake_mid, vdata, DECRED_MIDSTATE_LEN );
|
||||
|
||||
uint32_t *noncep = vdata + DECRED_NONCE_INDEX * 4;
|
||||
do {
|
||||
|
@@ -22,23 +22,23 @@ extern void pentablakehash_4way( void *output, const void *input )
|
||||
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way_update( &ctx, input, 80 );
|
||||
blake512_4way( &ctx, input, 80 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way_update( &ctx, vhash, 64 );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way_update( &ctx, vhash, 64 );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way_update( &ctx, vhash, 64 );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way_update( &ctx, vhash, 64 );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
memcpy( output, hash0, 32 );
|
||||
|
@@ -168,66 +168,6 @@ int cube_4way_close( cube_4way_context *sp, void *output )
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
|
||||
const void *data, size_t size )
|
||||
{
|
||||
__m512i *h = (__m512i*)sp->h;
|
||||
__m128i *iv = (__m128i*)( hashbitlen == 512 ? (__m128i*)IV512
|
||||
: (__m128i*)IV256 );
|
||||
sp->hashlen = hashbitlen/128;
|
||||
sp->blocksize = 32/16;
|
||||
sp->rounds = 16;
|
||||
sp->pos = 0;
|
||||
|
||||
h[ 0] = m512_const1_128( iv[0] );
|
||||
h[ 1] = m512_const1_128( iv[1] );
|
||||
h[ 2] = m512_const1_128( iv[2] );
|
||||
h[ 3] = m512_const1_128( iv[3] );
|
||||
h[ 4] = m512_const1_128( iv[4] );
|
||||
h[ 5] = m512_const1_128( iv[5] );
|
||||
h[ 6] = m512_const1_128( iv[6] );
|
||||
h[ 7] = m512_const1_128( iv[7] );
|
||||
h[ 0] = m512_const1_128( iv[0] );
|
||||
h[ 1] = m512_const1_128( iv[1] );
|
||||
h[ 2] = m512_const1_128( iv[2] );
|
||||
h[ 3] = m512_const1_128( iv[3] );
|
||||
h[ 4] = m512_const1_128( iv[4] );
|
||||
h[ 5] = m512_const1_128( iv[5] );
|
||||
h[ 6] = m512_const1_128( iv[6] );
|
||||
h[ 7] = m512_const1_128( iv[7] );
|
||||
|
||||
const int len = size >> 4;
|
||||
const __m512i *in = (__m512i*)data;
|
||||
__m512i *hash = (__m512i*)output;
|
||||
int i;
|
||||
|
||||
for ( i = 0; i < len; i++ )
|
||||
{
|
||||
sp->h[ sp->pos ] = _mm512_xor_si512( sp->h[ sp->pos ], in[i] );
|
||||
sp->pos++;
|
||||
if ( sp->pos == sp->blocksize )
|
||||
{
|
||||
transform_4way( sp );
|
||||
sp->pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// pos is zero for 64 byte data, 1 for 80 byte data.
|
||||
sp->h[ sp->pos ] = _mm512_xor_si512( sp->h[ sp->pos ],
|
||||
m512_const2_64( 0, 0x0000000000000080 ) );
|
||||
transform_4way( sp );
|
||||
|
||||
sp->h[7] = _mm512_xor_si512( sp->h[7],
|
||||
m512_const2_64( 0x0000000100000000, 0 ) );
|
||||
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform_4way( sp );
|
||||
|
||||
memcpy( hash, sp->h, sp->hashlen<<6);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int cube_4way_update_close( cube_4way_context *sp, void *output,
|
||||
const void *data, size_t size )
|
||||
{
|
||||
@@ -436,62 +376,4 @@ int cube_2way_update_close( cube_2way_context *sp, void *output,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cube_2way_full( cube_2way_context *sp, void *output, int hashbitlen,
|
||||
const void *data, size_t size )
|
||||
{
|
||||
__m256i *h = (__m256i*)sp->h;
|
||||
__m128i *iv = (__m128i*)( hashbitlen == 512 ? (__m128i*)IV512
|
||||
: (__m128i*)IV256 );
|
||||
sp->hashlen = hashbitlen/128;
|
||||
sp->blocksize = 32/16;
|
||||
sp->rounds = 16;
|
||||
sp->pos = 0;
|
||||
|
||||
h[ 0] = m256_const1_128( iv[0] );
|
||||
h[ 1] = m256_const1_128( iv[1] );
|
||||
h[ 2] = m256_const1_128( iv[2] );
|
||||
h[ 3] = m256_const1_128( iv[3] );
|
||||
h[ 4] = m256_const1_128( iv[4] );
|
||||
h[ 5] = m256_const1_128( iv[5] );
|
||||
h[ 6] = m256_const1_128( iv[6] );
|
||||
h[ 7] = m256_const1_128( iv[7] );
|
||||
h[ 0] = m256_const1_128( iv[0] );
|
||||
h[ 1] = m256_const1_128( iv[1] );
|
||||
h[ 2] = m256_const1_128( iv[2] );
|
||||
h[ 3] = m256_const1_128( iv[3] );
|
||||
h[ 4] = m256_const1_128( iv[4] );
|
||||
h[ 5] = m256_const1_128( iv[5] );
|
||||
h[ 6] = m256_const1_128( iv[6] );
|
||||
h[ 7] = m256_const1_128( iv[7] );
|
||||
|
||||
const int len = size >> 4;
|
||||
const __m256i *in = (__m256i*)data;
|
||||
__m256i *hash = (__m256i*)output;
|
||||
int i;
|
||||
|
||||
for ( i = 0; i < len; i++ )
|
||||
{
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
|
||||
sp->pos++;
|
||||
if ( sp->pos == sp->blocksize )
|
||||
{
|
||||
transform_2way( sp );
|
||||
sp->pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// pos is zero for 64 byte data, 1 for 80 byte data.
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
|
||||
m256_const2_64( 0, 0x0000000000000080 ) );
|
||||
transform_2way( sp );
|
||||
|
||||
sp->h[7] = _mm256_xor_si256( sp->h[7],
|
||||
m256_const2_64( 0x0000000100000000, 0 ) );
|
||||
|
||||
for ( i = 0; i < 10; ++i ) transform_2way( sp );
|
||||
|
||||
memcpy( hash, sp->h, sp->hashlen<<5 );
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -21,12 +21,15 @@ typedef struct _cube_4way_context cube_4way_context;
|
||||
|
||||
int cube_4way_init( cube_4way_context* sp, int hashbitlen, int rounds,
|
||||
int blockbytes );
|
||||
// reinitialize context with same parameters, much faster.
|
||||
int cube_4way_reinit( cube_4way_context *sp );
|
||||
|
||||
int cube_4way_update( cube_4way_context *sp, const void *data, size_t size );
|
||||
|
||||
int cube_4way_close( cube_4way_context *sp, void *output );
|
||||
|
||||
int cube_4way_update_close( cube_4way_context *sp, void *output,
|
||||
const void *data, size_t size );
|
||||
int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
|
||||
const void *data, size_t size );
|
||||
|
||||
#endif
|
||||
|
||||
@@ -45,12 +48,15 @@ typedef struct _cube_2way_context cube_2way_context;
|
||||
|
||||
int cube_2way_init( cube_2way_context* sp, int hashbitlen, int rounds,
|
||||
int blockbytes );
|
||||
// reinitialize context with same parameters, much faster.
|
||||
int cube_2way_reinit( cube_2way_context *sp );
|
||||
|
||||
int cube_2way_update( cube_2way_context *sp, const void *data, size_t size );
|
||||
|
||||
int cube_2way_close( cube_2way_context *sp, void *output );
|
||||
|
||||
int cube_2way_update_close( cube_2way_context *sp, void *output,
|
||||
const void *data, size_t size );
|
||||
int cube_2way_full( cube_2way_context *sp, void *output, int hashbitlen,
|
||||
const void *data, size_t size );
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -20,7 +20,6 @@
|
||||
#include "hash_api.h"
|
||||
//#include "vperm.h"
|
||||
#include <immintrin.h>
|
||||
#include "simd-utils.h"
|
||||
|
||||
MYALIGN const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F};
|
||||
MYALIGN const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC};
|
||||
@@ -518,165 +517,6 @@ HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
|
||||
int nHashSize, const BitSequence *data, DataLength datalen )
|
||||
{
|
||||
int i, j;
|
||||
|
||||
state->k = m128_zero;
|
||||
state->processed_bits = 0;
|
||||
state->uBufferBytes = 0;
|
||||
|
||||
switch( nHashSize )
|
||||
{
|
||||
case 256:
|
||||
state->uHashSize = 256;
|
||||
state->uBlockLength = 192;
|
||||
state->uRounds = 8;
|
||||
state->hashsize = m128_const_64( 0, 0x100 );
|
||||
state->const1536 = m128_const_64( 0, 0x600 );
|
||||
break;
|
||||
|
||||
case 512:
|
||||
state->uHashSize = 512;
|
||||
state->uBlockLength = 128;
|
||||
state->uRounds = 10;
|
||||
state->hashsize = m128_const_64( 0, 0x200 );
|
||||
state->const1536 = m128_const_64( 0, 0x400 );
|
||||
break;
|
||||
|
||||
default:
|
||||
return BAD_HASHBITLEN;
|
||||
}
|
||||
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < nHashSize / 256; j++)
|
||||
state->state[i][j] = state->hashsize;
|
||||
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = nHashSize / 256; j < 4; j++)
|
||||
state->state[i][j] = m128_zero;
|
||||
|
||||
|
||||
unsigned int uBlockCount, uRemainingBytes;
|
||||
|
||||
if( (state->uBufferBytes + datalen) >= state->uBlockLength )
|
||||
{
|
||||
if( state->uBufferBytes != 0 )
|
||||
{
|
||||
// Fill the buffer
|
||||
memcpy( state->buffer + state->uBufferBytes,
|
||||
(void*)data, state->uBlockLength - state->uBufferBytes );
|
||||
|
||||
// Process buffer
|
||||
Compress( state, state->buffer, 1 );
|
||||
state->processed_bits += state->uBlockLength * 8;
|
||||
|
||||
data += state->uBlockLength - state->uBufferBytes;
|
||||
datalen -= state->uBlockLength - state->uBufferBytes;
|
||||
}
|
||||
|
||||
// buffer now does not contain any unprocessed bytes
|
||||
|
||||
uBlockCount = datalen / state->uBlockLength;
|
||||
uRemainingBytes = datalen % state->uBlockLength;
|
||||
|
||||
if( uBlockCount > 0 )
|
||||
{
|
||||
Compress( state, data, uBlockCount );
|
||||
state->processed_bits += uBlockCount * state->uBlockLength * 8;
|
||||
data += uBlockCount * state->uBlockLength;
|
||||
}
|
||||
|
||||
if( uRemainingBytes > 0 )
|
||||
memcpy(state->buffer, (void*)data, uRemainingBytes);
|
||||
|
||||
state->uBufferBytes = uRemainingBytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( state->buffer + state->uBufferBytes, (void*)data, datalen );
|
||||
state->uBufferBytes += datalen;
|
||||
}
|
||||
|
||||
__m128i remainingbits;
|
||||
|
||||
// Add remaining bytes in the buffer
|
||||
state->processed_bits += state->uBufferBytes * 8;
|
||||
|
||||
remainingbits = _mm_set_epi32( 0, 0, 0, state->uBufferBytes * 8 );
|
||||
|
||||
// Pad with 0x80
|
||||
state->buffer[state->uBufferBytes++] = 0x80;
|
||||
// Enough buffer space for padding in this block?
|
||||
if( (state->uBlockLength - state->uBufferBytes) >= 18 )
|
||||
{
|
||||
// Pad with zeros
|
||||
memset( state->buffer + state->uBufferBytes, 0, state->uBlockLength - (state->uBufferBytes + 18) );
|
||||
|
||||
// Hash size
|
||||
*( (unsigned short*)(state->buffer + state->uBlockLength - 18) ) = state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 16) ) =
|
||||
state->processed_bits;
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 8) ) = 0;
|
||||
|
||||
// Last block contains message bits?
|
||||
if( state->uBufferBytes == 1 )
|
||||
{
|
||||
state->k = _mm_xor_si128( state->k, state->k );
|
||||
state->k = _mm_sub_epi64( state->k, state->const1536 );
|
||||
}
|
||||
else
|
||||
{
|
||||
state->k = _mm_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm_sub_epi64( state->k, state->const1536 );
|
||||
}
|
||||
|
||||
// Compress
|
||||
Compress( state, state->buffer, 1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fill with zero and compress
|
||||
memset( state->buffer + state->uBufferBytes, 0,
|
||||
state->uBlockLength - state->uBufferBytes );
|
||||
state->k = _mm_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm_sub_epi64( state->k, state->const1536 );
|
||||
Compress( state, state->buffer, 1 );
|
||||
|
||||
// Last block
|
||||
memset( state->buffer, 0, state->uBlockLength - 18 );
|
||||
|
||||
// Hash size
|
||||
*( (unsigned short*)(state->buffer + state->uBlockLength - 18) ) =
|
||||
state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 16) ) =
|
||||
state->processed_bits;
|
||||
*( (DataLength*)(state->buffer + state->uBlockLength - 8) ) = 0;
|
||||
// Compress the last block
|
||||
state->k = _mm_xor_si128( state->k, state->k );
|
||||
state->k = _mm_sub_epi64( state->k, state->const1536 );
|
||||
Compress( state, state->buffer, 1) ;
|
||||
}
|
||||
|
||||
// Store the hash value
|
||||
_mm_store_si128( (__m128i*)hashval + 0, state->state[0][0] );
|
||||
_mm_store_si128( (__m128i*)hashval + 1, state->state[1][0] );
|
||||
|
||||
if( state->uHashSize == 512 )
|
||||
{
|
||||
_mm_store_si128( (__m128i*)hashval + 2, state->state[2][0] );
|
||||
_mm_store_si128( (__m128i*)hashval + 3, state->state[3][0] );
|
||||
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
|
||||
{
|
||||
|
@@ -55,8 +55,6 @@ HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databit
|
||||
|
||||
HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
|
||||
const BitSequence *data, DataLength databitlen );
|
||||
HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
|
||||
int nHashSize, const BitSequence *data, DataLength databitlen );
|
||||
|
||||
#endif // HASH_API_H
|
||||
|
||||
|
@@ -313,92 +313,4 @@ int echo_4way_update_close( echo_4way_context *state, void *hashval,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
|
||||
const void *data, int datalen )
|
||||
{
|
||||
int i, j;
|
||||
int databitlen = datalen * 8;
|
||||
ctx->k = m512_zero;
|
||||
ctx->processed_bits = 0;
|
||||
ctx->uBufferBytes = 0;
|
||||
|
||||
switch( nHashSize )
|
||||
{
|
||||
case 256:
|
||||
ctx->uHashSize = 256;
|
||||
ctx->uBlockLength = 192;
|
||||
ctx->uRounds = 8;
|
||||
ctx->hashsize = _mm512_set4_epi32( 0, 0, 0, 0x100 );
|
||||
ctx->const1536 = _mm512_set4_epi32( 0, 0, 0, 0x600 );
|
||||
break;
|
||||
|
||||
case 512:
|
||||
ctx->uHashSize = 512;
|
||||
ctx->uBlockLength = 128;
|
||||
ctx->uRounds = 10;
|
||||
ctx->hashsize = _mm512_set4_epi32( 0, 0, 0, 0x200 );
|
||||
ctx->const1536 = _mm512_set4_epi32( 0, 0, 0, 0x400);
|
||||
break;
|
||||
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = 0; j < nHashSize / 256; j++ )
|
||||
ctx->state[ i ][ j ] = ctx->hashsize;
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = nHashSize / 256; j < 4; j++ )
|
||||
ctx->state[ i ][ j ] = m512_zero;
|
||||
|
||||
|
||||
// bytelen is either 32 (maybe), 64 or 80 or 128!
|
||||
// all are less than full block.
|
||||
|
||||
int vlen = datalen / 32;
|
||||
const int vblen = ctx->uBlockLength / 16; // 16 bytes per lane
|
||||
__m512i remainingbits;
|
||||
|
||||
if ( databitlen == 1024 )
|
||||
{
|
||||
echo_4way_compress( ctx, data, 1 );
|
||||
ctx->processed_bits = 1024;
|
||||
remainingbits = m512_const2_64( 0, -1024 );
|
||||
vlen = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
vlen = databitlen / 128; // * 4 lanes / 128 bits per lane
|
||||
memcpy_512( ctx->buffer, data, vlen );
|
||||
ctx->processed_bits += (unsigned int)( databitlen );
|
||||
remainingbits = _mm512_set4_epi32( 0, 0, 0, databitlen );
|
||||
|
||||
}
|
||||
|
||||
ctx->buffer[ vlen ] = _mm512_set4_epi32( 0, 0, 0, 0x80 );
|
||||
memset_zero_512( ctx->buffer + vlen + 1, vblen - vlen - 2 );
|
||||
ctx->buffer[ vblen-2 ] =
|
||||
_mm512_set4_epi32( (uint32_t)ctx->uHashSize << 16, 0, 0, 0 );
|
||||
ctx->buffer[ vblen-1 ] =
|
||||
_mm512_set4_epi64( 0, ctx->processed_bits,
|
||||
0, ctx->processed_bits );
|
||||
|
||||
ctx->k = _mm512_add_epi64( ctx->k, remainingbits );
|
||||
ctx->k = _mm512_sub_epi64( ctx->k, ctx->const1536 );
|
||||
|
||||
echo_4way_compress( ctx, ctx->buffer, 1 );
|
||||
|
||||
_mm512_store_si512( (__m512i*)hashval + 0, ctx->state[ 0 ][ 0] );
|
||||
_mm512_store_si512( (__m512i*)hashval + 1, ctx->state[ 1 ][ 0] );
|
||||
|
||||
if ( ctx->uHashSize == 512 )
|
||||
{
|
||||
_mm512_store_si512( (__m512i*)hashval + 2, ctx->state[ 2 ][ 0 ] );
|
||||
_mm512_store_si512( (__m512i*)hashval + 3, ctx->state[ 3 ][ 0 ] );
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -32,8 +32,5 @@ int echo_close( echo_4way_context *state, void *hashval );
|
||||
int echo_4way_update_close( echo_4way_context *state, void *hashval,
|
||||
const void *data, int databitlen );
|
||||
|
||||
int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
|
||||
const void *data, int datalen );
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -185,82 +185,6 @@ HashReturn_gr final_groestl( hashState_groestl* ctx, void* output )
|
||||
return SUCCESS_GR;
|
||||
}
|
||||
|
||||
int groestl512_full( hashState_groestl* ctx, void* output,
|
||||
const void* input, uint64_t databitlen )
|
||||
{
|
||||
|
||||
int i;
|
||||
|
||||
ctx->hashlen = 64;
|
||||
SET_CONSTANTS();
|
||||
|
||||
for ( i = 0; i < SIZE512; i++ )
|
||||
{
|
||||
ctx->chaining[i] = _mm_setzero_si128();
|
||||
ctx->buffer[i] = _mm_setzero_si128();
|
||||
}
|
||||
ctx->chaining[ 6 ] = m128_const_64( 0x0200000000000000, 0 );
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
|
||||
|
||||
const int len = (int)databitlen / 128;
|
||||
const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i
|
||||
const int hash_offset = SIZE512 - hashlen_m128i;
|
||||
int rem = ctx->rem_ptr;
|
||||
uint64_t blocks = len / SIZE512;
|
||||
__m128i* in = (__m128i*)input;
|
||||
|
||||
// --- update ---
|
||||
|
||||
// digest any full blocks, process directly from input
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
TF1024( ctx->chaining, &in[ i * SIZE512 ] );
|
||||
ctx->buf_ptr = blocks * SIZE512;
|
||||
|
||||
// copy any remaining data to buffer, it may already contain data
|
||||
// from a previous update for a midstate precalc
|
||||
for ( i = 0; i < len % SIZE512; i++ )
|
||||
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
|
||||
i += rem; // use i as rem_ptr in final
|
||||
|
||||
//--- final ---
|
||||
|
||||
blocks++; // adjust for final block
|
||||
|
||||
if ( i == len -1 )
|
||||
{
|
||||
// only 128 bits left in buffer, all padding at once
|
||||
ctx->buffer[i] = _mm_set_epi8( blocks,0,0,0, 0,0,0,0,
|
||||
0,0,0,0, 0,0,0,0x80 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// add first padding
|
||||
ctx->buffer[i] = _mm_set_epi8( 0,0,0,0, 0,0,0,0,
|
||||
0,0,0,0, 0,0,0,0x80 );
|
||||
// add zero padding
|
||||
for ( i += 1; i < SIZE512 - 1; i++ )
|
||||
ctx->buffer[i] = _mm_setzero_si128();
|
||||
|
||||
// add length padding, second last byte is zero unless blocks > 255
|
||||
ctx->buffer[i] = _mm_set_epi8( blocks, blocks>>8, 0,0, 0,0,0,0,
|
||||
0, 0 ,0,0, 0,0,0,0 );
|
||||
}
|
||||
|
||||
// digest final padding block and do output transform
|
||||
TF1024( ctx->chaining, ctx->buffer );
|
||||
|
||||
OF1024( ctx->chaining );
|
||||
|
||||
// store hash result in output
|
||||
for ( i = 0; i < hashlen_m128i; i++ )
|
||||
casti_m128i( output, i ) = ctx->chaining[ hash_offset + i ];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
HashReturn_gr update_and_final_groestl( hashState_groestl* ctx, void* output,
|
||||
const void* input, DataLength_gr databitlen )
|
||||
{
|
||||
|
@@ -87,6 +87,5 @@ HashReturn_gr final_groestl( hashState_groestl*, void* );
|
||||
|
||||
HashReturn_gr update_and_final_groestl( hashState_groestl*, void*,
|
||||
const void*, DataLength_gr );
|
||||
int groestl512_full( hashState_groestl*, void*, const void*, uint64_t );
|
||||
|
||||
#endif /* __hash_h */
|
||||
|
@@ -15,7 +15,7 @@
|
||||
#include "miner.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(__VAES__)
|
||||
|
||||
|
||||
int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen )
|
||||
|
@@ -18,8 +18,6 @@
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#define LENGTH (256)
|
||||
|
||||
//#include "brg_endian.h"
|
||||
@@ -71,5 +69,4 @@ int groestl256_4way_init( groestl256_4way_context*, uint64_t );
|
||||
int groestl256_4way_update_close( groestl256_4way_context*, void*,
|
||||
const void*, uint64_t );
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -15,22 +15,29 @@
|
||||
#include "miner.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(__VAES__)
|
||||
|
||||
int groestl512_4way_init( groestl512_4way_context* ctx, uint64_t hashlen )
|
||||
{
|
||||
int i;
|
||||
|
||||
ctx->hashlen = hashlen;
|
||||
SET_CONSTANTS();
|
||||
|
||||
if (ctx->chaining == NULL || ctx->buffer == NULL)
|
||||
return 1;
|
||||
|
||||
memset_zero_512( ctx->chaining, SIZE512 );
|
||||
memset_zero_512( ctx->buffer, SIZE512 );
|
||||
for ( i = 0; i < SIZE512; i++ )
|
||||
{
|
||||
ctx->chaining[i] = m512_zero;
|
||||
ctx->buffer[i] = m512_zero;
|
||||
}
|
||||
|
||||
// The only non-zero in the IV is len. It can be hard coded.
|
||||
ctx->chaining[ 6 ] = m512_const2_64( 0x0200000000000000, 0 );
|
||||
// uint64_t len = U64BIG((uint64_t)LENGTH);
|
||||
// ctx->chaining[ COLS/2 -1 ] = _mm512_set4_epi64( len, 0, len, 0 );
|
||||
// INIT_4way(ctx->chaining);
|
||||
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
@@ -42,7 +49,7 @@ int groestl512_4way_update_close( groestl512_4way_context* ctx, void* output,
|
||||
const void* input, uint64_t databitlen )
|
||||
{
|
||||
const int len = (int)databitlen / 128;
|
||||
const int hashlen_m128i = 64 / 16; // bytes to __m128i
|
||||
const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i
|
||||
const int hash_offset = SIZE512 - hashlen_m128i;
|
||||
int rem = ctx->rem_ptr;
|
||||
int blocks = len / SIZE512;
|
||||
@@ -51,13 +58,16 @@ int groestl512_4way_update_close( groestl512_4way_context* ctx, void* output,
|
||||
|
||||
// --- update ---
|
||||
|
||||
// digest any full blocks, process directly from input
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
TF1024_4way( ctx->chaining, &in[ i * SIZE512 ] );
|
||||
ctx->buf_ptr = blocks * SIZE512;
|
||||
|
||||
// copy any remaining data to buffer, it may already contain data
|
||||
// from a previous update for a midstate precalc
|
||||
for ( i = 0; i < len % SIZE512; i++ )
|
||||
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
|
||||
i += rem;
|
||||
i += rem; // use i as rem_ptr in final
|
||||
|
||||
//--- final ---
|
||||
|
||||
@@ -71,71 +81,23 @@ int groestl512_4way_update_close( groestl512_4way_context* ctx, void* output,
|
||||
}
|
||||
else
|
||||
{
|
||||
// add first padding
|
||||
ctx->buffer[i] = m512_const4_64( 0, 0x80, 0, 0x80 );
|
||||
// add zero padding
|
||||
for ( i += 1; i < SIZE512 - 1; i++ )
|
||||
ctx->buffer[i] = m512_zero;
|
||||
|
||||
// add length padding, second last byte is zero unless blocks > 255
|
||||
ctx->buffer[i] = m512_const1_128( _mm_set_epi8(
|
||||
blocks, blocks>>8, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 ) );
|
||||
}
|
||||
|
||||
// digest final padding block and do output transform
|
||||
TF1024_4way( ctx->chaining, ctx->buffer );
|
||||
|
||||
OF1024_4way( ctx->chaining );
|
||||
|
||||
for ( i = 0; i < hashlen_m128i; i++ )
|
||||
casti_m512i( output, i ) = ctx->chaining[ hash_offset + i ];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int groestl512_4way_full( groestl512_4way_context* ctx, void* output,
|
||||
const void* input, uint64_t datalen )
|
||||
{
|
||||
const int len = (int)datalen >> 4;
|
||||
const int hashlen_m128i = 64 >> 4; // bytes to __m128i
|
||||
const int hash_offset = SIZE512 - hashlen_m128i;
|
||||
uint64_t blocks = len / SIZE512;
|
||||
__m512i* in = (__m512i*)input;
|
||||
int i;
|
||||
|
||||
// --- init ---
|
||||
|
||||
SET_CONSTANTS();
|
||||
memset_zero_512( ctx->chaining, SIZE512 );
|
||||
memset_zero_512( ctx->buffer, SIZE512 );
|
||||
ctx->chaining[ 6 ] = m512_const2_64( 0x0200000000000000, 0 );
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
|
||||
// --- update ---
|
||||
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
TF1024_4way( ctx->chaining, &in[ i * SIZE512 ] );
|
||||
ctx->buf_ptr = blocks * SIZE512;
|
||||
|
||||
for ( i = 0; i < len % SIZE512; i++ )
|
||||
ctx->buffer[ ctx->rem_ptr + i ] = in[ ctx->buf_ptr + i ];
|
||||
i += ctx->rem_ptr;
|
||||
|
||||
// --- close ---
|
||||
|
||||
blocks++;
|
||||
|
||||
if ( i == SIZE512 - 1 )
|
||||
{
|
||||
// only 1 vector left in buffer, all padding at once
|
||||
ctx->buffer[i] = m512_const2_64( blocks << 56, 0x80 );
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx->buffer[i] = m512_const4_64( 0, 0x80, 0, 0x80 );
|
||||
for ( i += 1; i < SIZE512 - 1; i++ )
|
||||
ctx->buffer[i] = m512_zero;
|
||||
ctx->buffer[i] = m512_const2_64( blocks << 56, 0 );
|
||||
}
|
||||
|
||||
TF1024_4way( ctx->chaining, ctx->buffer );
|
||||
OF1024_4way( ctx->chaining );
|
||||
|
||||
// store hash result in output
|
||||
for ( i = 0; i < hashlen_m128i; i++ )
|
||||
casti_m512i( output, i ) = ctx->chaining[ hash_offset + i ];
|
||||
|
||||
|
@@ -1,3 +1,11 @@
|
||||
/* hash.h Aug 2011
|
||||
*
|
||||
* Groestl implementation for different versions.
|
||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
||||
*
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
#if !defined(GROESTL512_HASH_4WAY_H__)
|
||||
#define GROESTL512_HASH_4WAY_H__ 1
|
||||
|
||||
@@ -10,10 +18,12 @@
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#define LENGTH (512)
|
||||
|
||||
//#include "brg_endian.h"
|
||||
//#define NEED_UINT_64T
|
||||
//#include "algo/sha/brg_types.h"
|
||||
|
||||
/* some sizes (number of bytes) */
|
||||
#define ROWS (8)
|
||||
#define LENGTHFIELDLEN (ROWS)
|
||||
@@ -34,11 +44,34 @@
|
||||
#define ROUNDS (ROUNDS1024)
|
||||
//#endif
|
||||
|
||||
/*
|
||||
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
|
||||
|
||||
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
|
||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
|
||||
#define U64BIG(a) (a)
|
||||
#endif // IS_BIG_ENDIAN
|
||||
|
||||
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
|
||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
|
||||
#define U64BIG(a) \
|
||||
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
|
||||
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
|
||||
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
|
||||
(ROTL64(a,56) & li_64(FF000000FF000000)))
|
||||
#endif // IS_LITTLE_ENDIAN
|
||||
|
||||
typedef unsigned char BitSequence_gr;
|
||||
typedef unsigned long long DataLength_gr;
|
||||
typedef enum { SUCCESS_GR = 0, FAIL_GR = 1, BAD_HASHBITLEN_GR = 2} HashReturn_gr;
|
||||
*/
|
||||
|
||||
#define SIZE512 (SIZE_1024/16)
|
||||
|
||||
typedef struct {
|
||||
__attribute__ ((aligned (128))) __m512i chaining[SIZE512];
|
||||
__attribute__ ((aligned (64))) __m512i buffer[SIZE512];
|
||||
int hashlen; // byte
|
||||
int blk_count; // SIZE_m128i
|
||||
int buf_ptr; // __m128i offset
|
||||
int rem_ptr;
|
||||
@@ -52,11 +85,10 @@ int groestl512_4way_init( groestl512_4way_context*, uint64_t );
|
||||
|
||||
int groestl512_4way_update( groestl512_4way_context*, const void*,
|
||||
uint64_t );
|
||||
|
||||
int groestl512_4way_close( groestl512_4way_context*, void* );
|
||||
|
||||
int groestl512_4way_update_close( groestl512_4way_context*, void*,
|
||||
const void*, uint64_t );
|
||||
int groestl512_4way_full( groestl512_4way_context*, void*,
|
||||
const void*, uint64_t );
|
||||
|
||||
#endif // VAES
|
||||
#endif // GROESTL512_HASH_4WAY_H__
|
||||
#endif /* __hash_h */
|
||||
|
@@ -161,7 +161,7 @@ bool register_hodl_algo( algo_gate_t* gate )
|
||||
// return false;
|
||||
// }
|
||||
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
|
||||
gate->optimizations = SSE42_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->optimizations = AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&hodl_scanhash;
|
||||
gate->get_new_work = (void*)&hodl_get_new_work;
|
||||
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;
|
||||
|
@@ -41,10 +41,57 @@
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_JH
|
||||
#define SPH_SMALL_FOOTPRINT_JH 1
|
||||
#endif
|
||||
|
||||
#if !defined SPH_JH_64 && SPH_64_TRUE
|
||||
#define SPH_JH_64 1
|
||||
#endif
|
||||
|
||||
#if !SPH_64
|
||||
#undef SPH_JH_64
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The internal bitslice representation may use either big-endian or
|
||||
* little-endian (true bitslice operations do not care about the bit
|
||||
* ordering, and the bit-swapping linear operations in JH happen to
|
||||
* be invariant through endianness-swapping). The constants must be
|
||||
* defined according to the chosen endianness; we use some
|
||||
* byte-swapping macros for that.
|
||||
*/
|
||||
|
||||
#if SPH_LITTLE_ENDIAN
|
||||
|
||||
#if SPH_64
|
||||
#define C64e(x) ((SPH_C64(x) >> 56) \
|
||||
| ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
|
||||
| ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
|
||||
| ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \
|
||||
| ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \
|
||||
| ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
|
||||
| ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
|
||||
| ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
|
||||
#define dec64e_aligned sph_dec64le_aligned
|
||||
#define enc64e sph_enc64le
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if SPH_64
|
||||
#define C64e(x) SPH_C64(x)
|
||||
#define dec64e_aligned sph_dec64be_aligned
|
||||
#define enc64e sph_enc64be
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#define Sb_8W(x0, x1, x2, x3, c) \
|
||||
@@ -105,97 +152,8 @@ do { \
|
||||
x3 = _mm256_xor_si256( x3, x4 ); \
|
||||
} while (0)
|
||||
|
||||
static const uint64_t C[] =
|
||||
{
|
||||
0x67f815dfa2ded572, 0x571523b70a15847b,
|
||||
0xf6875a4d90d6ab81, 0x402bd1c3c54f9f4e,
|
||||
0x9cfa455ce03a98ea, 0x9a99b26699d2c503,
|
||||
0x8a53bbf2b4960266, 0x31a2db881a1456b5,
|
||||
0xdb0e199a5c5aa303, 0x1044c1870ab23f40,
|
||||
0x1d959e848019051c, 0xdccde75eadeb336f,
|
||||
0x416bbf029213ba10, 0xd027bbf7156578dc,
|
||||
0x5078aa3739812c0a, 0xd3910041d2bf1a3f,
|
||||
0x907eccf60d5a2d42, 0xce97c0929c9f62dd,
|
||||
0xac442bc70ba75c18, 0x23fcc663d665dfd1,
|
||||
0x1ab8e09e036c6e97, 0xa8ec6c447e450521,
|
||||
0xfa618e5dbb03f1ee, 0x97818394b29796fd,
|
||||
0x2f3003db37858e4a, 0x956a9ffb2d8d672a,
|
||||
0x6c69b8f88173fe8a, 0x14427fc04672c78a,
|
||||
0xc45ec7bd8f15f4c5, 0x80bb118fa76f4475,
|
||||
0xbc88e4aeb775de52, 0xf4a3a6981e00b882,
|
||||
0x1563a3a9338ff48e, 0x89f9b7d524565faa,
|
||||
0xfde05a7c20edf1b6, 0x362c42065ae9ca36,
|
||||
0x3d98fe4e433529ce, 0xa74b9a7374f93a53,
|
||||
0x86814e6f591ff5d0, 0x9f5ad8af81ad9d0e,
|
||||
0x6a6234ee670605a7, 0x2717b96ebe280b8b,
|
||||
0x3f1080c626077447, 0x7b487ec66f7ea0e0,
|
||||
0xc0a4f84aa50a550d, 0x9ef18e979fe7e391,
|
||||
0xd48d605081727686, 0x62b0e5f3415a9e7e,
|
||||
0x7a205440ec1f9ffc, 0x84c9f4ce001ae4e3,
|
||||
0xd895fa9df594d74f, 0xa554c324117e2e55,
|
||||
0x286efebd2872df5b, 0xb2c4a50fe27ff578,
|
||||
0x2ed349eeef7c8905, 0x7f5928eb85937e44,
|
||||
0x4a3124b337695f70, 0x65e4d61df128865e,
|
||||
0xe720b95104771bc7, 0x8a87d423e843fe74,
|
||||
0xf2947692a3e8297d, 0xc1d9309b097acbdd,
|
||||
0xe01bdc5bfb301b1d, 0xbf829cf24f4924da,
|
||||
0xffbf70b431bae7a4, 0x48bcf8de0544320d,
|
||||
0x39d3bb5332fcae3b, 0xa08b29e0c1c39f45,
|
||||
0x0f09aef7fd05c9e5, 0x34f1904212347094,
|
||||
0x95ed44e301b771a2, 0x4a982f4f368e3be9,
|
||||
0x15f66ca0631d4088, 0xffaf52874b44c147,
|
||||
0x30c60ae2f14abb7e, 0xe68c6eccc5b67046,
|
||||
0x00ca4fbd56a4d5a4, 0xae183ec84b849dda,
|
||||
0xadd1643045ce5773, 0x67255c1468cea6e8,
|
||||
0x16e10ecbf28cdaa3, 0x9a99949a5806e933,
|
||||
0x7b846fc220b2601f, 0x1885d1a07facced1,
|
||||
0xd319dd8da15b5932, 0x46b4a5aac01c9a50,
|
||||
0xba6b04e467633d9f, 0x7eee560bab19caf6,
|
||||
0x742128a9ea79b11f, 0xee51363b35f7bde9,
|
||||
0x76d350755aac571d, 0x01707da3fec2463a,
|
||||
0x42d8a498afc135f7, 0x79676b9e20eced78,
|
||||
0xa8db3aea15638341, 0x832c83324d3bc3fa,
|
||||
0xf347271c1f3b40a7, 0x9a762db734f04059,
|
||||
0xfd4f21d26c4e3ee7, 0xef5957dc398dfdb8,
|
||||
0xdaeb492b490c9b8d, 0x0d70f36849d7a25b,
|
||||
0x84558d7ad0ae3b7d, 0x658ef8e4f0e9a5f5,
|
||||
0x533b1036f4a2b8a0, 0x5aec3e759e07a80c,
|
||||
0x4f88e85692946891, 0x4cbcbaf8555cb05b,
|
||||
0x7b9487f3993bbbe3, 0x5d1c6b72d6f4da75,
|
||||
0x6db334dc28acae64, 0x71db28b850a5346c,
|
||||
0x2a518d10f2e261f8, 0xfc75dd593364dbe3,
|
||||
0xa23fce43f1bcac1c, 0xb043e8023cd1bb67,
|
||||
0x75a12988ca5b0a33, 0x5c5316b44d19347f,
|
||||
0x1e4d790ec3943b92, 0x3fafeeb6d7757479,
|
||||
0x21391abef7d4a8ea, 0x5127234c097ef45c,
|
||||
0xd23c32ba5324a326, 0xadd5a66d4a17a344,
|
||||
0x08c9f2afa63e1db5, 0x563c6b91983d5983,
|
||||
0x4d608672a17cf84c, 0xf6c76e08cc3ee246,
|
||||
0x5e76bcb1b333982f, 0x2ae6c4efa566d62b,
|
||||
0x36d4c1bee8b6f406, 0x6321efbc1582ee74,
|
||||
0x69c953f40d4ec1fd, 0x26585806c45a7da7,
|
||||
0x16fae0061614c17e, 0x3f9d63283daf907e,
|
||||
0x0cd29b00e3f2c9d2, 0x300cd4b730ceaa5f,
|
||||
0x9832e0f216512a74, 0x9af8cee3d830eb0d,
|
||||
0x9279f1b57b9ec54b, 0xd36886046ee651ff,
|
||||
0x316796e6574d239b, 0x05750a17f3a6e6cc,
|
||||
0xce6c3213d98176b1, 0x62a205f88452173c,
|
||||
0x47154778b3cb2bf4, 0x486a9323825446ff,
|
||||
0x65655e4e0758df38, 0x8e5086fc897cfcf2,
|
||||
0x86ca0bd0442e7031, 0x4e477830a20940f0,
|
||||
0x8338f7d139eea065, 0xbd3a2ce437e95ef7,
|
||||
0x6ff8130126b29721, 0xe7de9fefd1ed44a3,
|
||||
0xd992257615dfa08b, 0xbe42dc12f6f7853c,
|
||||
0x7eb027ab7ceca7d8, 0xdea83eaada7d8d53,
|
||||
0xd86902bd93ce25aa, 0xf908731afd43f65a,
|
||||
0xa5194a17daef5fc0, 0x6a21fd4c33664d97,
|
||||
0x701541db3198b435, 0x9b54cdedbb0f1eea,
|
||||
0x72409751a163d09a, 0xe26f4791bf9d75f6
|
||||
};
|
||||
#if SPH_JH_64
|
||||
|
||||
// Big endian version
|
||||
|
||||
/*
|
||||
static const sph_u64 C[] = {
|
||||
C64e(0x72d5dea2df15f867), C64e(0x7b84150ab7231557),
|
||||
C64e(0x81abd6904d5a87f6), C64e(0x4e9f4fc5c3d12b40),
|
||||
@@ -282,7 +240,6 @@ static const sph_u64 C[] = {
|
||||
C64e(0x35b49831db411570), C64e(0xea1e0fbbedcd549b),
|
||||
C64e(0x9ad063a151974072), C64e(0xf6759dbf91476fe2)
|
||||
};
|
||||
*/
|
||||
|
||||
#define Ceven_hi(r) (C[((r) << 2) + 0])
|
||||
#define Ceven_lo(r) (C[((r) << 2) + 1])
|
||||
@@ -470,7 +427,7 @@ do { \
|
||||
h7h = _mm256_xor_si256( h7h, m3h ); \
|
||||
h7l = _mm256_xor_si256( h7l, m3l ); \
|
||||
|
||||
/*
|
||||
|
||||
static const sph_u64 IV256[] = {
|
||||
C64e(0xeb98a3412c20d3eb), C64e(0x92cdbe7b9cb245c1),
|
||||
C64e(0x1c93519160d4c7fa), C64e(0x260082d67e508a03),
|
||||
@@ -493,8 +450,11 @@ static const sph_u64 IV512[] = {
|
||||
C64e(0xcf57f6ec9db1f856), C64e(0xa706887c5716b156),
|
||||
C64e(0xe3c2fcdfe68517fb), C64e(0x545a4678cc8cdd4b)
|
||||
};
|
||||
*/
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
@@ -524,6 +484,57 @@ static const sph_u64 IV512[] = {
|
||||
W ## ro(h7); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_JH
|
||||
|
||||
#if SPH_JH_64
|
||||
|
||||
/*
|
||||
* The "small footprint" 64-bit version just uses a partially unrolled
|
||||
* loop.
|
||||
*/
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#define E8_8W do { \
|
||||
unsigned r; \
|
||||
for (r = 0; r < 42; r += 7) { \
|
||||
SL_8W(0); \
|
||||
SL_8W(1); \
|
||||
SL_8W(2); \
|
||||
SL_8W(3); \
|
||||
SL_8W(4); \
|
||||
SL_8W(5); \
|
||||
SL_8W(6); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define E8 do { \
|
||||
unsigned r; \
|
||||
for (r = 0; r < 42; r += 7) { \
|
||||
SL(0); \
|
||||
SL(1); \
|
||||
SL(2); \
|
||||
SL(3); \
|
||||
SL(4); \
|
||||
SL(5); \
|
||||
SL(6); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if SPH_JH_64
|
||||
|
||||
/*
|
||||
* On a "true 64-bit" architecture, we can unroll at will.
|
||||
*/
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
@@ -574,7 +585,6 @@ static const sph_u64 IV512[] = {
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
|
||||
#define E8 do { \
|
||||
SLu( 0, 0); \
|
||||
SLu( 1, 1); \
|
||||
@@ -620,6 +630,13 @@ static const sph_u64 IV512[] = {
|
||||
SLu(41, 6); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
void jh256_8way_init( jh_8way_context *sc )
|
||||
@@ -715,12 +732,12 @@ jh_8way_core( jh_8way_context *sc, const void *data, size_t len )
|
||||
|
||||
static void
|
||||
jh_8way_close( jh_8way_context *sc, unsigned ub, unsigned n, void *dst,
|
||||
size_t out_size_w32 )
|
||||
size_t out_size_w32, const void *iv )
|
||||
{
|
||||
__m512i buf[16*4];
|
||||
__m512i *dst512 = (__m512i*)dst;
|
||||
size_t numz, u;
|
||||
uint64_t l0, l1;
|
||||
sph_u64 l0, l1, l0e, l1e;
|
||||
|
||||
buf[0] = m512_const1_64( 0x80ULL );
|
||||
|
||||
@@ -731,10 +748,12 @@ jh_8way_close( jh_8way_context *sc, unsigned ub, unsigned n, void *dst,
|
||||
|
||||
memset_zero_512( buf+1, (numz>>3) - 1 );
|
||||
|
||||
l0 = ( sc->block_count << 9 ) + ( sc->ptr << 3 );
|
||||
l1 = ( sc->block_count >> 55 );
|
||||
*(buf + (numz>>3) ) = _mm512_set1_epi64( bswap_64( l1 ) );
|
||||
*(buf + (numz>>3) + 1) = _mm512_set1_epi64( bswap_64( l0 ) );
|
||||
l0 = SPH_T64(sc->block_count << 9) + (sc->ptr << 3);
|
||||
l1 = SPH_T64(sc->block_count >> 55);
|
||||
sph_enc64be( &l0e, l0 );
|
||||
sph_enc64be( &l1e, l1 );
|
||||
*(buf + (numz>>3) ) = _mm512_set1_epi64( l1e );
|
||||
*(buf + (numz>>3) + 1) = _mm512_set1_epi64( l0e );
|
||||
|
||||
jh_8way_core( sc, buf, numz + 16 );
|
||||
|
||||
@@ -753,7 +772,7 @@ jh256_8way_update(void *cc, const void *data, size_t len)
|
||||
void
|
||||
jh256_8way_close(void *cc, void *dst)
|
||||
{
|
||||
jh_8way_close(cc, 0, 0, dst, 8);
|
||||
jh_8way_close(cc, 0, 0, dst, 8, IV256);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -765,7 +784,7 @@ jh512_8way_update(void *cc, const void *data, size_t len)
|
||||
void
|
||||
jh512_8way_close(void *cc, void *dst)
|
||||
{
|
||||
jh_8way_close(cc, 0, 0, dst, 16);
|
||||
jh_8way_close(cc, 0, 0, dst, 16, IV512);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -863,12 +882,12 @@ jh_4way_core( jh_4way_context *sc, const void *data, size_t len )
|
||||
|
||||
static void
|
||||
jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
|
||||
size_t out_size_w32 )
|
||||
size_t out_size_w32, const void *iv )
|
||||
{
|
||||
__m256i buf[16*4];
|
||||
__m256i *dst256 = (__m256i*)dst;
|
||||
size_t numz, u;
|
||||
uint64_t l0, l1;
|
||||
sph_u64 l0, l1, l0e, l1e;
|
||||
|
||||
buf[0] = m256_const1_64( 0x80ULL );
|
||||
|
||||
@@ -879,10 +898,12 @@ jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
|
||||
|
||||
memset_zero_256( buf+1, (numz>>3) - 1 );
|
||||
|
||||
l0 = ( sc->block_count << 9 ) + ( sc->ptr << 3 );
|
||||
l1 = ( sc->block_count >> 55 );
|
||||
*(buf + (numz>>3) ) = _mm256_set1_epi64x( bswap_64( l1 ) );
|
||||
*(buf + (numz>>3) + 1) = _mm256_set1_epi64x( bswap_64( l0 ) );
|
||||
l0 = SPH_T64(sc->block_count << 9) + (sc->ptr << 3);
|
||||
l1 = SPH_T64(sc->block_count >> 55);
|
||||
sph_enc64be( &l0e, l0 );
|
||||
sph_enc64be( &l1e, l1 );
|
||||
*(buf + (numz>>3) ) = _mm256_set1_epi64x( l1e );
|
||||
*(buf + (numz>>3) + 1) = _mm256_set1_epi64x( l0e );
|
||||
|
||||
jh_4way_core( sc, buf, numz + 16 );
|
||||
|
||||
@@ -901,7 +922,7 @@ jh256_4way_update(void *cc, const void *data, size_t len)
|
||||
void
|
||||
jh256_4way_close(void *cc, void *dst)
|
||||
{
|
||||
jh_4way_close(cc, 0, 0, dst, 8 );
|
||||
jh_4way_close(cc, 0, 0, dst, 8, IV256);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -913,7 +934,7 @@ jh512_4way_update(void *cc, const void *data, size_t len)
|
||||
void
|
||||
jh512_4way_close(void *cc, void *dst)
|
||||
{
|
||||
jh_4way_close(cc, 0, 0, dst, 16 );
|
||||
jh_4way_close(cc, 0, 0, dst, 16, IV512);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -43,6 +43,7 @@ extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#define SPH_SIZE_jh256 256
|
||||
|
@@ -65,7 +65,7 @@ void jha_hash_4way( void *out, const void *input )
|
||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
|
||||
blake512_4way_init( &ctx_blake );
|
||||
blake512_4way_update( &ctx_blake, vhash, 64 );
|
||||
blake512_4way( &ctx_blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx_blake, vhashA );
|
||||
|
||||
jh512_4way_init( &ctx_jh );
|
||||
|
@@ -28,28 +28,26 @@ int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
do {
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
||||
|
||||
keccakhash_8way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
|
||||
if ( hash7[ lane<<1 ] <= Htarg )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
@@ -81,28 +79,27 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do {
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
keccakhash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
|
||||
if ( hash7[ lane<<1 ] <= Htarg )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ))
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
|
@@ -1,9 +1,5 @@
|
||||
#include "keccak-gate.h"
|
||||
#include "sph_keccak.h"
|
||||
|
||||
int hard_coded_eb = 1;
|
||||
|
||||
// KECCAK
|
||||
|
||||
bool register_keccak_algo( algo_gate_t* gate )
|
||||
{
|
||||
@@ -23,8 +19,6 @@ bool register_keccak_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
};
|
||||
|
||||
// KECCAKC
|
||||
|
||||
bool register_keccakc_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
@@ -43,50 +37,3 @@ bool register_keccakc_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
};
|
||||
|
||||
// SHA3D
|
||||
|
||||
void sha3d( void *state, const void *input, int len )
|
||||
{
|
||||
uint32_t _ALIGN(64) buffer[16], hash[16];
|
||||
sph_keccak_context ctx_keccak;
|
||||
|
||||
sph_keccak256_init( &ctx_keccak );
|
||||
sph_keccak256 ( &ctx_keccak, input, len );
|
||||
sph_keccak256_close( &ctx_keccak, (void*) buffer );
|
||||
|
||||
sph_keccak256_init( &ctx_keccak );
|
||||
sph_keccak256 ( &ctx_keccak, buffer, 32 );
|
||||
sph_keccak256_close( &ctx_keccak, (void*) hash );
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
void sha3d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
|
||||
{
|
||||
sha3d( merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size );
|
||||
for ( int i = 0; i < sctx->job.merkle_count; i++ )
|
||||
{
|
||||
memcpy( merkle_root + 32, sctx->job.merkle[i], 32 );
|
||||
sha256d( merkle_root, merkle_root, 64 );
|
||||
}
|
||||
}
|
||||
|
||||
bool register_sha3d_algo( algo_gate_t* gate )
|
||||
{
|
||||
hard_coded_eb = 6;
|
||||
opt_extranonce = false;
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
gate->gen_merkle_root = (void*)&sha3d_gen_merkle_root;
|
||||
#if defined (KECCAK_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha3d_8way;
|
||||
gate->hash = (void*)&sha3d_hash_8way;
|
||||
#elif defined (KECCAK_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha3d_4way;
|
||||
gate->hash = (void*)&sha3d_hash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_sha3d;
|
||||
gate->hash = (void*)&sha3d_hash;
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -10,37 +10,24 @@
|
||||
#define KECCAK_4WAY 1
|
||||
#endif
|
||||
|
||||
extern int hard_coded_eb;
|
||||
|
||||
#if defined(KECCAK_8WAY)
|
||||
|
||||
void keccakhash_8way( void *state, const void *input );
|
||||
int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void sha3d_hash_8way( void *state, const void *input );
|
||||
int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(KECCAK_4WAY)
|
||||
|
||||
void keccakhash_4way( void *state, const void *input );
|
||||
int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void sha3d_hash_4way( void *state, const void *input );
|
||||
int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void keccakhash( void *state, const void *input );
|
||||
int scanhash_keccak( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void sha3d_hash( void *state, const void *input );
|
||||
int scanhash_sha3d( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -1,7 +1,6 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include "keccak-hash-4way.h"
|
||||
#include "keccak-gate.h"
|
||||
|
||||
static const uint64_t RC[] = {
|
||||
0x0000000000000001, 0x0000000000008082,
|
||||
@@ -164,12 +163,12 @@ static void keccak64_8way_close( keccak64_ctx_m512i *kc, void *dst,
|
||||
unsigned eb;
|
||||
union {
|
||||
__m512i tmp[lim + 1];
|
||||
uint64_t dummy; /* for alignment */
|
||||
sph_u64 dummy; /* for alignment */
|
||||
} u;
|
||||
size_t j;
|
||||
size_t m512_len = byte_len >> 3;
|
||||
|
||||
eb = hard_coded_eb;
|
||||
eb = 0x100 >> 8;
|
||||
if ( kc->ptr == (lim - 8) )
|
||||
{
|
||||
const uint64_t t = eb | 0x8000000000000000;
|
||||
@@ -345,12 +344,12 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
|
||||
unsigned eb;
|
||||
union {
|
||||
__m256i tmp[lim + 1];
|
||||
uint64_t dummy; /* for alignment */
|
||||
sph_u64 dummy; /* for alignment */
|
||||
} u;
|
||||
size_t j;
|
||||
size_t m256_len = byte_len >> 3;
|
||||
|
||||
eb = hard_coded_eb;
|
||||
eb = 0x100 >> 8;
|
||||
if ( kc->ptr == (lim - 8) )
|
||||
{
|
||||
const uint64_t t = eb | 0x8000000000000000;
|
||||
|
@@ -43,8 +43,16 @@ extern "C"{
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#define SPH_SIZE_keccak256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Keccak-512.
|
||||
*/
|
||||
#define SPH_SIZE_keccak512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Keccak computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once a
|
||||
|
@@ -18,34 +18,36 @@ void keccakhash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_keccak( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
int scanhash_keccak( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[32];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t endiandata[32];
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], n );
|
||||
keccakhash( hash64, endiandata );
|
||||
if ( valid_hash( hash64, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
for (int i=0; i < 19; i++)
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
do {
|
||||
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
keccakhash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFFF00)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -1,126 +0,0 @@
|
||||
#include "keccak-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_keccak.h"
|
||||
#include "keccak-hash-4way.h"
|
||||
|
||||
#if defined(KECCAK_8WAY)
|
||||
|
||||
void sha3d_hash_8way(void *state, const void *input)
|
||||
{
|
||||
uint32_t buffer[16*8] __attribute__ ((aligned (128)));
|
||||
keccak256_8way_context ctx;
|
||||
|
||||
keccak256_8way_init( &ctx );
|
||||
keccak256_8way_update( &ctx, input, 80 );
|
||||
keccak256_8way_close( &ctx, buffer );
|
||||
|
||||
keccak256_8way_init( &ctx );
|
||||
keccak256_8way_update( &ctx, buffer, 32 );
|
||||
keccak256_8way_close( &ctx, state );
|
||||
}
|
||||
|
||||
int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash[16*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[49]); // 3*16+1
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
do {
|
||||
sha3d_hash_8way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(KECCAK_4WAY)
|
||||
|
||||
void sha3d_hash_4way(void *state, const void *input)
|
||||
{
|
||||
uint32_t buffer[16*4] __attribute__ ((aligned (64)));
|
||||
keccak256_4way_context ctx;
|
||||
|
||||
keccak256_4way_init( &ctx );
|
||||
keccak256_4way_update( &ctx, input, 80 );
|
||||
keccak256_4way_close( &ctx, buffer );
|
||||
|
||||
keccak256_4way_init( &ctx );
|
||||
keccak256_4way_update( &ctx, buffer, 32 );
|
||||
keccak256_4way_close( &ctx, state );
|
||||
}
|
||||
|
||||
int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[25]); // 3*8+1
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do {
|
||||
sha3d_hash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,50 +0,0 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_keccak.h"
|
||||
|
||||
void sha3d_hash(void *state, const void *input)
|
||||
{
|
||||
uint32_t buffer[16];
|
||||
sph_keccak256_context ctx_keccak;
|
||||
|
||||
sph_keccak256_init( &ctx_keccak );
|
||||
sph_keccak256 ( &ctx_keccak, input, 80 );
|
||||
sph_keccak256_close( &ctx_keccak, buffer );
|
||||
sph_keccak256_init( &ctx_keccak );
|
||||
sph_keccak256 ( &ctx_keccak, buffer, 32 );
|
||||
sph_keccak256_close( &ctx_keccak, state );
|
||||
}
|
||||
|
||||
int scanhash_sha3d( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[32];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], n );
|
||||
sha3d_hash( hash64, endiandata );
|
||||
if ( valid_hash( hash64, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -32,8 +32,8 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_keccak.h"
|
||||
#include "keccak-gate.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
@@ -1616,7 +1616,7 @@ keccak_core(sph_keccak_context *kc, const void *data, size_t len, size_t lim)
|
||||
} u; \
|
||||
size_t j; \
|
||||
\
|
||||
eb = hard_coded_eb; \
|
||||
eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
|
||||
if (kc->ptr == (lim - 1)) { \
|
||||
if (n == 7) { \
|
||||
u.tmp[0] = eb; \
|
||||
|
@@ -459,11 +459,6 @@ int luffa_4way_init( luffa_4way_context *state, int hashbitlen )
|
||||
return 0;
|
||||
}
|
||||
|
||||
int luffa512_4way_init( luffa_4way_context *state )
|
||||
{
|
||||
return luffa_4way_init( state, 512 );
|
||||
}
|
||||
|
||||
// Do not call luffa_update_close after having called luffa_update.
|
||||
// Once luffa_update has been called only call luffa_update or luffa_close.
|
||||
int luffa_4way_update( luffa_4way_context *state, const void *data,
|
||||
@@ -501,14 +496,6 @@ int luffa_4way_update( luffa_4way_context *state, const void *data,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
int luffa512_4way_update( luffa_4way_context *state, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
return luffa_4way_update( state, data, len );
|
||||
}
|
||||
*/
|
||||
|
||||
int luffa_4way_close( luffa_4way_context *state, void *hashval )
|
||||
{
|
||||
__m512i *buffer = (__m512i*)state->buffer;
|
||||
@@ -531,77 +518,6 @@ int luffa_4way_close( luffa_4way_context *state, void *hashval )
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
int luffa512_4way_close( luffa_4way_context *state, void *hashval )
|
||||
{
|
||||
return luffa_4way_close( state, hashval );
|
||||
}
|
||||
*/
|
||||
|
||||
int luffa512_4way_full( luffa_4way_context *state, void *output,
|
||||
const void *data, size_t inlen )
|
||||
{
|
||||
state->hashbitlen = 512;
|
||||
__m128i *iv = (__m128i*)IV;
|
||||
|
||||
state->chainv[0] = m512_const1_128( iv[0] );
|
||||
state->chainv[1] = m512_const1_128( iv[1] );
|
||||
state->chainv[2] = m512_const1_128( iv[2] );
|
||||
state->chainv[3] = m512_const1_128( iv[3] );
|
||||
state->chainv[4] = m512_const1_128( iv[4] );
|
||||
state->chainv[5] = m512_const1_128( iv[5] );
|
||||
state->chainv[6] = m512_const1_128( iv[6] );
|
||||
state->chainv[7] = m512_const1_128( iv[7] );
|
||||
state->chainv[8] = m512_const1_128( iv[8] );
|
||||
state->chainv[9] = m512_const1_128( iv[9] );
|
||||
|
||||
((__m512i*)state->buffer)[0] = m512_zero;
|
||||
((__m512i*)state->buffer)[1] = m512_zero;
|
||||
|
||||
const __m512i *vdata = (__m512i*)data;
|
||||
__m512i msg[2];
|
||||
int i;
|
||||
const int blocks = (int)( inlen >> 5 );
|
||||
const __m512i shuff_bswap32 = m512_const_64(
|
||||
0x3c3d3e3f38393a3b, 0x3435363730313233,
|
||||
0x2c2d2e2f28292a2b, 0x2425262720212223,
|
||||
0x1c1d1e1f18191a1b, 0x1415161710111213,
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 );
|
||||
|
||||
state->rembytes = inlen & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = _mm512_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = _mm512_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
|
||||
rnd512_4way( state, msg );
|
||||
}
|
||||
|
||||
// 16 byte partial block exists for 80 byte len
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// padding of partial block
|
||||
msg[0] = _mm512_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = m512_const2_64( 0, 0x0000000080000000 );
|
||||
rnd512_4way( state, msg );
|
||||
}
|
||||
else
|
||||
{
|
||||
// empty pad block
|
||||
msg[0] = m512_const2_64( 0, 0x0000000080000000 );
|
||||
msg[1] = m512_zero;
|
||||
rnd512_4way( state, msg );
|
||||
}
|
||||
|
||||
finalization512_4way( state, (uint32*)output );
|
||||
|
||||
if ( state->hashbitlen > 512 )
|
||||
finalization512_4way( state, (uint32*)( output+64 ) );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int luffa_4way_update_close( luffa_4way_context *state,
|
||||
void *output, const void *data, size_t inlen )
|
||||
{
|
||||
@@ -1115,69 +1031,6 @@ int luffa_2way_close( luffa_2way_context *state, void *hashval )
|
||||
return 0;
|
||||
}
|
||||
|
||||
int luffa512_2way_full( luffa_2way_context *state, void *output,
|
||||
const void *data, size_t inlen )
|
||||
{
|
||||
state->hashbitlen = 512;
|
||||
__m128i *iv = (__m128i*)IV;
|
||||
|
||||
state->chainv[0] = m256_const1_128( iv[0] );
|
||||
state->chainv[1] = m256_const1_128( iv[1] );
|
||||
state->chainv[2] = m256_const1_128( iv[2] );
|
||||
state->chainv[3] = m256_const1_128( iv[3] );
|
||||
state->chainv[4] = m256_const1_128( iv[4] );
|
||||
state->chainv[5] = m256_const1_128( iv[5] );
|
||||
state->chainv[6] = m256_const1_128( iv[6] );
|
||||
state->chainv[7] = m256_const1_128( iv[7] );
|
||||
state->chainv[8] = m256_const1_128( iv[8] );
|
||||
state->chainv[9] = m256_const1_128( iv[9] );
|
||||
|
||||
((__m256i*)state->buffer)[0] = m256_zero;
|
||||
((__m256i*)state->buffer)[1] = m256_zero;
|
||||
|
||||
const __m256i *vdata = (__m256i*)data;
|
||||
__m256i msg[2];
|
||||
int i;
|
||||
const int blocks = (int)( inlen >> 5 );
|
||||
const __m256i shuff_bswap32 = m256_const_64( 0x1c1d1e1f18191a1b,
|
||||
0x1415161710111213,
|
||||
0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
|
||||
state->rembytes = inlen & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = _mm256_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
// 16 byte partial block exists for 80 byte len
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// padding of partial block
|
||||
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = m256_const2_64( 0, 0x0000000080000000 );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
else
|
||||
{
|
||||
// empty pad block
|
||||
msg[0] = m256_const2_64( 0, 0x0000000080000000 );
|
||||
msg[1] = m256_zero;
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
finalization512_2way( state, (uint32*)output );
|
||||
|
||||
if ( state->hashbitlen > 512 )
|
||||
finalization512_2way( state, (uint32*)( output+32 ) );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int luffa_2way_update_close( luffa_2way_context *state,
|
||||
void *output, const void *data, size_t inlen )
|
||||
{
|
||||
|
@@ -61,23 +61,11 @@ typedef struct {
|
||||
} luffa_4way_context __attribute((aligned(128)));
|
||||
|
||||
int luffa_4way_init( luffa_4way_context *state, int hashbitlen );
|
||||
//int luffa_4way_update( luffa_4way_context *state, const void *data,
|
||||
// size_t len );
|
||||
//int luffa_4way_close( luffa_4way_context *state, void *hashval );
|
||||
int luffa_4way_update( luffa_4way_context *state, const void *data,
|
||||
size_t len );
|
||||
int luffa_4way_close( luffa_4way_context *state, void *hashval );
|
||||
int luffa_4way_update_close( luffa_4way_context *state, void *output,
|
||||
const void *data, size_t inlen );
|
||||
int luffa512_4way_full( luffa_4way_context *state, void *output,
|
||||
const void *data, size_t inlen );
|
||||
int luffa512_4way_init( luffa_4way_context *state );
|
||||
int luffa512_4way_update( luffa_4way_context *state, const void *data,
|
||||
size_t len );
|
||||
int luffa512_4way_close( luffa_4way_context *state, void *hashval );
|
||||
int luffa512_4way_update_close( luffa_4way_context *state, void *output,
|
||||
const void *data, size_t inlen );
|
||||
|
||||
#define luffa_4way_update luffa512_4way_update
|
||||
#define luffa_4way_close luffa512_4way_close
|
||||
#define luffa_4way_update_close luffa512_4way_update_close
|
||||
|
||||
#endif
|
||||
|
||||
@@ -94,8 +82,6 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
|
||||
int luffa_2way_close( luffa_2way_context *state, void *hashval );
|
||||
int luffa_2way_update_close( luffa_2way_context *state, void *output,
|
||||
const void *data, size_t inlen );
|
||||
int luffa512_2way_full( luffa_2way_context *state, void *output,
|
||||
const void *data, size_t inlen );
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -262,32 +262,38 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( bench ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
|
||||
|
||||
blake256_16way_init( &allium_16way_ctx.blake );
|
||||
blake256_16way_update( &allium_16way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
allium_16way_hash( hash, vdata );
|
||||
*noncev = mm512_bswap_32( _mm512_set_epi32( n+15, n+14, n+13, n+12,
|
||||
n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4,
|
||||
n+ 3, n+ 2, n +1, n ) );
|
||||
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if unlikely( valid_hash( hash+(lane<<3), ptarget ) && !bench )
|
||||
allium_16way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 16; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
||||
if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||
n += 16;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
@@ -314,19 +320,18 @@ bool init_allium_8way_ctx()
|
||||
return true;
|
||||
}
|
||||
|
||||
void allium_8way_hash( void *hash, const void *input )
|
||||
void allium_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhashA[4*8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[4*8] __attribute__ ((aligned (64)));
|
||||
// uint64_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint64_t *hash0 = (uint64_t*)hash;
|
||||
uint64_t *hash1 = (uint64_t*)hash+ 4;
|
||||
uint64_t *hash2 = (uint64_t*)hash+ 8;
|
||||
uint64_t *hash3 = (uint64_t*)hash+12;
|
||||
uint64_t *hash4 = (uint64_t*)hash+16;
|
||||
uint64_t *hash5 = (uint64_t*)hash+20;
|
||||
uint64_t *hash6 = (uint64_t*)hash+24;
|
||||
uint64_t *hash7 = (uint64_t*)hash+28;
|
||||
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (32)));
|
||||
allium_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &allium_8way_ctx, sizeof(allium_8way_ctx) );
|
||||
@@ -393,66 +398,69 @@ void allium_8way_hash( void *hash, const void *input )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x64( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
|
||||
update_and_final_groestl256( &ctx.groestl, hash0, hash0, 256 );
|
||||
update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash1, hash1, 256 );
|
||||
update_and_final_groestl256( &ctx.groestl, state+32, hash1, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash2, hash2, 256 );
|
||||
update_and_final_groestl256( &ctx.groestl, state+64, hash2, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash3, hash3, 256 );
|
||||
update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash4, hash4, 256 );
|
||||
update_and_final_groestl256( &ctx.groestl, state+128, hash4, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash5, hash5, 256 );
|
||||
update_and_final_groestl256( &ctx.groestl, state+160, hash5, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash6, hash6, 256 );
|
||||
update_and_final_groestl256( &ctx.groestl, state+192, hash6, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash7, hash7, 256 );
|
||||
update_and_final_groestl256( &ctx.groestl, state+224, hash7, 256 );
|
||||
}
|
||||
|
||||
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint64_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint64_t *ptarget = (uint64_t*)work->target;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
|
||||
blake256_8way_init( &allium_8way_ctx.blake );
|
||||
blake256_8way_update( &allium_8way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
allium_8way_hash( hash, vdata );
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
allium_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
|
||||
{
|
||||
const uint64_t *lane_hash = hash + (lane<<2);
|
||||
if unlikely( valid_hash( lane_hash, ptarget ) && !bench )
|
||||
if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||
} while likely( (n <= last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -220,7 +220,7 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
// Assemble block header
|
||||
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
|
||||
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
|
||||
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits), NULL );
|
||||
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
|
||||
for ( t = 0; t < 16; t++ )
|
||||
g_work->data[ 20+t ] = ((uint32_t*)sctx->job.extra)[t];
|
||||
}
|
||||
|
@@ -33,7 +33,7 @@ void lyra2h_4way_hash( void *state, const void *input )
|
||||
blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2h_4way_blake_mid, sizeof l2h_4way_blake_mid );
|
||||
blake256_4way_update( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4way( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4way_close( &ctx_blake, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
@@ -260,8 +260,8 @@ inline void reducedDuplexRowSetup_2way( uint64_t *State, uint64_t *rowIn,
|
||||
// Overlap has 2 in Nrows chance reduced to 1 in Nrows because if both
|
||||
// overlap it's unified.
|
||||
// As a result normal is Nrows-2 / Nrows.
|
||||
// for 4 rows: 1 unified, 2 overlap, 1 normal.
|
||||
// for 8 rows: 1 unified, 2 overlap, 56 normal.
|
||||
// for 4 rows: 1 unified, 1 overlap, 2 normal.
|
||||
// for 8 rows: 1 unified, 1 overlap, 6 normal.
|
||||
|
||||
static inline void reducedDuplexRow_2way_normal( uint64_t *State,
|
||||
uint64_t *rowIn, uint64_t *rowInOut0, uint64_t *rowInOut1,
|
||||
@@ -338,18 +338,21 @@ static inline void reducedDuplexRow_2way_normal( uint64_t *State,
|
||||
_mm512_store_si512( (__m512i*)State + 3, state3 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
// rowInOut0 ! = rowInOut1 != rowOut
|
||||
static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
|
||||
uint64_t *rowIn, uint64_t *rowInOut0, uint64_t *rowInOut1,
|
||||
uint64_t *rowOut, uint64_t nCols)
|
||||
{
|
||||
|
||||
int i;
|
||||
register __m512i state0, state1, state2, state3;
|
||||
__m512i *in = (__m512i*)rowIn;
|
||||
__m512i *inout0 = (__m512i*)rowInOut0;
|
||||
__m512i *inout1 = (__m512i*)rowInOut1;
|
||||
__m512i *out = (__m512i*)rowOut;
|
||||
// inout_ovly io;
|
||||
ovly_512 io0, io1, io2;
|
||||
inout_ovly io;
|
||||
|
||||
state0 = _mm512_load_si512( (__m512i*)State );
|
||||
state1 = _mm512_load_si512( (__m512i*)State + 1 );
|
||||
@@ -359,21 +362,6 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
|
||||
for ( i = 0; i < nCols; i++ )
|
||||
{
|
||||
//Absorbing "M[prev] [+] M[row*]"
|
||||
io0.v512 = _mm512_mask_blend_epi64( 0xf0,
|
||||
_mm512_load_si512( (__m512i*)inout0 ),
|
||||
_mm512_load_si512( (__m512i*)inout1 ) );
|
||||
io1.v512 = _mm512_mask_blend_epi64( 0xf0,
|
||||
_mm512_load_si512( (__m512i*)inout0 +1 ),
|
||||
_mm512_load_si512( (__m512i*)inout1 +1 ) );
|
||||
io2.v512 = _mm512_mask_blend_epi64( 0xf0,
|
||||
_mm512_load_si512( (__m512i*)inout0 +2 ),
|
||||
_mm512_load_si512( (__m512i*)inout1 +2 ) );
|
||||
|
||||
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0.v512 ) );
|
||||
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1.v512 ) );
|
||||
state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io2.v512 ) );
|
||||
|
||||
/*
|
||||
io.v512[0] = _mm512_mask_blend_epi64( 0xf0,
|
||||
_mm512_load_si512( (__m512i*)inout0 ),
|
||||
_mm512_load_si512( (__m512i*)inout1 ) );
|
||||
@@ -387,7 +375,6 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
|
||||
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io.v512[0] ) );
|
||||
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io.v512[1] ) );
|
||||
state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io.v512[2] ) );
|
||||
*/
|
||||
|
||||
//Applies the reduced-round transformation f to the sponge's state
|
||||
LYRA_ROUND_2WAY_AVX512( state0, state1, state2, state3 );
|
||||
@@ -401,21 +388,6 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
|
||||
out[2] = _mm512_xor_si512( out[2], state2 );
|
||||
|
||||
// if out is the same row as inout, update with new data.
|
||||
if ( rowOut == rowInOut0 )
|
||||
{
|
||||
io0.v512 = _mm512_mask_blend_epi64( 0x0f, io0.v512, out[0] );
|
||||
io1.v512 = _mm512_mask_blend_epi64( 0x0f, io1.v512, out[1] );
|
||||
io2.v512 = _mm512_mask_blend_epi64( 0x0f, io2.v512, out[2] );
|
||||
|
||||
}
|
||||
if ( rowOut == rowInOut1 )
|
||||
{
|
||||
io0.v512 = _mm512_mask_blend_epi64( 0xf0, io0.v512, out[0] );
|
||||
io1.v512 = _mm512_mask_blend_epi64( 0xf0, io1.v512, out[1] );
|
||||
io2.v512 = _mm512_mask_blend_epi64( 0xf0, io2.v512, out[2] );
|
||||
}
|
||||
|
||||
/*
|
||||
if ( rowOut == rowInOut0 )
|
||||
{
|
||||
io.v512[0] = _mm512_mask_blend_epi64( 0x0f, io.v512[0], out[0] );
|
||||
@@ -429,35 +401,27 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
|
||||
io.v512[1] = _mm512_mask_blend_epi64( 0xf0, io.v512[1], out[1] );
|
||||
io.v512[2] = _mm512_mask_blend_epi64( 0xf0, io.v512[2], out[2] );
|
||||
}
|
||||
*/
|
||||
|
||||
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
|
||||
t0 = _mm512_permutex_epi64( state0, 0x93 );
|
||||
t1 = _mm512_permutex_epi64( state1, 0x93 );
|
||||
t2 = _mm512_permutex_epi64( state2, 0x93 );
|
||||
|
||||
io0.v512 = _mm512_xor_si512( io0.v512,
|
||||
io.v512[0] = _mm512_xor_si512( io.v512[0],
|
||||
_mm512_mask_blend_epi64( 0x11, t0, t2 ) );
|
||||
io1.v512 = _mm512_xor_si512( io1.v512,
|
||||
io.v512[1] = _mm512_xor_si512( io.v512[1],
|
||||
_mm512_mask_blend_epi64( 0x11, t1, t0 ) );
|
||||
io2.v512 = _mm512_xor_si512( io2.v512,
|
||||
io.v512[2] = _mm512_xor_si512( io.v512[2],
|
||||
_mm512_mask_blend_epi64( 0x11, t2, t1 ) );
|
||||
}
|
||||
|
||||
casti_m256i( inout0, 0 ) = io0.v256lo;
|
||||
casti_m256i( inout1, 1 ) = io0.v256hi;
|
||||
casti_m256i( inout0, 2 ) = io1.v256lo;
|
||||
casti_m256i( inout1, 3 ) = io1.v256hi;
|
||||
casti_m256i( inout0, 4 ) = io2.v256lo;
|
||||
casti_m256i( inout1, 5 ) = io2.v256hi;
|
||||
/*
|
||||
_mm512_mask_store_epi64( inout0, 0x0f, io.v512[0] );
|
||||
_mm512_mask_store_epi64( inout1, 0xf0, io.v512[0] );
|
||||
_mm512_mask_store_epi64( inout0 +1, 0x0f, io.v512[1] );
|
||||
_mm512_mask_store_epi64( inout1 +1, 0xf0, io.v512[1] );
|
||||
_mm512_mask_store_epi64( inout0 +2, 0x0f, io.v512[2] );
|
||||
_mm512_mask_store_epi64( inout1 +2, 0xf0, io.v512[2] );
|
||||
*/
|
||||
|
||||
//Goes to next block
|
||||
in += BLOCK_LEN_M256I;
|
||||
inout0 += BLOCK_LEN_M256I;
|
||||
@@ -602,7 +566,7 @@ static inline void reducedDuplexRow_2way_unified( uint64_t *State,
|
||||
inout[1] = _mm512_xor_si512( inout[1],
|
||||
_mm512_mask_blend_epi64( 0x11, t1, t0 ) );
|
||||
inout[2] = _mm512_xor_si512( inout[2],
|
||||
_mm512_mask_blend_epi64( 0x11, t2, t1 ) );
|
||||
_mm512_mask_blend_epi64( 0x11, t2, t1 ) );
|
||||
|
||||
out[0] = _mm512_xor_si512( out[0], state0 );
|
||||
out[1] = _mm512_xor_si512( out[1], state1 );
|
||||
@@ -611,9 +575,9 @@ static inline void reducedDuplexRow_2way_unified( uint64_t *State,
|
||||
}
|
||||
|
||||
//Goes to next block
|
||||
in += BLOCK_LEN_M256I;
|
||||
in += BLOCK_LEN_M256I;
|
||||
inout += BLOCK_LEN_M256I;
|
||||
out += BLOCK_LEN_M256I;
|
||||
out += BLOCK_LEN_M256I;
|
||||
}
|
||||
|
||||
_mm512_store_si512( (__m512i*)State, state0 );
|
||||
@@ -636,8 +600,8 @@ static inline void reducedDuplexRow_2way_unified( uint64_t *State,
|
||||
|
||||
// Wrapper
|
||||
inline void reducedDuplexRow_2way( uint64_t *State, uint64_t *rowIn,
|
||||
uint64_t *rowInOut0, uint64_t *rowInOut1,
|
||||
uint64_t *rowOut, uint64_t nCols )
|
||||
uint64_t *rowInOut0, uint64_t *rowInOut1,
|
||||
uint64_t *rowOut, uint64_t nCols )
|
||||
{
|
||||
if ( rowInOut0 == rowInOut1 )
|
||||
reducedDuplexRow_2way_unified( State, rowIn, rowInOut0, rowOut, nCols );
|
||||
@@ -650,18 +614,18 @@ inline void reducedDuplexRow_2way( uint64_t *State, uint64_t *rowIn,
|
||||
}
|
||||
|
||||
inline void reducedDuplexRow_2way_X( uint64_t *State, uint64_t *rowIn,
|
||||
uint64_t *rowInOut0, uint64_t *rowInOut1,
|
||||
uint64_t *rowOut, uint64_t nCols )
|
||||
uint64_t *rowInOut0, uint64_t *rowInOut1,
|
||||
uint64_t *rowOut, uint64_t nCols )
|
||||
{
|
||||
if ( rowInOut0 == rowInOut1 )
|
||||
if ( rowInOut0 == rowInOut1 )
|
||||
reducedDuplexRow_2way_unified( State, rowIn, rowInOut0, rowOut, nCols );
|
||||
else if ( ( rowInOut0 == rowOut ) || ( rowInOut1 == rowOut ) )
|
||||
{
|
||||
asm volatile ( "nop" ); // Prevent GCC from optimizing
|
||||
reducedDuplexRow_2way_overlap_X( State, rowIn, rowInOut0, rowInOut1,
|
||||
rowOut, nCols );
|
||||
}
|
||||
else
|
||||
else if ( ( rowInOut0 == rowOut ) || ( rowInOut1 == rowOut ) )
|
||||
{
|
||||
asm ( "nop" ); // This prevents GCC from merging with previous function
|
||||
reducedDuplexRow_2way_overlap_X( State, rowIn, rowInOut0, rowInOut1,
|
||||
rowOut, nCols );
|
||||
}
|
||||
else
|
||||
reducedDuplexRow_2way_normal( State, rowIn, rowInOut0, rowInOut1,
|
||||
rowOut, nCols );
|
||||
}
|
||||
|
@@ -203,18 +203,6 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
union _ovly_512
|
||||
{
|
||||
__m512i v512;
|
||||
struct
|
||||
{
|
||||
__m256i v256lo;
|
||||
__m256i v256hi;
|
||||
};
|
||||
};
|
||||
typedef union _ovly_512 ovly_512;
|
||||
|
||||
|
||||
union _inout_ovly
|
||||
{
|
||||
__m512i v512[3];
|
||||
|
61
algo/m7m.c
61
algo/m7m.c
@@ -149,7 +149,7 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
|
||||
char data_str[161], hash_str[65], target_str[65];
|
||||
//uint8_t *bdata = 0;
|
||||
uint8_t bdata[8192] __attribute__ ((aligned (64)));
|
||||
int i, digits;
|
||||
int rc = 0, i, digits;
|
||||
int bytes;
|
||||
size_t p = sizeof(unsigned long), a = 64/p, b = 32/p;
|
||||
|
||||
@@ -267,41 +267,48 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
|
||||
SHA256_Final( (unsigned char*) hash, &ctxf_sha256 );
|
||||
}
|
||||
|
||||
|
||||
if ( unlikely( valid_hash( (uint64_t*)hash, (uint64_t*)ptarget )
|
||||
&& !opt_benchmark ) )
|
||||
|
||||
|
||||
// if ( unlikely( hash[7] <= ptarget[7] ) )
|
||||
// if ( likely( fulltest( hash, ptarget ) && !opt_benchmark ) )
|
||||
// rewrite to use 64 bit test.
|
||||
const unsigned char *hash_ = (const unsigned char *)hash;
|
||||
const unsigned char *target_ = (const unsigned char *)ptarget;
|
||||
for ( i = 31; i >= 0; i-- )
|
||||
{
|
||||
if ( opt_debug )
|
||||
if ( hash_[i] != target_[i] )
|
||||
{
|
||||
bin2hex( hash_str, (unsigned char *)hash, 32 );
|
||||
bin2hex( target_str, (unsigned char *)ptarget, 32 );
|
||||
bin2hex( data_str, (unsigned char *)data, 80 );
|
||||
applog( LOG_DEBUG, "DEBUG: [%d thread] Found share!\ndata %s\nhash %s\ntarget %s",
|
||||
thr_id, data_str, hash_str, target_str );
|
||||
rc = hash_[i] < target_[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( unlikely(rc) )
|
||||
{
|
||||
if ( opt_debug )
|
||||
{
|
||||
bin2hex(hash_str, (unsigned char *)hash, 32);
|
||||
bin2hex(target_str, (unsigned char *)ptarget, 32);
|
||||
bin2hex(data_str, (unsigned char *)data, 80);
|
||||
applog(LOG_DEBUG, "DEBUG: [%d thread] Found share!\ndata %s\nhash %s\ntarget %s", thr_id,
|
||||
data_str,
|
||||
hash_str,
|
||||
target_str);
|
||||
}
|
||||
pdata[19] = data[19];
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
mpf_set_prec_raw( magifpi, prec0 );
|
||||
mpf_set_prec_raw( magifpi0, prec0 );
|
||||
mpf_set_prec_raw( mptmp, prec0 );
|
||||
mpf_set_prec_raw( mpt1, prec0 );
|
||||
mpf_set_prec_raw( mpt2, prec0 );
|
||||
mpf_clear( magifpi );
|
||||
mpf_clear( magifpi0 );
|
||||
mpf_clear( mpten );
|
||||
mpf_clear( mptmp );
|
||||
mpf_clear( mpt1 );
|
||||
mpf_clear( mpt2 );
|
||||
mpz_clears( magipi, magisw, product, bns0, bns1, NULL );
|
||||
mpf_set_prec_raw(magifpi, prec0);
|
||||
mpf_set_prec_raw(magifpi0, prec0);
|
||||
mpf_set_prec_raw(mptmp, prec0);
|
||||
mpf_set_prec_raw(mpt1, prec0);
|
||||
mpf_set_prec_raw(mpt2, prec0);
|
||||
mpf_clear(magifpi);
|
||||
mpf_clear(magifpi0);
|
||||
mpf_clear(mpten);
|
||||
mpf_clear(mptmp);
|
||||
mpf_clear(mpt1);
|
||||
mpf_clear(mpt2);
|
||||
mpz_clears(magipi, magisw, product, bns0, bns1, NULL);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
|
@@ -154,13 +154,14 @@ int scanhash_zr5( struct work *work, uint32_t max_nonce,
|
||||
}
|
||||
|
||||
void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t* end_nonce_ptr )
|
||||
uint32_t* end_nonce_ptr, bool clean_job )
|
||||
{
|
||||
// ignore POK in first word
|
||||
// const int nonce_i = 19;
|
||||
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
|
||||
|| ( *nonceptr >= *end_nonce_ptr ) )
|
||||
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr ) ) )
|
||||
{
|
||||
work_free( work );
|
||||
work_copy( work, g_work );
|
||||
|
@@ -94,37 +94,6 @@ static const uint32_t K256[64] =
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm128_ror_32(x, 17), mm128_ror_32(x, 19) ), _mm_srli_epi32(x, 10) )
|
||||
|
||||
#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
__m128i K = _mm_set1_epi32( K256[( (j)+(i) )] ); \
|
||||
__m128i T1 = mm128_ror_32( E, 14 ); \
|
||||
__m128i T2 = mm128_ror_32( A, 9 ); \
|
||||
__m128i T3 = _mm_xor_si128( F, G ); \
|
||||
__m128i T4 = _mm_or_si128( A, B ); \
|
||||
__m128i T5 = _mm_and_si128( A, B ); \
|
||||
K = _mm_add_epi32( K, W[i] ); \
|
||||
T1 = _mm_xor_si128( T1, E ); \
|
||||
T2 = _mm_xor_si128( T2, A ); \
|
||||
T3 = _mm_and_si128( T3, E ); \
|
||||
T4 = _mm_and_si128( T4, C ); \
|
||||
K = _mm_add_epi32( H, K ); \
|
||||
T1 = mm128_ror_32( T1, 5 ); \
|
||||
T2 = mm128_ror_32( T2, 11 ); \
|
||||
T3 = _mm_xor_si128( T3, G ); \
|
||||
T4 = _mm_or_si128( T4, T5 ); \
|
||||
T1 = _mm_xor_si128( T1, E ); \
|
||||
T2 = _mm_xor_si128( T2, A ); \
|
||||
T1 = mm128_ror_32( T1, 6 ); \
|
||||
T2 = mm128_ror_32( T2, 2 ); \
|
||||
T1 = _mm_add_epi32( T1, T3 ); \
|
||||
T2 = _mm_add_epi32( T2, T4 ); \
|
||||
T1 = _mm_add_epi32( T1, K ); \
|
||||
H = _mm_add_epi32( T1, T2 ); \
|
||||
D = _mm_add_epi32( D, T1 ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/*
|
||||
#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
__m128i T1, T2; \
|
||||
@@ -135,8 +104,6 @@ do { \
|
||||
D = _mm_add_epi32( D, T1 ); \
|
||||
H = _mm_add_epi32( T1, T2 ); \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
|
||||
static void
|
||||
sha256_4way_round( sha256_4way_context *ctx, __m128i *in, __m128i r[8] )
|
||||
|
@@ -319,7 +319,7 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )
|
||||
|
||||
// SHA-512 4 way 64 bit
|
||||
|
||||
/*
|
||||
|
||||
#define CH(X, Y, Z) \
|
||||
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
||||
|
||||
@@ -327,15 +327,6 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )
|
||||
_mm256_or_si256( _mm256_and_si256( X, Y ), \
|
||||
_mm256_and_si256( _mm256_or_si256( X, Y ), Z ) )
|
||||
|
||||
#define BSG5_0(x) \
|
||||
mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \
|
||||
_mm256_xor_si256( mm256_ror_64( x, 5 ), x ), 6 ), x ), 28 )
|
||||
|
||||
#define BSG5_1(x) \
|
||||
mm256_ror_64( _mm256_xor_si256( mm256_ror_64( \
|
||||
_mm256_xor_si256( mm256_ror_64( x, 23 ), x ), 4 ), x ), 14 )
|
||||
*/
|
||||
/*
|
||||
#define BSG5_0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 28), mm256_ror_64(x, 34) ), mm256_ror_64(x, 39) )
|
||||
@@ -343,8 +334,7 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )
|
||||
#define BSG5_1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 14), mm256_ror_64(x, 18) ), mm256_ror_64(x, 41) )
|
||||
*/
|
||||
/*
|
||||
|
||||
#define SSG5_0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 1), mm256_ror_64(x, 8) ), _mm256_srli_epi64(x, 7) )
|
||||
@@ -352,7 +342,7 @@ void sha512_8way_close( sha512_8way_context *sc, void *dst )
|
||||
#define SSG5_1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 19), mm256_ror_64(x, 61) ), _mm256_srli_epi64(x, 6) )
|
||||
*/
|
||||
|
||||
// Interleave SSG0 & SSG1 for better throughput.
|
||||
// return ssg0(w0) + ssg1(w1)
|
||||
static inline __m256i ssg512_add( __m256i w0, __m256i w1 )
|
||||
@@ -371,7 +361,7 @@ static inline __m256i ssg512_add( __m256i w0, __m256i w1 )
|
||||
return _mm256_add_epi64( w0a, w1a );
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
#define SSG512x2_0( w0, w1, i ) do \
|
||||
{ \
|
||||
__m256i X0a, X1a, X0b, X1b; \
|
||||
@@ -401,51 +391,7 @@ static inline __m256i ssg512_add( __m256i w0, __m256i w1 )
|
||||
w0 = _mm256_xor_si256( X0a, X0b ); \
|
||||
w1 = _mm256_xor_si256( X1a, X1b ); \
|
||||
} while(0)
|
||||
*/
|
||||
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
__m256i K = _mm256_set1_epi64x( K512[ i ] ); \
|
||||
__m256i T1 = mm256_ror_64( E, 23 ); \
|
||||
__m256i T2 = mm256_ror_64( A, 5 ); \
|
||||
__m256i T3 = _mm256_xor_si256( F, G ); \
|
||||
__m256i T4 = _mm256_or_si256( A, B ); \
|
||||
__m256i T5 = _mm256_and_si256( A, B ); \
|
||||
K = _mm256_add_epi64( K, W[i] ); \
|
||||
T1 = _mm256_xor_si256( T1, E ); \
|
||||
T2 = _mm256_xor_si256( T2, A ); \
|
||||
T3 = _mm256_and_si256( T3, E ); \
|
||||
T4 = _mm256_and_si256( T4, C ); \
|
||||
K = _mm256_add_epi64( H, K ); \
|
||||
T1 = mm256_ror_64( T1, 4 ); \
|
||||
T2 = mm256_ror_64( T2, 6 ); \
|
||||
T3 = _mm256_xor_si256( T3, G ); \
|
||||
T4 = _mm256_or_si256( T4, T5 ); \
|
||||
T1 = _mm256_xor_si256( T1, E ); \
|
||||
T2 = _mm256_xor_si256( T2, A ); \
|
||||
T1 = mm256_ror_64( T1, 14 ); \
|
||||
T2 = mm256_ror_64( T2, 28 ); \
|
||||
T1 = _mm256_add_epi64( T1, T3 ); \
|
||||
T2 = _mm256_add_epi64( T2, T4 ); \
|
||||
T1 = _mm256_add_epi64( T1, K ); \
|
||||
H = _mm256_add_epi64( T1, T2 ); \
|
||||
D = _mm256_add_epi64( D, T1 ); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
__m256i K = _mm256_add_epi64( W[i], _mm256_set1_epi64x( K512[ i ] ) ); \
|
||||
__m256i T1 = BSG5_1(E); \
|
||||
__m256i T2 = BSG5_0(A); \
|
||||
T1 = mm256_add4_64( T1, H, CH(E, F, G), K ); \
|
||||
T2 = _mm256_add_epi64( T2, MAJ(A, B, C) ); \
|
||||
D = _mm256_add_epi64( D, T1 ); \
|
||||
H = _mm256_add_epi64( T1, T2 ); \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
/*
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
__m256i T1, T2; \
|
||||
@@ -456,7 +402,7 @@ do { \
|
||||
D = _mm256_add_epi64( D, T1 ); \
|
||||
H = _mm256_add_epi64( T1, T2 ); \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
|
||||
static void
|
||||
sha512_4way_round( sha512_4way_context *ctx, __m256i *in, __m256i r[8] )
|
||||
|
@@ -3,9 +3,11 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
// This is a fake, it actually does not do parallel AES, that requires VAES.
|
||||
// This is only intended when the preceding and folllowing functions use the
|
||||
// same 2x128 interleave.
|
||||
// This implementation is deprecated, superseded by VAES in Icelake
|
||||
// which provides HW based 4 way aes.
|
||||
// It was created for AVX2 to eliminate interleaving between the
|
||||
// preceding and following function.
|
||||
// This code can be removed when current users have reverted to one way.
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
@@ -408,94 +410,4 @@ void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
|
||||
casti_m256i( dst, 3 ) = casti_m256i( ctx->h, 3 );
|
||||
}
|
||||
|
||||
void shavite512_2way_full( shavite512_2way_context *ctx, void *dst,
|
||||
const void *data, size_t len )
|
||||
{
|
||||
__m256i *h = (__m256i*)ctx->h;
|
||||
__m128i *iv = (__m128i*)IV512;
|
||||
|
||||
h[0] = m256_const1_128( iv[0] );
|
||||
h[1] = m256_const1_128( iv[1] );
|
||||
h[2] = m256_const1_128( iv[2] );
|
||||
h[3] = m256_const1_128( iv[3] );
|
||||
|
||||
ctx->ptr =
|
||||
ctx->count0 =
|
||||
ctx->count1 =
|
||||
ctx->count2 =
|
||||
ctx->count3 = 0;
|
||||
|
||||
unsigned char *buf = ctx->buf;
|
||||
size_t ptr = ctx->ptr;
|
||||
|
||||
// process full blocks and load buf with remainder.
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof ctx->buf) - ptr;
|
||||
if ( clen > len << 1 )
|
||||
clen = len << 1;
|
||||
memcpy( buf + ptr, data, clen );
|
||||
data = (const unsigned char *)data + clen;
|
||||
ptr += clen;
|
||||
len -= (clen >> 1);
|
||||
if ( ptr == sizeof ctx->buf )
|
||||
{
|
||||
if ( ( ctx->count0 = ctx->count0 + 1024 ) == 0 )
|
||||
{
|
||||
ctx->count1 = ctx->count1 + 1;
|
||||
if ( ctx->count1 == 0 )
|
||||
{
|
||||
ctx->count2 = ctx->count2 + 1;
|
||||
if ( ctx->count2 == 0 )
|
||||
ctx->count3 = ctx->count3 + 1;
|
||||
}
|
||||
}
|
||||
c512_2way( ctx, buf );
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t vp = ptr>>5;
|
||||
// Count = { 0, 16, 64, 80 }. Outsize = 16 u32 = 512 bits = 0x0200
|
||||
// Count is misaligned to 16 bits and straddles 2 vectors.
|
||||
// Use u32 overlay to stage then u16 to load buf.
|
||||
union
|
||||
{
|
||||
uint32_t u32[4];
|
||||
uint16_t u16[8];
|
||||
} count;
|
||||
|
||||
count.u32[0] = ctx->count0 += (ptr << 2); // ptr/2 * 8
|
||||
count.u32[1] = ctx->count1;
|
||||
count.u32[2] = ctx->count2;
|
||||
count.u32[3] = ctx->count3;
|
||||
|
||||
if ( vp == 0 ) // empty buf, xevan.
|
||||
{
|
||||
casti_m256i( buf, 0 ) = m256_const2_64( 0, 0x0000000000000080 );
|
||||
memset_zero_256( (__m256i*)buf + 1, 5 );
|
||||
ctx->count0 = ctx->count1 = ctx->count2 = ctx->count3 = 0;
|
||||
}
|
||||
else // half full buf, everyone else.
|
||||
{
|
||||
casti_m256i( buf, vp++ ) = m256_const2_64( 0, 0x0000000000000080 );
|
||||
memset_zero_256( (__m256i*)buf + vp, 6 - vp );
|
||||
}
|
||||
|
||||
casti_m256i( buf, 6 ) = m256_const1_128(
|
||||
_mm_insert_epi16( m128_zero, count.u16[0], 7 ) );
|
||||
casti_m256i( buf, 7 ) = m256_const1_128( _mm_set_epi16(
|
||||
0x0200, count.u16[7], count.u16[6], count.u16[5],
|
||||
count.u16[4], count.u16[3], count.u16[2], count.u16[1] ) );
|
||||
|
||||
c512_2way( ctx, buf);
|
||||
|
||||
casti_m256i( dst, 0 ) = casti_m256i( ctx->h, 0 );
|
||||
casti_m256i( dst, 1 ) = casti_m256i( ctx->h, 1 );
|
||||
casti_m256i( dst, 2 ) = casti_m256i( ctx->h, 2 );
|
||||
casti_m256i( dst, 3 ) = casti_m256i( ctx->h, 3 );
|
||||
}
|
||||
|
||||
#endif // AVX2
|
||||
|
@@ -18,8 +18,6 @@ void shavite512_2way_update( shavite512_2way_context *ctx, const void *data,
|
||||
void shavite512_2way_close( shavite512_2way_context *ctx, void *dst );
|
||||
void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
|
||||
const void *data, size_t len );
|
||||
void shavite512_2way_full( shavite512_2way_context *ctx, void *dst,
|
||||
const void *data, size_t len );
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
|
@@ -396,96 +396,4 @@ void shavite512_4way_update_close( shavite512_4way_context *ctx, void *dst,
|
||||
casti_m512i( dst, 3 ) = casti_m512i( ctx->h, 3 );
|
||||
}
|
||||
|
||||
|
||||
void shavite512_4way_full( shavite512_4way_context *ctx, void *dst,
|
||||
const void *data, size_t len )
|
||||
{
|
||||
__m512i *h = (__m512i*)ctx->h;
|
||||
__m128i *iv = (__m128i*)IV512;
|
||||
|
||||
h[0] = m512_const1_128( iv[0] );
|
||||
h[1] = m512_const1_128( iv[1] );
|
||||
h[2] = m512_const1_128( iv[2] );
|
||||
h[3] = m512_const1_128( iv[3] );
|
||||
|
||||
ctx->ptr =
|
||||
ctx->count0 =
|
||||
ctx->count1 =
|
||||
ctx->count2 =
|
||||
ctx->count3 = 0;
|
||||
|
||||
unsigned char *buf = ctx->buf;
|
||||
size_t ptr = ctx->ptr;
|
||||
|
||||
// process full blocks and load buf with remainder.
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof ctx->buf) - ptr;
|
||||
if ( clen > len << 2 )
|
||||
clen = len << 2;
|
||||
memcpy( buf + ptr, data, clen );
|
||||
data = (const unsigned char *)data + clen;
|
||||
ptr += clen;
|
||||
len -= (clen >> 2);
|
||||
if ( ptr == sizeof ctx->buf )
|
||||
{
|
||||
if ( ( ctx->count0 = ctx->count0 + 1024 ) == 0 )
|
||||
{
|
||||
ctx->count1 = ctx->count1 + 1;
|
||||
if ( ctx->count1 == 0 )
|
||||
{
|
||||
ctx->count2 = ctx->count2 + 1;
|
||||
if ( ctx->count2 == 0 )
|
||||
ctx->count3 = ctx->count3 + 1;
|
||||
}
|
||||
}
|
||||
c512_4way( ctx, buf );
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t vp = ptr>>6;
|
||||
// Count = { 0, 16, 64, 80 }. Outsize = 16 u32 = 512 bits = 0x0200
|
||||
// Count is misaligned to 16 bits and straddles 2 vectors.
|
||||
// Use u32 overlay to stage then u16 to load buf.
|
||||
union
|
||||
{
|
||||
uint32_t u32[4];
|
||||
uint16_t u16[8];
|
||||
} count;
|
||||
|
||||
count.u32[0] = ctx->count0 += (ptr << 1); // ptr/4 * 8
|
||||
count.u32[1] = ctx->count1;
|
||||
count.u32[2] = ctx->count2;
|
||||
count.u32[3] = ctx->count3;
|
||||
|
||||
if ( vp == 0 ) // empty buf, xevan.
|
||||
{
|
||||
casti_m512i( buf, 0 ) = m512_const2_64( 0, 0x0000000000000080 );
|
||||
memset_zero_512( (__m512i*)buf + 1, 5 );
|
||||
ctx->count0 = ctx->count1 = ctx->count2 = ctx->count3 = 0;
|
||||
}
|
||||
else // half full buf, everyone else.
|
||||
{
|
||||
casti_m512i( buf, vp++ ) = m512_const2_64( 0, 0x0000000000000080 );
|
||||
memset_zero_512( (__m512i*)buf + vp, 6 - vp );
|
||||
}
|
||||
|
||||
casti_m512i( buf, 6 ) = m512_const1_128(
|
||||
_mm_insert_epi16( m128_zero, count.u16[0], 7 ) );
|
||||
casti_m512i( buf, 7 ) = m512_const1_128( _mm_set_epi16(
|
||||
0x0200, count.u16[7], count.u16[6], count.u16[5],
|
||||
count.u16[4], count.u16[3], count.u16[2], count.u16[1] ) );
|
||||
|
||||
c512_4way( ctx, buf);
|
||||
|
||||
casti_m512i( dst, 0 ) = casti_m512i( ctx->h, 0 );
|
||||
casti_m512i( dst, 1 ) = casti_m512i( ctx->h, 1 );
|
||||
casti_m512i( dst, 2 ) = casti_m512i( ctx->h, 2 );
|
||||
casti_m512i( dst, 3 ) = casti_m512i( ctx->h, 3 );
|
||||
}
|
||||
|
||||
|
||||
#endif // VAES
|
||||
|
@@ -18,8 +18,6 @@ void shavite512_4way_update( shavite512_4way_context *ctx, const void *data,
|
||||
void shavite512_4way_close( shavite512_4way_context *ctx, void *dst );
|
||||
void shavite512_4way_update_close( shavite512_4way_context *ctx, void *dst,
|
||||
const void *data, size_t len );
|
||||
void shavite512_4way_full( shavite512_4way_context *ctx, void *dst,
|
||||
const void *data, size_t len );
|
||||
|
||||
#endif // VAES
|
||||
|
||||
|
@@ -1173,91 +1173,6 @@ int simd_4way_update_close( simd_4way_context *state, void *hashval,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int simd512_4way_full( simd_4way_context *state, void *hashval,
|
||||
const void *data, int datalen )
|
||||
{
|
||||
__m512i *A = (__m512i*)state->A;
|
||||
|
||||
state->hashbitlen = 512;
|
||||
state->n_feistels = 8;
|
||||
state->blocksize = 128*8;
|
||||
state->count = 0;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
A[i] = _mm512_set4_epi32( SIMD_IV_512[4*i+3], SIMD_IV_512[4*i+2],
|
||||
SIMD_IV_512[4*i+1], SIMD_IV_512[4*i+0] );
|
||||
|
||||
int current, i;
|
||||
int bs = state->blocksize; // bits in one lane
|
||||
int isshort = 1;
|
||||
uint64_t l;
|
||||
int databitlen = datalen * 8;
|
||||
|
||||
current = state->count & (bs - 1);
|
||||
|
||||
while ( databitlen > 0 )
|
||||
{
|
||||
if ( current == 0 && databitlen >= bs )
|
||||
{
|
||||
// We can hash the data directly from the input buffer.
|
||||
SIMD_4way_Compress( state, data, 0 );
|
||||
databitlen -= bs;
|
||||
data += 4*( bs/8 );
|
||||
state->count += bs;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Copy a chunk of data to the buffer
|
||||
int len = bs - current;
|
||||
if ( databitlen < len )
|
||||
{
|
||||
memcpy( state->buffer + 4*( current/8 ), data, 4*( (databitlen)/8 ) );
|
||||
state->count += databitlen;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( state->buffer + 4*(current/8), data, 4*(len/8) );
|
||||
state->count += len;
|
||||
databitlen -= len;
|
||||
data += 4*( len/8 );
|
||||
current = 0;
|
||||
SIMD_4way_Compress( state, state->buffer, 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
current = state->count & (state->blocksize - 1);
|
||||
|
||||
// If there is still some data in the buffer, hash it
|
||||
if ( current )
|
||||
{
|
||||
current = current / 8;
|
||||
memset( state->buffer + 4*current, 0, 4*( state->blocksize/8 - current) );
|
||||
SIMD_4way_Compress( state, state->buffer, 0 );
|
||||
}
|
||||
|
||||
//* Input the message length as the last block
|
||||
memset( state->buffer, 0, 4*( state->blocksize/8 ) );
|
||||
l = state->count;
|
||||
for ( i = 0; i < 8; i++ )
|
||||
{
|
||||
state->buffer[ i ] = l & 0xff;
|
||||
state->buffer[ i+16 ] = l & 0xff;
|
||||
state->buffer[ i+32 ] = l & 0xff;
|
||||
state->buffer[ i+48 ] = l & 0xff;
|
||||
l >>= 8;
|
||||
}
|
||||
if ( state->count < 16384 )
|
||||
isshort = 2;
|
||||
|
||||
SIMD_4way_Compress( state, state->buffer, isshort );
|
||||
memcpy( hashval, state->A, 4*( state->hashbitlen / 8 ) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
////////////////////////////////////
|
||||
@@ -2014,90 +1929,4 @@ int simd_2way_update_close( simd_2way_context *state, void *hashval,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int simd512_2way_full( simd_2way_context *state, void *hashval,
|
||||
const void *data, int datalen )
|
||||
{
|
||||
__m256i *A = (__m256i*)state->A;
|
||||
|
||||
state->hashbitlen = 512;
|
||||
state->n_feistels = 8;
|
||||
state->blocksize = 128*8;
|
||||
state->count = 0;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
A[i] = _mm256_set_epi32( SIMD_IV_512[4*i+3], SIMD_IV_512[4*i+2],
|
||||
SIMD_IV_512[4*i+1], SIMD_IV_512[4*i+0],
|
||||
SIMD_IV_512[4*i+3], SIMD_IV_512[4*i+2],
|
||||
SIMD_IV_512[4*i+1], SIMD_IV_512[4*i+0] );
|
||||
|
||||
int current, i;
|
||||
int bs = state->blocksize; // bits in one lane
|
||||
int isshort = 1;
|
||||
uint64_t l;
|
||||
int databitlen = datalen * 8;
|
||||
|
||||
current = state->count & (bs - 1);
|
||||
|
||||
while ( databitlen > 0 )
|
||||
{
|
||||
if ( current == 0 && databitlen >= bs )
|
||||
{
|
||||
// We can hash the data directly from the input buffer.
|
||||
SIMD_2way_Compress( state, data, 0 );
|
||||
|
||||
databitlen -= bs;
|
||||
data += 2*( bs/8 );
|
||||
state->count += bs;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Copy a chunk of data to the buffer
|
||||
int len = bs - current;
|
||||
if ( databitlen < len )
|
||||
{
|
||||
|
||||
memcpy( state->buffer + 2*( current/8 ), data, 2*( (databitlen+7)/8 ) );
|
||||
state->count += databitlen;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( state->buffer + 2*(current/8), data, 2*(len/8) );
|
||||
state->count += len;
|
||||
databitlen -= len;
|
||||
data += 2*( len/8 );
|
||||
current = 0;
|
||||
SIMD_2way_Compress( state, state->buffer, 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
current = state->count & (state->blocksize - 1);
|
||||
|
||||
// If there is still some data in the buffer, hash it
|
||||
if ( current )
|
||||
{
|
||||
current = ( current+7 ) / 8;
|
||||
memset( state->buffer + 2*current, 0, 2*( state->blocksize/8 - current) );
|
||||
SIMD_2way_Compress( state, state->buffer, 0 );
|
||||
}
|
||||
|
||||
//* Input the message length as the last block
|
||||
memset( state->buffer, 0, 2*( state->blocksize/8 ) );
|
||||
l = state->count;
|
||||
for ( i = 0; i < 8; i++ )
|
||||
{
|
||||
state->buffer[ i ] = l & 0xff;
|
||||
state->buffer[ i+16 ] = l & 0xff;
|
||||
l >>= 8;
|
||||
}
|
||||
if ( state->count < 16384 )
|
||||
isshort = 2;
|
||||
|
||||
SIMD_2way_Compress( state, state->buffer, isshort );
|
||||
memcpy( hashval, state->A, 2*( state->hashbitlen / 8 ) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -26,8 +26,6 @@ int simd_4way_update( simd_4way_context *state, const void *data,
|
||||
int simd_4way_close( simd_4way_context *state, void *hashval );
|
||||
int simd_4way_update_close( simd_4way_context *state, void *hashval,
|
||||
const void *data, int databitlen );
|
||||
int simd512_4way_full( simd_4way_context *state, void *hashval,
|
||||
const void *data, int datalen );
|
||||
|
||||
#endif
|
||||
|
||||
@@ -47,8 +45,5 @@ int simd_2way_update( simd_2way_context *state, const void *data,
|
||||
int simd_2way_close( simd_2way_context *state, void *hashval );
|
||||
int simd_2way_update_close( simd_2way_context *state, void *hashval,
|
||||
const void *data, int databitlen );
|
||||
int simd512_2way_full( simd_2way_context *state, void *hashval,
|
||||
const void *data, int datalen );
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -45,18 +45,18 @@ extern "C"{
|
||||
#endif
|
||||
|
||||
/*
|
||||
static const uint64_t IV256[] = {
|
||||
0xCCD044A12FDB3E13, 0xE83590301A79A9EB,
|
||||
0x55AEA0614F816E6F, 0x2A2767A4AE9B94DB,
|
||||
0xEC06025E74DD7683, 0xE7A436CDC4746251,
|
||||
0xC36FBAF9393AD185, 0x3EEDBA1833EDFC13
|
||||
static const sph_u64 IV256[] = {
|
||||
SPH_C64(0xCCD044A12FDB3E13), SPH_C64(0xE83590301A79A9EB),
|
||||
SPH_C64(0x55AEA0614F816E6F), SPH_C64(0x2A2767A4AE9B94DB),
|
||||
SPH_C64(0xEC06025E74DD7683), SPH_C64(0xE7A436CDC4746251),
|
||||
SPH_C64(0xC36FBAF9393AD185), SPH_C64(0x3EEDBA1833EDFC13)
|
||||
};
|
||||
|
||||
static const uint64_t IV512[] = {
|
||||
0x4903ADFF749C51CE, 0x0D95DE399746DF03,
|
||||
0x8FD1934127C79BCE, 0x9A255629FF352CB1,
|
||||
0x5DB62599DF6CA7B0, 0xEABE394CA9D5C3F4,
|
||||
0x991112C71A75B523, 0xAE18A40B660FCC33
|
||||
static const sph_u64 IV512[] = {
|
||||
SPH_C64(0x4903ADFF749C51CE), SPH_C64(0x0D95DE399746DF03),
|
||||
SPH_C64(0x8FD1934127C79BCE), SPH_C64(0x9A255629FF352CB1),
|
||||
SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4),
|
||||
SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33)
|
||||
};
|
||||
*/
|
||||
|
||||
@@ -372,7 +372,7 @@ do { \
|
||||
|
||||
#define UBI_BIG_8WAY(etype, extra) \
|
||||
do { \
|
||||
uint64_t t0, t1, t2; \
|
||||
sph_u64 t0, t1, t2; \
|
||||
__m512i h8; \
|
||||
__m512i m0 = buf[0]; \
|
||||
__m512i m1 = buf[1]; \
|
||||
@@ -391,8 +391,8 @@ do { \
|
||||
__m512i p5 = m5; \
|
||||
__m512i p6 = m6; \
|
||||
__m512i p7 = m7; \
|
||||
t0 = (uint64_t)(bcount << 6) + (uint64_t)(extra); \
|
||||
t1 = (bcount >> 58) + ((uint64_t)(etype) << 55); \
|
||||
t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
|
||||
t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
|
||||
TFBIG_KINIT_8WAY(h0, h1, h2, h3, h4, h5, h6, h7, h8, t0, t1, t2); \
|
||||
TFBIG_8WAY_4e(0); \
|
||||
TFBIG_8WAY_4o(1); \
|
||||
@@ -425,7 +425,7 @@ do { \
|
||||
|
||||
#define DECL_STATE_BIG_8WAY \
|
||||
__m512i h0, h1, h2, h3, h4, h5, h6, h7; \
|
||||
uint64_t bcount;
|
||||
sph_u64 bcount;
|
||||
|
||||
|
||||
#endif // AVX512
|
||||
@@ -488,7 +488,7 @@ do { \
|
||||
// scale buf offset by 4
|
||||
#define UBI_BIG_4WAY(etype, extra) \
|
||||
do { \
|
||||
uint64_t t0, t1, t2; \
|
||||
sph_u64 t0, t1, t2; \
|
||||
__m256i h8; \
|
||||
__m256i m0 = buf[0]; \
|
||||
__m256i m1 = buf[1]; \
|
||||
@@ -507,8 +507,8 @@ do { \
|
||||
__m256i p5 = m5; \
|
||||
__m256i p6 = m6; \
|
||||
__m256i p7 = m7; \
|
||||
t0 = (uint64_t)(bcount << 6) + (uint64_t)(extra); \
|
||||
t1 = (bcount >> 58) + ((uint64_t)(etype) << 55); \
|
||||
t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
|
||||
t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
|
||||
TFBIG_KINIT_4WAY(h0, h1, h2, h3, h4, h5, h6, h7, h8, t0, t1, t2); \
|
||||
TFBIG_4WAY_4e(0); \
|
||||
TFBIG_4WAY_4o(1); \
|
||||
@@ -542,7 +542,7 @@ do { \
|
||||
|
||||
#define DECL_STATE_BIG_4WAY \
|
||||
__m256i h0, h1, h2, h3, h4, h5, h6, h7; \
|
||||
uint64_t bcount;
|
||||
sph_u64 bcount;
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
|
@@ -48,8 +48,14 @@ extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
// Output size in bits
|
||||
#define SPH_SIZE_skein256 256
|
||||
#define SPH_SIZE_skein512 512
|
||||
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
typedef struct
|
||||
@@ -57,11 +63,11 @@ typedef struct
|
||||
__m512i buf[8];
|
||||
__m512i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
uint64_t bcount;
|
||||
} skein_8way_big_context __attribute__ ((aligned (128)));
|
||||
sph_u64 bcount;
|
||||
} sph_skein_8way_big_context __attribute__ ((aligned (128)));
|
||||
|
||||
typedef skein_8way_big_context skein512_8way_context;
|
||||
typedef skein_8way_big_context skein256_8way_context;
|
||||
typedef sph_skein_8way_big_context skein512_8way_context;
|
||||
typedef sph_skein_8way_big_context skein256_8way_context;
|
||||
|
||||
void skein512_8way_init( skein512_8way_context *sc );
|
||||
void skein512_8way_update( void *cc, const void *data, size_t len );
|
||||
@@ -78,19 +84,21 @@ typedef struct
|
||||
__m256i buf[8];
|
||||
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
uint64_t bcount;
|
||||
} skein_4way_big_context __attribute__ ((aligned (128)));
|
||||
sph_u64 bcount;
|
||||
} sph_skein_4way_big_context __attribute__ ((aligned (128)));
|
||||
|
||||
typedef skein_4way_big_context skein512_4way_context;
|
||||
typedef skein_4way_big_context skein256_4way_context;
|
||||
typedef sph_skein_4way_big_context skein512_4way_context;
|
||||
typedef sph_skein_4way_big_context skein256_4way_context;
|
||||
|
||||
void skein512_4way_init( skein512_4way_context *sc );
|
||||
void skein512_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein512_4way_close( void *cc, void *dst );
|
||||
//#define skein512_4way skein512_4way_update
|
||||
|
||||
void skein256_4way_init( skein256_4way_context *sc );
|
||||
void skein256_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein256_4way_close( void *cc, void *dst );
|
||||
//#define skein256_4way skein256_4way_update
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@@ -214,14 +214,14 @@ int scanhash_drop( struct work *work, uint32_t max_nonce,
|
||||
}
|
||||
|
||||
void drop_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t* end_nonce_ptr )
|
||||
uint32_t* end_nonce_ptr, bool clean_job )
|
||||
{
|
||||
// ignore POK in first word
|
||||
// const int nonce_i = 19;
|
||||
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
|
||||
|| ( *nonceptr >= *end_nonce_ptr ) )
|
||||
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr ) ) )
|
||||
{
|
||||
work_free( work );
|
||||
work_copy( work, g_work );
|
||||
|
@@ -527,7 +527,7 @@ int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
blake512_4way_init( &x13bcd_ctx_mid );
|
||||
blake512_4way_update( &x13bcd_ctx_mid, vdata, 64 );
|
||||
blake512_4way( &x13bcd_ctx_mid, vdata, 64 );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
|
@@ -227,7 +227,7 @@ int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
blake512_4way_init( &x13sm3_ctx_mid );
|
||||
blake512_4way_update( &x13sm3_ctx_mid, vdata, 64 );
|
||||
blake512_4way( &x13sm3_ctx_mid, vdata, 64 );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
if ( Htarg <= htmax[m] )
|
||||
|
@@ -65,7 +65,6 @@ union _x16r_8way_context_overlay
|
||||
|
||||
typedef union _x16r_8way_context_overlay x16r_8way_context_overlay;
|
||||
|
||||
|
||||
void x16r_8way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[24*8] __attribute__ ((aligned (128)));
|
||||
@@ -99,16 +98,18 @@ void x16r_8way_hash( void* output, const void* input )
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
blake512_8way_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_8way_full( &ctx.blake, vhash, input, size );
|
||||
blake512_8way_update( &ctx.blake, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
blake512_8way_full( &ctx.blake, vhash, vhash, size );
|
||||
blake512_8way_update( &ctx.blake, vhash, size );
|
||||
}
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5,
|
||||
hash6, hash7, vhash );
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
case BMW:
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
@@ -127,22 +128,40 @@ void x16r_8way_hash( void* output, const void* input )
|
||||
case GROESTL:
|
||||
#if defined(__VAES__)
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
groestl512_4way_full( &ctx.groestl, vhash, vhash, size );
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhash, vhash, size<<3 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
groestl512_4way_full( &ctx.groestl, vhash, vhash, size );
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhash, vhash, size<<3 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)in4, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)in5, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)in6, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)in7, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)in0, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)in1, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)in2, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)in3, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4,
|
||||
(const char*)in4, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5,
|
||||
(const char*)in5, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6,
|
||||
(const char*)in6, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7,
|
||||
(const char*)in7, size<<3 );
|
||||
#endif
|
||||
break;
|
||||
break;
|
||||
case SKEIN:
|
||||
skein512_8way_init( &ctx.skein );
|
||||
if ( i == 0 )
|
||||
@@ -187,27 +206,33 @@ void x16r_8way_hash( void* output, const void* input )
|
||||
break;
|
||||
case LUFFA:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
luffa512_4way_full( &ctx.luffa, vhash, vhash, size );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhash, vhash, size );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
luffa512_4way_full( &ctx.luffa, vhash, vhash, size );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhash, vhash, size);
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
cube_4way_full( &ctx.cube, vhash, 512, vhash, size );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
cube_4way_full( &ctx.cube, vhash, 512, vhash, size );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, size );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case SHAVITE:
|
||||
#if defined(__VAES__)
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
@@ -235,42 +260,54 @@ void x16r_8way_hash( void* output, const void* input )
|
||||
sph_shavite512( &ctx.shavite, in7, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
||||
#endif
|
||||
break;
|
||||
break;
|
||||
case SIMD:
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
simd512_4way_full( &ctx.simd, vhash, vhash, size );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
simd512_4way_full( &ctx.simd, vhash, vhash, size );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
break;
|
||||
case ECHO:
|
||||
#if defined(__VAES__)
|
||||
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
|
||||
echo_4way_full( &ctx.echo, vhash, 512, vhash, size );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
|
||||
echo_4way_full( &ctx.echo, vhash, 512, vhash, size );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhash, vhash, size<<3 );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||
#else
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)in0, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)in1, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)in2, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)in3, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)in4, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)in5, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)in6, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)in7, size );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence*)in0, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence*)in1, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence*)in2, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence*)in3, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash4,
|
||||
(const BitSequence*)in4, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash5,
|
||||
(const BitSequence*)in5, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash6,
|
||||
(const BitSequence*)in6, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash7,
|
||||
(const BitSequence*)in7, size<<3 );
|
||||
#endif
|
||||
break;
|
||||
break;
|
||||
case HAMSI:
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
@@ -280,7 +317,7 @@ void x16r_8way_hash( void* output, const void* input )
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
break;
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in0, size );
|
||||
@@ -343,18 +380,13 @@ void x16r_8way_hash( void* output, const void* input )
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
||||
break;
|
||||
case SHA_512:
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
sha512_8way_init( &ctx.sha512 );
|
||||
if ( i == 0 )
|
||||
sha512_8way_update( &ctx.sha512, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
sha512_8way_update( &ctx.sha512, vhash, size );
|
||||
}
|
||||
sha512_8way_update( &ctx.sha512, vhash, size );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, vhash );
|
||||
hash7, vhash );
|
||||
break;
|
||||
}
|
||||
size = 64;
|
||||
@@ -399,7 +431,7 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
do
|
||||
@@ -473,13 +505,15 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
blake512_4way_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_4way_full( &ctx.blake, vhash, input, size );
|
||||
blake512_4way_update( &ctx.blake, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
blake512_4way_full( &ctx.blake, vhash, vhash, size );
|
||||
blake512_4way_update( &ctx.blake, vhash, size );
|
||||
}
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case BMW:
|
||||
@@ -495,10 +529,18 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case GROESTL:
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)in0, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)in1, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)in2, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)in3, size<<3 );
|
||||
break;
|
||||
case SKEIN:
|
||||
skein512_4way_init( &ctx.skein );
|
||||
@@ -538,10 +580,12 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
break;
|
||||
case LUFFA:
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
luffa512_2way_full( &ctx.luffa, vhash, vhash, size );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size );
|
||||
dintrlv_2x128_512( hash0, hash1, vhash );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
luffa512_2way_full( &ctx.luffa, vhash, vhash, size );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size);
|
||||
dintrlv_2x128_512( hash2, hash3, vhash );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
@@ -574,21 +618,27 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
break;
|
||||
case SIMD:
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
simd512_2way_full( &ctx.simd, vhash, vhash, size );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_2x128_512( hash0, hash1, vhash );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
simd512_2way_full( &ctx.simd, vhash, vhash, size );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_2x128_512( hash2, hash3, vhash );
|
||||
break;
|
||||
case ECHO:
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)in0, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)in1, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)in2, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)in3, size );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence*)in0, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence*)in1, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence*)in2, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence*)in3, size<<3 );
|
||||
break;
|
||||
case HAMSI:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
@@ -677,7 +727,7 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
do
|
||||
|
@@ -39,13 +39,9 @@
|
||||
#include <openssl/sha.h>
|
||||
#endif
|
||||
|
||||
#if defined(X21S_8WAY) || defined(X21S_4WAY)
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (X21S_8WAY)
|
||||
|
||||
static __thread uint64_t* x21s_8way_matrix;
|
||||
|
@@ -72,19 +72,27 @@ void x17_8way_hash( void *state, const void *input )
|
||||
uint64_t hash7[8] __attribute__ ((aligned (64)));
|
||||
x17_8way_context_overlay ctx;
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
// 1 Blake
|
||||
blake512_8way_init( &ctx.blake );
|
||||
blake512_8way_update( &ctx.blake, input, 80 );
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
|
||||
// 2 Bmw
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
|
||||
// 3 Groestl
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
@@ -92,44 +100,65 @@ void x17_8way_hash( void *state, const void *input )
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
#endif
|
||||
|
||||
// 4 Skein parallel 4 way 64 bit
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
|
||||
// 5 JH
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
|
||||
// 6 Keccak
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
luffa512_4way_full( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa512_4way_full( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
// 7 Luffa
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
|
||||
cube_4way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
|
||||
cube_4way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
|
||||
// 8 Cubehash
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
||||
|
||||
// 9 Shavite
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
|
||||
#else
|
||||
|
||||
@@ -166,13 +195,20 @@ void x17_8way_hash( void *state, const void *input )
|
||||
|
||||
#endif
|
||||
|
||||
simd512_4way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_4way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
// 10 Simd
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
|
||||
// 11 Echo
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_4way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_4way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashA, vhashA, 512 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashB, vhashB, 512 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
@@ -181,28 +217,37 @@ void x17_8way_hash( void *state, const void *input )
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
|
||||
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)hash4, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)hash5, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)hash6, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)hash7, 64 );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash4,
|
||||
(const BitSequence *) hash4, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash5,
|
||||
(const BitSequence *) hash5, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash6,
|
||||
(const BitSequence *) hash6, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash7,
|
||||
(const BitSequence *) hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
#endif
|
||||
|
||||
// 12 Hamsi
|
||||
|
||||
hamsi512_8way_init( &ctx.hamsi );
|
||||
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_8way_close( &ctx.hamsi, vhash );
|
||||
@@ -210,6 +255,7 @@ void x17_8way_hash( void *state, const void *input )
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
|
||||
// 13 Fugue serial
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
@@ -235,6 +281,7 @@ void x17_8way_hash( void *state, const void *input )
|
||||
sph_fugue512( &ctx.fugue, hash7, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
||||
|
||||
// 14 Shabal, parallel 8 way 32 bit
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
@@ -245,6 +292,7 @@ void x17_8way_hash( void *state, const void *input )
|
||||
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
|
||||
// 15 Whirlpool serial
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
@@ -270,6 +318,7 @@ void x17_8way_hash( void *state, const void *input )
|
||||
sph_whirlpool( &ctx.whirlpool, hash7, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
||||
|
||||
// 16 SHA512 parallel 64 bit
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
@@ -277,6 +326,7 @@ void x17_8way_hash( void *state, const void *input )
|
||||
sha512_8way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_8way_close( &ctx.sha512, vhash );
|
||||
|
||||
// 17 Haval parallel 32 bit
|
||||
rintrlv_8x64_8x32( vhashA, vhash, 512 );
|
||||
|
||||
haval256_5_8way_init( &ctx.haval );
|
||||
@@ -299,28 +349,25 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x17_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) && !bench )
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if likely( valid_hash( lane_hash, ptarget ) )
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
@@ -354,7 +401,7 @@ typedef union _x17_4way_context_overlay x17_4way_context_overlay;
|
||||
|
||||
void x17_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (128)));
|
||||
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
@@ -363,59 +410,91 @@ void x17_4way_hash( void *state, const void *input )
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
x17_4way_context_overlay ctx;
|
||||
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
// 1 Blake parallel 4 way 64 bit
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// 2 Bmw
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serialize
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
// 3 Groestl
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
// Parallellize
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
// 4 Skein parallel 4 way 64 bit
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// 5 JH
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// 6 Keccak
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// 7 Luffa parallel 2 way 128 bit
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
luffa512_2way_full( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa512_2way_full( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
|
||||
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
|
||||
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
|
||||
// 8 Cubehash
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
||||
|
||||
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
// 9 Shavite
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
// 10 Simd
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
// 11 Echo serial
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
// 12 Hamsi parallel 4 way 64 bit
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
@@ -424,6 +503,7 @@ void x17_4way_hash( void *state, const void *input )
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
// 13 Fugue serial
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
@@ -437,6 +517,7 @@ void x17_4way_hash( void *state, const void *input )
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
// 14 Shabal, parallel 4 way 32 bit
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
@@ -445,6 +526,7 @@ void x17_4way_hash( void *state, const void *input )
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
// 15 Whirlpool serial
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
@@ -458,12 +540,14 @@ void x17_4way_hash( void *state, const void *input )
|
||||
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
|
||||
// 16 SHA512 parallel 64 bit
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
// 17 Haval parallel 32 bit
|
||||
rintrlv_4x64_4x32( vhashB, vhash, 512 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
@@ -474,8 +558,8 @@ void x17_4way_hash( void *state, const void *input )
|
||||
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -486,30 +570,27 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x17_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hash7[ lane ] <= Htarg && !bench ) )
|
||||
{
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -11,8 +11,9 @@
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/shavite/shavite-hash-2way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
@@ -73,7 +74,9 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
const int dataLen = 128;
|
||||
xevan_8way_context_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
blake512_8way_init( &ctx.blake );
|
||||
blake512_8way_update( &ctx.blake, input, 80 );
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
memset( &vhash[8<<3], 0, 64<<3 );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
@@ -84,8 +87,10 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, dataLen );
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, dataLen );
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, dataLen<<3 );
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, dataLen<<3 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, dataLen<<3 );
|
||||
|
||||
@@ -94,14 +99,30 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, dataLen<<3 );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7,
|
||||
dataLen<<3 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
@@ -122,16 +143,22 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
luffa512_4way_full( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
luffa512_4way_full( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||
|
||||
cube_4way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
|
||||
cube_4way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
|
||||
#else
|
||||
|
||||
@@ -168,13 +195,17 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
#endif
|
||||
|
||||
simd512_4way_full( &ctx.simd, vhashA, vhashA, dataLen );
|
||||
simd512_4way_full( &ctx.simd, vhashB, vhashB, dataLen );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_4way_full( &ctx.echo, vhashA, 512, vhashA, dataLen );
|
||||
echo_4way_full( &ctx.echo, vhashB, 512, vhashB, dataLen );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashA, vhashA, dataLen<<3 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashB, vhashB, dataLen<<3 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, dataLen<<3 );
|
||||
|
||||
@@ -183,23 +214,31 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, dataLen<<3 );
|
||||
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)hash4, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)hash5, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)hash6, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)hash7, dataLen );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash4,
|
||||
(const BitSequence *) hash4, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash5,
|
||||
(const BitSequence *) hash5, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash6,
|
||||
(const BitSequence *) hash6, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash7,
|
||||
(const BitSequence *) hash7, dataLen<<3 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
|
||||
@@ -289,7 +328,9 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
memset( &vhash[ 4<<3 ], 0, (dataLen-32) << 3 );
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, vhash, dataLen );
|
||||
blake512_8way_init( &ctx.blake );
|
||||
blake512_8way_update( &ctx.blake, vhash, dataLen );
|
||||
blake512_8way_close(&ctx.blake, vhash);
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, dataLen );
|
||||
@@ -299,8 +340,10 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, dataLen );
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, dataLen );
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, dataLen<<3 );
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, dataLen<<3 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, dataLen<<3 );
|
||||
|
||||
@@ -309,14 +352,30 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, dataLen<<3 );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7,
|
||||
dataLen<<3 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
@@ -337,16 +396,22 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
luffa512_4way_full( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
luffa512_4way_full( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||
|
||||
cube_4way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
|
||||
cube_4way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
|
||||
#else
|
||||
|
||||
@@ -383,13 +448,17 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
#endif
|
||||
|
||||
simd512_4way_full( &ctx.simd, vhashA, vhashA, dataLen );
|
||||
simd512_4way_full( &ctx.simd, vhashB, vhashB, dataLen );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_4way_full( &ctx.echo, vhashA, 512, vhashA, dataLen );
|
||||
echo_4way_full( &ctx.echo, vhashB, 512, vhashB, dataLen );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashA, vhashA, dataLen<<3 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashB, vhashB, dataLen<<3 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, dataLen<<3 );
|
||||
|
||||
@@ -398,22 +467,30 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, dataLen<<3 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, dataLen<<3 );
|
||||
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)hash4, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)hash5, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)hash6, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)hash7, dataLen );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash4,
|
||||
(const BitSequence *) hash4, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash5,
|
||||
(const BitSequence *) hash5, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash6,
|
||||
(const BitSequence *) hash6, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash7,
|
||||
(const BitSequence *) hash7, dataLen<<3 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, dataLen<<3 );
|
||||
@@ -580,7 +657,9 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
|
||||
// parallel 4 way
|
||||
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close(&ctx.blake, vhash);
|
||||
memset( &vhash[8<<2], 0, 64<<2 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
@@ -590,10 +669,18 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
// Serial
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
|
||||
dataLen<<3 );
|
||||
|
||||
// Parallel 4way
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
@@ -612,11 +699,15 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
luffa512_2way_full( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
luffa512_2way_full( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||
|
||||
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
|
||||
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
|
||||
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
@@ -631,15 +722,18 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, dataLen );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, dataLen<<3 );
|
||||
// Parallel
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
@@ -711,10 +805,18 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
|
||||
dataLen<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
|
||||
dataLen<<3 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
@@ -732,11 +834,15 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
luffa512_2way_full( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
luffa512_2way_full( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||
|
||||
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
|
||||
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
|
||||
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
@@ -751,14 +857,18 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, dataLen );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, dataLen );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, dataLen<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, dataLen<<3 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
@@ -824,7 +934,7 @@ int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
@@ -167,10 +167,10 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
|
||||
#else
|
||||
|
||||
@@ -214,12 +214,12 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashA, vhashA, 512 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashB, vhashB, 512 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashA, vhashA, 512 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashB, vhashB, 512 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
|
409
algo/yescrypt/sha256_Y.c
Normal file
409
algo/yescrypt/sha256_Y.c
Normal file
@@ -0,0 +1,409 @@
|
||||
/*-
|
||||
* Copyright 2005,2007,2009 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "sha256_Y.h"
|
||||
#include "compat.h"
|
||||
|
||||
/*
|
||||
* Encode a length len/4 vector of (uint32_t) into a length len vector of
|
||||
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
|
||||
*/
|
||||
static void
|
||||
be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len / 4; i++)
|
||||
be32enc(dst + i * 4, src[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode a big-endian length len vector of (unsigned char) into a length
|
||||
* len/4 vector of (uint32_t). Assumes len is a multiple of 4.
|
||||
*/
|
||||
static void
|
||||
be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len / 4; i++)
|
||||
dst[i] = be32dec(src + i * 4);
|
||||
}
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define SHR(x, n) (x >> n)
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||
|
||||
/* SHA256 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
t0 = h + S1(e) + Ch(e, f, g) + k; \
|
||||
t1 = S0(a) + Maj(a, b, c); \
|
||||
d += t0; \
|
||||
h = t0 + t1;
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, k) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i] + k)
|
||||
|
||||
/*
|
||||
* SHA256 block compression function. The 256-bit state is transformed via
|
||||
* the 512-bit input block to produce a new state.
|
||||
*/
|
||||
static void
|
||||
SHA256_Transform_Y(uint32_t * state, const unsigned char block[64])
|
||||
{
|
||||
uint32_t _ALIGN(128) W[64], S[8];
|
||||
uint32_t t0, t1;
|
||||
int i;
|
||||
|
||||
/* 1. Prepare message schedule W. */
|
||||
be32dec_vect(W, block, 64);
|
||||
for (i = 16; i < 64; i++)
|
||||
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
|
||||
|
||||
/* 2. Initialize working variables. */
|
||||
memcpy(S, state, 32);
|
||||
|
||||
/* 3. Mix. */
|
||||
RNDr(S, W, 0, 0x428a2f98);
|
||||
RNDr(S, W, 1, 0x71374491);
|
||||
RNDr(S, W, 2, 0xb5c0fbcf);
|
||||
RNDr(S, W, 3, 0xe9b5dba5);
|
||||
RNDr(S, W, 4, 0x3956c25b);
|
||||
RNDr(S, W, 5, 0x59f111f1);
|
||||
RNDr(S, W, 6, 0x923f82a4);
|
||||
RNDr(S, W, 7, 0xab1c5ed5);
|
||||
RNDr(S, W, 8, 0xd807aa98);
|
||||
RNDr(S, W, 9, 0x12835b01);
|
||||
RNDr(S, W, 10, 0x243185be);
|
||||
RNDr(S, W, 11, 0x550c7dc3);
|
||||
RNDr(S, W, 12, 0x72be5d74);
|
||||
RNDr(S, W, 13, 0x80deb1fe);
|
||||
RNDr(S, W, 14, 0x9bdc06a7);
|
||||
RNDr(S, W, 15, 0xc19bf174);
|
||||
RNDr(S, W, 16, 0xe49b69c1);
|
||||
RNDr(S, W, 17, 0xefbe4786);
|
||||
RNDr(S, W, 18, 0x0fc19dc6);
|
||||
RNDr(S, W, 19, 0x240ca1cc);
|
||||
RNDr(S, W, 20, 0x2de92c6f);
|
||||
RNDr(S, W, 21, 0x4a7484aa);
|
||||
RNDr(S, W, 22, 0x5cb0a9dc);
|
||||
RNDr(S, W, 23, 0x76f988da);
|
||||
RNDr(S, W, 24, 0x983e5152);
|
||||
RNDr(S, W, 25, 0xa831c66d);
|
||||
RNDr(S, W, 26, 0xb00327c8);
|
||||
RNDr(S, W, 27, 0xbf597fc7);
|
||||
RNDr(S, W, 28, 0xc6e00bf3);
|
||||
RNDr(S, W, 29, 0xd5a79147);
|
||||
RNDr(S, W, 30, 0x06ca6351);
|
||||
RNDr(S, W, 31, 0x14292967);
|
||||
RNDr(S, W, 32, 0x27b70a85);
|
||||
RNDr(S, W, 33, 0x2e1b2138);
|
||||
RNDr(S, W, 34, 0x4d2c6dfc);
|
||||
RNDr(S, W, 35, 0x53380d13);
|
||||
RNDr(S, W, 36, 0x650a7354);
|
||||
RNDr(S, W, 37, 0x766a0abb);
|
||||
RNDr(S, W, 38, 0x81c2c92e);
|
||||
RNDr(S, W, 39, 0x92722c85);
|
||||
RNDr(S, W, 40, 0xa2bfe8a1);
|
||||
RNDr(S, W, 41, 0xa81a664b);
|
||||
RNDr(S, W, 42, 0xc24b8b70);
|
||||
RNDr(S, W, 43, 0xc76c51a3);
|
||||
RNDr(S, W, 44, 0xd192e819);
|
||||
RNDr(S, W, 45, 0xd6990624);
|
||||
RNDr(S, W, 46, 0xf40e3585);
|
||||
RNDr(S, W, 47, 0x106aa070);
|
||||
RNDr(S, W, 48, 0x19a4c116);
|
||||
RNDr(S, W, 49, 0x1e376c08);
|
||||
RNDr(S, W, 50, 0x2748774c);
|
||||
RNDr(S, W, 51, 0x34b0bcb5);
|
||||
RNDr(S, W, 52, 0x391c0cb3);
|
||||
RNDr(S, W, 53, 0x4ed8aa4a);
|
||||
RNDr(S, W, 54, 0x5b9cca4f);
|
||||
RNDr(S, W, 55, 0x682e6ff3);
|
||||
RNDr(S, W, 56, 0x748f82ee);
|
||||
RNDr(S, W, 57, 0x78a5636f);
|
||||
RNDr(S, W, 58, 0x84c87814);
|
||||
RNDr(S, W, 59, 0x8cc70208);
|
||||
RNDr(S, W, 60, 0x90befffa);
|
||||
RNDr(S, W, 61, 0xa4506ceb);
|
||||
RNDr(S, W, 62, 0xbef9a3f7);
|
||||
RNDr(S, W, 63, 0xc67178f2);
|
||||
|
||||
/* 4. Mix local working variables into global state */
|
||||
for (i = 0; i < 8; i++)
|
||||
state[i] += S[i];
|
||||
#if 0
|
||||
/* Clean the stack. */
|
||||
memset(W, 0, 256);
|
||||
memset(S, 0, 32);
|
||||
t0 = t1 = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static unsigned char PAD[64] = {
|
||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* Add padding and terminating bit-count. */
|
||||
static void
|
||||
SHA256_Pad_Y(SHA256_CTX_Y * ctx)
|
||||
{
|
||||
unsigned char len[8];
|
||||
uint32_t r, plen;
|
||||
|
||||
/*
|
||||
* Convert length to a vector of bytes -- we do this now rather
|
||||
* than later because the length will change after we pad.
|
||||
*/
|
||||
be32enc_vect(len, ctx->count, 8);
|
||||
|
||||
/* Add 1--64 bytes so that the resulting length is 56 mod 64 */
|
||||
r = (ctx->count[1] >> 3) & 0x3f;
|
||||
plen = (r < 56) ? (56 - r) : (120 - r);
|
||||
SHA256_Update_Y(ctx, PAD, (size_t)plen);
|
||||
|
||||
/* Add the terminating bit-count */
|
||||
SHA256_Update_Y(ctx, len, 8);
|
||||
}
|
||||
|
||||
/* SHA-256 initialization. Begins a SHA-256 operation. */
|
||||
void
|
||||
SHA256_Init_Y(SHA256_CTX_Y * ctx)
|
||||
{
|
||||
/* Zero bits processed so far */
|
||||
ctx->count[0] = ctx->count[1] = 0;
|
||||
|
||||
/* Magic initialization constants */
|
||||
ctx->state[0] = 0x6A09E667;
|
||||
ctx->state[1] = 0xBB67AE85;
|
||||
ctx->state[2] = 0x3C6EF372;
|
||||
ctx->state[3] = 0xA54FF53A;
|
||||
ctx->state[4] = 0x510E527F;
|
||||
ctx->state[5] = 0x9B05688C;
|
||||
ctx->state[6] = 0x1F83D9AB;
|
||||
ctx->state[7] = 0x5BE0CD19;
|
||||
}
|
||||
|
||||
/* Add bytes into the hash */
|
||||
void
|
||||
SHA256_Update_Y(SHA256_CTX_Y * ctx, const void *in, size_t len)
|
||||
{
|
||||
uint32_t bitlen[2];
|
||||
uint32_t r;
|
||||
const unsigned char *src = in;
|
||||
|
||||
/* Number of bytes left in the buffer from previous updates */
|
||||
r = (ctx->count[1] >> 3) & 0x3f;
|
||||
|
||||
/* Convert the length into a number of bits */
|
||||
bitlen[1] = ((uint32_t)len) << 3;
|
||||
bitlen[0] = (uint32_t)(len >> 29);
|
||||
|
||||
/* Update number of bits */
|
||||
if ((ctx->count[1] += bitlen[1]) < bitlen[1])
|
||||
ctx->count[0]++;
|
||||
ctx->count[0] += bitlen[0];
|
||||
|
||||
/* Handle the case where we don't need to perform any transforms */
|
||||
if (len < 64 - r) {
|
||||
memcpy(&ctx->buf[r], src, len);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Finish the current block */
|
||||
memcpy(&ctx->buf[r], src, 64 - r);
|
||||
SHA256_Transform_Y(ctx->state, ctx->buf);
|
||||
src += 64 - r;
|
||||
len -= 64 - r;
|
||||
|
||||
/* Perform complete blocks */
|
||||
while (len >= 64) {
|
||||
SHA256_Transform_Y(ctx->state, src);
|
||||
src += 64;
|
||||
len -= 64;
|
||||
}
|
||||
|
||||
/* Copy left over data into buffer */
|
||||
memcpy(ctx->buf, src, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* SHA-256 finalization. Pads the input data, exports the hash value,
|
||||
* and clears the context state.
|
||||
*/
|
||||
void
|
||||
SHA256_Final_Y(unsigned char digest[32], SHA256_CTX_Y * ctx)
|
||||
{
|
||||
/* Add padding */
|
||||
SHA256_Pad_Y(ctx);
|
||||
|
||||
/* Write the hash */
|
||||
be32enc_vect(digest, ctx->state, 32);
|
||||
|
||||
/* Clear the context state */
|
||||
memset((void *)ctx, 0, sizeof(*ctx));
|
||||
}
|
||||
|
||||
/* Initialize an HMAC-SHA256 operation with the given key. */
|
||||
void
|
||||
HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y * ctx, const void * _K, size_t Klen)
|
||||
{
|
||||
unsigned char pad[64];
|
||||
unsigned char khash[32];
|
||||
const unsigned char * K = _K;
|
||||
size_t i;
|
||||
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if (Klen > 64) {
|
||||
SHA256_Init(&ctx->ictx);
|
||||
SHA256_Update(&ctx->ictx, K, Klen);
|
||||
SHA256_Final(khash, &ctx->ictx);
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
SHA256_Init(&ctx->ictx);
|
||||
memset(pad, 0x36, 64);
|
||||
for (i = 0; i < Klen; i++)
|
||||
pad[i] ^= K[i];
|
||||
SHA256_Update(&ctx->ictx, pad, 64);
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
SHA256_Init(&ctx->octx);
|
||||
memset(pad, 0x5c, 64);
|
||||
for (i = 0; i < Klen; i++)
|
||||
pad[i] ^= K[i];
|
||||
SHA256_Update(&ctx->octx, pad, 64);
|
||||
|
||||
/* Clean the stack. */
|
||||
//memset(khash, 0, 32);
|
||||
}
|
||||
|
||||
/* Add bytes to the HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y * ctx, const void *in, size_t len)
|
||||
{
|
||||
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
SHA256_Update(&ctx->ictx, in, len);
|
||||
}
|
||||
|
||||
/* Finish an HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Final_Y(unsigned char digest[32], HMAC_SHA256_CTX_Y * ctx)
|
||||
{
|
||||
unsigned char ihash[32];
|
||||
|
||||
/* Finish the inner SHA256 operation. */
|
||||
SHA256_Final(ihash, &ctx->ictx);
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
SHA256_Update(&ctx->octx, ihash, 32);
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
SHA256_Final(digest, &ctx->octx);
|
||||
|
||||
/* Clean the stack. */
|
||||
//memset(ihash, 0, 32);
|
||||
}
|
||||
|
||||
/**
|
||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||
*/
|
||||
void
|
||||
PBKDF2_SHA256_Y(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||
{
|
||||
HMAC_SHA256_CTX_Y PShctx, hctx;
|
||||
uint8_t _ALIGN(128) T[32];
|
||||
uint8_t _ALIGN(128) U[32];
|
||||
uint8_t ivec[4];
|
||||
size_t i, clen;
|
||||
uint64_t j;
|
||||
int k;
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
HMAC_SHA256_Init_Y(&PShctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update_Y(&PShctx, salt, saltlen);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
be32enc(ivec, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX_Y));
|
||||
HMAC_SHA256_Update_Y(&hctx, ivec, 4);
|
||||
HMAC_SHA256_Final_Y(U, &hctx);
|
||||
|
||||
/* T_i = U_1 ... */
|
||||
memcpy(T, U, 32);
|
||||
|
||||
for (j = 2; j <= c; j++) {
|
||||
/* Compute U_j. */
|
||||
HMAC_SHA256_Init_Y(&hctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update_Y(&hctx, U, 32);
|
||||
HMAC_SHA256_Final_Y(U, &hctx);
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for (k = 0; k < 32; k++)
|
||||
T[k] ^= U[k];
|
||||
}
|
||||
|
||||
/* Copy as many bytes as necessary into buf. */
|
||||
clen = dkLen - i * 32;
|
||||
if (clen > 32)
|
||||
clen = 32;
|
||||
memcpy(&buf[i * 32], T, clen);
|
||||
}
|
||||
|
||||
/* Clean PShctx, since we never called _Final on it. */
|
||||
//memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y));
|
||||
}
|
@@ -1,6 +1,5 @@
|
||||
/*-
|
||||
* Copyright 2009 Colin Percival
|
||||
* Copyright 2013-2018 Alexander Peslyak
|
||||
* Copyright 2005,2007,2009 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -24,26 +23,47 @@
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||
* online backup system.
|
||||
* $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $
|
||||
*/
|
||||
#ifndef _YESPOWERR8G_H_
|
||||
#define _YESPOWERR8G_H_
|
||||
|
||||
#ifndef _SHA256_H_
|
||||
#define _SHA256_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h> /* for size_t */
|
||||
#include "algo-gate-api.h"
|
||||
#include "algo/yespower/yespower.h"
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
typedef struct SHA256Context {
|
||||
uint32_t state[8];
|
||||
uint32_t count[2];
|
||||
unsigned char buf[64];
|
||||
} SHA256_CTX_Y;
|
||||
|
||||
extern int yespowerr8g_tls(const uint8_t *src, size_t srclen,
|
||||
const yespower_params_t *params, yespower_binary_t *dst);
|
||||
/*
|
||||
typedef struct HMAC_SHA256Context {
|
||||
SHA256_CTX_Y ictx;
|
||||
SHA256_CTX_Y octx;
|
||||
} HMAC_SHA256_CTX_Y;
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
typedef struct HMAC_SHA256Context {
|
||||
SHA256_CTX ictx;
|
||||
SHA256_CTX octx;
|
||||
} HMAC_SHA256_CTX_Y;
|
||||
|
||||
#endif /* !_YESPOWERR8G_H_ */
|
||||
void SHA256_Init_Y(SHA256_CTX_Y *);
|
||||
void SHA256_Update_Y(SHA256_CTX_Y *, const void *, size_t);
|
||||
void SHA256_Final_Y(unsigned char [32], SHA256_CTX_Y *);
|
||||
void HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y *, const void *, size_t);
|
||||
void HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y *, const void *, size_t);
|
||||
void HMAC_SHA256_Final_Y(unsigned char [32], HMAC_SHA256_CTX_Y *);
|
||||
|
||||
/**
|
||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||
*/
|
||||
void PBKDF2_SHA256_Y(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
uint64_t, uint8_t *, size_t);
|
||||
|
||||
#endif /* !_SHA256_H_ */
|
124
algo/yescrypt/sysendian.h
Normal file
124
algo/yescrypt/sysendian.h
Normal file
@@ -0,0 +1,124 @@
|
||||
/*-
|
||||
* Copyright 2007-2009 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||
* online backup system.
|
||||
*/
|
||||
#ifndef _SYSENDIAN_H_
|
||||
#define _SYSENDIAN_H_
|
||||
|
||||
/* If we don't have be64enc, the <sys/endian.h> we have isn't usable. */
|
||||
#if !HAVE_DECL_BE64ENC
|
||||
#undef HAVE_SYS_ENDIAN_H
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SYS_ENDIAN_H
|
||||
|
||||
#include <sys/endian.h>
|
||||
|
||||
#else
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
|
||||
static __inline uint64_t
|
||||
be64dec(const void *pp)
|
||||
{
|
||||
const uint8_t *p = (uint8_t const *)pp;
|
||||
|
||||
return ((uint64_t)(p[7]) + ((uint64_t)(p[6]) << 8) +
|
||||
((uint64_t)(p[5]) << 16) + ((uint64_t)(p[4]) << 24) +
|
||||
((uint64_t)(p[3]) << 32) + ((uint64_t)(p[2]) << 40) +
|
||||
((uint64_t)(p[1]) << 48) + ((uint64_t)(p[0]) << 56));
|
||||
}
|
||||
|
||||
static __inline void
|
||||
be64enc(void *pp, uint64_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[7] = x & 0xff;
|
||||
p[6] = (x >> 8) & 0xff;
|
||||
p[5] = (x >> 16) & 0xff;
|
||||
p[4] = (x >> 24) & 0xff;
|
||||
p[3] = (x >> 32) & 0xff;
|
||||
p[2] = (x >> 40) & 0xff;
|
||||
p[1] = (x >> 48) & 0xff;
|
||||
p[0] = (x >> 56) & 0xff;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static __inline uint64_t
|
||||
le64dec(const void *pp)
|
||||
{
|
||||
const uint8_t *p = (uint8_t const *)pp;
|
||||
|
||||
return ((uint64_t)(p[0]) + ((uint64_t)(p[1]) << 8) +
|
||||
((uint64_t)(p[2]) << 16) + ((uint64_t)(p[3]) << 24) +
|
||||
((uint64_t)(p[4]) << 32) + ((uint64_t)(p[5]) << 40) +
|
||||
((uint64_t)(p[6]) << 48) + ((uint64_t)(p[7]) << 56));
|
||||
}
|
||||
|
||||
static __inline void
|
||||
le64enc(void *pp, uint64_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[0] = x & 0xff;
|
||||
p[1] = (x >> 8) & 0xff;
|
||||
p[2] = (x >> 16) & 0xff;
|
||||
p[3] = (x >> 24) & 0xff;
|
||||
p[4] = (x >> 32) & 0xff;
|
||||
p[5] = (x >> 40) & 0xff;
|
||||
p[6] = (x >> 48) & 0xff;
|
||||
p[7] = (x >> 56) & 0xff;
|
||||
}
|
||||
|
||||
|
||||
static __inline uint32_t
|
||||
be32dec(const void *pp)
|
||||
{
|
||||
const uint8_t *p = (uint8_t const *)pp;
|
||||
|
||||
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
|
||||
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
|
||||
}
|
||||
|
||||
static __inline void
|
||||
be32enc(void *pp, uint32_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[3] = x & 0xff;
|
||||
p[2] = (x >> 8) & 0xff;
|
||||
p[1] = (x >> 16) & 0xff;
|
||||
p[0] = (x >> 24) & 0xff;
|
||||
}
|
||||
|
||||
#endif /* !HAVE_SYS_ENDIAN_H */
|
||||
|
||||
#endif /* !_SYSENDIAN_H_ */
|
@@ -48,7 +48,9 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "algo/sha/hmac-sha256-hash.h"
|
||||
#include "sha256_Y.h"
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "yescrypt.h"
|
||||
#include "yescrypt-platform.h"
|
||||
|
||||
@@ -1310,7 +1312,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
|
||||
}
|
||||
|
||||
/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
|
||||
PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, B_size);
|
||||
PBKDF2_SHA256_Y(passwd, passwdlen, salt, saltlen, 1, B, B_size);
|
||||
|
||||
if (t || flags)
|
||||
memcpy(sha256, B, sizeof(sha256));
|
||||
@@ -1340,7 +1342,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
|
||||
}
|
||||
|
||||
/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
|
||||
PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf, buflen);
|
||||
PBKDF2_SHA256_Y(passwd, passwdlen, B, B_size, 1, buf, buflen);
|
||||
|
||||
/*
|
||||
* Except when computing classic scrypt, allow all computation so far
|
||||
@@ -1352,14 +1354,14 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
|
||||
if ((t || flags) && buflen == sizeof(sha256)) {
|
||||
/* Compute ClientKey */
|
||||
{
|
||||
HMAC_SHA256_CTX ctx;
|
||||
HMAC_SHA256_Init(&ctx, buf, buflen);
|
||||
HMAC_SHA256_CTX_Y ctx;
|
||||
HMAC_SHA256_Init_Y(&ctx, buf, buflen);
|
||||
if ( yescrypt_client_key )
|
||||
HMAC_SHA256_Update( &ctx, (uint8_t*)yescrypt_client_key,
|
||||
HMAC_SHA256_Update_Y( &ctx, (uint8_t*)yescrypt_client_key,
|
||||
yescrypt_client_key_len );
|
||||
else
|
||||
HMAC_SHA256_Update( &ctx, salt, saltlen );
|
||||
HMAC_SHA256_Final(sha256, &ctx);
|
||||
HMAC_SHA256_Update_Y( &ctx, salt, saltlen );
|
||||
HMAC_SHA256_Final_Y(sha256, &ctx);
|
||||
}
|
||||
/* Compute StoredKey */
|
||||
{
|
||||
|
@@ -25,7 +25,7 @@
|
||||
#include "compat.h"
|
||||
|
||||
#include "yescrypt.h"
|
||||
#include "algo/sha/hmac-sha256-hash.h"
|
||||
#include "sha256_Y.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#define BYTES2CHARS(bytes) \
|
||||
@@ -385,30 +385,35 @@ void yescrypthash(void *output, const void *input)
|
||||
int scanhash_yescrypt( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce;
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &endiandata[k], pdata[k] );
|
||||
endiandata[19] = n;
|
||||
do {
|
||||
yescrypt_hash((char*) endiandata, (char*) vhash, 80);
|
||||
if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
be32enc( pdata+19, n );
|
||||
submit_solution( work, vhash, mythr );
|
||||
}
|
||||
endiandata[19] = ++n;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
for (int k = 0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
yescrypt_hash((char*) endiandata, (char*) vhash, 80);
|
||||
if (vhash[7] <= Htarg && fulltest(vhash, ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, vhash, mythr );
|
||||
}
|
||||
n++;
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void yescrypt_gate_base(algo_gate_t *gate )
|
||||
|
@@ -30,8 +30,9 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "simd-utils.h"
|
||||
|
||||
#include <algo/yespower/crypto/sph_types.h>
|
||||
#include <algo/yespower/utils/sysendian.h>
|
||||
#include "blake2b-yp.h"
|
||||
|
||||
// Cyclic right rotation.
|
||||
@@ -271,7 +272,7 @@ void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t *
|
||||
{
|
||||
hmac_yp_ctx PShctx, hctx;
|
||||
size_t i;
|
||||
uint32_t ivec;
|
||||
uint8_t ivec[4];
|
||||
uint8_t U[32];
|
||||
uint8_t T[32];
|
||||
uint64_t j;
|
||||
@@ -285,11 +286,11 @@ void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t *
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
ivec = bswap_32( i+1 );
|
||||
be32enc(ivec, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(&hctx, &PShctx, sizeof(hmac_yp_ctx));
|
||||
hmac_blake2b_yp_update(&hctx, &ivec, 4);
|
||||
hmac_blake2b_yp_update(&hctx, ivec, 4);
|
||||
hmac_blake2b_yp_final(&hctx, U);
|
||||
|
||||
/* T_i = U_1 ... */
|
||||
|
1
algo/yespower/insecure_memzero.h
Normal file
1
algo/yespower/insecure_memzero.h
Normal file
@@ -0,0 +1 @@
|
||||
#define insecure_memzero(buf, len) /* empty */
|
@@ -28,10 +28,46 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "simd-utils.h"
|
||||
#include "hmac-sha256-hash.h"
|
||||
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "sha256_p.h"
|
||||
#include "compat.h"
|
||||
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define SHR(x, n) (x >> n)
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||
|
||||
/* SHA256 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
t0 = h + S1(e) + Ch(e, f, g) + k; \
|
||||
t1 = S0(a) + Maj(a, b, c); \
|
||||
d += t0; \
|
||||
h = t0 + t1;
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, k) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i] + k)
|
||||
|
||||
/*
|
||||
static unsigned char PAD[64] = {
|
||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
*/
|
||||
/**
|
||||
* SHA256_Buf(in, len, digest):
|
||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
||||
@@ -40,9 +76,9 @@ void
|
||||
SHA256_Buf( const void * in, size_t len, uint8_t digest[32] )
|
||||
{
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, in, len );
|
||||
SHA256_Final( digest, &ctx );
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, in, len );
|
||||
SHA256_Final( digest, &ctx );
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -51,18 +87,19 @@ SHA256_Buf( const void * in, size_t len, uint8_t digest[32] )
|
||||
* length ${Klen}, and write the result to ${digest}.
|
||||
*/
|
||||
void
|
||||
HMAC_SHA256_Buf( const void *K, size_t Klen, const void *in, size_t len,
|
||||
uint8_t digest[32])
|
||||
HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len,
|
||||
uint8_t digest[32])
|
||||
{
|
||||
HMAC_SHA256_CTX ctx;
|
||||
HMAC_SHA256_Init( &ctx, K, Klen );
|
||||
HMAC_SHA256_Update( &ctx, in, len );
|
||||
HMAC_SHA256_Final( digest, &ctx );
|
||||
HMAC_SHA256_CTX ctx;
|
||||
|
||||
HMAC_SHA256_Init( &ctx, K, Klen );
|
||||
HMAC_SHA256_Update( &ctx, in, len );
|
||||
HMAC_SHA256_Final( digest, &ctx );
|
||||
}
|
||||
|
||||
/* Initialize an HMAC-SHA256 operation with the given key. */
|
||||
void
|
||||
HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
|
||||
HMAC_SHA256_Init( HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen )
|
||||
{
|
||||
unsigned char pad[64];
|
||||
unsigned char khash[32];
|
||||
@@ -70,8 +107,7 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
|
||||
size_t i;
|
||||
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if ( Klen > 64 )
|
||||
{
|
||||
if (Klen > 64) {
|
||||
SHA256_Init( &ctx->ictx );
|
||||
SHA256_Update( &ctx->ictx, K, Klen );
|
||||
SHA256_Final( khash, &ctx->ictx );
|
||||
@@ -80,7 +116,7 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
SHA256_Init( &ctx->ictx );
|
||||
SHA256_Init( &ctx->ictx );
|
||||
memset( pad, 0x36, 64 );
|
||||
for ( i = 0; i < Klen; i++ )
|
||||
pad[i] ^= K[i];
|
||||
@@ -92,19 +128,23 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
|
||||
for ( i = 0; i < Klen; i++ )
|
||||
pad[i] ^= K[i];
|
||||
SHA256_Update( &ctx->octx, pad, 64 );
|
||||
|
||||
/* Clean the stack. */
|
||||
//memset(khash, 0, 32);
|
||||
}
|
||||
|
||||
/* Add bytes to the HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Update( HMAC_SHA256_CTX *ctx, const void *in, size_t len )
|
||||
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void *in, size_t len)
|
||||
{
|
||||
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
SHA256_Update( &ctx->ictx, in, len );
|
||||
}
|
||||
|
||||
/* Finish an HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Final( unsigned char digest[32], HMAC_SHA256_CTX *ctx )
|
||||
HMAC_SHA256_Final(unsigned char digest[32], HMAC_SHA256_CTX * ctx )
|
||||
{
|
||||
unsigned char ihash[32];
|
||||
|
||||
@@ -116,6 +156,9 @@ HMAC_SHA256_Final( unsigned char digest[32], HMAC_SHA256_CTX *ctx )
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
SHA256_Final( digest, &ctx->octx );
|
||||
|
||||
/* Clean the stack. */
|
||||
//memset(ihash, 0, 32);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -124,51 +167,52 @@ HMAC_SHA256_Final( unsigned char digest[32], HMAC_SHA256_CTX *ctx )
|
||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||
*/
|
||||
void
|
||||
PBKDF2_SHA256( const uint8_t *passwd, size_t passwdlen, const uint8_t *salt,
|
||||
size_t saltlen, uint64_t c, uint8_t *buf, size_t dkLen )
|
||||
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||
{
|
||||
HMAC_SHA256_CTX PShctx, hctx;
|
||||
uint8_t _ALIGN(128) T[32];
|
||||
uint8_t _ALIGN(128) U[32];
|
||||
uint32_t ivec;
|
||||
uint8_t ivec[4];
|
||||
size_t i, clen;
|
||||
uint64_t j;
|
||||
int k;
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
HMAC_SHA256_Init( &PShctx, passwd, passwdlen );
|
||||
HMAC_SHA256_Update( &PShctx, salt, saltlen );
|
||||
HMAC_SHA256_Init(&PShctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update(&PShctx, salt, saltlen);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for ( i = 0; i * 32 < dkLen; i++ )
|
||||
{
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
ivec = bswap_32( i+1 );
|
||||
be32enc(ivec, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy( &hctx, &PShctx, sizeof(HMAC_SHA256_CTX) );
|
||||
HMAC_SHA256_Update( &hctx, &ivec, 4 );
|
||||
HMAC_SHA256_Final( U, &hctx );
|
||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
|
||||
HMAC_SHA256_Update(&hctx, ivec, 4);
|
||||
HMAC_SHA256_Final(U, &hctx);
|
||||
|
||||
/* T_i = U_1 ... */
|
||||
memcpy( T, U, 32 );
|
||||
memcpy(T, U, 32);
|
||||
|
||||
for ( j = 2; j <= c; j++ )
|
||||
{
|
||||
for (j = 2; j <= c; j++) {
|
||||
/* Compute U_j. */
|
||||
HMAC_SHA256_Init( &hctx, passwd, passwdlen );
|
||||
HMAC_SHA256_Update( &hctx, U, 32 );
|
||||
HMAC_SHA256_Final( U, &hctx );
|
||||
HMAC_SHA256_Init(&hctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update(&hctx, U, 32);
|
||||
HMAC_SHA256_Final(U, &hctx);
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for ( k = 0; k < 32; k++ )
|
||||
for (k = 0; k < 32; k++)
|
||||
T[k] ^= U[k];
|
||||
}
|
||||
|
||||
/* Copy as many bytes as necessary into buf. */
|
||||
clen = dkLen - i * 32;
|
||||
if ( clen > 32 )
|
||||
if (clen > 32)
|
||||
clen = 32;
|
||||
memcpy( &buf[i * 32], T, clen );
|
||||
memcpy(&buf[i * 32], T, clen);
|
||||
}
|
||||
|
||||
/* Clean PShctx, since we never called _Final on it. */
|
||||
//memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y));
|
||||
}
|
@@ -26,24 +26,23 @@
|
||||
* $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $
|
||||
*/
|
||||
|
||||
#ifndef HMAC_SHA256_H__
|
||||
#define HMAC_SHA256_H__
|
||||
#ifndef _SHA256_H_
|
||||
#define _SHA256_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
typedef struct HMAC_SHA256Context
|
||||
{
|
||||
SHA256_CTX ictx;
|
||||
SHA256_CTX octx;
|
||||
typedef struct HMAC_SHA256Context {
|
||||
SHA256_CTX ictx;
|
||||
SHA256_CTX octx;
|
||||
} HMAC_SHA256_CTX;
|
||||
|
||||
void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
|
||||
void SHA256_Buf( const void * in, size_t len, uint8_t digest[32] );
|
||||
void HMAC_SHA256_Init( HMAC_SHA256_CTX *, const void *, size_t );
|
||||
void HMAC_SHA256_Update( HMAC_SHA256_CTX *, const void *, size_t );
|
||||
void HMAC_SHA256_Final( unsigned char [32], HMAC_SHA256_CTX * );
|
||||
void HMAC_SHA256_Buf( const void *, size_t Klen, const void *,
|
||||
void HMAC_SHA256_Buf( const void * K, size_t Klen, const void * in,
|
||||
size_t len, uint8_t digest[32] );
|
||||
|
||||
/**
|
||||
@@ -54,4 +53,4 @@ void HMAC_SHA256_Buf( const void *, size_t Klen, const void *,
|
||||
void PBKDF2_SHA256( const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
uint64_t, uint8_t *, size_t);
|
||||
|
||||
#endif // HMAC_SHA256_H__
|
||||
#endif /* !_SHA256_H_ */
|
94
algo/yespower/sysendian.h
Normal file
94
algo/yespower/sysendian.h
Normal file
@@ -0,0 +1,94 @@
|
||||
/*-
|
||||
* Copyright 2007-2014 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SYSENDIAN_H_
|
||||
#define _SYSENDIAN_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/* Avoid namespace collisions with BSD <sys/endian.h>. */
|
||||
#define be32dec libcperciva_be32dec
|
||||
#define be32enc libcperciva_be32enc
|
||||
#define be64enc libcperciva_be64enc
|
||||
#define le32dec libcperciva_le32dec
|
||||
#define le32enc libcperciva_le32enc
|
||||
|
||||
static inline uint32_t
|
||||
be32dec(const void * pp)
|
||||
{
|
||||
const uint8_t * p = (uint8_t const *)pp;
|
||||
|
||||
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
|
||||
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
|
||||
}
|
||||
|
||||
static inline void
|
||||
be32enc(void * pp, uint32_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[3] = x & 0xff;
|
||||
p[2] = (x >> 8) & 0xff;
|
||||
p[1] = (x >> 16) & 0xff;
|
||||
p[0] = (x >> 24) & 0xff;
|
||||
}
|
||||
|
||||
static inline void
|
||||
be64enc(void * pp, uint64_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[7] = x & 0xff;
|
||||
p[6] = (x >> 8) & 0xff;
|
||||
p[5] = (x >> 16) & 0xff;
|
||||
p[4] = (x >> 24) & 0xff;
|
||||
p[3] = (x >> 32) & 0xff;
|
||||
p[2] = (x >> 40) & 0xff;
|
||||
p[1] = (x >> 48) & 0xff;
|
||||
p[0] = (x >> 56) & 0xff;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
le32dec(const void * pp)
|
||||
{
|
||||
const uint8_t * p = (uint8_t const *)pp;
|
||||
|
||||
return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
|
||||
((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
|
||||
}
|
||||
|
||||
static inline void
|
||||
le32enc(void * pp, uint32_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[0] = x & 0xff;
|
||||
p[1] = (x >> 8) & 0xff;
|
||||
p[2] = (x >> 16) & 0xff;
|
||||
p[3] = (x >> 24) & 0xff;
|
||||
}
|
||||
|
||||
#endif /* !_SYSENDIAN_H_ */
|
1
algo/yespower/utils/insecure_memzero.h
Normal file
1
algo/yespower/utils/insecure_memzero.h
Normal file
@@ -0,0 +1 @@
|
||||
#define insecure_memzero(buf, len) /* empty */
|
94
algo/yespower/utils/sysendian.h
Normal file
94
algo/yespower/utils/sysendian.h
Normal file
@@ -0,0 +1,94 @@
|
||||
/*-
|
||||
* Copyright 2007-2014 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SYSENDIAN_H_
|
||||
#define _SYSENDIAN_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/* Avoid namespace collisions with BSD <sys/endian.h>. */
|
||||
#define be32dec libcperciva_be32dec
|
||||
#define be32enc libcperciva_be32enc
|
||||
#define be64enc libcperciva_be64enc
|
||||
#define le32dec libcperciva_le32dec
|
||||
#define le32enc libcperciva_le32enc
|
||||
|
||||
static inline uint32_t
|
||||
be32dec(const void * pp)
|
||||
{
|
||||
const uint8_t * p = (uint8_t const *)pp;
|
||||
|
||||
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
|
||||
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
|
||||
}
|
||||
|
||||
static inline void
|
||||
be32enc(void * pp, uint32_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[3] = x & 0xff;
|
||||
p[2] = (x >> 8) & 0xff;
|
||||
p[1] = (x >> 16) & 0xff;
|
||||
p[0] = (x >> 24) & 0xff;
|
||||
}
|
||||
|
||||
static inline void
|
||||
be64enc(void * pp, uint64_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[7] = x & 0xff;
|
||||
p[6] = (x >> 8) & 0xff;
|
||||
p[5] = (x >> 16) & 0xff;
|
||||
p[4] = (x >> 24) & 0xff;
|
||||
p[3] = (x >> 32) & 0xff;
|
||||
p[2] = (x >> 40) & 0xff;
|
||||
p[1] = (x >> 48) & 0xff;
|
||||
p[0] = (x >> 56) & 0xff;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
le32dec(const void * pp)
|
||||
{
|
||||
const uint8_t * p = (uint8_t const *)pp;
|
||||
|
||||
return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
|
||||
((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
|
||||
}
|
||||
|
||||
static inline void
|
||||
le32enc(void * pp, uint32_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[0] = x & 0xff;
|
||||
p[1] = (x >> 8) & 0xff;
|
||||
p[2] = (x >> 16) & 0xff;
|
||||
p[3] = (x >> 24) & 0xff;
|
||||
}
|
||||
|
||||
#endif /* !_SYSENDIAN_H_ */
|
@@ -1,80 +0,0 @@
|
||||
/*-
|
||||
* Copyright 2013-2018 Alexander Peslyak
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cpuminer-config.h"
|
||||
#include "miner.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "yescrypt-r8g.h"
|
||||
|
||||
int scanhash_yespower_r8g( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint64_t hash[4] __attribute__((aligned(64)));
|
||||
uint32_t endiandata[32];
|
||||
uint32_t *pdata = work->data;
|
||||
const uint64_t *ptarget = (const uint64_t*)work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
yespower_params_t params =
|
||||
{
|
||||
.version = YESPOWER_0_5,
|
||||
.N = 2048,
|
||||
.r = 8,
|
||||
.pers = (const uint8_t *)endiandata,
|
||||
.perslen = work->sapling ? 112 : 80,
|
||||
};
|
||||
|
||||
//we need bigendian data...
|
||||
for ( int i = 0 ; i < 32; i++ )
|
||||
be32enc( &endiandata[ i], pdata[ i ]);
|
||||
endiandata[19] = n;
|
||||
|
||||
do {
|
||||
yespower_tls( (unsigned char *)endiandata, params.perslen,
|
||||
¶ms, (yespower_binary_t*)hash );
|
||||
|
||||
if unlikely( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
be32enc( pdata+19, n );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
endiandata[19] = ++n;
|
||||
} while (n < last_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_yescryptr8g_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower_r8g;
|
||||
gate->hash = (void*)&yespower_tls;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -95,7 +95,11 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "utils/insecure_memzero.h"
|
||||
#include "utils/sysendian.h"
|
||||
#include "crypto/blake2b-yp.h"
|
||||
|
||||
#include "yespower.h"
|
||||
|
||||
#ifdef __unix__
|
||||
@@ -948,7 +952,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N,
|
||||
salsa20_blk_t *dst = &X[i];
|
||||
size_t k;
|
||||
for (k = 0; k < 16; k++)
|
||||
tmp->w[k] = src->w[k];
|
||||
tmp->w[k] = le32dec(&src->w[k]);
|
||||
salsa20_simd_shuffle(tmp, dst);
|
||||
}
|
||||
|
||||
@@ -995,7 +999,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N,
|
||||
salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
|
||||
size_t k;
|
||||
for (k = 0; k < 16; k++)
|
||||
tmp->w[k] = src->w[k];
|
||||
le32enc(&tmp->w[k], src->w[k]);
|
||||
salsa20_simd_unshuffle(tmp, dst);
|
||||
}
|
||||
}
|
||||
@@ -1021,7 +1025,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop,
|
||||
salsa20_blk_t *dst = &X[i];
|
||||
size_t k;
|
||||
for (k = 0; k < 16; k++)
|
||||
tmp->w[k] = src->w[k];
|
||||
tmp->w[k] = le32dec(&src->w[k]);
|
||||
salsa20_simd_shuffle(tmp, dst);
|
||||
}
|
||||
|
||||
@@ -1051,7 +1055,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop,
|
||||
salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
|
||||
size_t k;
|
||||
for (k = 0; k < 16; k++)
|
||||
tmp->w[k] = src->w[k];
|
||||
le32enc(&tmp->w[k], src->w[k]);
|
||||
salsa20_simd_unshuffle(tmp, dst);
|
||||
}
|
||||
}
|
||||
|
@@ -32,8 +32,6 @@
|
||||
|
||||
static yespower_params_t yespower_params;
|
||||
|
||||
// YESPOWER
|
||||
|
||||
void yespower_hash( const char *input, char *output, uint32_t len )
|
||||
{
|
||||
yespower_tls( input, len, &yespower_params, (yespower_binary_t*)output );
|
||||
@@ -42,33 +40,36 @@ void yespower_hash( const char *input, char *output, uint32_t len )
|
||||
int scanhash_yespower( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &endiandata[k], pdata[k] );
|
||||
endiandata[19] = n;
|
||||
do {
|
||||
yespower_hash( (char*)endiandata, (char*)vhash, 80 );
|
||||
if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
be32enc( pdata+19, n );
|
||||
submit_solution( work, vhash, mythr );
|
||||
}
|
||||
endiandata[19] = ++n;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
for (int k = 0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
yespower_hash((char*) endiandata, (char*) vhash, 80);
|
||||
if ( vhash[7] <= Htarg && fulltest( vhash, ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, vhash, mythr );
|
||||
}
|
||||
n++;
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// YESPOWER-B2B
|
||||
|
||||
void yespower_b2b_hash( const char *input, char *output, uint32_t len )
|
||||
{
|
||||
@@ -78,30 +79,34 @@ void yespower_b2b_hash( const char *input, char *output, uint32_t len )
|
||||
int scanhash_yespower_b2b( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce;
|
||||
const int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &endiandata[k], pdata[k] );
|
||||
endiandata[19] = n;
|
||||
do {
|
||||
yespower_b2b_hash( (char*) endiandata, (char*) vhash, 80 );
|
||||
if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
be32enc( pdata+19, n );
|
||||
submit_solution( work, vhash, mythr );
|
||||
}
|
||||
endiandata[19] = ++n;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
for (int k = 0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
yespower_b2b_hash((char*) endiandata, (char*) vhash, 80);
|
||||
if ( vhash[7] < Htarg && fulltest( vhash, ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, vhash, mythr );
|
||||
}
|
||||
n++;
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_yespower_algo( algo_gate_t* gate )
|
||||
@@ -130,7 +135,7 @@ bool register_yespower_algo( algo_gate_t* gate )
|
||||
if ( yespower_params.pers )
|
||||
applog( LOG_NOTICE,"Key= \"%s\"\n", yespower_params.pers );
|
||||
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower;
|
||||
gate->hash = (void*)&yespower_hash;
|
||||
opt_target_factor = 65536.0;
|
||||
@@ -144,14 +149,14 @@ bool register_yespowerr16_algo( algo_gate_t* gate )
|
||||
yespower_params.r = 16;
|
||||
yespower_params.pers = NULL;
|
||||
yespower_params.perslen = 0;
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower;
|
||||
gate->hash = (void*)&yespower_hash;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
/* not used
|
||||
|
||||
bool register_yescrypt_05_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
@@ -203,9 +208,6 @@ bool register_yescryptr32_05_algo( algo_gate_t* gate )
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
||||
// POWER2B
|
||||
|
||||
bool register_power2b_algo( algo_gate_t* gate )
|
||||
{
|
||||
@@ -221,7 +223,7 @@ bool register_power2b_algo( algo_gate_t* gate )
|
||||
applog( LOG_NOTICE,"Key= \"%s\"", yespower_params.pers );
|
||||
applog( LOG_NOTICE,"Key length= %d\n", yespower_params.perslen );
|
||||
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower_b2b;
|
||||
gate->hash = (void*)&yespower_b2b_hash;
|
||||
opt_target_factor = 65536.0;
|
||||
|
@@ -95,8 +95,13 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "algo/sha/hmac-sha256-hash.h"
|
||||
|
||||
#include "insecure_memzero.h"
|
||||
#include "sha256_p.h"
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "yespower.h"
|
||||
|
||||
#include "yespower-platform.c"
|
||||
|
||||
#if __STDC_VERSION__ >= 199901L
|
||||
@@ -524,7 +529,7 @@ static volatile uint64_t Smask2var = Smask2;
|
||||
/* 64-bit without AVX. This relies on out-of-order execution and register
|
||||
* renaming. It may actually be fastest on CPUs with AVX(2) as well - e.g.,
|
||||
* it runs great on Haswell. */
|
||||
//#warning "Note: using x86-64 inline assembly for pwxform. That's great."
|
||||
#warning "Note: using x86-64 inline assembly for pwxform. That's great."
|
||||
#undef MAYBE_MEMORY_BARRIER
|
||||
#define MAYBE_MEMORY_BARRIER \
|
||||
__asm__("" : : : "memory");
|
||||
@@ -856,7 +861,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N,
|
||||
salsa20_blk_t *dst = &X[i];
|
||||
size_t k;
|
||||
for (k = 0; k < 16; k++)
|
||||
tmp->w[k] = src->w[k];
|
||||
tmp->w[k] = le32dec(&src->w[k]);
|
||||
salsa20_simd_shuffle(tmp, dst);
|
||||
}
|
||||
|
||||
@@ -903,7 +908,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N,
|
||||
salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
|
||||
size_t k;
|
||||
for (k = 0; k < 16; k++)
|
||||
tmp->w[k] = src->w[k];
|
||||
le32enc(&tmp->w[k], src->w[k]);
|
||||
salsa20_simd_unshuffle(tmp, dst);
|
||||
}
|
||||
}
|
||||
@@ -929,7 +934,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop,
|
||||
salsa20_blk_t *dst = &X[i];
|
||||
size_t k;
|
||||
for (k = 0; k < 16; k++)
|
||||
tmp->w[k] = src->w[k];
|
||||
tmp->w[k] = le32dec(&src->w[k]);
|
||||
salsa20_simd_shuffle(tmp, dst);
|
||||
}
|
||||
|
||||
@@ -961,7 +966,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop,
|
||||
salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
|
||||
size_t k;
|
||||
for (k = 0; k < 16; k++)
|
||||
tmp->w[k] = src->w[k];
|
||||
le32enc(&tmp->w[k], src->w[k]);
|
||||
salsa20_simd_unshuffle(tmp, dst);
|
||||
}
|
||||
}
|
||||
|
@@ -51,8 +51,8 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/sha/hmac-sha256-hash.h"
|
||||
//#include "sysendian.h"
|
||||
#include "sha256_p.h"
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "yespower.h"
|
||||
|
||||
@@ -346,7 +346,7 @@ static void smix1(uint32_t *B, size_t r, uint32_t N,
|
||||
/* 1: X <-- B */
|
||||
for (k = 0; k < 2 * r; k++)
|
||||
for (i = 0; i < 16; i++)
|
||||
X[k * 16 + i] = B[k * 16 + (i * 5 % 16)];
|
||||
X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]);
|
||||
|
||||
if (ctx->version != YESPOWER_0_5) {
|
||||
for (k = 1; k < r; k++) {
|
||||
@@ -378,7 +378,7 @@ static void smix1(uint32_t *B, size_t r, uint32_t N,
|
||||
/* B' <-- X */
|
||||
for (k = 0; k < 2 * r; k++)
|
||||
for (i = 0; i < 16; i++)
|
||||
B[k * 16 + (i * 5 % 16)] = X[k * 16 + i];
|
||||
le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -398,7 +398,7 @@ static void smix2(uint32_t *B, size_t r, uint32_t N, uint32_t Nloop,
|
||||
/* X <-- B */
|
||||
for (k = 0; k < 2 * r; k++)
|
||||
for (i = 0; i < 16; i++)
|
||||
X[k * 16 + i] = B[k * 16 + (i * 5 % 16)];
|
||||
X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]);
|
||||
|
||||
/* 6: for i = 0 to N - 1 do */
|
||||
for (i = 0; i < Nloop; i++) {
|
||||
@@ -418,7 +418,7 @@ static void smix2(uint32_t *B, size_t r, uint32_t N, uint32_t Nloop,
|
||||
/* 10: B' <-- X */
|
||||
for (k = 0; k < 2 * r; k++)
|
||||
for (i = 0; i < 16; i++)
|
||||
B[k * 16 + (i * 5 % 16)] = X[k * 16 + i];
|
||||
le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -71,7 +71,7 @@ typedef struct {
|
||||
*/
|
||||
typedef struct {
|
||||
unsigned char uc[32];
|
||||
} yespower_binary_t __attribute__ ((aligned (64)));
|
||||
} yespower_binary_t;
|
||||
|
||||
/**
|
||||
* yespower_init_local(local):
|
||||
|
@@ -44,23 +44,23 @@ mv cpuminer.exe cpuminer-aes-sse42.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-aes-sse42
|
||||
|
||||
#make clean || echo clean
|
||||
#rm -f config.status
|
||||
#CFLAGS="-O3 -march=corei7 -Wall" ./configure --with-curl
|
||||
#make -j 16
|
||||
#strip -s cpuminer.exe
|
||||
#mv cpuminer.exe cpuminer-sse42.exe
|
||||
#strip -s cpuminer
|
||||
#mv cpuminer cpuminer-sse42
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=corei7 -Wall" ./configure --with-curl
|
||||
make -j 16
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-sse42.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-sse42
|
||||
|
||||
#make clean || echo clean
|
||||
#rm -f config.status
|
||||
#CFLAGS="-O3 -march=core2 -Wall" ./configure --with-curl
|
||||
#make -j 16
|
||||
#strip -s cpuminer.exe
|
||||
#mv cpuminer.exe cpuminer-ssse3.exe
|
||||
#strip -s cpuminer
|
||||
#mv cpuminer cpuminer-ssse3
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=core2 -Wall" ./configure --with-curl
|
||||
make -j 16
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-ssse3.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-ssse3
|
||||
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
|
@@ -3,8 +3,8 @@
|
||||
# imake clean and rm all the targetted executables.
|
||||
# tips to users.
|
||||
|
||||
rm cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen > /dev/null
|
||||
rm cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen > /dev/null
|
||||
|
||||
rm cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-aes-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe > /dev/null
|
||||
rm cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-aes-avx.exe cpuminer-aes-sse42.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-sse2.exe cpuminer-zen.exe > /dev/null
|
||||
|
||||
make distclean > /dev/null
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.7.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.3.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.11.7'
|
||||
PACKAGE_STRING='cpuminer-opt 3.11.7'
|
||||
PACKAGE_VERSION='3.11.3'
|
||||
PACKAGE_STRING='cpuminer-opt 3.11.3'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.11.7 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.11.3 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.11.7:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.11.3:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.11.7
|
||||
cpuminer-opt configure 3.11.3
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.11.7, which was
|
||||
It was created by cpuminer-opt $as_me 3.11.3, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.11.7'
|
||||
VERSION='3.11.3'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.11.7, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.11.3, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.11.7
|
||||
cpuminer-opt config.status 3.11.3
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.11.7])
|
||||
AC_INIT([cpuminer-opt], [3.11.3])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
532
cpu-miner.c
532
cpu-miner.c
@@ -157,7 +157,6 @@ bool opt_hash_meter = false;
|
||||
uint32_t submitted_share_count= 0;
|
||||
uint32_t accepted_share_count = 0;
|
||||
uint32_t rejected_share_count = 0;
|
||||
uint32_t stale_share_count = 0;
|
||||
uint32_t solved_block_count = 0;
|
||||
double *thr_hashrates;
|
||||
double global_hashrate = 0;
|
||||
@@ -506,7 +505,6 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
uint32_t version, curtime, bits;
|
||||
uint32_t prevhash[8];
|
||||
uint32_t target[8];
|
||||
unsigned char final_sapling_hash[32];
|
||||
int cbtx_size;
|
||||
uchar *cbtx = NULL;
|
||||
int tx_count, tx_size;
|
||||
@@ -530,8 +528,8 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
continue;
|
||||
if ( !strcmp( s, "coinbase/append" ) ) coinbase_append = true;
|
||||
else if ( !strcmp( s, "submit/coinbase" ) ) submit_coinbase = true;
|
||||
else if ( !strcmp( s, "version/force" ) ) version_force = true;
|
||||
else if ( !strcmp( s, "version/reduce" ) ) version_reduce = true;
|
||||
else if ( !strcmp( s, "version/force" ) ) version_force = true;
|
||||
else if ( !strcmp( s, "version/reduce" ) ) version_reduce = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -551,13 +549,12 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
goto out;
|
||||
}
|
||||
version = (uint32_t) json_integer_value( tmp );
|
||||
if ( version == 5 )
|
||||
work->sapling = true;
|
||||
else if ( version > 4 )
|
||||
// if ( (version & 0xffU) > BLOCK_VERSION_CURRENT )
|
||||
if ( (version & 0xffU) > BLOCK_VERSION_CURRENT )
|
||||
{
|
||||
if ( version_reduce )
|
||||
{
|
||||
version = ( version & ~0xffU ) | BLOCK_VERSION_CURRENT;
|
||||
}
|
||||
else if ( have_gbt && allow_getwork && !version_force )
|
||||
{
|
||||
applog( LOG_DEBUG, "Switching to getwork, gbt version %d", version );
|
||||
@@ -592,16 +589,6 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ( work->sapling )
|
||||
{
|
||||
if ( unlikely( !jobj_binary( val, "finalsaplingroothash",
|
||||
final_sapling_hash, sizeof(final_sapling_hash) ) ) )
|
||||
{
|
||||
applog( LOG_ERR, "JSON invalid finalsaplingroothash" );
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* find count and size of transactions */
|
||||
txa = json_object_get(val, "transactions" );
|
||||
if ( !txa || !json_is_array( txa ) )
|
||||
@@ -784,8 +771,7 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
/* assemble block header */
|
||||
algo_gate.build_block_header( work, swab32( version ),
|
||||
(uint32_t*) prevhash, (uint32_t*) merkle_tree,
|
||||
swab32( curtime ), le32dec( &bits ),
|
||||
final_sapling_hash );
|
||||
swab32( curtime ), le32dec( &bits ) );
|
||||
|
||||
if ( unlikely( !jobj_binary(val, "target", target, sizeof(target)) ) )
|
||||
{
|
||||
@@ -833,15 +819,20 @@ out:
|
||||
// returns the unit prefix and the hashrate appropriately scaled.
|
||||
void scale_hash_for_display ( double* hashrate, char* prefix )
|
||||
{
|
||||
if ( *hashrate < 1e4 ) *prefix = 0;
|
||||
else if ( *hashrate < 1e7 ) { *prefix = 'k'; *hashrate /= 1e3; }
|
||||
else if ( *hashrate < 1e10 ) { *prefix = 'M'; *hashrate /= 1e6; }
|
||||
else if ( *hashrate < 1e13 ) { *prefix = 'G'; *hashrate /= 1e9; }
|
||||
else if ( *hashrate < 1e16 ) { *prefix = 'T'; *hashrate /= 1e12; }
|
||||
else if ( *hashrate < 1e19 ) { *prefix = 'P'; *hashrate /= 1e15; }
|
||||
else if ( *hashrate < 1e22 ) { *prefix = 'E'; *hashrate /= 1e18; }
|
||||
else if ( *hashrate < 1e25 ) { *prefix = 'Z'; *hashrate /= 1e21; }
|
||||
else { *prefix = 'Y'; *hashrate /= 1e24; }
|
||||
if ( *hashrate < 1e4 ) // 0 H/s to 9999 h/s
|
||||
*prefix = 0;
|
||||
else if ( *hashrate < 1e7 ) // 10 kH/s to 9999 kh/s
|
||||
{ *prefix = 'k'; *hashrate /= 1e3; }
|
||||
else if ( *hashrate < 1e10 ) // 10 Mh/s to 9999 Mh/s
|
||||
{ *prefix = 'M'; *hashrate /= 1e6; }
|
||||
else if ( *hashrate < 1e13 ) // 10 Gh/s to 9999 Gh/s
|
||||
{ *prefix = 'G'; *hashrate /= 1e9; }
|
||||
else if ( *hashrate < 1e16 ) // 10 Th/s to 9999 Th/s
|
||||
{ *prefix = 'T'; *hashrate /= 1e12; }
|
||||
else if ( *hashrate < 1e19 ) // 10 Ph/s to 9999 Ph
|
||||
{ *prefix = 'P'; *hashrate /= 1e15; }
|
||||
else // 10 Eh/s and higher
|
||||
{ *prefix = 'E'; *hashrate /= 1e18; }
|
||||
}
|
||||
|
||||
static inline void sprintf_et( char *str, int seconds )
|
||||
@@ -850,13 +841,10 @@ static inline void sprintf_et( char *str, int seconds )
|
||||
unsigned int min = seconds / 60;
|
||||
unsigned int sec = seconds % 60;
|
||||
unsigned int hrs = min / 60;
|
||||
if ( unlikely( hrs ) )
|
||||
if ( hrs )
|
||||
{
|
||||
unsigned int years = hrs / (24*365);
|
||||
unsigned int days = hrs / 24;
|
||||
if ( years )
|
||||
sprintf( str, "%uy%ud", years, years % 365 );
|
||||
else if ( days ) //0d00h
|
||||
if ( days ) //0d00h
|
||||
sprintf( str, "%ud%02uh", days, hrs % 24 );
|
||||
else // 0h00m
|
||||
sprintf( str, "%uh%02um", hrs, min % 60 );
|
||||
@@ -878,15 +866,13 @@ const double diff_to_hash = 4294967296.;
|
||||
|
||||
static struct timeval session_start;
|
||||
static struct timeval five_min_start;
|
||||
static uint64_t session_first_block = 0;
|
||||
static double latency_sum = 0.;
|
||||
static uint64_t submit_sum = 0;
|
||||
static uint64_t accept_sum = 0;
|
||||
static uint64_t stale_sum = 0;
|
||||
static uint64_t reject_sum = 0;
|
||||
static double norm_diff_sum = 0.;
|
||||
static uint32_t last_block_height = 0;
|
||||
//static bool new_job = false;
|
||||
static bool new_job = false;
|
||||
static double last_targetdiff = 0.;
|
||||
static double ref_rate_hi = 0.;
|
||||
static double ref_rate_lo = 1e100;
|
||||
@@ -897,7 +883,6 @@ static uint32_t hi_temp = 0;
|
||||
|
||||
struct share_stats_t
|
||||
{
|
||||
int share_count;
|
||||
struct timeval submit_time;
|
||||
double net_diff;
|
||||
double share_diff;
|
||||
@@ -907,7 +892,7 @@ struct share_stats_t
|
||||
};
|
||||
|
||||
#define s_stats_size 8
|
||||
static struct share_stats_t share_stats[ s_stats_size ] = {{0}};
|
||||
static struct share_stats_t share_stats[ s_stats_size ] = {0};
|
||||
static int s_get_ptr = 0, s_put_ptr = 0;
|
||||
static struct timeval last_submit_time = {0};
|
||||
|
||||
@@ -936,7 +921,6 @@ void report_summary_log( bool force )
|
||||
uint64_t submits = submit_sum; submit_sum = 0;
|
||||
uint64_t accepts = accept_sum; accept_sum = 0;
|
||||
uint64_t rejects = reject_sum; reject_sum = 0;
|
||||
uint64_t stales = stale_sum; stale_sum = 0;
|
||||
// int latency = latency_sum; latency_sum = 0;
|
||||
memcpy( &start_time, &five_min_start, sizeof start_time );
|
||||
memcpy( &five_min_start, &now, sizeof now );
|
||||
@@ -992,11 +976,7 @@ void report_summary_log( bool force )
|
||||
submits, submitted_share_count );
|
||||
applog2( LOG_INFO,"Accepted %6d %6d",
|
||||
accepts, accepted_share_count );
|
||||
if ( stale_share_count )
|
||||
applog2( LOG_INFO,"Stale %6d %6d",
|
||||
stales, stale_share_count );
|
||||
if ( rejected_share_count )
|
||||
applog2( LOG_INFO,"Rejected %6d %6d",
|
||||
applog2( LOG_INFO,"Rejected %6d %6d",
|
||||
rejects, rejected_share_count );
|
||||
if ( solved_block_count )
|
||||
applog2( LOG_INFO,"Blocks solved %6d",
|
||||
@@ -1031,18 +1011,13 @@ static int share_result( int result, struct work *null_work,
|
||||
int latency = 0;
|
||||
struct share_stats_t my_stats = {0};
|
||||
struct timeval ack_time, latency_tv, et;
|
||||
char ares[48];
|
||||
char sres[48];
|
||||
char rres[48];
|
||||
char bres[48];
|
||||
// char job_id[48];
|
||||
const char *sres = NULL;
|
||||
bool solved = false;
|
||||
bool stale = false;
|
||||
char *acol = NULL, *bcol = NULL, *scol = NULL, *rcol = NULL;
|
||||
|
||||
// Mutex while we grab a snapshot of the stats.
|
||||
pthread_mutex_lock( &stats_lock );
|
||||
|
||||
if ( likely( share_stats[ s_get_ptr ].submit_time.tv_sec ) )
|
||||
if ( share_stats[ s_get_ptr ].submit_time.tv_sec )
|
||||
{
|
||||
memcpy( &my_stats, &share_stats[ s_get_ptr], sizeof my_stats );
|
||||
memset( &share_stats[ s_get_ptr ], 0, sizeof my_stats );
|
||||
@@ -1072,7 +1047,7 @@ static int share_result( int result, struct work *null_work,
|
||||
my_stats.net_diff * 100.;
|
||||
|
||||
// check result
|
||||
if ( likely( result ) )
|
||||
if ( result )
|
||||
{
|
||||
accepted_share_count++;
|
||||
if ( ( my_stats.net_diff > 0. ) && ( my_stats.share_diff >= net_diff ) )
|
||||
@@ -1082,16 +1057,13 @@ static int share_result( int result, struct work *null_work,
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( reason && strstr( reason, "Invalid job id" ) )
|
||||
{
|
||||
stale = true;
|
||||
stale_share_count++;
|
||||
}
|
||||
else
|
||||
rejected_share_count++;
|
||||
}
|
||||
|
||||
rejected_share_count++;
|
||||
/*
|
||||
result ? accepted_share_count++ : rejected_share_count++;
|
||||
solved = result && (my_stats.net_diff > 0.0 )
|
||||
&& ( my_stats.share_diff >= net_diff );
|
||||
solved_block_count += solved ? 1 : 0 ;
|
||||
*/
|
||||
// update global counters for summary report
|
||||
pthread_mutex_lock( &stats_lock );
|
||||
|
||||
@@ -1099,88 +1071,37 @@ static int share_result( int result, struct work *null_work,
|
||||
hashrate += thr_hashrates[i];
|
||||
global_hashrate = hashrate;
|
||||
|
||||
if ( likely( result ) )
|
||||
if ( result )
|
||||
{
|
||||
accept_sum++;
|
||||
norm_diff_sum += my_stats.target_diff;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( stale )
|
||||
stale_sum++;
|
||||
else
|
||||
reject_sum++;
|
||||
}
|
||||
reject_sum++;
|
||||
submit_sum++;
|
||||
latency_sum += latency;
|
||||
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
|
||||
bcol = acol = scol = rcol = "\0";
|
||||
if ( likely( result ) )
|
||||
{
|
||||
if ( unlikely( solved ) )
|
||||
{
|
||||
sprintf( bres, "BLOCK SOLVED %d", solved_block_count );
|
||||
sprintf( ares, "A%d", accepted_share_count );
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf( bres, "B%d", solved_block_count );
|
||||
sprintf( ares, "Accepted %d", accepted_share_count );
|
||||
}
|
||||
sprintf( sres, "S%d", stale_share_count );
|
||||
sprintf( rres, "R%d", rejected_share_count );
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf( ares, "A%d", accepted_share_count );
|
||||
sprintf( bres, "B%d", solved_block_count );
|
||||
if ( stale )
|
||||
{
|
||||
sprintf( sres, "Stale job %d", stale_share_count );
|
||||
sprintf( rres, "R%d", rejected_share_count );
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf( sres, "S%d", stale_share_count );
|
||||
sprintf( rres, "Rejected %d" , rejected_share_count );
|
||||
}
|
||||
}
|
||||
|
||||
bcol = acol = scol = rcol = CL_WHT;
|
||||
|
||||
if ( use_colors )
|
||||
{
|
||||
if ( likely( result ) )
|
||||
{
|
||||
if ( unlikely( solved ) )
|
||||
{
|
||||
bcol = CL_MAG;
|
||||
acol = CL_GRN;
|
||||
}
|
||||
else
|
||||
acol = CL_GRN;
|
||||
}
|
||||
else if ( stale )
|
||||
scol = CL_YL2;
|
||||
else
|
||||
rcol = CL_RED;
|
||||
}
|
||||
sres = solved ? ( CL_MAG "BLOCK SOLVED" CL_WHT )
|
||||
: ( result ? ( CL_GRN "Accepted" CL_WHT )
|
||||
: ( CL_RED "Rejected" CL_WHT ) );
|
||||
else // monochrome
|
||||
sres = solved ? "BLOCK SOLVED" : ( result ? "Accepted" : "Rejected" );
|
||||
|
||||
applog( LOG_NOTICE, "%d %s%s %s%s %s%s %s%s" CL_WHT ", %.3f sec (%dms)",
|
||||
my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
|
||||
bres, share_time, latency );
|
||||
applog( LOG_NOTICE, "%s, %.3f secs (%dms), A/R/B: %d/%d/%d",
|
||||
sres, share_time, latency, accepted_share_count,
|
||||
rejected_share_count, solved_block_count );
|
||||
|
||||
if ( have_stratum && !opt_quiet )
|
||||
applog2( LOG_NOTICE, "Diff %.3g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
|
||||
my_stats.share_diff, share_ratio, bcol, stratum.block_height,
|
||||
scol, my_stats.job_id );
|
||||
applog2( LOG_INFO, "Share diff %.3g (%5f%%), block %d, job %s",
|
||||
my_stats.share_diff, share_ratio, stratum.block_height,
|
||||
my_stats.job_id );
|
||||
|
||||
if ( unlikely( reason && !result ) )
|
||||
if ( reason )
|
||||
{
|
||||
if ( !( opt_quiet || stale ) )
|
||||
applog( LOG_WARNING, "Reject reason: %s", reason );
|
||||
applog( LOG_WARNING, "Reject reason: %s", reason );
|
||||
|
||||
if ( opt_debug )
|
||||
{
|
||||
@@ -1188,20 +1109,20 @@ static int share_result( int result, struct work *null_work,
|
||||
char str3[65];
|
||||
|
||||
// display share hash and target for troubleshooting
|
||||
diff_to_target( (uint64_t*)str1, my_stats.share_diff );
|
||||
diff_to_target( str1, my_stats.share_diff );
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
be32enc( str2 + i, str1[7 - i] );
|
||||
bin2hex( str3, (unsigned char*)str2, 12 );
|
||||
applog2( LOG_INFO, "Hash: %s...", str3 );
|
||||
|
||||
diff_to_target( (uint64_t*)str1, my_stats.target_diff );
|
||||
diff_to_target( str1, my_stats.target_diff );
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
be32enc( str2 + i, str1[7 - i] );
|
||||
bin2hex( str3, (unsigned char*)str2, 12 );
|
||||
applog2( LOG_INFO, "Target: %s...", str3 );
|
||||
}
|
||||
|
||||
if ( unlikely( opt_reset_on_stale && stale ) )
|
||||
if ( opt_reset_on_stale && strstr( reason, "Invalid job id" ) )
|
||||
stratum_need_reset = true;
|
||||
}
|
||||
|
||||
@@ -1377,11 +1298,9 @@ char* std_malloc_txs_request( struct work *work )
|
||||
char data_str[2 * sizeof(work->data) + 1];
|
||||
int i;
|
||||
|
||||
int datasize = work->sapling ? 112 : 80;
|
||||
|
||||
for ( i = 0; i < ARRAY_SIZE(work->data); i++ )
|
||||
be32enc( work->data + i, work->data[i] );
|
||||
bin2hex( data_str, (unsigned char *)work->data, datasize );
|
||||
bin2hex( data_str, (unsigned char *)work->data, 80 );
|
||||
if ( work->workid )
|
||||
{
|
||||
char *params;
|
||||
@@ -1389,7 +1308,7 @@ char* std_malloc_txs_request( struct work *work )
|
||||
json_object_set_new( val, "workid", json_string( work->workid ) );
|
||||
params = json_dumps( val, 0 );
|
||||
json_decref( val );
|
||||
req = (char*) malloc( 128 + 2 * datasize + strlen( work->txs )
|
||||
req = (char*) malloc( 128 + 2 * 80 + strlen( work->txs )
|
||||
+ strlen( params ) );
|
||||
sprintf( req,
|
||||
"{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":4}\r\n",
|
||||
@@ -1398,7 +1317,7 @@ char* std_malloc_txs_request( struct work *work )
|
||||
}
|
||||
else
|
||||
{
|
||||
req = (char*) malloc( 128 + 2 * datasize + strlen( work->txs ) );
|
||||
req = (char*) malloc( 128 + 2 * 80 + strlen( work->txs ) );
|
||||
sprintf( req,
|
||||
"{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":4}\r\n",
|
||||
data_str, work->txs);
|
||||
@@ -1716,7 +1635,7 @@ static void *workio_thread(void *userdata)
|
||||
if ( jsonrpc_2 && !have_stratum )
|
||||
ok = rpc2_workio_login( curl );
|
||||
|
||||
while ( likely(ok) )
|
||||
while (ok)
|
||||
{
|
||||
struct workio_cmd *wc;
|
||||
|
||||
@@ -1792,8 +1711,7 @@ static bool get_work(struct thr_info *thr, struct work *work)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool submit_work( struct thr_info *thr,
|
||||
const struct work *work_in )
|
||||
static bool submit_work( struct thr_info *thr, const struct work *work_in )
|
||||
{
|
||||
struct workio_cmd *wc;
|
||||
|
||||
@@ -1817,22 +1735,20 @@ err_out:
|
||||
return false;
|
||||
}
|
||||
|
||||
// __float128?
|
||||
// Convert little endian 256 bit (38 decimal digits) unsigned integer to
|
||||
// double precision floating point with 15 decimal digits precision.
|
||||
// returns u * ( 2**256 )
|
||||
static inline double u256_to_double( const uint64_t *u )
|
||||
// Convert little endian 256 bit unsigned integer to
|
||||
// double precision floating point.
|
||||
static inline double u256_to_double( const uint64_t* u )
|
||||
{
|
||||
const double exp64 = 4294967296.0 * 4294967296.0; // 2**64
|
||||
return ( ( u[3] * exp64 + u[2] ) * exp64 + u[1] ) * exp64 + u[0];
|
||||
const double f = 4294967296.0 * 4294967296.0; // 2**64
|
||||
return ( ( u[3] * f + u[2] ) * f + u[1] ) * f + u[0];
|
||||
}
|
||||
|
||||
void work_set_target_ratio( struct work* work, const void *hash )
|
||||
void work_set_target_ratio( struct work* work, uint32_t* hash )
|
||||
{
|
||||
double dhash;
|
||||
|
||||
dhash = u256_to_double( (const uint64_t*)hash );
|
||||
if ( likely( dhash > 0. ) )
|
||||
if ( dhash > 0. )
|
||||
work->sharediff = work->targetdiff *
|
||||
u256_to_double( (const uint64_t*)( work->target ) ) / dhash;
|
||||
else
|
||||
@@ -1844,7 +1760,6 @@ void work_set_target_ratio( struct work* work, const void *hash )
|
||||
// it can overflow the queue and overwrite stats for a share.
|
||||
pthread_mutex_lock( &stats_lock );
|
||||
|
||||
share_stats[ s_put_ptr ].share_count = submitted_share_count;
|
||||
gettimeofday( &share_stats[ s_put_ptr ].submit_time, NULL );
|
||||
share_stats[ s_put_ptr ].share_diff = work->sharediff;
|
||||
share_stats[ s_put_ptr ].net_diff = net_diff;
|
||||
@@ -1857,39 +1772,37 @@ void work_set_target_ratio( struct work* work, const void *hash )
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
}
|
||||
|
||||
bool submit_solution( struct work *work, const void *hash,
|
||||
bool submit_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr )
|
||||
{
|
||||
if ( likely( submit_work( thr, work ) ) )
|
||||
if ( submit_work( thr, work ) )
|
||||
{
|
||||
submitted_share_count++;
|
||||
work_set_target_ratio( work, hash );
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_NOTICE, "%d submitted by thread %d, job %s",
|
||||
submitted_share_count, thr->id, work->job_id );
|
||||
applog( LOG_BLUE, "Share %d submitted by thread %d",
|
||||
submitted_share_count, thr->id );
|
||||
return true;
|
||||
}
|
||||
else
|
||||
applog( LOG_WARNING, "%d failed to submit share.",
|
||||
submitted_share_count );
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
return false;
|
||||
}
|
||||
|
||||
bool submit_lane_solution( struct work *work, const void *hash,
|
||||
struct thr_info *thr, const int lane )
|
||||
bool submit_lane_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr, int lane )
|
||||
{
|
||||
if ( likely( submit_work( thr, work ) ) )
|
||||
if ( submit_work( thr, work ) )
|
||||
{
|
||||
submitted_share_count++;
|
||||
work_set_target_ratio( work, hash );
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_NOTICE, "%d submitted by thread %d, lane %d, job %s",
|
||||
submitted_share_count, thr->id, lane, work->job_id );
|
||||
applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d",
|
||||
submitted_share_count, thr->id, lane );
|
||||
return true;
|
||||
}
|
||||
else
|
||||
applog( LOG_WARNING, "%d failed to submit share.",
|
||||
submitted_share_count );
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1995,27 +1908,35 @@ double std_calc_network_diff( struct work* work )
|
||||
return d;
|
||||
}
|
||||
|
||||
uint32_t *std_get_nonceptr( uint32_t *work_data )
|
||||
uint32_t* std_get_nonceptr( uint32_t *work_data )
|
||||
{
|
||||
return work_data + algo_gate.nonce_index;
|
||||
}
|
||||
|
||||
uint32_t *jr2_get_nonceptr( uint32_t *work_data )
|
||||
uint32_t* jr2_get_nonceptr( uint32_t *work_data )
|
||||
{
|
||||
// nonce is misaligned, use byte offset
|
||||
return (uint32_t*) ( ((uint8_t*) work_data) + algo_gate.nonce_index );
|
||||
}
|
||||
|
||||
|
||||
void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t *end_nonce_ptr )
|
||||
uint32_t *end_nonce_ptr, bool clean_job )
|
||||
{
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
|
||||
bool force_new_work = work->job_id ? strtoul( work->job_id, NULL, 16 ) !=
|
||||
strtoul( g_work->job_id, NULL, 16 )
|
||||
: true;
|
||||
|
||||
if ( force_new_work || *nonceptr >= *end_nonce_ptr )
|
||||
// the job_id check doesn't work as intended, it's a char pointer!
|
||||
// For stratum the pointers can be dereferenced and the strings compared,
|
||||
// benchmark not, getwork & gbt unsure.
|
||||
// || ( have_straum && strcmp( work->job_id, g_work->job_id ) ) ) )
|
||||
// or
|
||||
// || ( !benchmark && strcmp( work->job_id, g_work->job_id ) ) ) )
|
||||
// For now leave it as is, it seems stable.
|
||||
// strtoul seems to work.
|
||||
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|
||||
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|
||||
|| strtoul( work->job_id, NULL, 16 )
|
||||
!= strtoul( g_work->job_id, NULL, 16 ) ) )
|
||||
{
|
||||
work_free( work );
|
||||
work_copy( work, g_work );
|
||||
@@ -2037,7 +1958,7 @@ void jr2_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
if ( memcmp( work->data, g_work->data, algo_gate.nonce_index )
|
||||
|| memcmp( ((uint8_t*) work->data) + JR2_WORK_CMP_INDEX_2,
|
||||
((uint8_t*) g_work->data) + JR2_WORK_CMP_INDEX_2,
|
||||
JR2_WORK_CMP_SIZE_2 ) )
|
||||
JR2_WORK_CMP_SIZE_2 ) )
|
||||
{
|
||||
work_free( work );
|
||||
work_copy( work, g_work );
|
||||
@@ -2086,30 +2007,39 @@ static void *miner_thread( void *userdata )
|
||||
* error if it fails */
|
||||
if (!opt_benchmark && opt_priority == 0)
|
||||
{
|
||||
setpriority(PRIO_PROCESS, 0, 19);
|
||||
drop_policy();
|
||||
setpriority(PRIO_PROCESS, 0, 19);
|
||||
drop_policy();
|
||||
}
|
||||
else
|
||||
{
|
||||
int prio = 0;
|
||||
int prio = 0;
|
||||
#ifndef WIN32
|
||||
prio = 18;
|
||||
// note: different behavior on linux (-19 to 19)
|
||||
switch ( opt_priority )
|
||||
{
|
||||
case 1: prio = 5; break;
|
||||
case 2: prio = 0; break;
|
||||
case 3: prio = -5; break;
|
||||
case 4: prio = -10; break;
|
||||
case 5: prio = -15;
|
||||
}
|
||||
if (opt_debug)
|
||||
applog(LOG_DEBUG, "Thread %d priority %d (nice %d)", thr_id,
|
||||
prio = 18;
|
||||
// note: different behavior on linux (-19 to 19)
|
||||
switch (opt_priority)
|
||||
{
|
||||
case 1:
|
||||
prio = 5;
|
||||
break;
|
||||
case 2:
|
||||
prio = 0;
|
||||
break;
|
||||
case 3:
|
||||
prio = -5;
|
||||
break;
|
||||
case 4:
|
||||
prio = -10;
|
||||
break;
|
||||
case 5:
|
||||
prio = -15;
|
||||
}
|
||||
if (opt_debug)
|
||||
applog(LOG_DEBUG, "Thread %d priority %d (nice %d)", thr_id,
|
||||
opt_priority, prio );
|
||||
#endif
|
||||
setpriority(PRIO_PROCESS, 0, prio);
|
||||
if ( opt_priority == 0 )
|
||||
drop_policy();
|
||||
setpriority(PRIO_PROCESS, 0, prio);
|
||||
if (opt_priority == 0)
|
||||
drop_policy();
|
||||
}
|
||||
// CPU thread affinity
|
||||
if ( num_cpus > 1 )
|
||||
@@ -2162,7 +2092,7 @@ static void *miner_thread( void *userdata )
|
||||
}
|
||||
|
||||
// wait for stratum to send first job
|
||||
if ( have_stratum ) while ( unlikely( !g_work.job_id ) ) sleep(1);
|
||||
if ( have_stratum ) while ( !stratum.job.job_id ) sleep(1);
|
||||
|
||||
while (1)
|
||||
{
|
||||
@@ -2171,14 +2101,15 @@ static void *miner_thread( void *userdata )
|
||||
int64_t max64 = 1000;
|
||||
int nonce_found = 0;
|
||||
|
||||
if ( likely( algo_gate.do_this_thread( thr_id ) ) )
|
||||
if ( algo_gate.do_this_thread( thr_id ) )
|
||||
{
|
||||
if ( have_stratum )
|
||||
{
|
||||
pthread_mutex_lock( &g_work_lock );
|
||||
if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce )
|
||||
algo_gate.stratum_gen_work( &stratum, &g_work );
|
||||
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce );
|
||||
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce,
|
||||
stratum.job.clean );
|
||||
pthread_mutex_unlock( &g_work_lock );
|
||||
}
|
||||
else
|
||||
@@ -2198,17 +2129,17 @@ static void *miner_thread( void *userdata )
|
||||
}
|
||||
g_work_time = time(NULL);
|
||||
}
|
||||
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce );
|
||||
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce, true );
|
||||
|
||||
pthread_mutex_unlock( &g_work_lock );
|
||||
}
|
||||
} // do_this_thread
|
||||
algo_gate.resync_threads( &work );
|
||||
|
||||
if ( unlikely( !algo_gate.ready_to_mine( &work, &stratum, thr_id ) ) )
|
||||
if ( !algo_gate.ready_to_mine( &work, &stratum, thr_id ) )
|
||||
continue;
|
||||
// conditional mining
|
||||
if ( unlikely( !wanna_mine( thr_id ) ) )
|
||||
if (!wanna_mine(thr_id))
|
||||
{
|
||||
sleep(5);
|
||||
continue;
|
||||
@@ -2220,7 +2151,7 @@ static void *miner_thread( void *userdata )
|
||||
max64 = g_work_time + ( have_longpoll ? LP_SCANTIME : opt_scantime )
|
||||
- time(NULL);
|
||||
// time limit
|
||||
if ( unlikely( opt_time_limit && firstwork_time ) )
|
||||
if ( opt_time_limit && firstwork_time )
|
||||
{
|
||||
int passed = (int)( time(NULL) - firstwork_time );
|
||||
int remain = (int)( opt_time_limit - passed );
|
||||
@@ -2278,17 +2209,15 @@ static void *miner_thread( void *userdata )
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
}
|
||||
// If unsubmiited nonce(s) found, submit now.
|
||||
if ( unlikely( nonce_found && !opt_benchmark ) )
|
||||
if ( nonce_found && !opt_benchmark )
|
||||
{
|
||||
applog( LOG_WARNING, "BUG: See RELEASE_NOTES for reporting bugs. Algo = %s.",
|
||||
algo_names[ opt_algo ] );
|
||||
if ( !submit_work( mythr, &work ) )
|
||||
{
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
break;
|
||||
}
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_NOTICE, "%d: submitted by thread %d.",
|
||||
applog( LOG_BLUE, "Share %d submitted by thread %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
mythr->id );
|
||||
|
||||
@@ -2303,7 +2232,7 @@ static void *miner_thread( void *userdata )
|
||||
}
|
||||
}
|
||||
// display hashrate
|
||||
if ( unlikely( opt_hash_meter ) )
|
||||
if ( opt_hash_meter )
|
||||
{
|
||||
char hr[16];
|
||||
char hr_units[2] = {0,0};
|
||||
@@ -2321,8 +2250,8 @@ static void *miner_thread( void *userdata )
|
||||
|
||||
// Display benchmark total
|
||||
// Update hashrate for API if no shares accepted yet.
|
||||
if ( unlikely( ( opt_benchmark || !accepted_share_count )
|
||||
&& thr_id == opt_n_threads - 1 ) )
|
||||
if ( ( opt_benchmark || !accepted_share_count )
|
||||
&& thr_id == opt_n_threads - 1 )
|
||||
{
|
||||
double hashrate = 0.;
|
||||
for ( i = 0; i < opt_n_threads; i++ )
|
||||
@@ -2591,14 +2520,13 @@ out:
|
||||
|
||||
// used by stratum and gbt
|
||||
void std_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_tree, uint32_t ntime,
|
||||
uint32_t nbits, unsigned char *final_sapling_hash )
|
||||
uint32_t *prevhash, uint32_t *merkle_tree,
|
||||
uint32_t ntime, uint32_t nbits )
|
||||
{
|
||||
int i;
|
||||
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = version;
|
||||
g_work->sapling = be32dec( &version ) == 5 ? true : false;
|
||||
|
||||
if ( have_stratum )
|
||||
for ( i = 0; i < 8; i++ )
|
||||
@@ -2612,27 +2540,8 @@ void std_build_block_header( struct work* g_work, uint32_t version,
|
||||
|
||||
g_work->data[ algo_gate.ntime_index ] = ntime;
|
||||
g_work->data[ algo_gate.nbits_index ] = nbits;
|
||||
if ( g_work->sapling )
|
||||
{
|
||||
if ( have_stratum )
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[20 + i] = le32dec( (uint32_t*)final_sapling_hash + i );
|
||||
else
|
||||
{
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[27 - i] = le32dec( (uint32_t*)final_sapling_hash + i );
|
||||
g_work->data[19] = 0;
|
||||
}
|
||||
g_work->data[28] = 0x80000000;
|
||||
g_work->data[29] = 0x00000000;
|
||||
g_work->data[30] = 0x00000000;
|
||||
g_work->data[31] = 0x00000380;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_work->data[20] = 0x80000000;
|
||||
g_work->data[31] = 0x00000280;
|
||||
}
|
||||
g_work->data[20] = 0x80000000;
|
||||
g_work->data[31] = 0x00000280;
|
||||
}
|
||||
|
||||
void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
@@ -2646,8 +2555,7 @@ void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
// Assemble block header
|
||||
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
|
||||
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
|
||||
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits),
|
||||
sctx->job.final_sapling_hash );
|
||||
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
|
||||
}
|
||||
|
||||
void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
@@ -2662,11 +2570,12 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
algo_gate.build_extraheader( g_work, sctx );
|
||||
net_diff = algo_gate.calc_network_diff( g_work );
|
||||
algo_gate.set_work_data_endian( g_work );
|
||||
work_set_target( g_work, sctx->job.diff
|
||||
/ ( opt_target_factor * opt_diff_factor ) );
|
||||
|
||||
pthread_mutex_unlock( &sctx->work_lock );
|
||||
|
||||
work_set_target( g_work, sctx->job.diff
|
||||
/ ( opt_target_factor * opt_diff_factor ) );
|
||||
|
||||
if ( opt_debug )
|
||||
{
|
||||
unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
|
||||
@@ -2676,72 +2585,60 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
free( xnonce2str );
|
||||
}
|
||||
|
||||
double hr = 0.;
|
||||
pthread_mutex_lock( &stats_lock );
|
||||
|
||||
for ( int i = 0; i < opt_n_threads; i++ )
|
||||
hr += thr_hashrates[i];
|
||||
global_hashrate = hr;
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
|
||||
if ( stratum_diff != sctx->job.diff )
|
||||
applog( LOG_BLUE, "New stratum diff %g, block %d, job %s",
|
||||
sctx->job.diff, sctx->block_height, g_work->job_id );
|
||||
else if ( last_block_height != sctx->block_height )
|
||||
applog( LOG_BLUE, "New block %d, job %s",
|
||||
sctx->block_height, g_work->job_id );
|
||||
else
|
||||
applog( LOG_BLUE,"New job %s", g_work->job_id );
|
||||
|
||||
// Update data and calculate new estimates.
|
||||
// Log new block and/or stratum difficulty change.
|
||||
if ( ( stratum_diff != sctx->job.diff )
|
||||
|| ( last_block_height != sctx->block_height ) )
|
||||
|| ( last_block_height != sctx->block_height ) )
|
||||
{
|
||||
static bool multipool = false;
|
||||
if ( stratum.block_height < last_block_height ) multipool = true;
|
||||
if ( unlikely( !session_first_block ) )
|
||||
session_first_block = stratum.block_height;
|
||||
last_block_height = stratum.block_height;
|
||||
stratum_diff = sctx->job.diff;
|
||||
last_targetdiff = g_work->targetdiff;
|
||||
double hr = 0.;
|
||||
new_job = false;
|
||||
pthread_mutex_lock( &stats_lock );
|
||||
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
applog2( LOG_INFO, "%s: %s", algo_names[opt_algo], short_url );
|
||||
applog2( LOG_INFO, "Diff: Net %.3g, Stratum %.3g, Target %.3g",
|
||||
net_diff, stratum_diff, last_targetdiff );
|
||||
if ( likely( hr > 0. ) )
|
||||
{
|
||||
char hr_units[4] = {0};
|
||||
char block_ttf[32];
|
||||
char share_ttf[32];
|
||||
for ( int i = 0; i < opt_n_threads; i++ )
|
||||
hr += thr_hashrates[i];
|
||||
global_hashrate = hr;
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
|
||||
sprintf_et( block_ttf, net_diff * diff_to_hash / hr );
|
||||
sprintf_et( share_ttf, last_targetdiff * diff_to_hash / hr );
|
||||
scale_hash_for_display ( &hr, hr_units );
|
||||
|
||||
applog2( LOG_INFO, "TTF @ %.2f %sh/s: block %s, share %s",
|
||||
hr, hr_units, block_ttf, share_ttf );
|
||||
if ( !multipool && net_diff > 0. )
|
||||
{
|
||||
struct timeval now, et;
|
||||
gettimeofday( &now, NULL );
|
||||
timeval_subtract( &et, &now, &session_start );
|
||||
double net_hr = net_diff * diff_to_hash;
|
||||
char net_ttf[32];
|
||||
char net_hr_units[4] = {0};
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
if ( stratum_diff != sctx->job.diff )
|
||||
applog( LOG_BLUE, "New stratum diff %g, block %d, job %s",
|
||||
sctx->job.diff, sctx->block_height, g_work->job_id );
|
||||
else if ( last_block_height != sctx->block_height )
|
||||
applog( LOG_BLUE, "New block %d, job %s", sctx->block_height,
|
||||
g_work->job_id );
|
||||
else
|
||||
applog( LOG_BLUE,"New job %s.", g_work->job_id );
|
||||
}
|
||||
|
||||
sprintf_et( net_ttf,
|
||||
( last_block_height - session_first_block ) == 0 ? 0 :
|
||||
et.tv_sec / ( last_block_height - session_first_block ) );
|
||||
// Update data and calculate new estimates.
|
||||
stratum_diff = sctx->job.diff;
|
||||
last_block_height = stratum.block_height;
|
||||
last_targetdiff = g_work->targetdiff;
|
||||
|
||||
scale_hash_for_display ( &net_hr, net_hr_units );
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
applog2( LOG_INFO, "%s %s block %d", short_url,
|
||||
algo_names[opt_algo], stratum.block_height );
|
||||
applog2( LOG_INFO, "Diff: net %g, stratum %g, target %g",
|
||||
net_diff, stratum_diff, last_targetdiff );
|
||||
}
|
||||
|
||||
applog2( LOG_INFO, "TTF @ %.2f %sh/s: %s",
|
||||
net_hr, net_hr_units, net_ttf );
|
||||
}
|
||||
} // hr > 0
|
||||
} // !quiet
|
||||
if ( hr > 0. )
|
||||
{
|
||||
char hr_units[4] = {0};
|
||||
char block_ttf[32];
|
||||
char share_ttf[32];
|
||||
|
||||
sprintf_et( block_ttf, net_diff * diff_to_hash / hr );
|
||||
sprintf_et( share_ttf, last_targetdiff * diff_to_hash / hr );
|
||||
scale_hash_for_display ( &hr, hr_units );
|
||||
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
applog2( LOG_INFO, "TTF @ %.2f %sh/s: block %s, share %s",
|
||||
hr, hr_units, block_ttf, share_ttf );
|
||||
}
|
||||
}
|
||||
} // new diff/block
|
||||
}
|
||||
|
||||
@@ -2769,7 +2666,7 @@ static void *stratum_thread(void *userdata )
|
||||
{
|
||||
int failures = 0;
|
||||
|
||||
if ( unlikely( stratum_need_reset ) )
|
||||
if ( stratum_need_reset )
|
||||
{
|
||||
stratum_need_reset = false;
|
||||
stratum_disconnect( &stratum );
|
||||
@@ -2806,7 +2703,8 @@ static void *stratum_thread(void *userdata )
|
||||
applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause);
|
||||
sleep(opt_fail_pause);
|
||||
}
|
||||
if ( unlikely( jsonrpc_2 ) )
|
||||
|
||||
if (jsonrpc_2)
|
||||
{
|
||||
work_free(&g_work);
|
||||
work_copy(&g_work, &stratum.work);
|
||||
@@ -2819,12 +2717,28 @@ static void *stratum_thread(void *userdata )
|
||||
if ( stratum.job.job_id
|
||||
&& ( !g_work_time || strcmp( stratum.job.job_id, g_work.job_id ) ) )
|
||||
{
|
||||
new_job = true;
|
||||
pthread_mutex_lock(&g_work_lock);
|
||||
algo_gate.stratum_gen_work( &stratum, &g_work );
|
||||
time(&g_work_time);
|
||||
pthread_mutex_unlock(&g_work_lock);
|
||||
restart_threads();
|
||||
}
|
||||
|
||||
if ( stratum.job.clean || jsonrpc_2 )
|
||||
{
|
||||
if ( !opt_quiet && last_block_height && new_job
|
||||
&& ( last_block_height == stratum.block_height ) )
|
||||
{
|
||||
new_job = false;
|
||||
applog( LOG_BLUE,"New job %s", g_work.job_id );
|
||||
}
|
||||
}
|
||||
else if (opt_debug && !opt_quiet)
|
||||
{
|
||||
applog( LOG_BLUE, "%s asks job %d for block %d", short_url,
|
||||
strtoul( stratum.job.job_id, NULL, 16 ), stratum.block_height );
|
||||
}
|
||||
} // stratum.job.job_id
|
||||
|
||||
if ( stratum_socket_full( &stratum, opt_timeout ) )
|
||||
{
|
||||
@@ -2850,6 +2764,25 @@ static void *stratum_thread(void *userdata )
|
||||
// check if this redundant
|
||||
stratum_disconnect( &stratum );
|
||||
}
|
||||
/*
|
||||
if ( !stratum_socket_full( &stratum, opt_timeout ) )
|
||||
{
|
||||
stratum_errors++;
|
||||
applog(LOG_ERR, "Stratum connection timeout");
|
||||
s = NULL;
|
||||
}
|
||||
else
|
||||
s = stratum_recv_line(&stratum);
|
||||
if ( !s )
|
||||
{
|
||||
stratum_disconnect(&stratum);
|
||||
applog(LOG_WARNING, "Stratum connection interrupted");
|
||||
continue;
|
||||
}
|
||||
if (!stratum_handle_method(&stratum, s))
|
||||
stratum_handle_response(s);
|
||||
free(s);
|
||||
*/
|
||||
} // loop
|
||||
out:
|
||||
return NULL;
|
||||
@@ -3777,7 +3710,6 @@ int main(int argc, char *argv[])
|
||||
applog(LOG_WARNING,"available on Linux. Using default affinity.");
|
||||
opt_affinity = -1;
|
||||
}
|
||||
/*
|
||||
else
|
||||
{
|
||||
affine_to_cpu_mask( -1, opt_affinity );
|
||||
@@ -3796,12 +3728,8 @@ int main(int argc, char *argv[])
|
||||
#endif
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
applog( LOG_INFO, "Extranonce subscribe: %s",
|
||||
opt_extranonce ? "YES" : "NO" );
|
||||
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
if (use_syslog)
|
||||
openlog("cpuminer", LOG_PID, LOG_USER);
|
||||
@@ -3831,7 +3759,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
/* start work I/O thread */
|
||||
if (thread_create(thr, workio_thread))
|
||||
{
|
||||
{
|
||||
applog(LOG_ERR, "work thread create failed");
|
||||
return 1;
|
||||
}
|
||||
@@ -3897,7 +3825,7 @@ int main(int argc, char *argv[])
|
||||
thr->q = tq_new();
|
||||
if (!thr->q)
|
||||
return 1;
|
||||
err = thread_create(thr, miner_thread);
|
||||
err = thread_create(thr, miner_thread);
|
||||
if (err) {
|
||||
applog(LOG_ERR, "thread %d create failed", i);
|
||||
return 1;
|
||||
|
27
miner.h
27
miner.h
@@ -313,14 +313,12 @@ size_t address_to_script( unsigned char *out, size_t outsz, const char *addr );
|
||||
int timeval_subtract( struct timeval *result, struct timeval *x,
|
||||
struct timeval *y);
|
||||
bool fulltest( const uint32_t *hash, const uint32_t *target );
|
||||
bool valid_hash( const void*, const void* );
|
||||
|
||||
void work_set_target( struct work* work, double diff );
|
||||
double target_to_diff( uint32_t* target );
|
||||
extern void diff_to_target( uint64_t *target, double diff );
|
||||
extern void diff_to_target(uint32_t *target, double diff);
|
||||
|
||||
double hash_target_ratio( uint32_t* hash, uint32_t* target );
|
||||
void work_set_target_ratio( struct work* work, const void *hash );
|
||||
void work_set_target_ratio( struct work* work, uint32_t* hash );
|
||||
|
||||
struct thr_info {
|
||||
int id;
|
||||
@@ -332,10 +330,10 @@ struct thr_info {
|
||||
|
||||
//struct thr_info *thr_info;
|
||||
|
||||
bool submit_solution( struct work *work, const void *hash,
|
||||
bool submit_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr );
|
||||
bool submit_lane_solution( struct work *work, const void *hash,
|
||||
struct thr_info *thr, const int lane );
|
||||
bool submit_lane_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr, int lane );
|
||||
|
||||
|
||||
//bool submit_work( struct thr_info *thr, const struct work *work_in );
|
||||
@@ -363,7 +361,7 @@ float cpu_temp( int core );
|
||||
|
||||
struct work {
|
||||
uint32_t data[48] __attribute__ ((aligned (64)));
|
||||
uint32_t target[8] __attribute__ ((aligned (64)));
|
||||
uint32_t target[8];
|
||||
|
||||
double targetdiff;
|
||||
// double shareratio;
|
||||
@@ -376,8 +374,6 @@ struct work {
|
||||
char *job_id;
|
||||
size_t xnonce2_len;
|
||||
unsigned char *xnonce2;
|
||||
bool sapling;
|
||||
|
||||
// x16rt
|
||||
uint32_t merkleroothash[8];
|
||||
uint32_t witmerkleroothash[8];
|
||||
@@ -389,9 +385,8 @@ struct work {
|
||||
} __attribute__ ((aligned (64)));
|
||||
|
||||
struct stratum_job {
|
||||
char *job_id;
|
||||
unsigned char prevhash[32];
|
||||
unsigned char final_sapling_hash[32];
|
||||
char *job_id;
|
||||
size_t coinbase_size;
|
||||
unsigned char *coinbase;
|
||||
unsigned char *xnonce2;
|
||||
@@ -574,7 +569,6 @@ enum algos {
|
||||
ALGO_SHA256D,
|
||||
ALGO_SHA256Q,
|
||||
ALGO_SHA256T,
|
||||
ALGO_SHA3D,
|
||||
ALGO_SHAVITE3,
|
||||
ALGO_SKEIN,
|
||||
ALGO_SKEIN2,
|
||||
@@ -608,7 +602,6 @@ enum algos {
|
||||
ALGO_XEVAN,
|
||||
ALGO_YESCRYPT,
|
||||
ALGO_YESCRYPTR8,
|
||||
ALGO_YESCRYPTR8G,
|
||||
ALGO_YESCRYPTR16,
|
||||
ALGO_YESCRYPTR32,
|
||||
ALGO_YESPOWER,
|
||||
@@ -674,7 +667,6 @@ static const char* const algo_names[] = {
|
||||
"sha256d",
|
||||
"sha256q",
|
||||
"sha256t",
|
||||
"sha3d",
|
||||
"shavite3",
|
||||
"skein",
|
||||
"skein2",
|
||||
@@ -708,7 +700,6 @@ static const char* const algo_names[] = {
|
||||
"xevan",
|
||||
"yescrypt",
|
||||
"yescryptr8",
|
||||
"yescryptr8g",
|
||||
"yescryptr16",
|
||||
"yescryptr32",
|
||||
"yespower",
|
||||
@@ -841,8 +832,7 @@ Options:\n\
|
||||
sha256d Double SHA-256\n\
|
||||
sha256q Quad SHA-256, Pyrite (PYE)\n\
|
||||
sha256t Triple SHA-256, Onecoin (OC)\n\
|
||||
sha3d Double Keccak256 (BSHA3)\n\
|
||||
shavite3 Shavite3\n\
|
||||
shavite3 Shavite3\n\
|
||||
skein Skein+Sha (Skeincoin)\n\
|
||||
skein2 Double Skein (Woodcoin)\n\
|
||||
skunk Signatum (SIGT)\n\
|
||||
@@ -875,7 +865,6 @@ Options:\n\
|
||||
xevan Bitsend (BSD)\n\
|
||||
yescrypt Globalboost-Y (BSTY)\n\
|
||||
yescryptr8 BitZeny (ZNY)\n\
|
||||
yescryptr8g Koto (KOTO)\n\
|
||||
yescryptr16 Eli\n\
|
||||
yescryptr32 WAVI\n\
|
||||
yespower Cryply\n\
|
||||
|
@@ -129,8 +129,8 @@ static inline __m512i m512_const_64( const uint64_t i7, const uint64_t i6,
|
||||
}
|
||||
|
||||
// Equivalent of set1, broadcast 64 bit constant to all 64 bit elements.
|
||||
#define m512_const1_256( v ) _mm512_broadcast_i64x4( v )
|
||||
#define m512_const1_128( v ) _mm512_broadcast_i64x2( v )
|
||||
#define m512_const1_256( i ) _mm512_broadcast_i64x4( i )
|
||||
#define m512_const1_128( i ) _mm512_broadcast_i64x2( i )
|
||||
#define m512_const1_64( i ) _mm512_broadcastq_epi64( mm128_mov64_128( i ) )
|
||||
#define m512_const1_32( i ) _mm512_broadcastd_epi32( mm128_mov32_128( i ) )
|
||||
#define m512_const1_16( i ) _mm512_broadcastw_epi16( mm128_mov32_128( i ) )
|
||||
@@ -547,6 +547,8 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
|
||||
//
|
||||
// Rotate elements from 2 512 bit vectors in place, source arguments
|
||||
// are overwritten.
|
||||
// These can all be done with 2 permutex2var instructions but they are
|
||||
// slower than either xor or alignr and require AVX512VBMI.
|
||||
|
||||
#define mm512_swap1024_512(v1, v2) \
|
||||
v1 = _mm512_xor_si512(v1, v2); \
|
||||
|
23
sysinfos.c
23
sysinfos.c
@@ -41,16 +41,15 @@
|
||||
"/sys/devices/platform/coretemp.0/hwmon/hwmon1/temp1_input"
|
||||
|
||||
#define HWMON_PATH3 \
|
||||
"/sys/devices/platform/coretemp.0/hwmon/hwmon2/temp1_input"
|
||||
"/sys/class/hwmon/hwmon0/temp1_input"
|
||||
|
||||
#define HWMON_PATH \
|
||||
"/sys/class/hwmon/hwmon2/temp1_input"
|
||||
|
||||
// need this for Ryzen
|
||||
/*
|
||||
#define HWMON_ALT \
|
||||
"/sys/class/hwmon/hwmon0/temp1_input"
|
||||
|
||||
/*
|
||||
#define HWMON_ALT1 \
|
||||
"/sys/devices/platform/coretemp.0/hwmon/hwmon1/temp1_input"
|
||||
*/
|
||||
@@ -85,9 +84,21 @@ static inline float linux_cputemp(int core)
|
||||
if (!fd)
|
||||
fd = fopen(HWMON_PATH, "r");
|
||||
|
||||
if (!fd)
|
||||
fd = fopen(HWMON_ALT, "r");
|
||||
|
||||
if (!fd)
|
||||
// fd = fopen(HWMON_ALT1, "r");
|
||||
|
||||
// if (!fd)
|
||||
fd = fopen(HWMON_ALT2, "r");
|
||||
|
||||
if (!fd)
|
||||
fd = fopen(HWMON_ALT3, "r");
|
||||
|
||||
if (!fd)
|
||||
fd = fopen(HWMON_ALT4, "r");
|
||||
|
||||
if (!fd)
|
||||
fd = fopen(HWMON_ALT5, "r");
|
||||
|
||||
if (!fd)
|
||||
return tc;
|
||||
|
||||
|
167
util.c
167
util.c
@@ -923,7 +923,7 @@ bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen)
|
||||
|
||||
size_t address_to_script(unsigned char *out, size_t outsz, const char *addr)
|
||||
{
|
||||
unsigned char addrbin[26];
|
||||
unsigned char addrbin[25];
|
||||
int addrver;
|
||||
size_t rv;
|
||||
|
||||
@@ -982,89 +982,64 @@ int timeval_subtract(struct timeval *result, struct timeval *x,
|
||||
return x->tv_sec < y->tv_sec;
|
||||
}
|
||||
|
||||
// Use this when deinterleaved
|
||||
// do 64 bit test 4 iterations
|
||||
inline bool valid_hash( const void *hash, const void *target )
|
||||
{
|
||||
const uint64_t *h = (const uint64_t*)hash;
|
||||
const uint64_t *t = (const uint64_t*)target;
|
||||
if ( h[3] > t[3] ) return false;
|
||||
if ( h[3] < t[3] ) return true;
|
||||
if ( h[2] > t[2] ) return false;
|
||||
if ( h[2] < t[2] ) return true;
|
||||
if ( h[1] > t[1] ) return false;
|
||||
if ( h[1] < t[1] ) return true;
|
||||
if ( h[0] > t[0] ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool fulltest( const uint32_t *hash, const uint32_t *target )
|
||||
bool fulltest(const uint32_t *hash, const uint32_t *target)
|
||||
{
|
||||
int i;
|
||||
bool rc = true;
|
||||
|
||||
for ( i = 7; i >= 0; i-- )
|
||||
{
|
||||
if ( hash[i] > target[i] )
|
||||
{
|
||||
for (i = 7; i >= 0; i--) {
|
||||
if (hash[i] > target[i]) {
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
if ( hash[i] < target[i] )
|
||||
{
|
||||
if (hash[i] < target[i]) {
|
||||
rc = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( opt_debug )
|
||||
{
|
||||
if (opt_debug) {
|
||||
uint32_t hash_be[8], target_be[8];
|
||||
char hash_str[65], target_str[65];
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
{
|
||||
be32enc( hash_be + i, hash[7 - i] );
|
||||
be32enc( target_be + i, target[7 - i] );
|
||||
for (i = 0; i < 8; i++) {
|
||||
be32enc(hash_be + i, hash[7 - i]);
|
||||
be32enc(target_be + i, target[7 - i]);
|
||||
}
|
||||
bin2hex( hash_str, (unsigned char *)hash_be, 32 );
|
||||
bin2hex( target_str, (unsigned char *)target_be, 32 );
|
||||
bin2hex(hash_str, (unsigned char *)hash_be, 32);
|
||||
bin2hex(target_str, (unsigned char *)target_be, 32);
|
||||
|
||||
applog( LOG_DEBUG, "DEBUG: %s\nHash: %s\nTarget: %s",
|
||||
rc ? "hash <= target"
|
||||
: "hash > target (false positive)",
|
||||
hash_str, target_str );
|
||||
applog(LOG_DEBUG, "DEBUG: %s\nHash: %s\nTarget: %s",
|
||||
rc ? "hash <= target"
|
||||
: "hash > target (false positive)",
|
||||
hash_str,
|
||||
target_str);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
void diff_to_target(uint64_t *target, double diff)
|
||||
void diff_to_target(uint32_t *target, double diff)
|
||||
{
|
||||
uint64_t m;
|
||||
int k;
|
||||
|
||||
const double exp64 = (double)0xffffffffffffffff + 1.;
|
||||
for ( k = 3; k > 0 && diff > 1.0; k-- )
|
||||
diff /= exp64;
|
||||
|
||||
// for (k = 6; k > 0 && diff > 1.0; k--)
|
||||
// diff /= 4294967296.0;
|
||||
m = (uint64_t)( 0xffff0000 / diff );
|
||||
if unlikely( m == 0 && k == 3 )
|
||||
memset( target, 0xff, 32 );
|
||||
else
|
||||
{
|
||||
memset( target, 0, 32 );
|
||||
target[k] = m;
|
||||
// target[k] = (uint32_t)m;
|
||||
// target[k + 1] = (uint32_t)(m >> 32);
|
||||
for (k = 6; k > 0 && diff > 1.0; k--)
|
||||
diff /= 4294967296.0;
|
||||
m = (uint64_t)(4294901760.0 / diff);
|
||||
if (m == 0 && k == 6)
|
||||
memset(target, 0xff, 32);
|
||||
else {
|
||||
memset(target, 0, 32);
|
||||
target[k] = (uint32_t)m;
|
||||
target[k + 1] = (uint32_t)(m >> 32);
|
||||
}
|
||||
}
|
||||
|
||||
// Only used by stratum pools
|
||||
void work_set_target(struct work* work, double diff)
|
||||
{
|
||||
diff_to_target( (uint64_t*)work->target, diff );
|
||||
diff_to_target(work->target, diff);
|
||||
work->targetdiff = diff;
|
||||
}
|
||||
|
||||
@@ -1836,7 +1811,6 @@ static uint32_t getblocheight(struct stratum_ctx *sctx)
|
||||
static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
|
||||
{
|
||||
const char *job_id, *prevhash, *coinb1, *coinb2, *version, *nbits, *stime;
|
||||
const char *finalsaplinghash = NULL;
|
||||
const char *denom10 = NULL, *denom100 = NULL, *denom1000 = NULL,
|
||||
*denom10000 = NULL, *prooffullnode = NULL;
|
||||
const char *extradata = NULL;
|
||||
@@ -1897,18 +1871,6 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
|
||||
goto out;
|
||||
}
|
||||
|
||||
hex2bin( sctx->job.version, version, 4 );
|
||||
int ver = be32dec( sctx->job.version );
|
||||
if ( ver == 5 )
|
||||
{
|
||||
finalsaplinghash = json_string_value( json_array_get( params, 9 ) );
|
||||
if ( !finalsaplinghash || strlen(finalsaplinghash) != 64 )
|
||||
{
|
||||
applog( LOG_ERR, "Stratum notify: invalid version 5 parameters" );
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if ( is_veil )
|
||||
{
|
||||
if ( !denom10 || !denom100 || !denom1000 || !denom10000
|
||||
@@ -1922,69 +1884,66 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
|
||||
}
|
||||
|
||||
if ( merkle_count )
|
||||
merkle = (uchar**) malloc( merkle_count * sizeof(char *) );
|
||||
merkle = (uchar**) malloc(merkle_count * sizeof(char *));
|
||||
for ( i = 0; i < merkle_count; i++ )
|
||||
{
|
||||
const char *s = json_string_value( json_array_get( merkle_arr, i ) );
|
||||
if ( !s || strlen(s) != 64 )
|
||||
{
|
||||
while ( i-- ) free( merkle[i] );
|
||||
free( merkle );
|
||||
applog( LOG_ERR, "Stratum notify: invalid Merkle branch" );
|
||||
const char *s = json_string_value(json_array_get(merkle_arr, i));
|
||||
if (!s || strlen(s) != 64) {
|
||||
while (i--)
|
||||
free(merkle[i]);
|
||||
free(merkle);
|
||||
applog(LOG_ERR, "Stratum notify: invalid Merkle branch");
|
||||
goto out;
|
||||
}
|
||||
merkle[i] = (uchar*) malloc( 32 );
|
||||
hex2bin( merkle[i], s, 32 );
|
||||
merkle[i] = (uchar*) malloc(32);
|
||||
hex2bin(merkle[i], s, 32);
|
||||
}
|
||||
|
||||
pthread_mutex_lock( &sctx->work_lock );
|
||||
pthread_mutex_lock(&sctx->work_lock);
|
||||
|
||||
coinb1_size = strlen( coinb1 ) / 2;
|
||||
coinb2_size = strlen( coinb2 ) / 2;
|
||||
coinb1_size = strlen(coinb1) / 2;
|
||||
coinb2_size = strlen(coinb2) / 2;
|
||||
sctx->job.coinbase_size = coinb1_size + sctx->xnonce1_size +
|
||||
sctx->xnonce2_size + coinb2_size;
|
||||
sctx->job.coinbase = (uchar*) realloc( sctx->job.coinbase,
|
||||
sctx->job.coinbase_size );
|
||||
sctx->job.coinbase = (uchar*) realloc(sctx->job.coinbase, sctx->job.coinbase_size);
|
||||
sctx->job.xnonce2 = sctx->job.coinbase + coinb1_size + sctx->xnonce1_size;
|
||||
hex2bin( sctx->job.coinbase, coinb1, coinb1_size );
|
||||
memcpy( sctx->job.coinbase + coinb1_size,
|
||||
sctx->xnonce1, sctx->xnonce1_size );
|
||||
if ( !sctx->job.job_id || strcmp( sctx->job.job_id, job_id ) )
|
||||
hex2bin(sctx->job.coinbase, coinb1, coinb1_size);
|
||||
memcpy(sctx->job.coinbase + coinb1_size, sctx->xnonce1, sctx->xnonce1_size);
|
||||
if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id))
|
||||
memset(sctx->job.xnonce2, 0, sctx->xnonce2_size);
|
||||
hex2bin( sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size );
|
||||
free( sctx->job.job_id );
|
||||
sctx->job.job_id = strdup( job_id );
|
||||
hex2bin( sctx->job.prevhash, prevhash, 32 );
|
||||
if ( has_claim ) hex2bin( sctx->job.extra, extradata, 32 );
|
||||
if ( has_roots ) hex2bin( sctx->job.extra, extradata, 64 );
|
||||
if ( ver == 5 )
|
||||
hex2bin( sctx->job.final_sapling_hash, finalsaplinghash, 32 );
|
||||
hex2bin(sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size);
|
||||
free(sctx->job.job_id);
|
||||
sctx->job.job_id = strdup(job_id);
|
||||
hex2bin(sctx->job.prevhash, prevhash, 32);
|
||||
if (has_claim) hex2bin(sctx->job.extra, extradata, 32);
|
||||
if (has_roots) hex2bin(sctx->job.extra, extradata, 64);
|
||||
|
||||
if ( is_veil )
|
||||
{
|
||||
hex2bin( sctx->job.denom10, denom10, 32 );
|
||||
hex2bin( sctx->job.denom100, denom100, 32 );
|
||||
hex2bin( sctx->job.denom1000, denom1000, 32 );
|
||||
hex2bin( sctx->job.denom10000, denom10000, 32 );
|
||||
hex2bin( sctx->job.proofoffullnode, prooffullnode, 32 );
|
||||
hex2bin(sctx->job.denom10, denom10, 32);
|
||||
hex2bin(sctx->job.denom100, denom100, 32);
|
||||
hex2bin(sctx->job.denom1000, denom1000, 32);
|
||||
hex2bin(sctx->job.denom10000, denom10000, 32);
|
||||
hex2bin(sctx->job.proofoffullnode, prooffullnode, 32);
|
||||
}
|
||||
|
||||
sctx->block_height = getblocheight( sctx );
|
||||
sctx->block_height = getblocheight(sctx);
|
||||
|
||||
for ( i = 0; i < sctx->job.merkle_count; i++ )
|
||||
free( sctx->job.merkle[i] );
|
||||
for (i = 0; i < sctx->job.merkle_count; i++)
|
||||
free(sctx->job.merkle[i]);
|
||||
|
||||
free( sctx->job.merkle );
|
||||
free(sctx->job.merkle);
|
||||
sctx->job.merkle = merkle;
|
||||
sctx->job.merkle_count = merkle_count;
|
||||
|
||||
hex2bin( sctx->job.nbits, nbits, 4 );
|
||||
hex2bin( sctx->job.ntime, stime, 4 );
|
||||
hex2bin(sctx->job.version, version, 4);
|
||||
hex2bin(sctx->job.nbits, nbits, 4);
|
||||
hex2bin(sctx->job.ntime, stime, 4);
|
||||
sctx->job.clean = clean;
|
||||
|
||||
sctx->job.diff = sctx->next_diff;
|
||||
|
||||
pthread_mutex_unlock( &sctx->work_lock );
|
||||
pthread_mutex_unlock(&sctx->work_lock);
|
||||
|
||||
ret = true;
|
||||
|
||||
|
Reference in New Issue
Block a user