mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
91ec6f1771 | ||
![]() |
a52c5eccf7 | ||
![]() |
86b889e1b0 | ||
![]() |
72330eb5a7 |
11
Makefile.am
11
Makefile.am
@@ -117,6 +117,7 @@ cpuminer_SOURCES = \
|
||||
algo/keccak/keccak-4way.c\
|
||||
algo/keccak/keccak-gate.c \
|
||||
algo/keccak/sse2/keccak.c \
|
||||
algo/lanehash/lane.c \
|
||||
algo/luffa/sph_luffa.c \
|
||||
algo/luffa/luffa.c \
|
||||
algo/luffa/luffa_for_sse2.c \
|
||||
@@ -200,6 +201,7 @@ cpuminer_SOURCES = \
|
||||
algo/skein/skein2-gate.c \
|
||||
algo/sm3/sm3.c \
|
||||
algo/sm3/sm3-hash-4way.c \
|
||||
algo/swifftx/swifftx.c \
|
||||
algo/tiger/sph_tiger.c \
|
||||
algo/whirlpool/sph_whirlpool.c \
|
||||
algo/whirlpool/whirlpool-hash-4way.c \
|
||||
@@ -279,10 +281,17 @@ cpuminer_SOURCES = \
|
||||
algo/x17/sonoa-4way.c \
|
||||
algo/x17/sonoa.c \
|
||||
algo/x20/x20r.c \
|
||||
algo/x22/x22i-4way.c \
|
||||
algo/x22/x22i.c \
|
||||
algo/x22/x22i-gate.c \
|
||||
algo/x22/x25x.c \
|
||||
algo/x22/x25x-4way.c \
|
||||
algo/yescrypt/yescrypt.c \
|
||||
algo/yescrypt/sha256_Y.c \
|
||||
algo/yescrypt/yescrypt-best.c \
|
||||
algo/yespower/yespower.c \
|
||||
algo/yespower/yespower-gate.c \
|
||||
algo/yespower/yespower-blake2b.c \
|
||||
algo/yespower/crypto/blake2b-yp.c \
|
||||
algo/yespower/sha256_p.c \
|
||||
algo/yespower/yespower-opt.c
|
||||
|
||||
|
18
README.md
18
README.md
@@ -92,6 +92,7 @@ Supported Algorithms
|
||||
phi2-lux identical to phi2
|
||||
pluck Pluck:128 (Supcoin)
|
||||
polytimos Ninja
|
||||
power2b MicroBitcoin (MBC)
|
||||
quark Quark
|
||||
qubit Qubit
|
||||
scrypt scrypt(1024, 1, 1) (default)
|
||||
@@ -121,13 +122,15 @@ Supported Algorithms
|
||||
x13sm3 hsr (Hshare)
|
||||
x14 X14
|
||||
x15 X15
|
||||
x16r Ravencoin (RVN) (original algo)
|
||||
x16rv2 Ravencoin (RVN) (new algo)
|
||||
x16r
|
||||
x16rv2 Ravencoin (RVN)
|
||||
x16rt Gincoin (GIN)
|
||||
x16rt_veil Veil (VEIL)
|
||||
x16rt-veil Veil (VEIL)
|
||||
x16s Pigeoncoin (PGN)
|
||||
x17
|
||||
x21s
|
||||
x22i
|
||||
x25x
|
||||
xevan Bitsend (BSD)
|
||||
yescrypt Globalboost-Y (BSTY)
|
||||
yescryptr8 BitZeny (ZNY)
|
||||
@@ -135,6 +138,7 @@ Supported Algorithms
|
||||
yescryptr32 WAVI
|
||||
yespower Cryply
|
||||
yespowerr16 Yenten (YTN)
|
||||
yespower-b2b generic yespower + blake2b
|
||||
zr5 Ziftr
|
||||
|
||||
Errata
|
||||
@@ -158,10 +162,12 @@ Bugs
|
||||
----
|
||||
|
||||
Users are encouraged to post their bug reports using git issues or on the
|
||||
Bitcoin Talk forum at:
|
||||
Bitcoin Talk forum or opening an issue in git:
|
||||
|
||||
https://bitcointalk.org/index.php?topic=1326803.0
|
||||
|
||||
https://github.com/JayDDee/cpuminer-opt/issues
|
||||
|
||||
All problem reports must be accompanied by a proper problem definition.
|
||||
This should include how the problem occurred, the command line and
|
||||
output from the miner showing the startup messages and any errors.
|
||||
@@ -173,10 +179,6 @@ Donations
|
||||
cpuminer-opt has no fees of any kind but donations are accepted.
|
||||
|
||||
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
|
||||
ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
|
||||
LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
|
||||
BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
|
||||
BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ
|
||||
|
||||
Happy mining!
|
||||
|
||||
|
@@ -1,14 +1,6 @@
|
||||
cpuminer-opt is a console program run from the command line using the
|
||||
keyboard, not the mouse.
|
||||
|
||||
cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
|
||||
This feature requires recent SW including GCC version 5 or higher and
|
||||
openssl version 1.1 or higher. It may also require using "-march=znver1"
|
||||
compile flag.
|
||||
|
||||
cpuminer-opt is a console program, if you're using a mouse you're doing it
|
||||
wrong.
|
||||
|
||||
Security warning
|
||||
----------------
|
||||
|
||||
@@ -34,10 +26,43 @@ Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
|
||||
supported.
|
||||
|
||||
64 bit Linux or Windows operating system. Apple and Android are not supported.
|
||||
FreeBSD YMMV.
|
||||
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.9.11
|
||||
|
||||
Added x22i & x25x algos.
|
||||
Blake2s 2% faster AVX2 with Intel CPU, slower with Ryzen v1, v2 ?
|
||||
|
||||
v3.9.10
|
||||
|
||||
Faster X* algos with AVX2.
|
||||
Small improvements to summary stats report.
|
||||
|
||||
v3.9.9.1
|
||||
|
||||
Fixed a day1 bug that could cause the miner to idle for up to 2 minutes
|
||||
under certain circumstances.
|
||||
|
||||
Redesigned summary stats report now includes session statistics.
|
||||
|
||||
More robust handling of statistics to reduce corruption.
|
||||
|
||||
Removed --hide-diff option.
|
||||
|
||||
Better handling of cpu-affinity with more than 64 CPUs.
|
||||
|
||||
v3.9.9
|
||||
|
||||
Added power2b algo for MicroBitcoin.
|
||||
Added generic yespower-b2b (yespower + blake2b) algo to be used with
|
||||
the parameters introduced in v3.9.7 for yespower & yescrypt.
|
||||
Display additional info when a share is rejected.
|
||||
Some low level enhancements and minor tweaking of log output.
|
||||
RELEASE_NOTES (this file) and README.md added to Windows release package.
|
||||
|
||||
v3.9.8.1
|
||||
|
||||
Summary log report will be generated on stratum diff change or after 5 minutes,
|
||||
|
@@ -116,8 +116,6 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->get_nonceptr = (void*)&std_get_nonceptr;
|
||||
gate->work_decode = (void*)&std_le_work_decode;
|
||||
gate->decode_extra_data = (void*)&do_nothing;
|
||||
gate->wait_for_diff = (void*)&std_wait_for_diff;
|
||||
gate->get_max64 = (void*)&get_max64_0x1fffffLL;
|
||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
|
||||
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
|
||||
@@ -204,6 +202,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_PHI2: register_phi2_algo ( gate ); break;
|
||||
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
|
||||
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
|
||||
case ALGO_POWER2B: register_power2b_algo ( gate ); break;
|
||||
case ALGO_QUARK: register_quark_algo ( gate ); break;
|
||||
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
|
||||
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
|
||||
@@ -239,6 +238,8 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_X16S: register_x16s_algo ( gate ); break;
|
||||
case ALGO_X17: register_x17_algo ( gate ); break;
|
||||
case ALGO_X21S: register_x21s_algo ( gate ); break;
|
||||
case ALGO_X22I: register_x22i_algo ( gate ); break;
|
||||
case ALGO_X25X: register_x25x_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
/* case ALGO_YESCRYPT: register_yescrypt_05_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_05_algo ( gate ); break;
|
||||
@@ -251,6 +252,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_YESPOWER: register_yespower_algo ( gate ); break;
|
||||
case ALGO_YESPOWERR16: register_yespowerr16_algo ( gate ); break;
|
||||
case ALGO_YESPOWER_B2B: register_yespower_b2b_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
default:
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
|
||||
@@ -276,7 +278,7 @@ bool register_json_rpc2( algo_gate_t *gate )
|
||||
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
|
||||
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
|
||||
|
||||
gate->wait_for_diff = (void*)&do_nothing;
|
||||
// gate->wait_for_diff = (void*)&do_nothing;
|
||||
gate->get_new_work = (void*)&jr2_get_new_work;
|
||||
gate->get_nonceptr = (void*)&jr2_get_nonceptr;
|
||||
gate->stratum_gen_work = (void*)&jr2_stratum_gen_work;
|
||||
|
@@ -35,7 +35,7 @@
|
||||
// 6. Determine if other non existant functions are required.
|
||||
// That is determined by the need to add code in cpu-miner.c
|
||||
// that applies only to the new algo. That is forbidden. All
|
||||
// algo specific code must be in theh algo's file.
|
||||
// algo specific code must be in the algo's file.
|
||||
//
|
||||
// 7. If new functions need to be added to the gate add the type
|
||||
// to the structure, declare a null instance in this file and define
|
||||
@@ -48,10 +48,10 @@
|
||||
// instances as they are defined by default, or unsafe functions that
|
||||
// are not needed by the algo.
|
||||
//
|
||||
// 9. Add an case entry to the switch/case in function register_gate
|
||||
// 9. Add a case entry to the switch/case in function register_gate
|
||||
// in file algo-gate-api.c for the new algo.
|
||||
//
|
||||
// 10 If a new function type was defined add an entry to ini talgo_gate
|
||||
// 10 If a new function type was defined add an entry to init algo_gate
|
||||
// to initialize the new function to its null instance described in step 7.
|
||||
//
|
||||
// 11. If the new algo has aliases add them to the alias array in
|
||||
@@ -85,14 +85,16 @@
|
||||
|
||||
typedef uint32_t set_t;
|
||||
|
||||
#define EMPTY_SET 0
|
||||
#define SSE2_OPT 1
|
||||
#define AES_OPT 2
|
||||
#define SSE42_OPT 4
|
||||
#define AVX_OPT 8
|
||||
#define AVX2_OPT 0x10
|
||||
#define SHA_OPT 0x20
|
||||
#define AVX512_OPT 0x40
|
||||
#define EMPTY_SET 0
|
||||
#define SSE2_OPT 1
|
||||
#define AES_OPT 2
|
||||
#define SSE42_OPT 4
|
||||
#define AVX_OPT 8 // Sandybridge
|
||||
#define AVX2_OPT 0x10 // Haswell
|
||||
#define SHA_OPT 0x20 // sha256 (Ryzen, Ice Lake)
|
||||
#define AVX512_OPT 0x40 // AVX512- F, VL, DQ, BW (Skylake-X)
|
||||
#define VAES_OPT 0x80 // VAES (Ice Lake)
|
||||
|
||||
|
||||
// return set containing all elements from sets a & b
|
||||
inline set_t set_union ( set_t a, set_t b ) { return a | b; }
|
||||
@@ -108,14 +110,7 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// special case, only one target, provides a callback for scanhash to
|
||||
// submit work with less overhead.
|
||||
// bool (*submit_work ) ( struct thr_info*, const struct work* );
|
||||
|
||||
// mandatory functions, must be overwritten
|
||||
// Added a 5th arg for the thread_info structure to replace the int thr id
|
||||
// in the first arg. Both will co-exist during the trasition.
|
||||
//int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* );
|
||||
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
|
||||
|
||||
// optional unsafe, must be overwritten if algo uses function
|
||||
@@ -123,27 +118,55 @@ void ( *hash ) ( void*, const void*, uint32_t ) ;
|
||||
void ( *hash_suw ) ( void*, const void* );
|
||||
|
||||
//optional, safe to use default in most cases
|
||||
|
||||
// Allocate thread local buffers and other initialization specific to miner
|
||||
// threads.
|
||||
bool ( *miner_thread_init ) ( int );
|
||||
|
||||
// Generate global blockheader from stratum data.
|
||||
void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* );
|
||||
|
||||
// Get thread local copy of blockheader with unique nonce.
|
||||
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t*,
|
||||
bool );
|
||||
|
||||
// Return pointer to nonce in blockheader.
|
||||
uint32_t *( *get_nonceptr ) ( uint32_t* );
|
||||
void ( *decode_extra_data ) ( struct work*, uint64_t* );
|
||||
void ( *wait_for_diff ) ( struct stratum_ctx* );
|
||||
int64_t ( *get_max64 ) ();
|
||||
|
||||
// Decode getwork blockheader
|
||||
bool ( *work_decode ) ( const json_t*, struct work* );
|
||||
|
||||
// Extra getwork data
|
||||
void ( *decode_extra_data ) ( struct work*, uint64_t* );
|
||||
|
||||
bool ( *submit_getwork_result ) ( CURL*, struct work* );
|
||||
|
||||
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
|
||||
|
||||
// Increment extranonce
|
||||
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
|
||||
|
||||
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
|
||||
uint32_t*, uint32_t, uint32_t );
|
||||
uint32_t*, uint32_t, uint32_t );
|
||||
// Build mining.submit message
|
||||
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
|
||||
|
||||
char* ( *malloc_txs_request ) ( struct work* );
|
||||
|
||||
// Big or little
|
||||
void ( *set_work_data_endian ) ( struct work* );
|
||||
|
||||
double ( *calc_network_diff ) ( struct work* );
|
||||
|
||||
// Wait for first work
|
||||
bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
|
||||
void ( *resync_threads ) ( struct work* );
|
||||
|
||||
// Diverge mining threads
|
||||
bool ( *do_this_thread ) ( int );
|
||||
|
||||
// After do_this_thread
|
||||
void ( *resync_threads ) ( struct work* );
|
||||
|
||||
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
|
||||
bool ( *stratum_handle_response )( json_t* );
|
||||
set_t optimizations;
|
||||
@@ -198,8 +221,6 @@ void null_hash_suw();
|
||||
|
||||
// optional safe targets, default listed first unless noted.
|
||||
|
||||
void std_wait_for_diff();
|
||||
|
||||
uint32_t *std_get_nonceptr( uint32_t *work_data );
|
||||
uint32_t *jr2_get_nonceptr( uint32_t *work_data );
|
||||
|
||||
@@ -214,21 +235,13 @@ void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
|
||||
void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
|
||||
void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
|
||||
|
||||
// pick your favorite or define your own
|
||||
int64_t get_max64_0x1fffffLL(); // default
|
||||
int64_t get_max64_0x40LL();
|
||||
int64_t get_max64_0x3ffff();
|
||||
int64_t get_max64_0x3fffffLL();
|
||||
int64_t get_max64_0x1ffff();
|
||||
int64_t get_max64_0xffffLL();
|
||||
|
||||
bool std_le_work_decode( const json_t *val, struct work *work );
|
||||
bool std_be_work_decode( const json_t *val, struct work *work );
|
||||
bool jr2_work_decode( const json_t *val, struct work *work );
|
||||
bool jr2_work_decode( const json_t *val, struct work *work );
|
||||
|
||||
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
|
||||
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
|
||||
bool jr2_submit_getwork_result( CURL *curl, struct work *work );
|
||||
bool jr2_submit_getwork_result( CURL *curl, struct work *work );
|
||||
|
||||
void std_le_build_stratum_request( char *req, struct work *work );
|
||||
void std_be_build_stratum_request( char *req, struct work *work );
|
||||
@@ -242,8 +255,8 @@ void set_work_data_big_endian( struct work *work );
|
||||
double std_calc_network_diff( struct work *work );
|
||||
|
||||
void std_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits );
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits );
|
||||
|
||||
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
|
||||
|
||||
@@ -264,8 +277,8 @@ int std_get_work_data_size();
|
||||
// by calling the algo's register function.
|
||||
bool register_algo_gate( int algo, algo_gate_t *gate );
|
||||
|
||||
// Override any default gate functions that are applicable and do any other
|
||||
// algo-specific initialization.
|
||||
// Called by algos toverride any default gate functions that are applicable
|
||||
// and do any other algo-specific initialization.
|
||||
// The register functions for all the algos can be declared here to reduce
|
||||
// compiler warnings but that's just more work for devs adding new algos.
|
||||
bool register_algo( algo_gate_t *gate );
|
||||
@@ -278,5 +291,7 @@ bool register_json_rpc2( algo_gate_t *gate );
|
||||
// use this to call the hash function of an algo directly, ie util.c test.
|
||||
void exec_hash_function( int algo, void *output, const void *pdata );
|
||||
|
||||
void get_algo_alias( char** algo_or_alias );
|
||||
// Validate a string as a known algo and alias, updates arg to proper
|
||||
// algo name if valid alias, NULL if invalid alias or algo.
|
||||
void get_algo_alias( char **algo_or_alias );
|
||||
|
||||
|
@@ -74,18 +74,12 @@ int scanhash_argon2( struct work* work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t argon2_get_max64 ()
|
||||
{
|
||||
return 0x1ffLL;
|
||||
}
|
||||
|
||||
bool register_argon2_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_argon2;
|
||||
gate->hash = (void*)&argon2hash;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
gate->get_max64 = (void*)&argon2_get_max64;
|
||||
opt_target_factor = 65536.0;
|
||||
|
||||
return true;
|
||||
|
@@ -179,12 +179,9 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t get_max64_0x1ff() { return 0x1ff; }
|
||||
|
||||
bool register_argon2d4096_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d4096;
|
||||
gate->get_max64 = (void*)&get_max64_0x1ff;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
|
@@ -1,18 +1,8 @@
|
||||
#include "blake-gate.h"
|
||||
|
||||
int64_t blake_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
bool register_blake_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_max64 = (void*)&blake_get_max64;
|
||||
//#if defined (__AVX2__) && defined (FOUR_WAY)
|
||||
// gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
// gate->scanhash = (void*)&scanhash_blake_8way;
|
||||
// gate->hash = (void*)&blakehash_8way;
|
||||
#if defined(BLAKE_4WAY)
|
||||
four_way_not_tested();
|
||||
gate->scanhash = (void*)&scanhash_blake_4way;
|
||||
|
@@ -1,13 +1,5 @@
|
||||
#include "blake2b-gate.h"
|
||||
|
||||
/*
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blake2s_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
*/
|
||||
|
||||
bool register_blake2b_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(BLAKE2B_4WAY)
|
||||
@@ -17,7 +9,6 @@ bool register_blake2b_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_blake2b;
|
||||
gate->hash = (void*)&blake2b_hash;
|
||||
#endif
|
||||
// gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
gate->optimizations = AVX2_OPT;
|
||||
return true;
|
||||
};
|
||||
|
@@ -1,12 +1,5 @@
|
||||
#include "blake2s-gate.h"
|
||||
|
||||
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blake2s_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
bool register_blake2s_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(BLAKE2S_8WAY)
|
||||
@@ -19,7 +12,6 @@ bool register_blake2s_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_blake2s;
|
||||
gate->hash = (void*)&blake2s_hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
return true;
|
||||
};
|
||||
|
@@ -20,12 +20,13 @@
|
||||
//#if defined(__SSE4_2__)
|
||||
#if defined(__SSE2__)
|
||||
|
||||
|
||||
/*
|
||||
static const uint32_t blake2s_IV[8] =
|
||||
{
|
||||
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
|
||||
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
|
||||
};
|
||||
*/
|
||||
|
||||
static const uint8_t blake2s_sigma[10][16] =
|
||||
{
|
||||
@@ -41,6 +42,7 @@ static const uint8_t blake2s_sigma[10][16] =
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||
};
|
||||
|
||||
|
||||
// define a constant for initial param.
|
||||
|
||||
int blake2s_4way_init( blake2s_4way_state *S, const uint8_t outlen )
|
||||
@@ -88,41 +90,45 @@ int blake2s_4way_compress( blake2s_4way_state *S, const __m128i* block )
|
||||
memcpy_128( m, block, 16 );
|
||||
memcpy_128( v, S->h, 8 );
|
||||
|
||||
v[ 8] = _mm_set1_epi32( blake2s_IV[0] );
|
||||
v[ 9] = _mm_set1_epi32( blake2s_IV[1] );
|
||||
v[10] = _mm_set1_epi32( blake2s_IV[2] );
|
||||
v[11] = _mm_set1_epi32( blake2s_IV[3] );
|
||||
v[ 8] = m128_const1_64( 0x6A09E6676A09E667ULL );
|
||||
v[ 9] = m128_const1_64( 0xBB67AE85BB67AE85ULL );
|
||||
v[10] = m128_const1_64( 0x3C6EF3723C6EF372ULL );
|
||||
v[11] = m128_const1_64( 0xA54FF53AA54FF53AULL );
|
||||
v[12] = _mm_xor_si128( _mm_set1_epi32( S->t[0] ),
|
||||
_mm_set1_epi32( blake2s_IV[4] ) );
|
||||
m128_const1_64( 0x510E527F510E527FULL ) );
|
||||
v[13] = _mm_xor_si128( _mm_set1_epi32( S->t[1] ),
|
||||
_mm_set1_epi32( blake2s_IV[5] ) );
|
||||
m128_const1_64( 0x9B05688C9B05688CULL ) );
|
||||
v[14] = _mm_xor_si128( _mm_set1_epi32( S->f[0] ),
|
||||
_mm_set1_epi32( blake2s_IV[6] ) );
|
||||
m128_const1_64( 0x1F83D9AB1F83D9ABULL ) );
|
||||
v[15] = _mm_xor_si128( _mm_set1_epi32( S->f[1] ),
|
||||
_mm_set1_epi32( blake2s_IV[7] ) );
|
||||
m128_const1_64( 0x5BE0CD195BE0CD19ULL ) );
|
||||
|
||||
#define G4W(r,i,a,b,c,d) \
|
||||
#define G4W( sigma0, sigma1, a, b, c, d ) \
|
||||
do { \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+0] ] ); \
|
||||
uint8_t s0 = sigma0; \
|
||||
uint8_t s1 = sigma1; \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ s0 ] ); \
|
||||
d = mm128_ror_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm128_ror_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+1] ] ); \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ s1 ] ); \
|
||||
d = mm128_ror_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm128_ror_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define ROUND4W(r) \
|
||||
do { \
|
||||
G4W( r, 0, v[ 0], v[ 4], v[ 8], v[12] ); \
|
||||
G4W( r, 1, v[ 1], v[ 5], v[ 9], v[13] ); \
|
||||
G4W( r, 2, v[ 2], v[ 6], v[10], v[14] ); \
|
||||
G4W( r, 3, v[ 3], v[ 7], v[11], v[15] ); \
|
||||
G4W( r, 4, v[ 0], v[ 5], v[10], v[15] ); \
|
||||
G4W( r, 5, v[ 1], v[ 6], v[11], v[12] ); \
|
||||
G4W( r, 6, v[ 2], v[ 7], v[ 8], v[13] ); \
|
||||
G4W( r, 7, v[ 3], v[ 4], v[ 9], v[14] ); \
|
||||
uint8_t *sigma = (uint8_t*)&blake2s_sigma[r]; \
|
||||
G4W( sigma[ 0], sigma[ 1], v[ 0], v[ 4], v[ 8], v[12] ); \
|
||||
G4W( sigma[ 2], sigma[ 3], v[ 1], v[ 5], v[ 9], v[13] ); \
|
||||
G4W( sigma[ 4], sigma[ 5], v[ 2], v[ 6], v[10], v[14] ); \
|
||||
G4W( sigma[ 6], sigma[ 7], v[ 3], v[ 7], v[11], v[15] ); \
|
||||
G4W( sigma[ 8], sigma[ 9], v[ 0], v[ 5], v[10], v[15] ); \
|
||||
G4W( sigma[10], sigma[11], v[ 1], v[ 6], v[11], v[12] ); \
|
||||
G4W( sigma[12], sigma[13], v[ 2], v[ 7], v[ 8], v[13] ); \
|
||||
G4W( sigma[14], sigma[15], v[ 3], v[ 4], v[ 9], v[14] ); \
|
||||
} while(0)
|
||||
|
||||
ROUND4W( 0 );
|
||||
@@ -144,26 +150,47 @@ do { \
|
||||
return 0;
|
||||
}
|
||||
|
||||
// There is a problem that can't be resolved internally.
|
||||
// If the last block is a full 64 bytes it should not be compressed in
|
||||
// update but left for final. However, when streaming, it isn't known
|
||||
// which block is last. There may be a subsequent call to update to add
|
||||
// more data.
|
||||
//
|
||||
// The reference code handled this by juggling 2 blocks at a time at
|
||||
// a significant performance penalty.
|
||||
//
|
||||
// Instead a new function is introduced called full_blocks which combines
|
||||
// update and final and is to be used in non-streaming mode where the data
|
||||
// is a multiple of 64 bytes.
|
||||
//
|
||||
// Supported:
|
||||
// 64 + 16 bytes (blake2s with midstate optimization)
|
||||
// 80 bytes without midstate (blake2s without midstate optimization)
|
||||
// Any multiple of 64 bytes in one shot (x25x)
|
||||
//
|
||||
// Unsupported:
|
||||
// Stream of 64 byte blocks one at a time.
|
||||
//
|
||||
// use for part blocks or when streaming more data
|
||||
int blake2s_4way_update( blake2s_4way_state *S, const void *in,
|
||||
uint64_t inlen )
|
||||
{
|
||||
__m128i *input = (__m128i*)in;
|
||||
__m128i *buf = (__m128i*)S->buf;
|
||||
const int bsize = BLAKE2S_BLOCKBYTES;
|
||||
__m128i *input = (__m128i*)in;
|
||||
__m128i *buf = (__m128i*)S->buf;
|
||||
|
||||
while( inlen > 0 )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
if( inlen >= bsize - left )
|
||||
if( inlen >= BLAKE2S_BLOCKBYTES - left )
|
||||
{
|
||||
memcpy_128( buf + (left>>2), input, (bsize - left) >> 2 );
|
||||
S->buflen += bsize - left;
|
||||
memcpy_128( buf + (left>>2), input, (BLAKE2S_BLOCKBYTES - left) >> 2 );
|
||||
S->buflen += BLAKE2S_BLOCKBYTES - left;
|
||||
S->t[0] += BLAKE2S_BLOCKBYTES;
|
||||
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
|
||||
blake2s_4way_compress( S, buf );
|
||||
S->buflen = 0;
|
||||
input += ( bsize >> 2 );
|
||||
inlen -= bsize;
|
||||
input += ( BLAKE2S_BLOCKBYTES >> 2 );
|
||||
inlen -= BLAKE2S_BLOCKBYTES;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -195,8 +222,45 @@ int blake2s_4way_final( blake2s_4way_state *S, void *out, uint8_t outlen )
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Update and final when inlen is a multiple of 64 bytes
|
||||
int blake2s_4way_full_blocks( blake2s_4way_state *S, void *out,
|
||||
const void *input, uint64_t inlen )
|
||||
{
|
||||
__m128i *in = (__m128i*)input;
|
||||
__m128i *buf = (__m128i*)S->buf;
|
||||
|
||||
while( inlen > BLAKE2S_BLOCKBYTES )
|
||||
{
|
||||
memcpy_128( buf, in, BLAKE2S_BLOCKBYTES >> 2 );
|
||||
S->buflen = BLAKE2S_BLOCKBYTES;
|
||||
inlen -= BLAKE2S_BLOCKBYTES;
|
||||
S->t[0] += BLAKE2S_BLOCKBYTES;
|
||||
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
|
||||
blake2s_4way_compress( S, buf );
|
||||
S->buflen = 0;
|
||||
in += ( BLAKE2S_BLOCKBYTES >> 2 );
|
||||
}
|
||||
|
||||
// last block
|
||||
memcpy_128( buf, in, BLAKE2S_BLOCKBYTES >> 2 );
|
||||
S->buflen = BLAKE2S_BLOCKBYTES;
|
||||
S->t[0] += S->buflen;
|
||||
S->t[1] += ( S->t[0] < S->buflen );
|
||||
if ( S->last_node ) S->f[1] = ~0U;
|
||||
S->f[0] = ~0U;
|
||||
blake2s_4way_compress( S, buf );
|
||||
|
||||
for ( int i = 0; i < 8; ++i )
|
||||
casti_m128i( out, i ) = S->h[ i ];
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// The commented code below is slower on Intel but faster on
|
||||
// Zen1 AVX2. It's also faster than Zen1 AVX.
|
||||
// Ryzen gen2 is unknown at this time.
|
||||
|
||||
int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block )
|
||||
{
|
||||
__m256i m[16];
|
||||
@@ -205,6 +269,23 @@ int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block )
|
||||
memcpy_256( m, block, 16 );
|
||||
memcpy_256( v, S->h, 8 );
|
||||
|
||||
v[ 8] = m256_const1_64( 0x6A09E6676A09E667ULL );
|
||||
v[ 9] = m256_const1_64( 0xBB67AE85BB67AE85ULL );
|
||||
v[10] = m256_const1_64( 0x3C6EF3723C6EF372ULL );
|
||||
v[11] = m256_const1_64( 0xA54FF53AA54FF53AULL );
|
||||
v[12] = _mm256_xor_si256( _mm256_set1_epi32( S->t[0] ),
|
||||
m256_const1_64( 0x510E527F510E527FULL ) );
|
||||
|
||||
v[13] = _mm256_xor_si256( _mm256_set1_epi32( S->t[1] ),
|
||||
m256_const1_64( 0x9B05688C9B05688CULL ) );
|
||||
|
||||
v[14] = _mm256_xor_si256( _mm256_set1_epi32( S->f[0] ),
|
||||
m256_const1_64( 0x1F83D9AB1F83D9ABULL ) );
|
||||
|
||||
v[15] = _mm256_xor_si256( _mm256_set1_epi32( S->f[1] ),
|
||||
m256_const1_64( 0x5BE0CD195BE0CD19ULL ) );
|
||||
|
||||
/*
|
||||
v[ 8] = _mm256_set1_epi32( blake2s_IV[0] );
|
||||
v[ 9] = _mm256_set1_epi32( blake2s_IV[1] );
|
||||
v[10] = _mm256_set1_epi32( blake2s_IV[2] );
|
||||
@@ -218,6 +299,7 @@ int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block )
|
||||
v[15] = _mm256_xor_si256( _mm256_set1_epi32( S->f[1] ),
|
||||
_mm256_set1_epi32( blake2s_IV[7] ) );
|
||||
|
||||
|
||||
#define G8W(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
|
||||
@@ -231,7 +313,36 @@ do { \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
} while(0)
|
||||
*/
|
||||
|
||||
#define G8W( sigma0, sigma1, a, b, c, d) \
|
||||
do { \
|
||||
uint8_t s0 = sigma0; \
|
||||
uint8_t s1 = sigma1; \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), m[ s0 ] ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), m[ s1 ] ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND8W(r) \
|
||||
do { \
|
||||
uint8_t *sigma = (uint8_t*)&blake2s_sigma[r]; \
|
||||
G8W( sigma[ 0], sigma[ 1], v[ 0], v[ 4], v[ 8], v[12] ); \
|
||||
G8W( sigma[ 2], sigma[ 3], v[ 1], v[ 5], v[ 9], v[13] ); \
|
||||
G8W( sigma[ 4], sigma[ 5], v[ 2], v[ 6], v[10], v[14] ); \
|
||||
G8W( sigma[ 6], sigma[ 7], v[ 3], v[ 7], v[11], v[15] ); \
|
||||
G8W( sigma[ 8], sigma[ 9], v[ 0], v[ 5], v[10], v[15] ); \
|
||||
G8W( sigma[10], sigma[11], v[ 1], v[ 6], v[11], v[12] ); \
|
||||
G8W( sigma[12], sigma[13], v[ 2], v[ 7], v[ 8], v[13] ); \
|
||||
G8W( sigma[14], sigma[15], v[ 3], v[ 4], v[ 9], v[14] ); \
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
#define ROUND8W(r) \
|
||||
do { \
|
||||
G8W( r, 0, v[ 0], v[ 4], v[ 8], v[12] ); \
|
||||
@@ -243,6 +354,7 @@ do { \
|
||||
G8W( r, 6, v[ 2], v[ 7], v[ 8], v[13] ); \
|
||||
G8W( r, 7, v[ 3], v[ 4], v[ 9], v[14] ); \
|
||||
} while(0)
|
||||
*/
|
||||
|
||||
ROUND8W( 0 );
|
||||
ROUND8W( 1 );
|
||||
|
@@ -64,7 +64,7 @@ typedef struct __blake2s_nway_param
|
||||
ALIGN( 64 ) typedef struct __blake2s_4way_state
|
||||
{
|
||||
__m128i h[8];
|
||||
uint8_t buf[ BLAKE2S_BLOCKBYTES * 4 ];
|
||||
uint8_t buf[ 2 * BLAKE2S_BLOCKBYTES * 4 ];
|
||||
uint32_t t[2];
|
||||
uint32_t f[2];
|
||||
size_t buflen;
|
||||
@@ -81,7 +81,7 @@ int blake2s_4way_final( blake2s_4way_state *S, void *out, uint8_t outlen );
|
||||
ALIGN( 64 ) typedef struct __blake2s_8way_state
|
||||
{
|
||||
__m256i h[8];
|
||||
uint8_t buf[ BLAKE2S_BLOCKBYTES * 8 ];
|
||||
uint8_t buf[ 2 * BLAKE2S_BLOCKBYTES * 8 ];
|
||||
uint32_t t[2];
|
||||
uint32_t f[2];
|
||||
size_t buflen;
|
||||
@@ -92,6 +92,9 @@ int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen );
|
||||
int blake2s_8way_update( blake2s_8way_state *S, const void *in,
|
||||
uint64_t inlen );
|
||||
int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen );
|
||||
int blake2s_4way_full_blocks( blake2s_4way_state *S, void *out,
|
||||
const void *input, uint64_t inlen );
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -70,18 +70,3 @@ int scanhash_blake2s( struct work *work,
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blake2s_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
bool register_blake2s_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_blake2s;
|
||||
gate->hash = (void*)&blake2s_hash;
|
||||
gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
return true;
|
||||
};
|
||||
*/
|
||||
|
@@ -403,7 +403,9 @@ static const sph_u64 CB[16] = {
|
||||
__m256i M[16]; \
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
__m256i V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
unsigned r; \
|
||||
const __m256i shuff_bswap64 = m256_const2_64( 0x08090a0b0c0d0e0f, \
|
||||
0x0001020304050607 ) \
|
||||
unsigned r; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
@@ -412,53 +414,53 @@ static const sph_u64 CB[16] = {
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm256_xor_si256( S0, _mm256_set_epi64x( CB0, CB0, CB0, CB0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, _mm256_set_epi64x( CB1, CB1, CB1, CB1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, _mm256_set_epi64x( CB2, CB2, CB2, CB2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, _mm256_set_epi64x( CB3, CB3, CB3, CB3 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
|
||||
_mm256_set_epi64x( CB4, CB4, CB4, CB4 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
|
||||
_mm256_set_epi64x( CB5, CB5, CB5, CB5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
|
||||
_mm256_set_epi64x( CB6, CB6, CB6, CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
|
||||
_mm256_set_epi64x( CB7, CB7, CB7, CB7 ) ); \
|
||||
M[0x0] = mm256_bswap_64( *(buf+0) ); \
|
||||
M[0x1] = mm256_bswap_64( *(buf+1) ); \
|
||||
M[0x2] = mm256_bswap_64( *(buf+2) ); \
|
||||
M[0x3] = mm256_bswap_64( *(buf+3) ); \
|
||||
M[0x4] = mm256_bswap_64( *(buf+4) ); \
|
||||
M[0x5] = mm256_bswap_64( *(buf+5) ); \
|
||||
M[0x6] = mm256_bswap_64( *(buf+6) ); \
|
||||
M[0x7] = mm256_bswap_64( *(buf+7) ); \
|
||||
M[0x8] = mm256_bswap_64( *(buf+8) ); \
|
||||
M[0x9] = mm256_bswap_64( *(buf+9) ); \
|
||||
M[0xA] = mm256_bswap_64( *(buf+10) ); \
|
||||
M[0xB] = mm256_bswap_64( *(buf+11) ); \
|
||||
M[0xC] = mm256_bswap_64( *(buf+12) ); \
|
||||
M[0xD] = mm256_bswap_64( *(buf+13) ); \
|
||||
M[0xE] = mm256_bswap_64( *(buf+14) ); \
|
||||
M[0xF] = mm256_bswap_64( *(buf+15) ); \
|
||||
V8 = _mm256_xor_si256( S0, _mm256_set1_epi64x( CB0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, _mm256_set1_epi64x( CB1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, _mm256_set1_epi64x( CB2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, _mm256_set1_epi64x( CB3 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set1_epi64x( T0 ), \
|
||||
_mm256_set1_epi64x( CB4 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set1_epi64x( T0 ), \
|
||||
_mm256_set1_epi64x( CB5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
|
||||
_mm256_set1_epi64x( CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
|
||||
_mm256_set1_epi64x( CB7, CB7, CB7, CB7 ) ); \
|
||||
M[0x0] = _mm256_shuffle_epi8( *(buf+ 0), shuff_bswap64 ); \
|
||||
M[0x1] = _mm256_shuffle_epi8( *(buf+ 1), shuff_bswap64 ); \
|
||||
M[0x2] = _mm256_shuffle_epi8( *(buf+ 2), shuff_bswap64 ); \
|
||||
M[0x3] = _mm256_shuffle_epi8( *(buf+ 3), shuff_bswap64 ); \
|
||||
M[0x4] = _mm256_shuffle_epi8( *(buf+ 4), shuff_bswap64 ); \
|
||||
M[0x5] = _mm256_shuffle_epi8( *(buf+ 5), shuff_bswap64 ); \
|
||||
M[0x6] = _mm256_shuffle_epi8( *(buf+ 6), shuff_bswap64 ); \
|
||||
M[0x7] = _mm256_shuffle_epi8( *(buf+ 7), shuff_bswap64 ); \
|
||||
M[0x8] = _mm256_shuffle_epi8( *(buf+ 8), shuff_bswap64 ); \
|
||||
M[0x9] = _mm256_shuffle_epi8( *(buf+ 9), shuff_bswap64 ); \
|
||||
M[0xA] = _mm256_shuffle_epi8( *(buf+10), shuff_bswap64 ); \
|
||||
M[0xB] = _mm256_shuffle_epi8( *(buf+11), shuff_bswap64 ); \
|
||||
M[0xC] = _mm256_shuffle_epi8( *(buf+12), shuff_bswap64 ); \
|
||||
M[0xD] = _mm256_shuffle_epi8( *(buf+13), shuff_bswap64 ); \
|
||||
M[0xE] = _mm256_shuffle_epi8( *(buf+14), shuff_bswap64 ); \
|
||||
M[0xF] = _mm256_shuffle_epi8( *(buf+15), shuff_bswap64 ); \
|
||||
for (r = 0; r < 16; r ++) \
|
||||
ROUND_B_4WAY(r); \
|
||||
H0 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
H0 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S0, V0 ), V8 ), H0 ); \
|
||||
H1 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
H1 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S1, V1 ), V9 ), H1 ); \
|
||||
H2 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
H2 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S2, V2 ), VA ), H2 ); \
|
||||
H3 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
H3 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S3, V3 ), VB ), H3 ); \
|
||||
H4 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
H4 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S0, V4 ), VC ), H4 ); \
|
||||
H5 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
H5 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S1, V5 ), VD ), H5 ); \
|
||||
H6 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
H6 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S2, V6 ), VE ), H6 ); \
|
||||
H7 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
H7 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S3, V7 ), VF ), H7 ); \
|
||||
} while (0)
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
@@ -491,8 +493,7 @@ static const sph_u64 CB[16] = {
|
||||
m256_const1_64( CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
|
||||
m256_const1_64( CB7 ) ); \
|
||||
shuf_bswap64 = m256_const_64( 0x08090a0b0c0d0e0f, 0x0001020304050607, \
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ); \
|
||||
shuf_bswap64 = m256_const2_64( 0x08090a0b0c0d0e0f, 0x0001020304050607 ); \
|
||||
M0 = _mm256_shuffle_epi8( *(buf+ 0), shuf_bswap64 ); \
|
||||
M1 = _mm256_shuffle_epi8( *(buf+ 1), shuf_bswap64 ); \
|
||||
M2 = _mm256_shuffle_epi8( *(buf+ 2), shuf_bswap64 ); \
|
||||
@@ -620,7 +621,7 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
z = 0x80 >> n;
|
||||
zz = ((ub & -z) | z) & 0xFF;
|
||||
buf[ptr>>3] = _mm256_set_epi64x( zz, zz, zz, zz );
|
||||
buf[ptr>>3] = _mm256_set1_epi64x( zz );
|
||||
tl = sc->T0 + bit_len;
|
||||
th = sc->T1;
|
||||
if (ptr == 0 )
|
||||
|
@@ -1,13 +1,6 @@
|
||||
#include "blakecoin-gate.h"
|
||||
#include <memory.h>
|
||||
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blakecoin_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
// return 0x3fffffLL;
|
||||
}
|
||||
|
||||
// vanilla uses default gen merkle root, otherwise identical to blakecoin
|
||||
bool register_vanilla_algo( algo_gate_t* gate )
|
||||
{
|
||||
@@ -23,7 +16,6 @@ bool register_vanilla_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&blakecoinhash;
|
||||
#endif
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&blakecoin_get_max64;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -93,33 +93,3 @@ int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
void blakecoin_gen_merkle_root ( char* merkle_root, struct stratum_ctx* sctx )
|
||||
{
|
||||
SHA256( sctx->job.coinbase, (int)sctx->job.coinbase_size, merkle_root );
|
||||
}
|
||||
*/
|
||||
/*
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blakecoin_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
// vanilla uses default gen merkle root, otherwise identical to blakecoin
|
||||
bool register_vanilla_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_blakecoin;
|
||||
gate->hash = (void*)&blakecoinhash;
|
||||
gate->get_max64 = (void*)&blakecoin_get_max64;
|
||||
blakecoin_init( &blake_init_ctx );
|
||||
return true;
|
||||
}
|
||||
|
||||
bool register_blakecoin_algo( algo_gate_t* gate )
|
||||
{
|
||||
register_vanilla_algo( gate );
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
@@ -38,7 +38,7 @@ void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
|
||||
if (!have_longpoll && work->height > *net_blocks + 1)
|
||||
{
|
||||
char netinfo[64] = { 0 };
|
||||
if (opt_showdiff && net_diff > 0.)
|
||||
if ( net_diff > 0. )
|
||||
{
|
||||
if (net_diff != work->targetdiff)
|
||||
sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
|
||||
@@ -116,7 +116,7 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
// block header suffix from coinb2 (stake version)
|
||||
memcpy( &g_work->data[44],
|
||||
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
|
||||
sctx->bloc_height = g_work->data[32];
|
||||
sctx->block_height = g_work->data[32];
|
||||
//applog_hex(work->data, 180);
|
||||
//applog_hex(&work->data[36], 36);
|
||||
}
|
||||
@@ -154,7 +154,6 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
#endif
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
|
||||
gate->decode_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
|
@@ -143,7 +143,7 @@ void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
|
||||
if (!have_longpoll && work->height > *net_blocks + 1)
|
||||
{
|
||||
char netinfo[64] = { 0 };
|
||||
if (opt_showdiff && net_diff > 0.)
|
||||
if (net_diff > 0.)
|
||||
{
|
||||
if (net_diff != work->targetdiff)
|
||||
sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
|
||||
@@ -269,7 +269,6 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_decred;
|
||||
gate->hash = (void*)&decred_hash;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
|
||||
gate->decode_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
|
@@ -10,7 +10,6 @@ bool register_pentablake_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&pentablakehash;
|
||||
#endif
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -1,11 +1,8 @@
|
||||
#include "bmw512-gate.h"
|
||||
|
||||
int64_t bmw512_get_max64() { return 0x7ffffLL; }
|
||||
|
||||
bool register_bmw512_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_max64 = (void*)&bmw512_get_max64;
|
||||
opt_target_factor = 256.0;
|
||||
#if defined (BMW512_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_bmw512_4way;
|
||||
|
@@ -363,7 +363,6 @@ bool register_cryptolight_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_cryptolight;
|
||||
gate->hash = (void*)&cryptolight_hash;
|
||||
gate->hash_suw = (void*)&cryptolight_hash;
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -111,7 +111,6 @@ bool register_cryptonight_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
gate->hash = (void*)&cryptonight_hash;
|
||||
gate->hash_suw = (void*)&cryptonight_hash_suw;
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -123,7 +122,6 @@ bool register_cryptonightv7_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
gate->hash = (void*)&cryptonight_hash;
|
||||
gate->hash_suw = (void*)&cryptonight_hash_suw;
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -7,7 +7,7 @@
|
||||
|
||||
// 2x128
|
||||
|
||||
/*
|
||||
|
||||
// The result of hashing 10 rounds of initial data which consists of params
|
||||
// zero padded.
|
||||
static const uint64_t IV256[] =
|
||||
@@ -25,7 +25,7 @@ static const uint64_t IV512[] =
|
||||
0x148FE485FCD398D9, 0xB64445321B017BEF, 0x2FF5781C6A536159, 0x0DBADEA991FA7934,
|
||||
0xA5A70E75D65C8A2B, 0xBC796576B1C62456, 0xE7989AF11921C8F7, 0xD43E3B447795D246
|
||||
};
|
||||
*/
|
||||
|
||||
|
||||
static void transform_2way( cube_2way_context *sp )
|
||||
{
|
||||
@@ -97,39 +97,30 @@ static void transform_2way( cube_2way_context *sp )
|
||||
int cube_2way_init( cube_2way_context *sp, int hashbitlen, int rounds,
|
||||
int blockbytes )
|
||||
{
|
||||
__m128i* h = (__m128i*)sp->h;
|
||||
__m256i *h = (__m256i*)sp->h;
|
||||
__m128i *iv = (__m128i*)( hashbitlen == 512 ? (__m128i*)IV512
|
||||
: (__m128i*)IV256 );
|
||||
sp->hashlen = hashbitlen/128;
|
||||
sp->blocksize = blockbytes/16;
|
||||
sp->rounds = rounds;
|
||||
sp->pos = 0;
|
||||
|
||||
if ( hashbitlen == 512 )
|
||||
{
|
||||
|
||||
h[ 0] = m128_const_64( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 );
|
||||
h[ 2] = m128_const_64( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 );
|
||||
h[ 4] = m128_const_64( 0x825B453797CF0BEF, 0xA647A8B34D42C787 );
|
||||
h[ 6] = m128_const_64( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 );
|
||||
h[ 8] = m128_const_64( 0xB64445321B017BEF, 0x148FE485FCD398D9 );
|
||||
h[10] = m128_const_64( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 );
|
||||
h[12] = m128_const_64( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B );
|
||||
h[14] = m128_const_64( 0xD43E3B447795D246, 0xE7989AF11921C8F7 );
|
||||
h[1] = h[ 0]; h[ 3] = h[ 2]; h[ 5] = h[ 4]; h[ 7] = h[ 6];
|
||||
h[9] = h[ 8]; h[11] = h[10]; h[13] = h[12]; h[15] = h[14];
|
||||
}
|
||||
else
|
||||
{
|
||||
h[ 0] = m128_const_64( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 );
|
||||
h[ 2] = m128_const_64( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B );
|
||||
h[ 4] = m128_const_64( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 );
|
||||
h[ 6] = m128_const_64( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 );
|
||||
h[ 8] = m128_const_64( 0xF0B266796C859D41, 0x6107FBD5D89041C3 );
|
||||
h[10] = m128_const_64( 0x93CB628565C892FD, 0x5FA2560309392549 );
|
||||
h[12] = m128_const_64( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE );
|
||||
h[14] = m128_const_64( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB );
|
||||
h[1] = h[ 0]; h[ 3] = h[ 2]; h[ 5] = h[ 4]; h[ 7] = h[ 6];
|
||||
h[9] = h[ 8]; h[11] = h[10]; h[13] = h[12]; h[15] = h[14];
|
||||
}
|
||||
h[ 0] = m256_const1_128( iv[0] );
|
||||
h[ 1] = m256_const1_128( iv[1] );
|
||||
h[ 2] = m256_const1_128( iv[2] );
|
||||
h[ 3] = m256_const1_128( iv[3] );
|
||||
h[ 4] = m256_const1_128( iv[4] );
|
||||
h[ 5] = m256_const1_128( iv[5] );
|
||||
h[ 6] = m256_const1_128( iv[6] );
|
||||
h[ 7] = m256_const1_128( iv[7] );
|
||||
h[ 0] = m256_const1_128( iv[0] );
|
||||
h[ 1] = m256_const1_128( iv[1] );
|
||||
h[ 2] = m256_const1_128( iv[2] );
|
||||
h[ 3] = m256_const1_128( iv[3] );
|
||||
h[ 4] = m256_const1_128( iv[4] );
|
||||
h[ 5] = m256_const1_128( iv[5] );
|
||||
h[ 6] = m256_const1_128( iv[6] );
|
||||
h[ 7] = m256_const1_128( iv[7] );
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -164,11 +155,11 @@ int cube_2way_close( cube_2way_context *sp, void *output )
|
||||
|
||||
// pos is zero for 64 byte data, 1 for 80 byte data.
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
|
||||
_mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 ) );
|
||||
m256_const2_64( 0, 0x0000000000000080 ) );
|
||||
transform_2way( sp );
|
||||
|
||||
sp->h[7] = _mm256_xor_si256( sp->h[7],
|
||||
_mm256_set_epi32( 1,0,0,0, 1,0,0,0 ) );
|
||||
m256_const2_64( 0x0000000100000000, 0 ) );
|
||||
|
||||
for ( i = 0; i < 10; ++i ) transform_2way( sp );
|
||||
|
||||
@@ -197,13 +188,13 @@ int cube_2way_update_close( cube_2way_context *sp, void *output,
|
||||
|
||||
// pos is zero for 64 byte data, 1 for 80 byte data.
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
|
||||
_mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 ) );
|
||||
m256_const2_64( 0, 0x0000000000000080 ) );
|
||||
transform_2way( sp );
|
||||
|
||||
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
|
||||
1,0,0,0 ) );
|
||||
sp->h[7] = _mm256_xor_si256( sp->h[7],
|
||||
m256_const2_64( 0x0000000100000000, 0 ) );
|
||||
|
||||
for ( i = 0; i < 10; ++i ) transform_2way( sp );
|
||||
for ( i = 0; i < 10; ++i ) transform_2way( sp );
|
||||
|
||||
memcpy( hash, sp->h, sp->hashlen<<5 );
|
||||
return 0;
|
||||
|
@@ -100,7 +100,6 @@ bool register_dmd_gr_algo( algo_gate_t* gate )
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_groestl;
|
||||
gate->hash = (void*)&groestlhash;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
@@ -88,15 +88,3 @@ int scanhash_myriad( struct work *work, uint32_t max_nonce,
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
bool register_myriad_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
init_myrgr_ctx();
|
||||
gate->scanhash = (void*)&scanhash_myriad;
|
||||
gate->hash = (void*)&myriadhash;
|
||||
// gate->hash_alt = (void*)&myriadhash;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
*/
|
||||
|
@@ -12,7 +12,6 @@ bool register_myriad_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&myriad_hash;
|
||||
#endif
|
||||
gate->optimizations = AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -32,8 +32,6 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
//#include "miner.h"
|
||||
#include "hamsi-hash-4way.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
@@ -100,7 +98,7 @@ extern "C"{
|
||||
#endif
|
||||
|
||||
//#include "hamsi-helper-4way.c"
|
||||
|
||||
/*
|
||||
static const sph_u32 IV512[] = {
|
||||
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
|
||||
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
|
||||
@@ -109,7 +107,7 @@ static const sph_u32 IV512[] = {
|
||||
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
|
||||
SPH_C32(0x6769756d)
|
||||
};
|
||||
|
||||
*/
|
||||
static const sph_u32 alpha_n[] = {
|
||||
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
|
||||
@@ -138,6 +136,7 @@ static const sph_u32 alpha_f[] = {
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
|
||||
};
|
||||
|
||||
|
||||
// imported from hamsi helper
|
||||
|
||||
/* Note: this table lists bits within each byte from least
|
||||
@@ -529,49 +528,34 @@ static const sph_u32 T512[64][16] = {
|
||||
SPH_C32(0xe7e00a94) }
|
||||
};
|
||||
|
||||
|
||||
#define INPUT_BIG \
|
||||
do { \
|
||||
const __m256i zero = _mm256_setzero_si256(); \
|
||||
__m256i db = *buf; \
|
||||
const sph_u32 *tp = &T512[0][0]; \
|
||||
m0 = zero; \
|
||||
m1 = zero; \
|
||||
m2 = zero; \
|
||||
m3 = zero; \
|
||||
m4 = zero; \
|
||||
m5 = zero; \
|
||||
m6 = zero; \
|
||||
m7 = zero; \
|
||||
const uint64_t *tp = (uint64_t*)&T512[0][0]; \
|
||||
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = m256_zero; \
|
||||
for ( int u = 0; u < 64; u++ ) \
|
||||
{ \
|
||||
__m256i dm = _mm256_and_si256( db, m256_one_64 ) ; \
|
||||
dm = mm256_negate_32( _mm256_or_si256( dm, \
|
||||
_mm256_slli_epi64( dm, 32 ) ) ); \
|
||||
m0 = _mm256_xor_si256( m0, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0x1], tp[0x0], tp[0x1], tp[0x0], \
|
||||
tp[0x1], tp[0x0], tp[0x1], tp[0x0] ) ) ); \
|
||||
m256_const1_64( tp[0] ) ) ); \
|
||||
m1 = _mm256_xor_si256( m1, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0x3], tp[0x2], tp[0x3], tp[0x2], \
|
||||
tp[0x3], tp[0x2], tp[0x3], tp[0x2] ) ) ); \
|
||||
m256_const1_64( tp[1] ) ) ); \
|
||||
m2 = _mm256_xor_si256( m2, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0x5], tp[0x4], tp[0x5], tp[0x4], \
|
||||
tp[0x5], tp[0x4], tp[0x5], tp[0x4] ) ) ); \
|
||||
m256_const1_64( tp[2] ) ) ); \
|
||||
m3 = _mm256_xor_si256( m3, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0x7], tp[0x6], tp[0x7], tp[0x6], \
|
||||
tp[0x7], tp[0x6], tp[0x7], tp[0x6] ) ) ); \
|
||||
m256_const1_64( tp[3] ) ) ); \
|
||||
m4 = _mm256_xor_si256( m4, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0x9], tp[0x8], tp[0x9], tp[0x8], \
|
||||
tp[0x9], tp[0x8], tp[0x9], tp[0x8] ) ) ); \
|
||||
m256_const1_64( tp[4] ) ) ); \
|
||||
m5 = _mm256_xor_si256( m5, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0xB], tp[0xA], tp[0xB], tp[0xA], \
|
||||
tp[0xB], tp[0xA], tp[0xB], tp[0xA] ) ) ); \
|
||||
m256_const1_64( tp[5] ) ) ); \
|
||||
m6 = _mm256_xor_si256( m6, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0xD], tp[0xC], tp[0xD], tp[0xC], \
|
||||
tp[0xD], tp[0xC], tp[0xD], tp[0xC] ) ) ); \
|
||||
m256_const1_64( tp[6] ) ) ); \
|
||||
m7 = _mm256_xor_si256( m7, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0xF], tp[0xE], tp[0xF], tp[0xE], \
|
||||
tp[0xF], tp[0xE], tp[0xF], tp[0xE] ) ) ); \
|
||||
tp += 0x10; \
|
||||
m256_const1_64( tp[7] ) ) ); \
|
||||
tp += 8; \
|
||||
db = _mm256_srli_epi64( db, 1 ); \
|
||||
} \
|
||||
} while (0)
|
||||
@@ -662,55 +646,39 @@ do { \
|
||||
|
||||
#define ROUND_BIG(rc, alpha) \
|
||||
do { \
|
||||
__m256i t0, t1, t2, t3; \
|
||||
s0 = _mm256_xor_si256( s0, _mm256_set_epi32( \
|
||||
alpha[0x01] ^ (rc), alpha[0x00], alpha[0x01] ^ (rc), alpha[0x00], \
|
||||
alpha[0x01] ^ (rc), alpha[0x00], alpha[0x01] ^ (rc), alpha[0x00] ) ); \
|
||||
s1 = _mm256_xor_si256( s1, _mm256_set_epi32( \
|
||||
alpha[0x03], alpha[0x02], alpha[0x03], alpha[0x02], \
|
||||
alpha[0x03], alpha[0x02], alpha[0x03], alpha[0x02] ) ); \
|
||||
s2 = _mm256_xor_si256( s2, _mm256_set_epi32( \
|
||||
alpha[0x05], alpha[0x04], alpha[0x05], alpha[0x04], \
|
||||
alpha[0x05], alpha[0x04], alpha[0x05], alpha[0x04] ) ); \
|
||||
s3 = _mm256_xor_si256( s3, _mm256_set_epi32( \
|
||||
alpha[0x07], alpha[0x06], alpha[0x07], alpha[0x06], \
|
||||
alpha[0x07], alpha[0x06], alpha[0x07], alpha[0x06] ) ); \
|
||||
s4 = _mm256_xor_si256( s4, _mm256_set_epi32( \
|
||||
alpha[0x09], alpha[0x08], alpha[0x09], alpha[0x08], \
|
||||
alpha[0x09], alpha[0x08], alpha[0x09], alpha[0x08] ) ); \
|
||||
s5 = _mm256_xor_si256( s5, _mm256_set_epi32( \
|
||||
alpha[0x0B], alpha[0x0A], alpha[0x0B], alpha[0x0A], \
|
||||
alpha[0x0B], alpha[0x0A], alpha[0x0B], alpha[0x0A] ) ); \
|
||||
s6 = _mm256_xor_si256( s6, _mm256_set_epi32( \
|
||||
alpha[0x0D], alpha[0x0C], alpha[0x0D], alpha[0x0C], \
|
||||
alpha[0x0D], alpha[0x0C], alpha[0x0D], alpha[0x0C] ) ); \
|
||||
s7 = _mm256_xor_si256( s7, _mm256_set_epi32( \
|
||||
alpha[0x0F], alpha[0x0E], alpha[0x0F], alpha[0x0E], \
|
||||
alpha[0x0F], alpha[0x0E], alpha[0x0F], alpha[0x0E] ) ); \
|
||||
s8 = _mm256_xor_si256( s8, _mm256_set_epi32( \
|
||||
alpha[0x11], alpha[0x10], alpha[0x11], alpha[0x10], \
|
||||
alpha[0x11], alpha[0x10], alpha[0x11], alpha[0x10] ) ); \
|
||||
s9 = _mm256_xor_si256( s9, _mm256_set_epi32( \
|
||||
alpha[0x13], alpha[0x12], alpha[0x13], alpha[0x12], \
|
||||
alpha[0x13], alpha[0x12], alpha[0x13], alpha[0x12] ) ); \
|
||||
sA = _mm256_xor_si256( sA, _mm256_set_epi32( \
|
||||
alpha[0x15], alpha[0x14], alpha[0x15], alpha[0x14], \
|
||||
alpha[0x15], alpha[0x14], alpha[0x15], alpha[0x14] ) ); \
|
||||
sB = _mm256_xor_si256( sB, _mm256_set_epi32( \
|
||||
alpha[0x17], alpha[0x16], alpha[0x17], alpha[0x16], \
|
||||
alpha[0x17], alpha[0x16], alpha[0x17], alpha[0x16] ) ); \
|
||||
sC = _mm256_xor_si256( sC, _mm256_set_epi32( \
|
||||
alpha[0x19], alpha[0x18], alpha[0x19], alpha[0x18], \
|
||||
alpha[0x19], alpha[0x18], alpha[0x19], alpha[0x18] ) ); \
|
||||
sD = _mm256_xor_si256( sD, _mm256_set_epi32( \
|
||||
alpha[0x1B], alpha[0x1A], alpha[0x1B], alpha[0x1A], \
|
||||
alpha[0x1B], alpha[0x1A], alpha[0x1B], alpha[0x1A] ) ); \
|
||||
sE = _mm256_xor_si256( sE, _mm256_set_epi32( \
|
||||
alpha[0x1D], alpha[0x1C], alpha[0x1D], alpha[0x1C], \
|
||||
alpha[0x1D], alpha[0x1C], alpha[0x1D], alpha[0x1C] ) ); \
|
||||
sF = _mm256_xor_si256( sF, _mm256_set_epi32( \
|
||||
alpha[0x1F], alpha[0x1E], alpha[0x1F], alpha[0x1E], \
|
||||
alpha[0x1F], alpha[0x1E], alpha[0x1F], alpha[0x1E] ) ); \
|
||||
__m256i t0, t1, t2, t3; \
|
||||
s0 = _mm256_xor_si256( s0, m256_const1_64( \
|
||||
( ( (uint64_t)( (rc) ^ alpha[1] ) << 32 ) ) | (uint64_t)alpha[0] ) ); \
|
||||
s1 = _mm256_xor_si256( s1, m256_const1_64( \
|
||||
( (uint64_t)alpha[ 3] << 32 ) | (uint64_t)alpha[ 2] ) ); \
|
||||
s2 = _mm256_xor_si256( s2, m256_const1_64( \
|
||||
( (uint64_t)alpha[ 5] << 32 ) | (uint64_t)alpha[ 4] ) ); \
|
||||
s3 = _mm256_xor_si256( s3, m256_const1_64( \
|
||||
( (uint64_t)alpha[ 7] << 32 ) | (uint64_t)alpha[ 6] ) ); \
|
||||
s4 = _mm256_xor_si256( s4, m256_const1_64( \
|
||||
( (uint64_t)alpha[ 9] << 32 ) | (uint64_t)alpha[ 8] ) ); \
|
||||
s5 = _mm256_xor_si256( s5, m256_const1_64( \
|
||||
( (uint64_t)alpha[11] << 32 ) | (uint64_t)alpha[10] ) ); \
|
||||
s6 = _mm256_xor_si256( s6, m256_const1_64( \
|
||||
( (uint64_t)alpha[13] << 32 ) | (uint64_t)alpha[12] ) ); \
|
||||
s7 = _mm256_xor_si256( s7, m256_const1_64( \
|
||||
( (uint64_t)alpha[15] << 32 ) | (uint64_t)alpha[14] ) ); \
|
||||
s8 = _mm256_xor_si256( s8, m256_const1_64( \
|
||||
( (uint64_t)alpha[17] << 32 ) | (uint64_t)alpha[16] ) ); \
|
||||
s9 = _mm256_xor_si256( s9, m256_const1_64( \
|
||||
( (uint64_t)alpha[19] << 32 ) | (uint64_t)alpha[18] ) ); \
|
||||
sA = _mm256_xor_si256( sA, m256_const1_64( \
|
||||
( (uint64_t)alpha[21] << 32 ) | (uint64_t)alpha[20] ) ); \
|
||||
sB = _mm256_xor_si256( sB, m256_const1_64( \
|
||||
( (uint64_t)alpha[23] << 32 ) | (uint64_t)alpha[22] ) ); \
|
||||
sC = _mm256_xor_si256( sC, m256_const1_64( \
|
||||
( (uint64_t)alpha[25] << 32 ) | (uint64_t)alpha[24] ) ); \
|
||||
sD = _mm256_xor_si256( sD, m256_const1_64( \
|
||||
( (uint64_t)alpha[27] << 32 ) | (uint64_t)alpha[26] ) ); \
|
||||
sE = _mm256_xor_si256( sE, m256_const1_64( \
|
||||
( (uint64_t)alpha[29] << 32 ) | (uint64_t)alpha[28] ) ); \
|
||||
sF = _mm256_xor_si256( sF, m256_const1_64( \
|
||||
( (uint64_t)alpha[31] << 32 ) | (uint64_t)alpha[30] ) ); \
|
||||
\
|
||||
SBOX( s0, s4, s8, sC ); \
|
||||
SBOX( s1, s5, s9, sD ); \
|
||||
@@ -864,47 +832,22 @@ void hamsi_big_final( hamsi_4way_big_context *sc, __m256i *buf )
|
||||
void hamsi512_4way_init( hamsi_4way_big_context *sc )
|
||||
{
|
||||
sc->partial_len = 0;
|
||||
sph_u32 lo, hi;
|
||||
sc->count_high = sc->count_low = 0;
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
{
|
||||
lo = 2*i;
|
||||
hi = 2*i + 1;
|
||||
sc->h[i] = _mm256_set_epi32( IV512[hi], IV512[lo], IV512[hi], IV512[lo],
|
||||
IV512[hi], IV512[lo], IV512[hi], IV512[lo] );
|
||||
}
|
||||
|
||||
sc->h[0] = m256_const1_64( 0x6c70617273746565 );
|
||||
sc->h[1] = m256_const1_64( 0x656e62656b204172 );
|
||||
sc->h[2] = m256_const1_64( 0x302c206272672031 );
|
||||
sc->h[3] = m256_const1_64( 0x3434362c75732032 );
|
||||
sc->h[4] = m256_const1_64( 0x3030312020422d33 );
|
||||
sc->h[5] = m256_const1_64( 0x656e2d484c657576 );
|
||||
sc->h[6] = m256_const1_64( 0x6c65652c65766572 );
|
||||
sc->h[7] = m256_const1_64( 0x6769756d2042656c );
|
||||
}
|
||||
|
||||
void hamsi512_4way( hamsi_4way_big_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
|
||||
// It looks like the only way to get in here is if core was previously called
|
||||
// with a very small len
|
||||
// That's not likely even with 80 byte input so deprecate partial len
|
||||
/*
|
||||
if ( sc->partial_len != 0 )
|
||||
{
|
||||
size_t mlen;
|
||||
|
||||
mlen = 8 - sc->partial_len;
|
||||
if ( len < mlen )
|
||||
{
|
||||
memcpy_256( sc->partial + (sc->partial_len >> 3), data, len>>3 );
|
||||
sc->partial_len += len;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy_256( sc->partial + (sc->partial_len >> 3), data, mlen>>3 );
|
||||
len -= mlen;
|
||||
vdata += mlen>>3;
|
||||
hamsi_big( sc, sc->partial, 1 );
|
||||
sc->partial_len = 0;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
hamsi_big( sc, vdata, len>>3 );
|
||||
vdata += ( (len& ~(size_t)7) >> 3 );
|
||||
len &= (size_t)7;
|
||||
@@ -920,8 +863,9 @@ void hamsi512_4way_close( hamsi_4way_big_context *sc, void *dst )
|
||||
sph_enc32be( &ch, sc->count_high );
|
||||
sph_enc32be( &cl, sc->count_low + ( sc->partial_len << 3 ) );
|
||||
pad[0] = _mm256_set_epi32( cl, ch, cl, ch, cl, ch, cl, ch );
|
||||
sc->buf[0] = _mm256_set_epi32( 0UL, 0x80UL, 0UL, 0x80UL,
|
||||
0UL, 0x80UL, 0UL, 0x80UL );
|
||||
sc->buf[0] = m256_const1_64( 0x80 );
|
||||
// sc->buf[0] = _mm256_set_epi32( 0UL, 0x80UL, 0UL, 0x80UL,
|
||||
// 0UL, 0x80UL, 0UL, 0x80UL );
|
||||
hamsi_big( sc, sc->buf, 1 );
|
||||
hamsi_big_final( sc, pad );
|
||||
|
||||
|
@@ -94,7 +94,7 @@ extern "C"{
|
||||
|
||||
#define Sb(x0, x1, x2, x3, c) \
|
||||
do { \
|
||||
__m256i cc = _mm256_set_epi64x( c, c, c, c ); \
|
||||
__m256i cc = _mm256_set1_epi64x( c ); \
|
||||
x3 = mm256_not( x3 ); \
|
||||
x0 = _mm256_xor_si256( x0, _mm256_andnot_si256( x2, cc ) ); \
|
||||
tmp = _mm256_xor_si256( cc, _mm256_and_si256( x0, x1 ) ); \
|
||||
|
@@ -1,12 +1,10 @@
|
||||
#include "keccak-gate.h"
|
||||
|
||||
int64_t keccak_get_max64() { return 0x7ffffLL; }
|
||||
|
||||
bool register_keccak_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
gate->get_max64 = (void*)&keccak_get_max64;
|
||||
opt_target_factor = 128.0;
|
||||
#if defined (KECCAK_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_keccak_4way;
|
||||
@@ -22,7 +20,6 @@ bool register_keccakc_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||
gate->get_max64 = (void*)&keccak_get_max64;
|
||||
opt_target_factor = 256.0;
|
||||
#if defined (KECCAK_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_keccak_4way;
|
||||
|
2156
algo/lanehash/lane.c
Normal file
2156
algo/lanehash/lane.c
Normal file
File diff suppressed because it is too large
Load Diff
50
algo/lanehash/lane.h
Normal file
50
algo/lanehash/lane.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Sebastiaan Indesteege
|
||||
* <sebastiaan.indesteege@esat.kuleuven.be>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Optimised ANSI-C implementation of LANE
|
||||
*/
|
||||
|
||||
#ifndef LANE_H
|
||||
#define LANE_H
|
||||
|
||||
#include <string.h>
|
||||
//#include "algo/sha/sha3-defs.h"
|
||||
#include <stdint.h>
|
||||
|
||||
typedef unsigned char BitSequence;
|
||||
typedef unsigned long long DataLength;
|
||||
|
||||
//typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHBITLEN = 2, BAD_DATABITLEN = 3 } HashReturn;
|
||||
|
||||
//typedef unsigned char u8;
|
||||
//typedef unsigned int u32;
|
||||
//typedef unsigned long long u64;
|
||||
|
||||
typedef struct {
|
||||
int hashbitlen;
|
||||
uint64_t ctr;
|
||||
uint32_t h[16];
|
||||
uint8_t buffer[128];
|
||||
} hashState;
|
||||
|
||||
void laneInit (hashState *state, int hashbitlen);
|
||||
void laneUpdate (hashState *state, const BitSequence *data, DataLength databitlen);
|
||||
void laneFinal (hashState *state, BitSequence *hashval);
|
||||
void laneHash (int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
|
||||
|
||||
#endif /* LANE_H */
|
@@ -1,23 +1,3 @@
|
||||
/*
|
||||
* luffa_for_sse2.c
|
||||
* Version 2.0 (Sep 15th 2009)
|
||||
*
|
||||
* Copyright (C) 2008-2009 Hitachi, Ltd. All rights reserved.
|
||||
*
|
||||
* Hitachi, Ltd. is the owner of this software and hereby grant
|
||||
* the U.S. Government and any interested party the right to use
|
||||
* this software for the purposes of the SHA-3 evaluation process,
|
||||
* notwithstanding that this software is copyrighted.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <immintrin.h>
|
||||
#include "luffa-hash-2way.h"
|
||||
@@ -26,31 +6,30 @@
|
||||
|
||||
#include "simd-utils.h"
|
||||
|
||||
#define MASK _mm256_set_epi32( 0UL, 0UL, 0UL, 0xffffffffUL, \
|
||||
0UL, 0UL, 0UL, 0xffffffffUL )
|
||||
#define cns(i) m256_const1_128( ( (__m128i*)CNS_INIT)[i] )
|
||||
|
||||
#define ADD_CONSTANT(a,b,c0,c1)\
|
||||
a = _mm256_xor_si256(a,c0);\
|
||||
b = _mm256_xor_si256(b,c1);\
|
||||
|
||||
#define MULT2(a0,a1) \
|
||||
#define MULT2( a0, a1, mask ) \
|
||||
do { \
|
||||
register __m256i b = _mm256_xor_si256( a0, \
|
||||
_mm256_shuffle_epi32( _mm256_and_si256(a1,MASK), 16 ) ); \
|
||||
__m256i b = _mm256_xor_si256( a0, \
|
||||
_mm256_shuffle_epi32( _mm256_and_si256(a1,mask), 16 ) ); \
|
||||
a0 = _mm256_or_si256( _mm256_srli_si256(b,4), _mm256_slli_si256(a1,12) ); \
|
||||
a1 = _mm256_or_si256( _mm256_srli_si256(a1,4), _mm256_slli_si256(b,12) ); \
|
||||
} while(0)
|
||||
|
||||
// confirm pointer arithmetic
|
||||
// ok but use array indexes
|
||||
#define STEP_PART(x,c,t)\
|
||||
#define STEP_PART(x,c0,c1,t)\
|
||||
SUBCRUMB(*x,*(x+1),*(x+2),*(x+3),*t);\
|
||||
SUBCRUMB(*(x+5),*(x+6),*(x+7),*(x+4),*t);\
|
||||
MIXWORD(*x,*(x+4),*t,*(t+1));\
|
||||
MIXWORD(*(x+1),*(x+5),*t,*(t+1));\
|
||||
MIXWORD(*(x+2),*(x+6),*t,*(t+1));\
|
||||
MIXWORD(*(x+3),*(x+7),*t,*(t+1));\
|
||||
ADD_CONSTANT(*x, *(x+4), *c, *(c+1));
|
||||
ADD_CONSTANT(*x, *(x+4), c0, c1);
|
||||
|
||||
#define SUBCRUMB(a0,a1,a2,a3,t)\
|
||||
t = _mm256_load_si256(&a0);\
|
||||
@@ -245,7 +224,7 @@ static const uint32 CNS_INIT[128] __attribute((aligned(32))) = {
|
||||
0x00000000,0x00000000,0x00000000,0xfc053c31
|
||||
};
|
||||
|
||||
__m256i CNS[32];
|
||||
|
||||
|
||||
/***************************************************/
|
||||
/* Round function */
|
||||
@@ -258,6 +237,7 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
__m256i msg0, msg1;
|
||||
__m256i tmp[2];
|
||||
__m256i x[8];
|
||||
const __m256i MASK = m256_const2_64( 0, 0x00000000ffffffff );
|
||||
|
||||
t0 = chainv[0];
|
||||
t1 = chainv[1];
|
||||
@@ -271,7 +251,7 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
t0 = _mm256_xor_si256( t0, chainv[8] );
|
||||
t1 = _mm256_xor_si256( t1, chainv[9] );
|
||||
|
||||
MULT2( t0, t1 );
|
||||
MULT2( t0, t1, MASK );
|
||||
|
||||
msg0 = _mm256_shuffle_epi32( msg[0], 27 );
|
||||
msg1 = _mm256_shuffle_epi32( msg[1], 27 );
|
||||
@@ -290,66 +270,66 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
t0 = chainv[0];
|
||||
t1 = chainv[1];
|
||||
|
||||
MULT2( chainv[0], chainv[1]);
|
||||
MULT2( chainv[0], chainv[1], MASK );
|
||||
chainv[0] = _mm256_xor_si256( chainv[0], chainv[2] );
|
||||
chainv[1] = _mm256_xor_si256( chainv[1], chainv[3] );
|
||||
|
||||
MULT2( chainv[2], chainv[3]);
|
||||
MULT2( chainv[2], chainv[3], MASK );
|
||||
chainv[2] = _mm256_xor_si256(chainv[2], chainv[4]);
|
||||
chainv[3] = _mm256_xor_si256(chainv[3], chainv[5]);
|
||||
|
||||
MULT2( chainv[4], chainv[5]);
|
||||
MULT2( chainv[4], chainv[5], MASK );
|
||||
chainv[4] = _mm256_xor_si256(chainv[4], chainv[6]);
|
||||
chainv[5] = _mm256_xor_si256(chainv[5], chainv[7]);
|
||||
|
||||
MULT2( chainv[6], chainv[7]);
|
||||
MULT2( chainv[6], chainv[7], MASK );
|
||||
chainv[6] = _mm256_xor_si256(chainv[6], chainv[8]);
|
||||
chainv[7] = _mm256_xor_si256(chainv[7], chainv[9]);
|
||||
|
||||
MULT2( chainv[8], chainv[9]);
|
||||
MULT2( chainv[8], chainv[9], MASK );
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], t0 );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], t1 );
|
||||
|
||||
t0 = chainv[8];
|
||||
t1 = chainv[9];
|
||||
|
||||
MULT2( chainv[8], chainv[9]);
|
||||
MULT2( chainv[8], chainv[9], MASK );
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], chainv[6] );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], chainv[7] );
|
||||
|
||||
MULT2( chainv[6], chainv[7]);
|
||||
MULT2( chainv[6], chainv[7], MASK );
|
||||
chainv[6] = _mm256_xor_si256( chainv[6], chainv[4] );
|
||||
chainv[7] = _mm256_xor_si256( chainv[7], chainv[5] );
|
||||
|
||||
MULT2( chainv[4], chainv[5]);
|
||||
MULT2( chainv[4], chainv[5], MASK );
|
||||
chainv[4] = _mm256_xor_si256( chainv[4], chainv[2] );
|
||||
chainv[5] = _mm256_xor_si256( chainv[5], chainv[3] );
|
||||
|
||||
MULT2( chainv[2], chainv[3] );
|
||||
MULT2( chainv[2], chainv[3], MASK );
|
||||
chainv[2] = _mm256_xor_si256( chainv[2], chainv[0] );
|
||||
chainv[3] = _mm256_xor_si256( chainv[3], chainv[1] );
|
||||
|
||||
MULT2( chainv[0], chainv[1] );
|
||||
MULT2( chainv[0], chainv[1], MASK );
|
||||
chainv[0] = _mm256_xor_si256( _mm256_xor_si256( chainv[0], t0 ), msg0 );
|
||||
chainv[1] = _mm256_xor_si256( _mm256_xor_si256( chainv[1], t1 ), msg1 );
|
||||
|
||||
MULT2( msg0, msg1);
|
||||
MULT2( msg0, msg1, MASK );
|
||||
chainv[2] = _mm256_xor_si256( chainv[2], msg0 );
|
||||
chainv[3] = _mm256_xor_si256( chainv[3], msg1 );
|
||||
|
||||
MULT2( msg0, msg1);
|
||||
MULT2( msg0, msg1, MASK );
|
||||
chainv[4] = _mm256_xor_si256( chainv[4], msg0 );
|
||||
chainv[5] = _mm256_xor_si256( chainv[5], msg1 );
|
||||
|
||||
MULT2( msg0, msg1);
|
||||
MULT2( msg0, msg1, MASK );
|
||||
chainv[6] = _mm256_xor_si256( chainv[6], msg0 );
|
||||
chainv[7] = _mm256_xor_si256( chainv[7], msg1 );
|
||||
|
||||
MULT2( msg0, msg1);
|
||||
MULT2( msg0, msg1, MASK );
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], msg0 );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], msg1 );
|
||||
|
||||
MULT2( msg0, msg1);
|
||||
MULT2( msg0, msg1, MASK );
|
||||
|
||||
chainv[3] = _mm256_or_si256( _mm256_slli_epi32( chainv[3], 1 ),
|
||||
_mm256_srli_epi32( chainv[3], 31 ) );
|
||||
@@ -365,14 +345,14 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
chainv[1],chainv[3],chainv[5],chainv[7],
|
||||
x[4], x[5], x[6], x[7] );
|
||||
|
||||
STEP_PART( &x[0], &CNS[ 0], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[ 2], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[ 4], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[ 6], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[ 8], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[10], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[12], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[14], &tmp[0] );
|
||||
STEP_PART( &x[0], cns( 0), cns( 1), &tmp[0] );
|
||||
STEP_PART( &x[0], cns( 2), cns( 3), &tmp[0] );
|
||||
STEP_PART( &x[0], cns( 4), cns( 5), &tmp[0] );
|
||||
STEP_PART( &x[0], cns( 6), cns( 7), &tmp[0] );
|
||||
STEP_PART( &x[0], cns( 8), cns( 9), &tmp[0] );
|
||||
STEP_PART( &x[0], cns(10), cns(11), &tmp[0] );
|
||||
STEP_PART( &x[0], cns(12), cns(13), &tmp[0] );
|
||||
STEP_PART( &x[0], cns(14), cns(15), &tmp[0] );
|
||||
|
||||
MIXTON1024( x[0], x[1], x[2], x[3],
|
||||
chainv[0], chainv[2], chainv[4],chainv[6],
|
||||
@@ -380,25 +360,24 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
chainv[1],chainv[3],chainv[5],chainv[7]);
|
||||
|
||||
/* Process last 256-bit block */
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[16], CNS[17],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(16), cns(17),
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[18], CNS[19],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(18), cns(19),
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[20], CNS[21],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(20), cns(21),
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[22], CNS[23],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(22), cns(23),
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[24], CNS[25],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(24), cns(25),
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[26], CNS[27],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(26), cns(27),
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[28], CNS[29],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(28), cns(29),
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[30], CNS[31],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(30), cns(31),
|
||||
tmp[0], tmp[1] );
|
||||
}
|
||||
|
||||
|
||||
/***************************************************/
|
||||
/* Finalization function */
|
||||
/* state: hash context */
|
||||
@@ -410,8 +389,9 @@ void finalization512_2way( luffa_2way_context *state, uint32 *b )
|
||||
__m256i* chainv = state->chainv;
|
||||
__m256i t[2];
|
||||
__m256i zero[2];
|
||||
zero[0] = zero[1] = _mm256_setzero_si256();
|
||||
|
||||
zero[0] = zero[1] = m256_zero;
|
||||
const __m256i shuff_bswap32 = m256_const2_64( 0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
/*---- blank round with m=0 ----*/
|
||||
rnd512_2way( state, zero );
|
||||
|
||||
@@ -433,8 +413,10 @@ void finalization512_2way( luffa_2way_context *state, uint32 *b )
|
||||
_mm256_store_si256( (__m256i*)&hash[0], t[0] );
|
||||
_mm256_store_si256( (__m256i*)&hash[8], t[1] );
|
||||
|
||||
casti_m256i( b, 0 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 1 ) = mm256_bswap_32( casti_m256i( hash, 1 ) );
|
||||
casti_m256i( b, 0 ) = _mm256_shuffle_epi8(
|
||||
casti_m256i( hash, 0 ), shuff_bswap32 );
|
||||
casti_m256i( b, 1 ) = _mm256_shuffle_epi8(
|
||||
casti_m256i( hash, 1 ), shuff_bswap32 );
|
||||
|
||||
rnd512_2way( state, zero );
|
||||
|
||||
@@ -455,26 +437,27 @@ void finalization512_2way( luffa_2way_context *state, uint32 *b )
|
||||
_mm256_store_si256( (__m256i*)&hash[0], t[0] );
|
||||
_mm256_store_si256( (__m256i*)&hash[8], t[1] );
|
||||
|
||||
casti_m256i( b, 2 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 3 ) = mm256_bswap_32( casti_m256i( hash, 1 ) );
|
||||
casti_m256i( b, 2 ) = _mm256_shuffle_epi8(
|
||||
casti_m256i( hash, 0 ), shuff_bswap32 );
|
||||
casti_m256i( b, 3 ) = _mm256_shuffle_epi8(
|
||||
casti_m256i( hash, 1 ), shuff_bswap32 );
|
||||
}
|
||||
|
||||
int luffa_2way_init( luffa_2way_context *state, int hashbitlen )
|
||||
{
|
||||
int i;
|
||||
state->hashbitlen = hashbitlen;
|
||||
|
||||
for ( i=0; i<32; i++ ) CNS[i] =
|
||||
_mm256_set_epi32( CNS_INIT[ (i<<2) + 3 ], CNS_INIT[ (i<<2) +2 ],
|
||||
CNS_INIT[ (i<<2) + 1 ], CNS_INIT[ (i<<2) ],
|
||||
CNS_INIT[ (i<<2) + 3 ], CNS_INIT[ (i<<2) +2 ],
|
||||
CNS_INIT[ (i<<2) + 1 ], CNS_INIT[ (i<<2) ] );
|
||||
|
||||
for ( i=0; i<10; i++ ) state->chainv[i] =
|
||||
_mm256_set_epi32( IV[ (i<<2) +3 ], IV[ (i<<2) +2 ],
|
||||
IV[ (i<<2) +1 ], IV[ (i<<2) ],
|
||||
IV[ (i<<2) +3 ], IV[ (i<<2) +2 ],
|
||||
IV[ (i<<2) +1 ], IV[ (i<<2) ] );
|
||||
__m128i *iv = (__m128i*)IV;
|
||||
|
||||
state->chainv[0] = m256_const1_128( iv[0] );
|
||||
state->chainv[1] = m256_const1_128( iv[1] );
|
||||
state->chainv[2] = m256_const1_128( iv[2] );
|
||||
state->chainv[3] = m256_const1_128( iv[3] );
|
||||
state->chainv[4] = m256_const1_128( iv[4] );
|
||||
state->chainv[5] = m256_const1_128( iv[5] );
|
||||
state->chainv[6] = m256_const1_128( iv[6] );
|
||||
state->chainv[7] = m256_const1_128( iv[7] );
|
||||
state->chainv[8] = m256_const1_128( iv[8] );
|
||||
state->chainv[9] = m256_const1_128( iv[9] );
|
||||
|
||||
((__m256i*)state->buffer)[0] = m256_zero;
|
||||
((__m256i*)state->buffer)[1] = m256_zero;
|
||||
@@ -492,13 +475,15 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
|
||||
__m256i msg[2];
|
||||
int i;
|
||||
int blocks = (int)len >> 5;
|
||||
const __m256i shuff_bswap32 = m256_const2_64( 0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
state-> rembytes = (int)len & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = mm256_bswap_32( vdata[ 0] );
|
||||
msg[1] = mm256_bswap_32( vdata[ 1 ] );
|
||||
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = _mm256_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
@@ -507,9 +492,8 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// remaining data bytes
|
||||
buffer[0] = mm256_bswap_32( vdata[0] );
|
||||
buffer[1] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
buffer[0] = _mm256_shuffle_epi8( vdata[0], shuff_bswap32 );
|
||||
buffer[1] = m256_const2_64( 0, 0x0000000080000000 );
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -525,8 +509,7 @@ int luffa_2way_close( luffa_2way_context *state, void *hashval )
|
||||
rnd512_2way( state, buffer );
|
||||
else
|
||||
{ // empty pad block, constant data
|
||||
msg[0] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
msg[0] = m256_const2_64( 0, 0x0000000080000000 );
|
||||
msg[1] = m256_zero;
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
@@ -545,13 +528,16 @@ int luffa_2way_update_close( luffa_2way_context *state,
|
||||
__m256i msg[2];
|
||||
int i;
|
||||
const int blocks = (int)( inlen >> 5 );
|
||||
const __m256i shuff_bswap32 = m256_const2_64( 0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
|
||||
state->rembytes = inlen & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = mm256_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm256_bswap_32( vdata[ 1 ] );
|
||||
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = _mm256_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
@@ -559,16 +545,14 @@ int luffa_2way_update_close( luffa_2way_context *state,
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// padding of partial block
|
||||
msg[0] = mm256_bswap_32( vdata[0] );
|
||||
msg[1] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
|
||||
msg[1] = m256_const2_64( 0, 0x0000000080000000 );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
else
|
||||
{
|
||||
// empty pad block
|
||||
msg[0] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
msg[0] = m256_const2_64( 0, 0x0000000080000000 );
|
||||
msg[1] = m256_zero;
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
@@ -541,7 +541,9 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
uint32 hash[8] __attribute((aligned(64)));
|
||||
__m256i* chainv = (__m256i*)state->chainv;
|
||||
__m256i t;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i zero = m128_zero;
|
||||
const __m256i shuff_bswap32 = m256_const2_64( 0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
|
||||
rnd512( state, zero, zero );
|
||||
|
||||
@@ -555,7 +557,9 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
|
||||
_mm256_store_si256( (__m256i*)hash, t );
|
||||
|
||||
casti_m256i( b, 0 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 0 ) = _mm256_shuffle_epi8(
|
||||
casti_m256i( hash, 0 ), shuff_bswap32 );
|
||||
// casti_m256i( b, 0 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
|
||||
rnd512( state, zero, zero );
|
||||
|
||||
@@ -568,7 +572,9 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
|
||||
_mm256_store_si256( (__m256i*)hash, t );
|
||||
|
||||
casti_m256i( b, 1 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 1 ) = _mm256_shuffle_epi8(
|
||||
casti_m256i( hash, 0 ), shuff_bswap32 );
|
||||
// casti_m256i( b, 1 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
}
|
||||
|
||||
#else
|
||||
|
@@ -127,7 +127,6 @@ bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&lyra2z_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
@@ -147,15 +146,12 @@ bool register_lyra2h_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&lyra2h_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
/////////////////////////////////
|
||||
|
||||
int64_t allium_get_max64_0xFFFFLL() { return 0xFFFFLL; }
|
||||
|
||||
bool register_allium_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (ALLIUM_4WAY)
|
||||
@@ -168,7 +164,6 @@ bool register_allium_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&allium_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&allium_get_max64_0xFFFFLL;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
@@ -214,7 +209,6 @@ bool register_phi2_algo( algo_gate_t* gate )
|
||||
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
|
||||
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
|
||||
gate->build_extraheader = (void*)&phi2_build_extraheader;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
opt_target_factor = 256.0;
|
||||
#if defined(PHI2_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_phi2_4way;
|
||||
|
@@ -113,18 +113,12 @@ int scanhash_lyra2re( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t lyra2re_get_max64 ()
|
||||
{
|
||||
return 0xffffLL;
|
||||
}
|
||||
|
||||
bool register_lyra2re_algo( algo_gate_t* gate )
|
||||
{
|
||||
init_lyra2re_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_lyra2re;
|
||||
gate->hash = (void*)&lyra2re_hash;
|
||||
gate->get_max64 = (void*)&lyra2re_get_max64;
|
||||
opt_target_factor = 128.0;
|
||||
return true;
|
||||
};
|
||||
|
@@ -113,17 +113,18 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
||||
lyra2rev3_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
} while ( likely( (n < max_nonce-8) && !work_restart[thr_id].restart ) );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -70,7 +70,6 @@ bool register_lyra2z330_algo( algo_gate_t* gate )
|
||||
gate->miner_thread_init = (void*)&lyra2z330_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z330;
|
||||
gate->hash = (void*)&lyra2z330_hash;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
@@ -263,10 +263,9 @@ inline void absorbBlockBlake2Safe( uint64_t *State, const uint64_t *In,
|
||||
#if defined (__AVX2__)
|
||||
|
||||
register __m256i state0, state1, state2, state3;
|
||||
const __m256i zero = m256_zero;
|
||||
|
||||
state0 = zero;
|
||||
state1 = zero;
|
||||
state0 =
|
||||
state1 = m256_zero;
|
||||
state2 = m256_const_64( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL,
|
||||
0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL );
|
||||
state3 = m256_const_64( 0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL,
|
||||
@@ -290,12 +289,11 @@ inline void absorbBlockBlake2Safe( uint64_t *State, const uint64_t *In,
|
||||
#elif defined (__SSE2__)
|
||||
|
||||
__m128i state0, state1, state2, state3, state4, state5, state6, state7;
|
||||
const __m128i zero = m128_zero;
|
||||
|
||||
state0 = zero;
|
||||
state1 = zero;
|
||||
state2 = zero;
|
||||
state3 = zero;
|
||||
state0 =
|
||||
state1 =
|
||||
state2 =
|
||||
state3 = m128_zero;
|
||||
state4 = m128_const_64( 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL );
|
||||
state5 = m128_const_64( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL );
|
||||
state6 = m128_const_64( 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL );
|
||||
|
@@ -296,8 +296,6 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
// can this be skipped after finding a share? Seems to work ok.
|
||||
//out:
|
||||
mpf_set_prec_raw(magifpi, prec0);
|
||||
mpf_set_prec_raw(magifpi0, prec0);
|
||||
mpf_set_prec_raw(mptmp, prec0);
|
||||
@@ -323,7 +321,6 @@ bool register_m7m_algo( algo_gate_t *gate )
|
||||
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
gate->get_max64 = (void*)&get_max64_0x1ffff;
|
||||
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
|
@@ -208,12 +208,6 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
++(*nonceptr);
|
||||
}
|
||||
|
||||
int64_t zr5_get_max64 ()
|
||||
{
|
||||
// return 0x1ffffLL;
|
||||
return 0x1fffffLL;
|
||||
}
|
||||
|
||||
void zr5_display_pok( struct work* work )
|
||||
{
|
||||
if ( work->data[0] & 0x00008000 )
|
||||
@@ -229,7 +223,6 @@ bool register_zr5_algo( algo_gate_t* gate )
|
||||
gate->get_new_work = (void*)&zr5_get_new_work;
|
||||
gate->scanhash = (void*)&scanhash_zr5;
|
||||
gate->hash = (void*)&zr5hash;
|
||||
gate->get_max64 = (void*)&zr5_get_max64;
|
||||
gate->decode_extra_data = (void*)&zr5_display_pok;
|
||||
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
|
@@ -94,8 +94,6 @@ void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
g_work->data[28] = 0x80000000;
|
||||
}
|
||||
|
||||
int64_t lbry_get_max64() { return 0x1ffffLL; }
|
||||
|
||||
int lbry_get_work_data_size() { return LBRY_WORK_DATA_SIZE; }
|
||||
|
||||
bool register_lbry_algo( algo_gate_t* gate )
|
||||
@@ -112,7 +110,6 @@ bool register_lbry_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&lbry_hash;
|
||||
#endif
|
||||
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
|
||||
gate->get_max64 = (void*)&lbry_get_max64;
|
||||
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
|
||||
// gate->build_block_header = (void*)&build_block_header;
|
||||
gate->build_extraheader = (void*)&lbry_build_extraheader;
|
||||
|
@@ -1070,17 +1070,6 @@ int scanhash_neoscrypt( struct work *work,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t get_neoscrypt_max64() { return 0x3ffff; }
|
||||
|
||||
void neoscrypt_wait_for_diff( struct stratum_ctx *stratum )
|
||||
{
|
||||
while ( !stratum->job.diff )
|
||||
{
|
||||
// applog(LOG_DEBUG, "Waiting for Stratum to set the job difficulty");
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
|
||||
int neoscrypt_get_work_data_size () { return 80; }
|
||||
|
||||
bool register_neoscrypt_algo( algo_gate_t* gate )
|
||||
@@ -1088,8 +1077,6 @@ bool register_neoscrypt_algo( algo_gate_t* gate )
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_neoscrypt;
|
||||
gate->hash = (void*)&neoscrypt;
|
||||
gate->get_max64 = (void*)&get_neoscrypt_max64;
|
||||
gate->wait_for_diff = (void*)&neoscrypt_wait_for_diff;
|
||||
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
|
@@ -483,11 +483,6 @@ int scanhash_pluck( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t pluck_get_max64 ()
|
||||
{
|
||||
return 0x1ffLL;
|
||||
}
|
||||
|
||||
bool pluck_miner_thread_init( int thr_id )
|
||||
{
|
||||
scratchbuf = malloc( 128 * 1024 );
|
||||
@@ -503,7 +498,6 @@ bool register_pluck_algo( algo_gate_t* gate )
|
||||
gate->miner_thread_init = (void*)&pluck_miner_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_pluck;
|
||||
gate->hash = (void*)&pluck_hash;
|
||||
gate->get_max64 = (void*)&pluck_get_max64;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
};
|
||||
|
@@ -766,8 +766,6 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t scrypt_get_max64() { return 0xfff; }
|
||||
|
||||
bool scrypt_miner_thread_init( int thr_id )
|
||||
{
|
||||
scratchbuf = scrypt_buffer_alloc( scratchbuf_size );
|
||||
@@ -783,10 +781,8 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
||||
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_scrypt;
|
||||
// gate->hash = (void*)&scrypt_1024_1_1_256_24way;
|
||||
gate->get_max64 = (void*)&scrypt_get_max64;
|
||||
opt_target_factor = 65536.0;
|
||||
|
||||
|
||||
if ( !opt_param_n )
|
||||
{
|
||||
opt_param_n = 1024;
|
||||
|
@@ -240,7 +240,6 @@ bool register_scryptjane_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_scryptjane;
|
||||
gate->hash = (void*)&scryptjanehash;
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
opt_target_factor = 65536.0;
|
||||
|
||||
// figure out if arg in N or Nfactor
|
||||
|
@@ -305,9 +305,11 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
low = low << 3;
|
||||
|
||||
sc->buf[ pad >> 2 ] =
|
||||
mm128_bswap_32( _mm_set1_epi32( high ) );
|
||||
mm128_bswap_32( m128_const1_32( high ) );
|
||||
// mm128_bswap_32( _mm_set1_epi32( high ) );
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm128_bswap_32( _mm_set1_epi32( low ) );
|
||||
mm128_bswap_32( m128_const1_32( low ) );
|
||||
// mm128_bswap_32( _mm_set1_epi32( low ) );
|
||||
sha256_4way_round( sc, sc->buf, sc->val );
|
||||
|
||||
mm128_block_bswap_32( dst, sc->val );
|
||||
@@ -538,9 +540,9 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
||||
low = low << 3;
|
||||
|
||||
sc->buf[ pad >> 2 ] =
|
||||
mm256_bswap_32( _mm256_set1_epi32( high ) );
|
||||
mm256_bswap_32( m256_const1_32( high ) );
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm256_bswap_32( _mm256_set1_epi32( low ) );
|
||||
mm256_bswap_32( m256_const1_32( low ) );
|
||||
|
||||
sha256_8way_round( sc, sc->buf, sc->val );
|
||||
|
||||
|
@@ -15,7 +15,6 @@ bool register_sha256t_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_sha256t;
|
||||
gate->hash = (void*)&sha256t_hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -34,7 +33,6 @@ bool register_sha256q_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_sha256q;
|
||||
gate->hash = (void*)&sha256q_hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
|
||||
}
|
||||
|
@@ -252,16 +252,6 @@ void sha512_4way_init( sha512_4way_context *sc )
|
||||
{
|
||||
sc->initialized = false;
|
||||
sc->count = 0;
|
||||
/*
|
||||
sc->val[0] = _mm256_set1_epi64x( H512[0] );
|
||||
sc->val[1] = _mm256_set1_epi64x( H512[1] );
|
||||
sc->val[2] = _mm256_set1_epi64x( H512[2] );
|
||||
sc->val[3] = _mm256_set1_epi64x( H512[3] );
|
||||
sc->val[4] = _mm256_set1_epi64x( H512[4] );
|
||||
sc->val[5] = _mm256_set1_epi64x( H512[5] );
|
||||
sc->val[6] = _mm256_set1_epi64x( H512[6] );
|
||||
sc->val[7] = _mm256_set1_epi64x( H512[7] );
|
||||
*/
|
||||
}
|
||||
|
||||
void sha512_4way( sha512_4way_context *sc, const void *data, size_t len )
|
||||
@@ -295,6 +285,8 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
unsigned ptr;
|
||||
const int buf_size = 128;
|
||||
const int pad = buf_size - 16;
|
||||
const __m256i shuff_bswap64 = m256_const2_64( 0x08090a0b0c0d0e0f,
|
||||
0x0001020304050607 );
|
||||
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
sc->buf[ ptr>>3 ] = m256_const1_64( 0x80 );
|
||||
@@ -308,10 +300,10 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
else
|
||||
memset_zero_256( sc->buf + (ptr>>3), (pad - ptr) >> 3 );
|
||||
|
||||
sc->buf[ pad >> 3 ] =
|
||||
mm256_bswap_64( _mm256_set1_epi64x( sc->count >> 61 ) );
|
||||
sc->buf[ ( pad+8 ) >> 3 ] =
|
||||
mm256_bswap_64( _mm256_set1_epi64x( sc->count << 3 ) );
|
||||
sc->buf[ pad >> 3 ] = _mm256_shuffle_epi8(
|
||||
_mm256_set1_epi64x( sc->count >> 61 ), shuff_bswap64 );
|
||||
sc->buf[ ( pad+8 ) >> 3 ] = _mm256_shuffle_epi8(
|
||||
_mm256_set1_epi64x( sc->count << 3 ), shuff_bswap64 );
|
||||
sha512_4way_round( sc, sc->buf, sc->val );
|
||||
|
||||
mm256_block_bswap_64( dst, sc->val );
|
||||
|
@@ -5,6 +5,7 @@
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
|
||||
static const uint32_t IV512[] =
|
||||
{
|
||||
0x72FCCDD8, 0x79CA4727, 0x128A077B, 0x40D55AEC,
|
||||
@@ -13,6 +14,7 @@ static const uint32_t IV512[] =
|
||||
0xE275EADE, 0x502D9FCD, 0xB9357178, 0x022A4B9A
|
||||
};
|
||||
|
||||
|
||||
#define mm256_ror2x256hi_1x32( a, b ) \
|
||||
_mm256_blend_epi32( mm256_ror1x32_128( a ), \
|
||||
mm256_ror1x32_128( b ), 0x88 )
|
||||
@@ -232,18 +234,14 @@ c512_2way( shavite512_2way_context *ctx, const void *msg )
|
||||
|
||||
void shavite512_2way_init( shavite512_2way_context *ctx )
|
||||
{
|
||||
casti_m256i( ctx->h, 0 ) =
|
||||
_mm256_set_epi32( IV512[ 3], IV512[ 2], IV512[ 1], IV512[ 0],
|
||||
IV512[ 3], IV512[ 2], IV512[ 1], IV512[ 0] );
|
||||
casti_m256i( ctx->h, 1 ) =
|
||||
_mm256_set_epi32( IV512[ 7], IV512[ 6], IV512[ 5], IV512[ 4],
|
||||
IV512[ 7], IV512[ 6], IV512[ 5], IV512[ 4] );
|
||||
casti_m256i( ctx->h, 2 ) =
|
||||
_mm256_set_epi32( IV512[11], IV512[10], IV512[ 9], IV512[ 8],
|
||||
IV512[11], IV512[10], IV512[ 9], IV512[ 8] );
|
||||
casti_m256i( ctx->h, 3 ) =
|
||||
_mm256_set_epi32( IV512[15], IV512[14], IV512[13], IV512[12],
|
||||
IV512[15], IV512[14], IV512[13], IV512[12] );
|
||||
__m256i *h = (__m256i*)ctx->h;
|
||||
__m128i *iv = (__m128i*)IV512;
|
||||
|
||||
h[0] = m256_const1_128( iv[0] );
|
||||
h[1] = m256_const1_128( iv[1] );
|
||||
h[2] = m256_const1_128( iv[2] );
|
||||
h[3] = m256_const1_128( iv[3] );
|
||||
|
||||
ctx->ptr = 0;
|
||||
ctx->count0 = 0;
|
||||
ctx->count1 = 0;
|
||||
@@ -251,6 +249,7 @@ void shavite512_2way_init( shavite512_2way_context *ctx )
|
||||
ctx->count3 = 0;
|
||||
}
|
||||
|
||||
// not tested, use update_close
|
||||
void shavite512_2way_update( shavite512_2way_context *ctx, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
@@ -287,6 +286,7 @@ void shavite512_2way_update( shavite512_2way_context *ctx, const void *data,
|
||||
ctx->ptr = ptr;
|
||||
}
|
||||
|
||||
// not tested
|
||||
void shavite512_2way_close( shavite512_2way_context *ctx, void *dst )
|
||||
{
|
||||
unsigned char *buf;
|
||||
@@ -300,7 +300,7 @@ void shavite512_2way_close( shavite512_2way_context *ctx, void *dst )
|
||||
uint32_t vp = ctx->ptr>>5;
|
||||
|
||||
// Terminating byte then zero pad
|
||||
casti_m256i( buf, vp++ ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
|
||||
casti_m256i( buf, vp++ ) = m256_const2_64( 0, 0x0000000000000080 );
|
||||
|
||||
// Zero pad full vectors up to count
|
||||
for ( ; vp < 6; vp++ )
|
||||
@@ -314,14 +314,12 @@ void shavite512_2way_close( shavite512_2way_context *ctx, void *dst )
|
||||
count.u32[2] = ctx->count2;
|
||||
count.u32[3] = ctx->count3;
|
||||
|
||||
casti_m256i( buf, 6 ) = _mm256_set_epi16( count.u16[0], 0,0,0,0,0,0,0,
|
||||
count.u16[0], 0,0,0,0,0,0,0 );
|
||||
casti_m256i( buf, 7 ) = _mm256_set_epi16(
|
||||
0x0200 , count.u16[7], count.u16[6], count.u16[5],
|
||||
count.u16[4], count.u16[3], count.u16[2], count.u16[1],
|
||||
0x0200 , count.u16[7], count.u16[6], count.u16[5],
|
||||
count.u16[4], count.u16[3], count.u16[2], count.u16[1] );
|
||||
|
||||
casti_m256i( buf, 6 ) = m256_const1_128(
|
||||
_mm_insert_epi16( m128_zero, count.u16[0], 7 ) );
|
||||
casti_m256i( buf, 7 ) = m256_const1_128( _mm_set_epi16(
|
||||
0x0200, count.u16[7], count.u16[6], count.u16[5],
|
||||
count.u16[4], count.u16[3], count.u16[2], count.u16[1] ) );
|
||||
|
||||
c512_2way( ctx, buf);
|
||||
|
||||
casti_m256i( dst, 0 ) = casti_m256i( ctx->h, 0 );
|
||||
@@ -382,23 +380,21 @@ void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
|
||||
|
||||
if ( vp == 0 ) // empty buf, xevan.
|
||||
{
|
||||
casti_m256i( buf, 0 ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
|
||||
casti_m256i( buf, 0 ) = m256_const2_64( 0, 0x0000000000000080 );
|
||||
memset_zero_256( (__m256i*)buf + 1, 5 );
|
||||
ctx->count0 = ctx->count1 = ctx->count2 = ctx->count3 = 0;
|
||||
}
|
||||
else // half full buf, everyone else.
|
||||
{
|
||||
casti_m256i( buf, vp++ ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
|
||||
casti_m256i( buf, vp++ ) = m256_const2_64( 0, 0x0000000000000080 );
|
||||
memset_zero_256( (__m256i*)buf + vp, 6 - vp );
|
||||
}
|
||||
|
||||
casti_m256i( buf, 6 ) = _mm256_set_epi16( count.u16[0], 0,0,0,0,0,0,0,
|
||||
count.u16[0], 0,0,0,0,0,0,0 );
|
||||
casti_m256i( buf, 7 ) = _mm256_set_epi16(
|
||||
0x0200 , count.u16[7], count.u16[6], count.u16[5],
|
||||
count.u16[4], count.u16[3], count.u16[2], count.u16[1],
|
||||
0x0200 , count.u16[7], count.u16[6], count.u16[5],
|
||||
count.u16[4], count.u16[3], count.u16[2], count.u16[1] );
|
||||
casti_m256i( buf, 6 ) = m256_const1_128(
|
||||
_mm_insert_epi16( m128_zero, count.u16[0], 7 ) );
|
||||
casti_m256i( buf, 7 ) = m256_const1_128( _mm_set_epi16(
|
||||
0x0200, count.u16[7], count.u16[6], count.u16[5],
|
||||
count.u16[4], count.u16[3], count.u16[2], count.u16[1] ) );
|
||||
|
||||
c512_2way( ctx, buf);
|
||||
|
||||
|
@@ -110,14 +110,26 @@ static const m256_v16 FFT256_Twiddle[] =
|
||||
|
||||
// imported from vector.c
|
||||
|
||||
#define REDUCE(x) \
|
||||
_mm256_sub_epi16( _mm256_and_si256( x, m256_const1_64( \
|
||||
0x00ff00ff00ff00ff ) ), _mm256_srai_epi16( x, 8 ) )
|
||||
/*
|
||||
#define REDUCE(x) \
|
||||
_mm256_sub_epi16( _mm256_and_si256( x, _mm256_set1_epi16( 255 ) ), \
|
||||
_mm256_srai_epi16( x, 8 ) )
|
||||
*/
|
||||
|
||||
#define EXTRA_REDUCE_S(x)\
|
||||
_mm256_sub_epi16( x, _mm256_and_si256( \
|
||||
m256_const1_64( 0x0101010101010101 ), \
|
||||
_mm256_cmpgt_epi16( x, m256_const1_64( 0x0080008000800080 ) ) ) )
|
||||
|
||||
/*
|
||||
#define EXTRA_REDUCE_S(x)\
|
||||
_mm256_sub_epi16( x, \
|
||||
_mm256_and_si256( _mm256_set1_epi16( 257 ), \
|
||||
_mm256_cmpgt_epi16( x, _mm256_set1_epi16( 128 ) ) ) )
|
||||
*/
|
||||
|
||||
#define REDUCE_FULL_S( x ) EXTRA_REDUCE_S( REDUCE (x ) )
|
||||
|
||||
|
@@ -2,8 +2,6 @@
|
||||
#include "sph_skein.h"
|
||||
#include "skein-hash-4way.h"
|
||||
|
||||
int64_t skein_get_max64() { return 0x7ffffLL; }
|
||||
|
||||
bool register_skein_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT | SHA_OPT;
|
||||
@@ -14,7 +12,6 @@ bool register_skein_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_skein;
|
||||
gate->hash = (void*)&skeinhash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&skein_get_max64;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -2,11 +2,6 @@
|
||||
#include <stdint.h>
|
||||
#include "sph_skein.h"
|
||||
|
||||
int64_t skein2_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
bool register_skein2_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
@@ -17,7 +12,6 @@ bool register_skein2_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_skein2;
|
||||
gate->hash = (void*)&skein2hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&skein2_get_max64;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -181,7 +181,7 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
|
||||
for( j =0; j < 16; j++ )
|
||||
{
|
||||
SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ),
|
||||
mm128_rol_32( T, j ) ), 7 );
|
||||
mm128_rol_var_32( T, j ) ), 7 );
|
||||
SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) );
|
||||
TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF0( A, B, C ), D ),
|
||||
SS2 ), W1[j] );
|
||||
@@ -201,7 +201,7 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
|
||||
for( j =16; j < 64; j++ )
|
||||
{
|
||||
SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ),
|
||||
mm128_rol_32( T, j&31 ) ), 7 );
|
||||
mm128_rol_var_32( T, j&31 ) ), 7 );
|
||||
SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) );
|
||||
TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF1( A, B, C ), D ),
|
||||
SS2 ), W1[j] );
|
||||
|
369
algo/swifftx/Swifftx_sha3.cpp
Normal file
369
algo/swifftx/Swifftx_sha3.cpp
Normal file
@@ -0,0 +1,369 @@
|
||||
#include "Swifftx_sha3.h"
|
||||
extern "C" {
|
||||
#include "SWIFFTX.h"
|
||||
}
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// The default salt value.
|
||||
// This is the expansion of e (Euler's number) - the 19 digits after 2.71:
|
||||
// 8281828459045235360.
|
||||
// The above in base 256, from MSB to LSB:
|
||||
BitSequence SWIF_saltValueChar[SWIF_HAIFA_SALT_SIZE] = {114, 238, 247, 26, 192, 28, 170, 160};
|
||||
|
||||
// All the IVs here below were produced from the decimal digits of e's expansion.
|
||||
// The code can be found in 'ProduceRandomIV.c'.
|
||||
// The initial value for 224 digest size.
|
||||
const BitSequence SWIF_HAIFA_IV_224[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
||||
{37, 242, 132, 2, 167, 81, 158, 237, 113, 77, 162, 60, 65, 236, 108, 246,
|
||||
101, 72, 190, 109, 58, 205, 99, 6, 114, 169, 104, 114, 38, 146, 121, 142,
|
||||
59, 98, 233, 84, 72, 227, 22, 199, 17, 102, 198, 145, 24, 178, 37, 1,
|
||||
215, 245, 66, 120, 230, 193, 113, 253, 165, 218, 66, 134, 49, 231, 124, 204,
|
||||
0};
|
||||
|
||||
// The initial value for 256 digest size.
|
||||
const BitSequence SWIF_HAIFA_IV_256[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
||||
{250, 50, 42, 40, 14, 233, 53, 48, 227, 42, 237, 187, 211, 120, 209, 234,
|
||||
27, 144, 4, 61, 243, 244, 29, 247, 37, 162, 70, 11, 231, 196, 53, 6,
|
||||
193, 240, 94, 126, 204, 132, 104, 46, 114, 29, 3, 104, 118, 184, 201, 3,
|
||||
57, 77, 91, 101, 31, 155, 84, 199, 228, 39, 198, 42, 248, 198, 201, 178,
|
||||
8};
|
||||
|
||||
// The initial value for 384 digest size.
|
||||
const BitSequence SWIF_HAIFA_IV_384[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
||||
{40, 145, 193, 100, 205, 171, 47, 76, 254, 10, 196, 41, 165, 207, 200, 79,
|
||||
109, 13, 75, 201, 17, 172, 64, 162, 217, 22, 88, 39, 51, 30, 220, 151,
|
||||
133, 73, 216, 233, 184, 203, 77, 0, 248, 13, 28, 199, 30, 147, 232, 242,
|
||||
227, 124, 169, 174, 14, 45, 27, 87, 254, 73, 68, 136, 135, 159, 83, 152,
|
||||
0};
|
||||
|
||||
// The initial value for 512 digest size.
|
||||
const BitSequence SWIF_HAIFA_IV_512[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
||||
{195, 126, 197, 167, 157, 114, 99, 126, 208, 105, 200, 90, 71, 195, 144, 138,
|
||||
142, 122, 123, 116, 24, 214, 168, 173, 203, 183, 194, 210, 102, 117, 138, 42,
|
||||
114, 118, 132, 33, 35, 149, 143, 163, 163, 183, 243, 175, 72, 22, 201, 255,
|
||||
102, 243, 22, 187, 211, 167, 239, 76, 164, 70, 80, 182, 181, 212, 9, 185,
|
||||
0};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// NIST API implementation portion.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int Swifftx::Init(int hashbitlen)
|
||||
{
|
||||
switch(hashbitlen)
|
||||
{
|
||||
case 224:
|
||||
swifftxState.hashbitlen = hashbitlen;
|
||||
// Initializes h_0 in HAIFA:
|
||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_224, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
break;
|
||||
case 256:
|
||||
swifftxState.hashbitlen = hashbitlen;
|
||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_256, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
break;
|
||||
case 384:
|
||||
swifftxState.hashbitlen = hashbitlen;
|
||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_384, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
break;
|
||||
case 512:
|
||||
swifftxState.hashbitlen = hashbitlen;
|
||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_512, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
break;
|
||||
default:
|
||||
return BAD_HASHBITLEN;
|
||||
}
|
||||
|
||||
swifftxState.wasUpdated = false;
|
||||
swifftxState.remainingSize = 0;
|
||||
memset(swifftxState.remaining, 0, SWIF_HAIFA_INPUT_BLOCK_SIZE);
|
||||
memset(swifftxState.numOfBitsChar, 0, SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
||||
// Initialize the salt with the default value.
|
||||
memcpy(swifftxState.salt, SWIF_saltValueChar, SWIF_HAIFA_SALT_SIZE);
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int Swifftx::Update(const BitSequence *data, DataLength databitlen)
|
||||
{
|
||||
// The size of input in bytes after putting the remaining data from previous invocation.
|
||||
int sizeOfInputAfterRemaining = 0;
|
||||
// The input block to compression function of SWIFFTX:
|
||||
BitSequence currInputBlock[SWIFFTX_INPUT_BLOCK_SIZE] = {0};
|
||||
// Whether we handled a single block.
|
||||
bool wasSingleBlockHandled = false;
|
||||
|
||||
swifftxState.wasUpdated = true;
|
||||
|
||||
// Handle an empty message as required by NIST. Since 'Final()' is oblivious to the input
|
||||
// (but of course uses the output of the compression function from the previous round,
|
||||
// which is called h_{i-1} in HAIFA article), we have to do nothing here.
|
||||
if (databitlen == 0)
|
||||
return SUCCESS;
|
||||
|
||||
// If we had before an input with unaligned length, return an error
|
||||
if (swifftxState.remainingSize % 8)
|
||||
{
|
||||
return INPUT_DATA_NOT_ALIGNED;
|
||||
}
|
||||
|
||||
// Convert remaining size to bytes.
|
||||
swifftxState.remainingSize /= 8;
|
||||
|
||||
// As long as we have enough data combined from (remaining + data) to fill input block
|
||||
//NASTAVENIE RUND
|
||||
while (((databitlen / 8) + swifftxState.remainingSize) >= SWIF_HAIFA_INPUT_BLOCK_SIZE)
|
||||
{
|
||||
// Fill the input block with data:
|
||||
// 1. The output of the previous block:
|
||||
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
// 2. The input part of the block:
|
||||
// 2a. The remaining data from the previous 'Update()' call:
|
||||
if (swifftxState.remainingSize)
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE, swifftxState.remaining,
|
||||
swifftxState.remainingSize);
|
||||
// 2b. The input data that we have place for after the 'remaining':
|
||||
sizeOfInputAfterRemaining = SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE
|
||||
- ((int) swifftxState.remainingSize) - SWIF_HAIFA_NUM_OF_BITS_SIZE
|
||||
- SWIF_HAIFA_SALT_SIZE;
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize,
|
||||
data, sizeOfInputAfterRemaining);
|
||||
|
||||
// 3. The #bits part of the block:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize
|
||||
+ sizeOfInputAfterRemaining,
|
||||
swifftxState.numOfBitsChar, SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
||||
// 4. The salt part of the block:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize
|
||||
+ sizeOfInputAfterRemaining + SWIF_HAIFA_NUM_OF_BITS_SIZE,
|
||||
swifftxState.salt, SWIF_HAIFA_SALT_SIZE);
|
||||
|
||||
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, false);
|
||||
|
||||
// Update the #bits field with SWIF_HAIFA_INPUT_BLOCK_SIZE.
|
||||
AddToCurrInBase256(swifftxState.numOfBitsChar, SWIF_HAIFA_INPUT_BLOCK_SIZE * 8);
|
||||
wasSingleBlockHandled = true;
|
||||
data += sizeOfInputAfterRemaining;
|
||||
databitlen -= (sizeOfInputAfterRemaining * 8);
|
||||
swifftxState.remainingSize = 0;
|
||||
}
|
||||
|
||||
// Update the swifftxState.remaining and swifftxState.remainingSize.
|
||||
// remainingSize will be in bits after exiting 'Update()'.
|
||||
if (wasSingleBlockHandled)
|
||||
{
|
||||
swifftxState.remainingSize = (unsigned int) databitlen; // now remaining size is in bits.
|
||||
if (swifftxState.remainingSize)
|
||||
memcpy(swifftxState.remaining, data, (swifftxState.remainingSize + 7) / 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(swifftxState.remaining + swifftxState.remainingSize, data,
|
||||
(size_t) (databitlen + 7) / 8);
|
||||
swifftxState.remainingSize = (swifftxState.remainingSize * 8) + (unsigned short) databitlen;
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int Swifftx::Final(BitSequence *hashval)
|
||||
{
|
||||
int i;
|
||||
// Whether to add one last block. True if the padding appended to the last block overflows
|
||||
// the block size.
|
||||
bool toAddFinalBlock = false;
|
||||
bool toPutOneInFinalBlock = false;
|
||||
unsigned short oneShift = 0;
|
||||
// The size of the last input block before the zeroes padding. We add 1 here because we
|
||||
// include the final '1' bit in the calculation and 7 as we round the length to bytes.
|
||||
unsigned short sizeOfLastInputBlock = (swifftxState.remainingSize + 1 + 7) / 8;
|
||||
// The number of bytes of zero in the padding part.
|
||||
// The padding contains:
|
||||
// 1. A single 1 bit.
|
||||
// 2. As many zeroes as needed.
|
||||
// 3. The message length in bits. Occupies SWIF_HAIFA_NUM_OF_BITS_SIZE bytes.
|
||||
// 4. The digest size. Maximum is 512, so we need 2 bytes.
|
||||
// If the total number achieved is negative, add an additional block, as HAIFA specifies.
|
||||
short numOfZeroBytesInPadding = (short) SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE
|
||||
- sizeOfLastInputBlock - (2 * SWIF_HAIFA_NUM_OF_BITS_SIZE) - 2
|
||||
- SWIF_HAIFA_SALT_SIZE;
|
||||
// The input block to compression function of SWIFFTX:
|
||||
BitSequence currInputBlock[SWIFFTX_INPUT_BLOCK_SIZE] = {0};
|
||||
// The message length in base 256.
|
||||
BitSequence messageLengthChar[SWIF_HAIFA_NUM_OF_BITS_SIZE] = {0};
|
||||
// The digest size used for padding:
|
||||
unsigned char digestSizeLSB = swifftxState.hashbitlen % 256;
|
||||
unsigned char digestSizeMSB = (swifftxState.hashbitlen - digestSizeLSB) / 256;
|
||||
|
||||
if (numOfZeroBytesInPadding < 1)
|
||||
toAddFinalBlock = true;
|
||||
|
||||
// Fill the input block with data:
|
||||
// 1. The output of the previous block:
|
||||
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
// 2a. The input part of the block, which is the remaining data from the previous 'Update()'
|
||||
// call, if exists and an extra '1' bit (maybe all we have is this extra 1):
|
||||
|
||||
// Add the last 1 in big-endian convention ...
|
||||
if (swifftxState.remainingSize % 8 == 0)
|
||||
{
|
||||
swifftxState.remaining[sizeOfLastInputBlock - 1] = 0x80;
|
||||
}
|
||||
else
|
||||
{
|
||||
swifftxState.remaining[sizeOfLastInputBlock - 1] |= (1 << (7 - (swifftxState.remainingSize % 8)));
|
||||
}
|
||||
|
||||
if (sizeOfLastInputBlock)
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE, swifftxState.remaining,
|
||||
sizeOfLastInputBlock);
|
||||
|
||||
// Compute the message length in base 256:
|
||||
for (i = 0; i < SWIF_HAIFA_NUM_OF_BITS_SIZE; ++i)
|
||||
messageLengthChar[i] = swifftxState.numOfBitsChar[i];
|
||||
if (sizeOfLastInputBlock)
|
||||
AddToCurrInBase256(messageLengthChar, sizeOfLastInputBlock * 8);
|
||||
|
||||
if (!toAddFinalBlock)
|
||||
{
|
||||
// 2b. Put the zeroes:
|
||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock,
|
||||
0, numOfZeroBytesInPadding);
|
||||
// 2c. Pad the message length:
|
||||
for (i = 0; i < SWIF_HAIFA_NUM_OF_BITS_SIZE; ++i)
|
||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
|
||||
+ numOfZeroBytesInPadding + i] = messageLengthChar[i];
|
||||
// 2d. Pad the digest size:
|
||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
|
||||
+ numOfZeroBytesInPadding + SWIF_HAIFA_NUM_OF_BITS_SIZE] = digestSizeMSB;
|
||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
|
||||
+ numOfZeroBytesInPadding + SWIF_HAIFA_NUM_OF_BITS_SIZE + 1] = digestSizeLSB;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2b. Put the zeroes, if at all:
|
||||
if ((SWIF_HAIFA_INPUT_BLOCK_SIZE - sizeOfLastInputBlock) > 0)
|
||||
{
|
||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock,
|
||||
0, SWIF_HAIFA_INPUT_BLOCK_SIZE - sizeOfLastInputBlock);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. The #bits part of the block:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE,
|
||||
swifftxState.numOfBitsChar, SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
||||
// 4. The salt part of the block:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
|
||||
+ SWIF_HAIFA_NUM_OF_BITS_SIZE,
|
||||
swifftxState.salt,
|
||||
SWIF_HAIFA_SALT_SIZE);
|
||||
|
||||
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, !toAddFinalBlock);
|
||||
|
||||
// If we have to add one more block, it is now:
|
||||
if (toAddFinalBlock)
|
||||
{
|
||||
// 1. The previous output block, as usual.
|
||||
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
|
||||
// 2a. Instead of the input, zeroes:
|
||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE , 0,
|
||||
SWIF_HAIFA_INPUT_BLOCK_SIZE - SWIF_HAIFA_NUM_OF_BITS_SIZE - 2);
|
||||
// 2b. Instead of the input, the message length:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
|
||||
- SWIF_HAIFA_NUM_OF_BITS_SIZE - 2,
|
||||
messageLengthChar,
|
||||
SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
||||
// 2c. Instead of the input, the digest size:
|
||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE - 2] = digestSizeMSB;
|
||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE - 1] = digestSizeLSB;
|
||||
// 3. The #bits part of the block, which is zero in case of additional block:
|
||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE,
|
||||
0,
|
||||
SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
||||
// 4. The salt part of the block:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
|
||||
+ SWIF_HAIFA_NUM_OF_BITS_SIZE,
|
||||
swifftxState.salt,
|
||||
SWIF_HAIFA_SALT_SIZE);
|
||||
|
||||
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, true);
|
||||
}
|
||||
|
||||
// Finally, copy the result into 'hashval'. In case the digest size is not 512bit, copy the
|
||||
// first hashbitlen of them:
|
||||
for (i = 0; i < (swifftxState.hashbitlen / 8); ++i)
|
||||
hashval[i] = swifftxState.currOutputBlock[i];
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int Swifftx::Hash(int hashbitlen, const BitSequence *data, DataLength databitlen,
|
||||
BitSequence *hashval)
|
||||
{
|
||||
int result;
|
||||
//hashState state;
|
||||
// The pointer to the current place in the input we take into the compression function.
|
||||
DataLength currInputIndex = 0;
|
||||
|
||||
result = Swifftx::Init(hashbitlen);
|
||||
|
||||
if (result != SUCCESS)
|
||||
return result;
|
||||
|
||||
for ( ; (databitlen / 8) > SWIF_HAIFA_INPUT_BLOCK_SIZE;
|
||||
currInputIndex += SWIF_HAIFA_INPUT_BLOCK_SIZE, databitlen -= (SWIF_HAIFA_INPUT_BLOCK_SIZE * 8))
|
||||
{
|
||||
result = Swifftx::Update(data + currInputIndex, SWIF_HAIFA_INPUT_BLOCK_SIZE * 8);
|
||||
if (result != SUCCESS)
|
||||
return result;
|
||||
}
|
||||
|
||||
// The length of the last block may be shorter than (SWIF_HAIFA_INPUT_BLOCK_SIZE * 8)
|
||||
result = Swifftx::Update(data + currInputIndex, databitlen);
|
||||
if (result != SUCCESS)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
return Swifftx::Final(hashval);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper fuction implementation portion.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Swifftx::AddToCurrInBase256(BitSequence value[SWIF_HAIFA_NUM_OF_BITS_SIZE],
|
||||
unsigned short toAdd)
|
||||
{
|
||||
unsigned char remainder = 0;
|
||||
short i;
|
||||
BitSequence currValueInBase256[8] = {0};
|
||||
unsigned short currIndex = 7;
|
||||
unsigned short temp = 0;
|
||||
|
||||
do
|
||||
{
|
||||
remainder = toAdd % 256;
|
||||
currValueInBase256[currIndex--] = remainder;
|
||||
toAdd -= remainder;
|
||||
toAdd /= 256;
|
||||
}
|
||||
while(toAdd != 0);
|
||||
|
||||
for (i = 7; i >= 0; --i)
|
||||
{
|
||||
temp = value[i] + currValueInBase256[i];
|
||||
if (temp > 255)
|
||||
{
|
||||
value[i] = temp % 256;
|
||||
currValueInBase256[i - 1]++;
|
||||
}
|
||||
else
|
||||
value[i] = (unsigned char) temp;
|
||||
}
|
||||
}
|
79
algo/swifftx/Swifftx_sha3.h
Normal file
79
algo/swifftx/Swifftx_sha3.h
Normal file
@@ -0,0 +1,79 @@
|
||||
#ifndef SWIFFTX_SHA3_H
|
||||
#define SWIFFTX_SHA3_H
|
||||
|
||||
#include "sha3_interface.h"
|
||||
#include "stdbool.h"
|
||||
#include "stdint.h"
|
||||
|
||||
class Swifftx : public SHA3 {
|
||||
|
||||
#define SWIFFTX_INPUT_BLOCK_SIZE 256
|
||||
#define SWIFFTX_OUTPUT_BLOCK_SIZE 65
|
||||
#define SWIF_HAIFA_SALT_SIZE 8
|
||||
#define SWIF_HAIFA_NUM_OF_BITS_SIZE 8
|
||||
#define SWIF_HAIFA_INPUT_BLOCK_SIZE (SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE \
|
||||
- SWIF_HAIFA_NUM_OF_BITS_SIZE - SWIF_HAIFA_SALT_SIZE)
|
||||
|
||||
typedef unsigned char BitSequence;
|
||||
//const DataLength SWIF_SALT_VALUE;
|
||||
|
||||
#define SWIF_HAIFA_IV 0
|
||||
|
||||
/*const BitSequence SWIF_HAIFA_IV_224[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
||||
const BitSequence SWIF_HAIFA_IV_256[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
||||
const BitSequence SWIF_HAIFA_IV_384[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
||||
const BitSequence SWIF_HAIFA_IV_512[SWIFFTX_OUTPUT_BLOCK_SIZE];*/
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SUCCESS = 0,
|
||||
FAIL = 1,
|
||||
BAD_HASHBITLEN = 2,
|
||||
BAD_SALT_SIZE = 3,
|
||||
SET_SALT_VALUE_FAILED = 4,
|
||||
INPUT_DATA_NOT_ALIGNED = 5
|
||||
} HashReturn;
|
||||
|
||||
typedef struct hashState {
|
||||
unsigned short hashbitlen;
|
||||
|
||||
// The data remained after the recent call to 'Update()'.
|
||||
BitSequence remaining[SWIF_HAIFA_INPUT_BLOCK_SIZE + 1];
|
||||
|
||||
// The size of the remaining data in bits.
|
||||
// Is 0 in case there is no remaning data at all.
|
||||
unsigned int remainingSize;
|
||||
|
||||
// The current output of the compression function. At the end will contain the final digest
|
||||
// (which may be needed to be truncated, depending on hashbitlen).
|
||||
BitSequence currOutputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
||||
|
||||
// The value of '#bits hashed so far' field in HAIFA, in base 256.
|
||||
BitSequence numOfBitsChar[SWIF_HAIFA_NUM_OF_BITS_SIZE];
|
||||
|
||||
// The salt value currently in use:
|
||||
BitSequence salt[SWIF_HAIFA_SALT_SIZE];
|
||||
|
||||
// Indicates whether a single 'Update()' occured.
|
||||
// Ater a call to 'Update()' the key and the salt values cannot be changed.
|
||||
bool wasUpdated;
|
||||
} hashState;
|
||||
|
||||
private:
|
||||
int swifftxNumRounds;
|
||||
hashState swifftxState;
|
||||
|
||||
|
||||
public:
|
||||
int Init(int hashbitlen);
|
||||
int Update(const BitSequence *data, DataLength databitlen);
|
||||
int Final(BitSequence *hashval);
|
||||
int Hash(int hashbitlen, const BitSequence *data, DataLength databitlen,
|
||||
BitSequence *hashval);
|
||||
|
||||
private:
|
||||
static void AddToCurrInBase256(BitSequence value[SWIF_HAIFA_NUM_OF_BITS_SIZE], unsigned short toAdd);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
21
algo/swifftx/hash_interface.h
Normal file
21
algo/swifftx/hash_interface.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace hash {
|
||||
|
||||
using BitSequence = unsigned char;
|
||||
using DataLength = unsigned long long;
|
||||
|
||||
struct hash_interface {
|
||||
virtual ~hash_interface() = default;
|
||||
|
||||
virtual int Init(int hash_bitsize) = 0;
|
||||
virtual int Update(const BitSequence *data, DataLength data_bitsize) = 0;
|
||||
virtual int Final(BitSequence *hash) = 0;
|
||||
|
||||
virtual int
|
||||
Hash(int hash_bitsize, const BitSequence *data, DataLength data_bitsize, BitSequence *hash) = 0;
|
||||
};
|
||||
|
||||
} // namespace hash
|
39
algo/swifftx/inttypes.h
Normal file
39
algo/swifftx/inttypes.h
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
inttypes.h
|
||||
|
||||
Contributors:
|
||||
Created by Marek Michalkiewicz <marekm@linux.org.pl>
|
||||
|
||||
THIS SOFTWARE IS NOT COPYRIGHTED
|
||||
|
||||
This source code is offered for use in the public domain. You may
|
||||
use, modify or distribute it freely.
|
||||
|
||||
This code is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESS OR IMPLIED ARE HEREBY
|
||||
DISCLAIMED. This includes but is not limited to warranties of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#ifndef __INTTYPES_H_
|
||||
#define __INTTYPES_H_
|
||||
|
||||
/* Use [u]intN_t if you need exactly N bits.
|
||||
XXX - doesn't handle the -mint8 option. */
|
||||
|
||||
typedef signed char swift_int8_t;
|
||||
typedef unsigned char swift_uint8_t;
|
||||
|
||||
typedef int swift_int16_t;
|
||||
typedef unsigned int swift_uint16_t;
|
||||
|
||||
typedef long swift_int32_t;
|
||||
typedef unsigned long swift_uint32_t;
|
||||
|
||||
typedef long long swift_int64_t;
|
||||
typedef unsigned long long swift_uint64_t;
|
||||
|
||||
//typedef swift_int16_t intptr_t;
|
||||
//typedef swift_uint16_t uintptr_t;
|
||||
|
||||
#endif
|
14
algo/swifftx/sha3_interface.h
Normal file
14
algo/swifftx/sha3_interface.h
Normal file
@@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
//#include <streams/hash/hash_interface.h>
|
||||
#include "hash_interface.h"
|
||||
|
||||
namespace sha3 {
|
||||
|
||||
using BitSequence = hash::BitSequence;
|
||||
using DataLength = hash::DataLength;
|
||||
|
||||
struct sha3_interface : hash::hash_interface {};
|
||||
|
||||
} // namespace sha3
|
47
algo/swifftx/stdbool.h
Normal file
47
algo/swifftx/stdbool.h
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2000 Jeroen Ruigrok van der Werven <asmodai@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD: src/include/stdbool.h,v 1.6 2002/08/16 07:33:14 alfred Exp $
|
||||
*/
|
||||
|
||||
#ifndef _STDBOOL_H_
|
||||
#define _STDBOOL_H_
|
||||
|
||||
#define __bool_true_false_are_defined 1
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
#define false 0
|
||||
#define true 1
|
||||
|
||||
//#define bool _Bool
|
||||
//#if __STDC_VERSION__ < 199901L && __GNUC__ < 3
|
||||
//typedef int _Bool;
|
||||
//#endif
|
||||
typedef int bool;
|
||||
|
||||
#endif /* !__cplusplus */
|
||||
|
||||
#endif /* !_STDBOOL_H_ */
|
54
algo/swifftx/stdint.h
Normal file
54
algo/swifftx/stdint.h
Normal file
@@ -0,0 +1,54 @@
|
||||
#ifndef _SWIFFT_STDINT_H
|
||||
#define _SWIFFT_STDINT_H
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// A note from SWIFFTX implementers:
|
||||
//
|
||||
// Although the submission was targeted for Microsoft Visual Studio 2005 compiler, we strived
|
||||
// to make the code as portable as possible. This is why we preferred to use the types defined
|
||||
// here, instead of Microsoft-specific types. We compiled the code with gcc to make this sure.
|
||||
// However, we couldn't use this header as is, due to VS2005 compiler objections. This is why
|
||||
// we commented out certain defines and clearly marked it.
|
||||
// To compile our code on gcc you may define SYS_STDINT.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef SYS_STDINT
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#else
|
||||
|
||||
#include "inttypes.h"
|
||||
// The following was commented out by SWIFFTX implementers:
|
||||
// __BEGIN_DECLS
|
||||
|
||||
typedef swift_int8_t swifftx_int_least8_t;
|
||||
typedef swift_int16_t swifftx_int_least16_t;
|
||||
typedef swift_int32_t swifftx_int_least32_t;
|
||||
typedef swift_uint8_t swifftx_uint_least8_t;
|
||||
typedef swift_uint16_t swifftx_uint_least16_t;
|
||||
typedef swift_uint32_t swifftx_uint_least32_t;
|
||||
|
||||
#ifndef __STRICT_ANSI__
|
||||
typedef swift_int64_t swifftx_int_least64_t;
|
||||
typedef swift_uint64_t swifftx_uint_least64_t;
|
||||
#endif
|
||||
|
||||
/*typedef signed char int_fast8_t;
|
||||
typedef signed long int int_fast16_t;
|
||||
typedef signed long int int_fast32_t;
|
||||
typedef signed long long int int_fast64_t;
|
||||
|
||||
typedef unsigned char uint_fast8_t;
|
||||
typedef unsigned long int uint_fast16_t;
|
||||
typedef unsigned long int uint_fast32_t;
|
||||
typedef unsigned long long int uint_fast64_t;*/
|
||||
|
||||
// The following was commented out by SWIFFTX implementers:
|
||||
// #include <endian.h>
|
||||
// __END_DECLS
|
||||
#endif
|
||||
|
||||
#endif
|
912
algo/swifftx/swifftx-4way.c
Normal file
912
algo/swifftx/swifftx-4way.c
Normal file
@@ -0,0 +1,912 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// SWIFFTX ANSI C OPTIMIZED 32BIT IMPLEMENTATION FOR NIST SHA-3 COMPETITION
|
||||
//
|
||||
// SWIFFTX.c
|
||||
//
|
||||
// October 2008
|
||||
//
|
||||
// This is the source file of the OPTIMIZED 32BIT implementation of SWIFFTX hash function.
|
||||
// SWIFFTX is a candidate function for SHA-3 NIST competition.
|
||||
// More details about SWIFFTX can be found in the accompanying submission documents.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "swifftx.h"
|
||||
// See the remarks concerning compatibility issues inside stdint.h.
|
||||
#include "stdint.h"
|
||||
// Remove this while using gcc:
|
||||
//#include "stdbool.h"
|
||||
#include <memory.h>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Constants and static tables portion.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// In SWIFFTX we work over Z_257, so this is the modulus and the arithmetic is performed modulo
|
||||
// this number.
|
||||
#define FIELD_SIZE 257
|
||||
|
||||
// The size of FFT we use:
|
||||
#define N 64
|
||||
|
||||
#define LOGN 6
|
||||
|
||||
#define EIGHTH_N (N / 8)
|
||||
|
||||
// The number of FFTS done on the input.
|
||||
#define M (SWIFFTX_INPUT_BLOCK_SIZE / 8) // 32
|
||||
|
||||
// Omega is the 128th root of unity in Z_257.
|
||||
// We choose w = 42.
|
||||
#define OMEGA 42
|
||||
|
||||
// The size of the inner FFT lookup table:
|
||||
#define W 8
|
||||
|
||||
// Calculates the sum and the difference of two numbers.
|
||||
//
|
||||
// Parameters:
|
||||
// - A: the first operand. After the operation stores the sum of the two operands.
|
||||
// - B: the second operand. After the operation stores the difference between the first and the
|
||||
// second operands.
|
||||
#define ADD_SUB_4WAY( A, B ) \
|
||||
{ \
|
||||
__m128i temp = B; \
|
||||
B = _mm_sub_epi32( A, B ); \
|
||||
A = _mm_add_epi32( A, temp ); \
|
||||
}
|
||||
|
||||
|
||||
//#define ADD_SUB(A, B) {register int temp = (B); B = ((A) - (B)); A = ((A) + (temp));}
|
||||
|
||||
// Quickly reduces an integer modulo 257.
|
||||
//
|
||||
// Parameters:
|
||||
// - A: the input.
|
||||
|
||||
#define Q_REDUCE( A ) ( _mm_sub_epi32( \
|
||||
_mm_and_epi32( A, m128_const1_32( 0xff ) ), \
|
||||
_mm_srli_epi32( A, 8 ) ) )
|
||||
|
||||
//#define Q_REDUCE(A) (((A) & 0xff) - ((A) >> 8))
|
||||
|
||||
// Since we need to do the setup only once, this is the indicator variable:
|
||||
static bool wasSetupDone = false;
|
||||
|
||||
// This array stores the powers of omegas that correspond to the indices, which are the input
|
||||
// values. Known also as the "outer FFT twiddle factors".
|
||||
swift_int16_t multipliers[N];
|
||||
|
||||
// This array stores the powers of omegas, multiplied by the corresponding values.
|
||||
// We store this table to save computation time.
|
||||
//
|
||||
// To calculate the intermediate value of the compression function (the first out of two
|
||||
// stages), we multiply the k-th bit of x_i by w^[(2i + 1) * k]. {x_i} is the input to the
|
||||
// compression function, i is between 0 and 31, x_i is a 64-bit value.
|
||||
// One can see the formula for this (intermediate) stage in the SWIFFT FSE 2008 paper --
|
||||
// formula (2), section 3, page 6.
|
||||
swift_int16_t fftTable[256 * EIGHTH_N];
|
||||
|
||||
// The A's we use in SWIFFTX shall be random elements of Z_257.
|
||||
// We generated these A's from the decimal expansion of PI as follows: we converted each
|
||||
// triple of digits into a decimal number d. If d < (257 * 3) we used (d % 257) for the next A
|
||||
// element, otherwise move to the next triple of digits in the expansion. This guarntees that
|
||||
// the A's are random, provided that PI digits are.
|
||||
const swift_int16_t As[3 * M * N] =
|
||||
{141, 78, 139, 75, 238, 205, 129, 126, 22, 245, 197, 169, 142, 118, 105, 78,
|
||||
50, 149, 29, 208, 114, 34, 85, 117, 67, 148, 86, 256, 25, 49, 133, 93,
|
||||
95, 36, 68, 231, 211, 102, 151, 128, 224, 117, 193, 27, 102, 187, 7, 105,
|
||||
45, 130, 108, 124, 171, 151, 189, 128, 218, 134, 233, 165, 14, 201, 145, 134,
|
||||
52, 203, 91, 96, 197, 69, 134, 213, 136, 93, 3, 249, 141, 16, 210, 73,
|
||||
6, 92, 58, 74, 174, 6, 254, 91, 201, 107, 110, 76, 103, 11, 73, 16,
|
||||
34, 209, 7, 127, 146, 254, 95, 176, 57, 13, 108, 245, 77, 92, 186, 117,
|
||||
124, 97, 105, 118, 34, 74, 205, 122, 235, 53, 94, 238, 210, 227, 183, 11,
|
||||
129, 159, 105, 183, 142, 129, 86, 21, 137, 138, 224, 223, 190, 188, 179, 188,
|
||||
256, 25, 217, 176, 36, 176, 238, 127, 160, 210, 155, 148, 132, 0, 54, 127,
|
||||
145, 6, 46, 85, 243, 95, 173, 123, 178, 207, 211, 183, 224, 173, 146, 35,
|
||||
71, 114, 50, 22, 175, 1, 28, 19, 112, 129, 21, 34, 161, 159, 115, 52,
|
||||
4, 193, 211, 92, 115, 49, 59, 217, 218, 96, 61, 81, 24, 202, 198, 89,
|
||||
45, 128, 8, 51, 253, 87, 171, 35, 4, 188, 171, 10, 3, 137, 238, 73,
|
||||
19, 208, 124, 163, 103, 177, 155, 147, 46, 84, 253, 233, 171, 241, 211, 217,
|
||||
159, 48, 96, 79, 237, 18, 171, 226, 99, 1, 97, 195, 216, 163, 198, 95,
|
||||
0, 201, 65, 228, 21, 153, 124, 230, 44, 35, 44, 108, 85, 156, 249, 207,
|
||||
26, 222, 131, 1, 60, 242, 197, 150, 181, 19, 116, 213, 75, 98, 124, 240,
|
||||
123, 207, 62, 255, 60, 143, 187, 157, 139, 9, 12, 104, 89, 49, 193, 146,
|
||||
104, 196, 181, 82, 198, 253, 192, 191, 255, 122, 212, 104, 47, 20, 132, 208,
|
||||
46, 170, 2, 69, 234, 36, 56, 163, 28, 152, 104, 238, 162, 56, 24, 58,
|
||||
38, 150, 193, 254, 253, 125, 173, 35, 73, 126, 247, 239, 216, 6, 199, 15,
|
||||
90, 12, 97, 122, 9, 84, 207, 127, 219, 72, 58, 30, 29, 182, 41, 192,
|
||||
235, 248, 237, 74, 72, 176, 210, 252, 45, 64, 165, 87, 202, 241, 236, 223,
|
||||
151, 242, 119, 239, 52, 112, 169, 28, 13, 37, 160, 60, 158, 81, 133, 60,
|
||||
16, 145, 249, 192, 173, 217, 214, 93, 141, 184, 54, 34, 161, 104, 157, 95,
|
||||
38, 133, 218, 227, 211, 181, 9, 66, 137, 143, 77, 33, 248, 159, 4, 55,
|
||||
228, 48, 99, 219, 222, 184, 15, 36, 254, 256, 157, 237, 87, 139, 209, 113,
|
||||
232, 85, 126, 167, 197, 100, 103, 166, 64, 225, 125, 205, 117, 135, 84, 128,
|
||||
231, 112, 90, 241, 28, 22, 210, 147, 186, 49, 230, 21, 108, 39, 194, 47,
|
||||
123, 199, 107, 114, 30, 210, 250, 143, 59, 156, 131, 133, 221, 27, 76, 99,
|
||||
208, 250, 78, 12, 211, 141, 95, 81, 195, 106, 8, 232, 150, 212, 205, 221,
|
||||
11, 225, 87, 219, 126, 136, 137, 180, 198, 48, 68, 203, 239, 252, 194, 235,
|
||||
142, 137, 174, 172, 190, 145, 250, 221, 182, 204, 1, 195, 130, 153, 83, 241,
|
||||
161, 239, 211, 138, 11, 169, 155, 245, 174, 49, 10, 166, 16, 130, 181, 139,
|
||||
222, 222, 112, 99, 124, 94, 51, 243, 133, 194, 244, 136, 35, 248, 201, 177,
|
||||
178, 186, 129, 102, 89, 184, 180, 41, 149, 96, 165, 72, 225, 231, 134, 158,
|
||||
199, 28, 249, 16, 225, 195, 10, 210, 164, 252, 138, 8, 35, 152, 213, 199,
|
||||
82, 116, 97, 230, 63, 199, 241, 35, 79, 120, 54, 174, 67, 112, 1, 76,
|
||||
69, 222, 194, 96, 82, 94, 25, 228, 196, 145, 155, 136, 228, 234, 46, 101,
|
||||
246, 51, 103, 166, 246, 75, 9, 200, 161, 4, 108, 35, 129, 168, 208, 144,
|
||||
50, 14, 13, 220, 41, 132, 122, 127, 194, 9, 232, 234, 107, 28, 187, 8,
|
||||
51, 141, 97, 221, 225, 9, 113, 170, 166, 102, 135, 22, 231, 185, 227, 187,
|
||||
110, 145, 251, 146, 76, 22, 146, 228, 7, 53, 64, 25, 62, 198, 130, 190,
|
||||
221, 232, 169, 64, 188, 199, 237, 249, 173, 218, 196, 191, 48, 224, 5, 113,
|
||||
100, 166, 160, 21, 191, 197, 61, 162, 149, 171, 240, 183, 129, 231, 123, 204,
|
||||
192, 179, 134, 15, 47, 161, 142, 177, 239, 234, 186, 237, 231, 53, 208, 95,
|
||||
146, 36, 225, 231, 89, 142, 93, 248, 137, 124, 83, 39, 69, 77, 89, 208,
|
||||
182, 48, 85, 147, 244, 164, 246, 68, 38, 190, 220, 35, 202, 91, 157, 151,
|
||||
201, 240, 185, 218, 4, 152, 2, 132, 177, 88, 190, 196, 229, 74, 220, 135,
|
||||
137, 196, 11, 47, 5, 251, 106, 144, 163, 60, 222, 127, 52, 57, 202, 102,
|
||||
64, 140, 110, 206, 23, 182, 39, 245, 1, 163, 157, 186, 163, 80, 7, 230,
|
||||
44, 249, 176, 102, 164, 125, 147, 120, 18, 191, 186, 125, 64, 65, 198, 157,
|
||||
164, 213, 95, 61, 13, 181, 208, 91, 242, 197, 158, 34, 98, 169, 91, 14,
|
||||
17, 93, 157, 17, 65, 30, 183, 6, 139, 58, 255, 108, 100, 136, 209, 144,
|
||||
164, 6, 237, 33, 210, 110, 57, 126, 197, 136, 125, 244, 165, 151, 168, 3,
|
||||
143, 251, 247, 155, 136, 130, 88, 14, 74, 121, 250, 133, 21, 226, 185, 232,
|
||||
118, 132, 89, 64, 204, 161, 2, 70, 224, 159, 35, 204, 123, 180, 13, 52,
|
||||
231, 57, 25, 78, 66, 69, 97, 42, 198, 84, 176, 59, 8, 232, 125, 134,
|
||||
193, 2, 232, 109, 216, 69, 90, 142, 32, 38, 249, 37, 75, 180, 184, 188,
|
||||
19, 47, 120, 87, 146, 70, 232, 120, 191, 45, 33, 38, 19, 248, 110, 110,
|
||||
44, 64, 2, 84, 244, 228, 252, 228, 170, 123, 38, 144, 213, 144, 171, 212,
|
||||
243, 87, 189, 46, 128, 110, 84, 77, 65, 183, 61, 184, 101, 44, 168, 68,
|
||||
14, 106, 105, 8, 227, 211, 166, 39, 152, 43, 52, 254, 197, 55, 119, 89,
|
||||
168, 65, 53, 138, 177, 56, 219, 0, 58, 121, 148, 18, 44, 100, 215, 103,
|
||||
145, 229, 117, 196, 91, 89, 113, 143, 172, 239, 249, 184, 154, 39, 112, 65,
|
||||
204, 42, 84, 38, 155, 151, 151, 16, 100, 87, 174, 162, 145, 147, 149, 186,
|
||||
237, 145, 134, 144, 198, 235, 213, 163, 48, 230, 24, 47, 57, 71, 127, 0,
|
||||
150, 219, 12, 81, 197, 150, 131, 13, 169, 63, 175, 184, 48, 235, 65, 243,
|
||||
149, 200, 163, 254, 202, 114, 247, 67, 143, 250, 126, 228, 80, 130, 216, 214,
|
||||
36, 2, 230, 33, 119, 125, 3, 142, 237, 100, 3, 152, 197, 174, 244, 129,
|
||||
232, 30, 206, 199, 39, 210, 220, 43, 237, 221, 201, 54, 179, 42, 28, 133,
|
||||
246, 203, 198, 177, 0, 28, 194, 85, 223, 109, 155, 147, 221, 60, 133, 108,
|
||||
157, 254, 26, 75, 157, 185, 49, 142, 31, 137, 71, 43, 63, 64, 237, 148,
|
||||
237, 172, 159, 160, 155, 254, 234, 224, 140, 193, 114, 140, 62, 109, 136, 39,
|
||||
255, 8, 158, 146, 128, 49, 222, 96, 57, 209, 180, 249, 202, 127, 113, 231,
|
||||
78, 178, 46, 33, 228, 215, 104, 31, 207, 186, 82, 41, 42, 39, 103, 119,
|
||||
123, 133, 243, 254, 238, 156, 90, 186, 37, 212, 33, 107, 252, 51, 177, 36,
|
||||
237, 76, 159, 245, 93, 214, 97, 56, 190, 38, 160, 94, 105, 222, 220, 158,
|
||||
49, 16, 191, 52, 120, 87, 179, 2, 27, 144, 223, 230, 184, 6, 129, 227,
|
||||
69, 47, 215, 181, 162, 139, 72, 200, 45, 163, 159, 62, 2, 221, 124, 40,
|
||||
159, 242, 35, 208, 179, 166, 98, 67, 178, 68, 143, 225, 178, 146, 187, 159,
|
||||
57, 66, 176, 192, 236, 250, 168, 224, 122, 43, 159, 120, 133, 165, 122, 64,
|
||||
87, 74, 161, 241, 9, 87, 90, 24, 255, 113, 203, 220, 57, 139, 197, 159,
|
||||
31, 151, 27, 140, 77, 162, 7, 27, 84, 228, 187, 220, 53, 126, 162, 242,
|
||||
84, 181, 223, 103, 86, 177, 207, 31, 140, 18, 207, 256, 201, 166, 96, 23,
|
||||
233, 103, 197, 84, 161, 75, 59, 149, 138, 154, 119, 92, 16, 53, 116, 97,
|
||||
220, 114, 35, 45, 77, 209, 40, 196, 71, 22, 81, 178, 110, 14, 3, 180,
|
||||
110, 129, 112, 47, 18, 61, 134, 78, 73, 79, 254, 232, 125, 180, 205, 54,
|
||||
220, 119, 63, 89, 181, 52, 77, 109, 151, 77, 80, 207, 144, 25, 20, 6,
|
||||
208, 47, 201, 206, 192, 14, 73, 176, 256, 201, 207, 87, 216, 60, 56, 73,
|
||||
92, 243, 179, 113, 49, 59, 55, 168, 121, 137, 69, 154, 95, 57, 187, 47,
|
||||
129, 4, 15, 92, 6, 116, 69, 196, 48, 134, 84, 81, 111, 56, 38, 176,
|
||||
239, 6, 128, 72, 242, 134, 36, 221, 59, 48, 242, 68, 130, 110, 171, 89,
|
||||
13, 220, 48, 29, 5, 75, 104, 233, 91, 129, 105, 162, 44, 113, 163, 163,
|
||||
85, 147, 190, 111, 197, 80, 213, 153, 81, 68, 203, 33, 161, 165, 10, 61,
|
||||
120, 252, 0, 205, 28, 42, 193, 64, 39, 37, 83, 175, 5, 218, 215, 174,
|
||||
128, 121, 231, 11, 150, 145, 135, 197, 136, 91, 193, 5, 107, 88, 82, 6,
|
||||
4, 188, 256, 70, 40, 2, 167, 57, 169, 203, 115, 254, 215, 172, 84, 80,
|
||||
188, 167, 34, 137, 43, 243, 2, 79, 178, 38, 188, 135, 233, 194, 208, 13,
|
||||
11, 151, 231, 196, 12, 122, 162, 56, 17, 114, 191, 207, 90, 132, 64, 238,
|
||||
187, 6, 198, 176, 240, 88, 118, 236, 15, 226, 166, 22, 193, 229, 82, 246,
|
||||
213, 64, 37, 63, 31, 243, 252, 37, 156, 38, 175, 204, 138, 141, 211, 82,
|
||||
106, 217, 97, 139, 153, 56, 129, 218, 158, 9, 83, 26, 87, 112, 71, 21,
|
||||
250, 5, 65, 141, 68, 116, 231, 113, 10, 218, 99, 205, 201, 92, 157, 4,
|
||||
97, 46, 49, 220, 72, 139, 103, 171, 149, 129, 193, 19, 69, 245, 43, 31,
|
||||
58, 68, 36, 195, 159, 22, 54, 34, 233, 141, 205, 100, 226, 96, 22, 192,
|
||||
41, 231, 24, 79, 234, 138, 30, 120, 117, 216, 172, 197, 172, 107, 86, 29,
|
||||
181, 151, 0, 6, 146, 186, 68, 55, 54, 58, 213, 182, 60, 231, 33, 232,
|
||||
77, 210, 216, 154, 80, 51, 141, 122, 68, 148, 219, 122, 254, 48, 64, 175,
|
||||
41, 115, 62, 243, 141, 81, 119, 121, 5, 68, 121, 88, 239, 29, 230, 90,
|
||||
135, 159, 35, 223, 168, 112, 49, 37, 146, 60, 126, 134, 42, 145, 115, 90,
|
||||
73, 133, 211, 86, 120, 141, 122, 241, 127, 56, 130, 36, 174, 75, 83, 246,
|
||||
112, 45, 136, 194, 201, 115, 1, 156, 114, 167, 208, 12, 176, 147, 32, 170,
|
||||
251, 100, 102, 220, 122, 210, 6, 49, 75, 201, 38, 105, 132, 135, 126, 102,
|
||||
13, 121, 76, 228, 202, 20, 61, 213, 246, 13, 207, 42, 148, 168, 37, 253,
|
||||
34, 94, 141, 185, 18, 234, 157, 109, 104, 64, 250, 125, 49, 236, 86, 48,
|
||||
196, 77, 75, 237, 156, 103, 225, 19, 110, 229, 22, 68, 177, 93, 221, 181,
|
||||
152, 153, 61, 108, 101, 74, 247, 195, 127, 216, 30, 166, 168, 61, 83, 229,
|
||||
120, 156, 96, 120, 201, 124, 43, 27, 253, 250, 120, 143, 89, 235, 189, 243,
|
||||
150, 7, 127, 119, 149, 244, 84, 185, 134, 34, 128, 193, 236, 234, 132, 117,
|
||||
137, 32, 145, 184, 44, 121, 51, 76, 11, 228, 142, 251, 39, 77, 228, 251,
|
||||
41, 58, 246, 107, 125, 187, 9, 240, 35, 8, 11, 162, 242, 220, 158, 163,
|
||||
2, 184, 163, 227, 242, 2, 100, 101, 2, 78, 129, 34, 89, 28, 26, 157,
|
||||
79, 31, 107, 250, 194, 156, 186, 69, 212, 66, 41, 180, 139, 42, 211, 253,
|
||||
256, 239, 29, 129, 104, 248, 182, 68, 1, 189, 48, 226, 36, 229, 3, 158,
|
||||
41, 53, 241, 22, 115, 174, 16, 163, 224, 19, 112, 219, 177, 233, 42, 27,
|
||||
250, 134, 18, 28, 145, 122, 68, 34, 134, 31, 147, 17, 39, 188, 150, 76,
|
||||
45, 42, 167, 249, 12, 16, 23, 182, 13, 79, 121, 3, 70, 197, 239, 44,
|
||||
86, 177, 255, 81, 64, 171, 138, 131, 73, 110, 44, 201, 254, 198, 146, 91,
|
||||
48, 9, 104, 31, 29, 161, 101, 31, 138, 180, 231, 233, 79, 137, 61, 236,
|
||||
140, 15, 249, 218, 234, 119, 99, 195, 110, 137, 237, 207, 8, 31, 45, 24,
|
||||
90, 155, 203, 253, 192, 203, 65, 176, 210, 171, 142, 214, 220, 122, 136, 237,
|
||||
189, 186, 147, 40, 80, 254, 173, 33, 191, 46, 192, 26, 108, 255, 228, 205,
|
||||
61, 76, 39, 107, 225, 126, 228, 182, 140, 251, 143, 134, 252, 168, 221, 8,
|
||||
185, 85, 60, 233, 147, 244, 87, 137, 8, 140, 96, 80, 53, 45, 175, 160,
|
||||
124, 189, 112, 37, 144, 19, 70, 17, 170, 242, 2, 3, 28, 95, 120, 199,
|
||||
212, 43, 9, 117, 86, 151, 101, 241, 200, 145, 241, 19, 178, 69, 204, 197,
|
||||
227, 166, 94, 7, 193, 45, 247, 234, 19, 187, 212, 212, 236, 125, 33, 95,
|
||||
198, 121, 122, 103, 77, 155, 235, 49, 25, 237, 249, 11, 162, 7, 238, 24,
|
||||
16, 150, 129, 25, 152, 17, 42, 67, 247, 162, 77, 154, 31, 133, 55, 137,
|
||||
79, 119, 153, 10, 86, 28, 244, 186, 41, 169, 106, 44, 10, 49, 110, 179,
|
||||
32, 133, 155, 244, 61, 70, 131, 168, 170, 39, 231, 252, 32, 69, 92, 238,
|
||||
239, 35, 132, 136, 236, 167, 90, 32, 123, 88, 69, 22, 20, 89, 145, 166,
|
||||
30, 118, 75, 4, 49, 31, 225, 54, 11, 50, 56, 191, 246, 1, 187, 33,
|
||||
119, 107, 139, 68, 19, 240, 131, 55, 94, 113, 31, 252, 12, 179, 121, 2,
|
||||
120, 252, 0, 76, 41, 80, 185, 42, 62, 121, 105, 159, 121, 109, 111, 98,
|
||||
7, 118, 86, 29, 210, 70, 231, 179, 223, 229, 164, 70, 62, 47, 0, 206,
|
||||
204, 178, 168, 120, 224, 166, 99, 25, 103, 63, 246, 224, 117, 204, 75, 124,
|
||||
140, 133, 110, 110, 222, 88, 151, 118, 46, 37, 22, 143, 158, 40, 2, 50,
|
||||
153, 94, 190, 199, 13, 198, 127, 211, 180, 90, 183, 98, 0, 142, 210, 154,
|
||||
100, 187, 67, 231, 202, 100, 198, 235, 252, 160, 247, 124, 247, 14, 121, 221,
|
||||
57, 88, 253, 243, 185, 89, 45, 249, 221, 194, 108, 175, 193, 119, 50, 141,
|
||||
223, 133, 136, 64, 176, 250, 129, 100, 124, 94, 181, 159, 99, 185, 177, 240,
|
||||
135, 42, 103, 52, 202, 208, 143, 186, 193, 103, 154, 237, 102, 88, 225, 161,
|
||||
50, 188, 191, 109, 12, 87, 19, 227, 247, 183, 13, 52, 205, 170, 205, 146,
|
||||
89, 160, 18, 105, 192, 73, 231, 225, 184, 157, 252, 220, 61, 59, 169, 183,
|
||||
221, 20, 141, 20, 158, 101, 245, 7, 245, 225, 118, 137, 84, 55, 19, 27,
|
||||
164, 110, 35, 25, 202, 94, 150, 46, 91, 152, 130, 1, 7, 46, 16, 237,
|
||||
171, 109, 19, 200, 65, 38, 10, 213, 70, 96, 126, 226, 185, 225, 181, 46,
|
||||
10, 165, 11, 123, 53, 158, 22, 147, 64, 22, 227, 69, 182, 237, 197, 37,
|
||||
39, 49, 186, 223, 139, 128, 55, 36, 166, 178, 220, 20, 98, 172, 166, 253,
|
||||
45, 0, 120, 180, 189, 185, 158, 159, 196, 6, 214, 79, 141, 52, 156, 107,
|
||||
5, 109, 142, 159, 33, 64, 190, 133, 95, 132, 95, 202, 160, 63, 186, 23,
|
||||
231, 107, 163, 33, 234, 15, 244, 77, 108, 49, 51, 7, 164, 87, 142, 99,
|
||||
240, 202, 47, 256, 118, 190, 196, 178, 217, 42, 39, 153, 21, 192, 232, 202,
|
||||
14, 82, 179, 64, 233, 4, 219, 10, 133, 78, 43, 144, 146, 216, 202, 81,
|
||||
71, 252, 8, 201, 68, 256, 85, 233, 164, 88, 176, 30, 5, 152, 126, 179,
|
||||
249, 84, 140, 190, 159, 54, 118, 98, 2, 159, 27, 133, 74, 121, 239, 196,
|
||||
71, 149, 119, 135, 102, 20, 87, 112, 44, 75, 221, 3, 151, 158, 5, 98,
|
||||
152, 25, 97, 106, 63, 171, 240, 79, 234, 240, 230, 92, 76, 70, 173, 196,
|
||||
36, 225, 218, 133, 64, 240, 150, 41, 146, 66, 133, 51, 134, 73, 170, 238,
|
||||
140, 90, 45, 89, 46, 147, 96, 169, 174, 174, 244, 151, 90, 40, 32, 74,
|
||||
38, 154, 246, 57, 31, 14, 189, 151, 83, 243, 197, 183, 220, 185, 53, 225,
|
||||
51, 106, 188, 208, 222, 248, 93, 13, 93, 215, 131, 25, 142, 185, 113, 222,
|
||||
131, 215, 149, 50, 159, 85, 32, 5, 205, 192, 2, 227, 42, 214, 197, 42,
|
||||
126, 182, 68, 123, 109, 36, 237, 179, 170, 199, 77, 256, 5, 128, 214, 243,
|
||||
137, 177, 170, 253, 179, 180, 153, 236, 100, 196, 216, 231, 198, 37, 192, 80,
|
||||
121, 221, 246, 1, 16, 246, 29, 78, 64, 148, 124, 38, 96, 125, 28, 20,
|
||||
48, 51, 73, 187, 139, 208, 98, 253, 221, 188, 84, 129, 1, 205, 95, 205,
|
||||
117, 79, 71, 126, 134, 237, 19, 184, 137, 125, 129, 178, 223, 54, 188, 112,
|
||||
30, 7, 225, 228, 205, 184, 233, 87, 117, 22, 58, 10, 8, 42, 2, 114,
|
||||
254, 19, 17, 13, 150, 92, 233, 179, 63, 12, 60, 171, 127, 35, 50, 5,
|
||||
195, 113, 241, 25, 249, 184, 166, 44, 221, 35, 151, 116, 8, 54, 195, 89,
|
||||
218, 186, 132, 5, 41, 89, 226, 177, 11, 41, 87, 172, 5, 23, 20, 59,
|
||||
228, 94, 76, 33, 137, 43, 151, 221, 61, 232, 4, 120, 93, 217, 80, 228,
|
||||
228, 6, 58, 25, 62, 84, 91, 48, 209, 20, 247, 243, 55, 106, 80, 79,
|
||||
235, 34, 20, 180, 146, 2, 236, 13, 236, 206, 243, 222, 204, 83, 148, 213,
|
||||
214, 117, 237, 98, 0, 90, 204, 168, 32, 41, 126, 67, 191, 74, 27, 255,
|
||||
26, 75, 240, 113, 185, 105, 167, 154, 112, 67, 151, 63, 161, 134, 239, 176,
|
||||
42, 87, 249, 130, 45, 242, 17, 100, 107, 120, 212, 218, 237, 76, 231, 162,
|
||||
175, 172, 118, 155, 92, 36, 124, 17, 121, 71, 13, 9, 82, 126, 147, 142,
|
||||
218, 148, 138, 80, 163, 106, 164, 123, 140, 129, 35, 42, 186, 154, 228, 214,
|
||||
75, 73, 8, 253, 42, 153, 232, 164, 95, 24, 110, 90, 231, 197, 90, 196,
|
||||
57, 164, 252, 181, 31, 7, 97, 256, 35, 77, 200, 212, 99, 179, 92, 227,
|
||||
17, 180, 49, 176, 9, 188, 13, 182, 93, 44, 128, 219, 134, 92, 151, 6,
|
||||
23, 126, 200, 109, 66, 30, 140, 180, 146, 134, 67, 200, 7, 9, 223, 168,
|
||||
186, 221, 3, 154, 150, 165, 43, 53, 138, 27, 86, 213, 235, 160, 70, 2,
|
||||
240, 20, 89, 212, 84, 141, 168, 246, 183, 227, 30, 167, 138, 185, 253, 83,
|
||||
52, 143, 236, 94, 59, 65, 89, 218, 194, 157, 164, 156, 111, 95, 202, 168,
|
||||
245, 256, 151, 28, 222, 194, 72, 130, 217, 134, 253, 77, 246, 100, 76, 32,
|
||||
254, 174, 182, 193, 14, 237, 74, 1, 74, 26, 135, 216, 152, 208, 112, 38,
|
||||
181, 62, 25, 71, 61, 234, 254, 97, 191, 23, 92, 256, 190, 205, 6, 16,
|
||||
134, 147, 210, 219, 148, 59, 73, 185, 24, 247, 174, 143, 116, 220, 128, 144,
|
||||
111, 126, 101, 98, 130, 136, 101, 102, 69, 127, 24, 168, 146, 226, 226, 207,
|
||||
176, 122, 149, 254, 134, 196, 22, 151, 197, 21, 50, 205, 116, 154, 65, 116,
|
||||
177, 224, 127, 77, 177, 159, 225, 69, 176, 54, 100, 104, 140, 8, 11, 126,
|
||||
11, 188, 185, 159, 107, 16, 254, 142, 80, 28, 5, 157, 104, 57, 109, 82,
|
||||
102, 80, 173, 242, 238, 207, 57, 105, 237, 160, 59, 189, 189, 199, 26, 11,
|
||||
190, 156, 97, 118, 20, 12, 254, 189, 165, 147, 142, 199, 5, 213, 64, 133,
|
||||
108, 217, 133, 60, 94, 28, 116, 136, 47, 165, 125, 42, 183, 143, 14, 129,
|
||||
223, 70, 212, 205, 181, 180, 3, 201, 182, 46, 57, 104, 239, 60, 99, 181,
|
||||
220, 231, 45, 79, 156, 89, 149, 143, 190, 103, 153, 61, 235, 73, 136, 20,
|
||||
89, 243, 16, 130, 247, 141, 134, 93, 80, 68, 85, 84, 8, 72, 194, 4,
|
||||
242, 110, 19, 133, 199, 70, 172, 92, 132, 254, 67, 74, 36, 94, 13, 90,
|
||||
154, 184, 9, 109, 118, 243, 214, 71, 36, 95, 0, 90, 201, 105, 112, 215,
|
||||
69, 196, 224, 210, 236, 242, 155, 211, 37, 134, 69, 113, 157, 97, 68, 26,
|
||||
230, 149, 219, 180, 20, 76, 172, 145, 154, 40, 129, 8, 93, 56, 162, 124,
|
||||
207, 233, 105, 19, 3, 183, 155, 134, 8, 244, 213, 78, 139, 88, 156, 37,
|
||||
51, 152, 111, 102, 112, 250, 114, 252, 201, 241, 133, 24, 136, 153, 5, 90,
|
||||
210, 197, 216, 24, 131, 17, 147, 246, 13, 86, 3, 253, 179, 237, 101, 114,
|
||||
243, 191, 207, 2, 220, 133, 244, 53, 87, 125, 154, 158, 197, 20, 8, 83,
|
||||
32, 191, 38, 241, 204, 22, 168, 59, 217, 123, 162, 82, 21, 50, 130, 89,
|
||||
239, 253, 195, 56, 253, 74, 147, 125, 234, 199, 250, 28, 65, 193, 22, 237,
|
||||
193, 94, 58, 229, 139, 176, 69, 42, 179, 164, 150, 168, 246, 214, 86, 174,
|
||||
59, 117, 15, 19, 76, 37, 214, 238, 153, 226, 154, 45, 109, 114, 198, 107,
|
||||
45, 70, 238, 196, 142, 252, 244, 71, 123, 136, 134, 188, 99, 132, 25, 42,
|
||||
240, 0, 196, 33, 26, 124, 256, 145, 27, 102, 153, 35, 28, 132, 221, 167,
|
||||
138, 133, 41, 170, 95, 224, 40, 139, 239, 153, 1, 106, 255, 106, 170, 163,
|
||||
127, 44, 155, 232, 194, 119, 232, 117, 239, 143, 108, 41, 3, 9, 180, 256,
|
||||
144, 113, 133, 200, 79, 69, 128, 216, 31, 50, 102, 209, 249, 136, 150, 154,
|
||||
182, 51, 228, 39, 127, 142, 87, 15, 94, 92, 187, 245, 31, 236, 64, 58,
|
||||
114, 11, 17, 166, 189, 152, 218, 34, 123, 39, 58, 37, 153, 91, 63, 121,
|
||||
31, 34, 12, 254, 106, 96, 171, 14, 155, 247, 214, 69, 24, 98, 3, 204,
|
||||
202, 194, 207, 30, 253, 44, 119, 70, 14, 96, 82, 250, 63, 6, 232, 38,
|
||||
89, 144, 102, 191, 82, 254, 20, 222, 96, 162, 110, 6, 159, 58, 200, 226,
|
||||
98, 128, 42, 70, 84, 247, 128, 211, 136, 54, 143, 166, 60, 118, 99, 218,
|
||||
27, 193, 85, 81, 219, 223, 46, 41, 23, 233, 152, 222, 36, 236, 54, 181,
|
||||
56, 50, 4, 207, 129, 92, 78, 88, 197, 251, 131, 105, 31, 172, 38, 131,
|
||||
19, 204, 129, 47, 227, 106, 202, 183, 23, 6, 77, 224, 102, 147, 11, 218,
|
||||
131, 132, 60, 192, 208, 223, 236, 23, 103, 115, 89, 18, 185, 171, 70, 174,
|
||||
139, 0, 100, 160, 221, 11, 228, 60, 12, 122, 114, 12, 157, 235, 148, 57,
|
||||
83, 62, 173, 131, 169, 126, 85, 99, 93, 243, 81, 80, 29, 245, 206, 82,
|
||||
236, 227, 166, 14, 230, 213, 144, 97, 27, 111, 99, 164, 105, 150, 89, 111,
|
||||
252, 118, 140, 232, 120, 183, 137, 213, 232, 157, 224, 33, 134, 118, 186, 80,
|
||||
159, 2, 186, 193, 54, 242, 25, 237, 232, 249, 226, 213, 90, 149, 90, 160,
|
||||
118, 69, 64, 37, 10, 183, 109, 246, 30, 52, 219, 69, 189, 26, 116, 220,
|
||||
50, 244, 243, 243, 139, 137, 232, 98, 38, 45, 256, 143, 171, 101, 73, 238,
|
||||
123, 45, 194, 167, 250, 123, 12, 29, 136, 237, 141, 21, 89, 96, 199, 44,
|
||||
8, 214, 208, 17, 113, 41, 137, 26, 166, 155, 89, 85, 54, 58, 97, 160,
|
||||
50, 239, 58, 71, 21, 157, 139, 12, 37, 198, 182, 131, 149, 134, 16, 204,
|
||||
164, 181, 248, 166, 52, 216, 136, 201, 37, 255, 187, 240, 5, 101, 147, 231,
|
||||
14, 163, 253, 134, 146, 216, 8, 54, 224, 90, 220, 195, 75, 215, 186, 58,
|
||||
71, 204, 124, 105, 239, 53, 16, 85, 69, 163, 195, 223, 33, 38, 69, 88,
|
||||
88, 203, 99, 55, 176, 13, 156, 204, 236, 99, 194, 134, 75, 247, 126, 129,
|
||||
160, 124, 233, 206, 139, 144, 154, 45, 233, 51, 206, 61, 60, 55, 205, 107,
|
||||
84, 108, 96, 188, 203, 31, 89, 20, 115, 144, 137, 90, 237, 78, 231, 185,
|
||||
120, 217, 1, 176, 169, 30, 155, 176, 100, 113, 53, 42, 193, 108, 14, 121,
|
||||
176, 158, 137, 92, 178, 44, 110, 249, 108, 234, 94, 101, 128, 12, 250, 173,
|
||||
72, 202, 232, 66, 139, 152, 189, 18, 32, 197, 9, 238, 246, 55, 119, 183,
|
||||
196, 119, 113, 247, 191, 100, 200, 245, 46, 16, 234, 112, 136, 116, 232, 48,
|
||||
176, 108, 11, 237, 14, 153, 93, 177, 124, 72, 67, 121, 135, 143, 45, 18,
|
||||
97, 251, 184, 172, 136, 55, 213, 8, 103, 12, 221, 212, 13, 160, 116, 91,
|
||||
237, 127, 218, 190, 103, 131, 77, 82, 36, 100, 22, 252, 79, 69, 54, 26,
|
||||
65, 182, 115, 142, 247, 20, 89, 81, 188, 244, 27, 120, 240, 248, 13, 230,
|
||||
67, 133, 32, 201, 129, 87, 9, 245, 66, 88, 166, 34, 46, 184, 119, 218,
|
||||
144, 235, 163, 40, 138, 134, 127, 217, 64, 227, 116, 67, 55, 202, 130, 48,
|
||||
199, 42, 251, 112, 124, 153, 123, 194, 243, 49, 250, 12, 78, 157, 167, 134,
|
||||
210, 73, 156, 102, 21, 88, 216, 123, 45, 11, 208, 18, 47, 187, 20, 43,
|
||||
3, 180, 124, 2, 136, 176, 77, 111, 138, 139, 91, 225, 126, 8, 74, 255,
|
||||
88, 192, 193, 239, 138, 204, 139, 194, 166, 130, 252, 184, 140, 168, 30, 177,
|
||||
121, 98, 131, 124, 69, 171, 75, 49, 184, 34, 76, 122, 202, 115, 184, 253,
|
||||
120, 182, 33, 251, 1, 74, 216, 217, 243, 168, 70, 162, 119, 158, 197, 198,
|
||||
61, 89, 7, 5, 54, 199, 211, 170, 23, 226, 44, 247, 165, 195, 7, 225,
|
||||
91, 23, 50, 15, 51, 208, 106, 94, 12, 31, 43, 112, 146, 139, 246, 182,
|
||||
113, 1, 97, 15, 66, 2, 51, 76, 164, 184, 237, 200, 218, 176, 72, 98,
|
||||
33, 135, 38, 147, 140, 229, 50, 94, 81, 187, 129, 17, 238, 168, 146, 203,
|
||||
181, 99, 164, 3, 104, 98, 255, 189, 114, 142, 86, 102, 229, 102, 80, 129,
|
||||
64, 84, 79, 161, 81, 156, 128, 111, 164, 197, 18, 15, 55, 196, 198, 191,
|
||||
28, 113, 117, 96, 207, 253, 19, 158, 231, 13, 53, 130, 252, 211, 58, 180,
|
||||
212, 142, 7, 219, 38, 81, 62, 109, 167, 113, 33, 56, 97, 185, 157, 130,
|
||||
186, 129, 119, 182, 196, 26, 54, 110, 65, 170, 166, 236, 30, 22, 162, 0,
|
||||
106, 12, 248, 33, 48, 72, 159, 17, 76, 244, 172, 132, 89, 171, 196, 76,
|
||||
254, 166, 76, 218, 226, 3, 52, 220, 238, 181, 179, 144, 225, 23, 3, 166,
|
||||
158, 35, 228, 154, 204, 23, 203, 71, 134, 189, 18, 168, 236, 141, 117, 138,
|
||||
2, 132, 78, 57, 154, 21, 250, 196, 184, 40, 161, 40, 10, 178, 134, 120,
|
||||
132, 123, 101, 82, 205, 121, 55, 140, 231, 56, 231, 71, 206, 246, 198, 150,
|
||||
146, 192, 45, 105, 242, 1, 125, 18, 176, 46, 222, 122, 19, 80, 113, 133,
|
||||
131, 162, 81, 51, 98, 168, 247, 161, 139, 39, 63, 162, 22, 153, 170, 92,
|
||||
91, 130, 174, 200, 45, 112, 99, 164, 132, 184, 191, 186, 200, 167, 86, 145,
|
||||
167, 227, 130, 44, 12, 158, 172, 249, 204, 17, 54, 249, 16, 200, 21, 174,
|
||||
67, 223, 105, 201, 50, 36, 133, 203, 244, 131, 228, 67, 29, 195, 91, 91,
|
||||
55, 107, 167, 154, 170, 137, 218, 183, 169, 61, 99, 175, 128, 23, 142, 183,
|
||||
66, 255, 59, 187, 66, 85, 212, 109, 168, 82, 16, 43, 67, 139, 114, 176,
|
||||
216, 255, 130, 94, 152, 79, 183, 64, 100, 23, 214, 82, 34, 230, 48, 15,
|
||||
242, 130, 50, 241, 81, 32, 5, 125, 183, 182, 184, 99, 248, 109, 159, 210,
|
||||
226, 61, 119, 129, 39, 149, 78, 214, 107, 78, 147, 124, 228, 18, 143, 188,
|
||||
84, 180, 233, 119, 64, 39, 158, 133, 177, 168, 6, 150, 80, 117, 150, 56,
|
||||
49, 72, 49, 37, 30, 242, 49, 142, 33, 156, 34, 44, 44, 72, 58, 22,
|
||||
249, 46, 168, 80, 25, 196, 64, 174, 97, 179, 244, 134, 213, 105, 63, 151,
|
||||
21, 90, 168, 90, 245, 28, 157, 65, 250, 232, 188, 27, 99, 160, 156, 127,
|
||||
68, 193, 10, 80, 205, 36, 138, 229, 12, 223, 70, 169, 251, 41, 48, 94,
|
||||
41, 177, 99, 256, 158, 0, 6, 83, 231, 191, 120, 135, 157, 146, 218, 213,
|
||||
160, 7, 47, 234, 98, 211, 79, 225, 179, 95, 175, 105, 185, 79, 115, 0,
|
||||
104, 14, 65, 124, 15, 188, 52, 9, 253, 27, 132, 137, 13, 127, 75, 238,
|
||||
185, 253, 33, 8, 52, 157, 164, 68, 232, 188, 69, 28, 209, 233, 5, 129,
|
||||
216, 90, 252, 212, 33, 200, 222, 9, 112, 15, 43, 36, 226, 114, 15, 249,
|
||||
217, 8, 148, 22, 147, 23, 143, 67, 222, 116, 235, 250, 212, 210, 39, 142,
|
||||
108, 64, 209, 83, 73, 66, 99, 34, 17, 29, 45, 151, 244, 114, 28, 241,
|
||||
144, 208, 146, 179, 132, 89, 217, 198, 252, 219, 205, 165, 75, 107, 11, 173,
|
||||
76, 6, 196, 247, 152, 216, 248, 91, 209, 178, 57, 250, 174, 60, 79, 123,
|
||||
18, 135, 9, 241, 230, 159, 184, 68, 156, 251, 215, 9, 113, 234, 75, 235,
|
||||
103, 194, 205, 129, 230, 45, 96, 73, 157, 20, 200, 212, 212, 228, 161, 7,
|
||||
231, 228, 108, 43, 198, 87, 140, 140, 4, 182, 164, 3, 53, 104, 250, 213,
|
||||
85, 38, 89, 61, 52, 187, 35, 204, 86, 249, 100, 71, 248, 213, 163, 215,
|
||||
66, 106, 252, 129, 40, 111, 47, 24, 186, 221, 85, 205, 199, 237, 122, 181,
|
||||
32, 46, 182, 135, 33, 251, 142, 34, 208, 242, 128, 255, 4, 234, 15, 33,
|
||||
167, 222, 32, 186, 191, 34, 255, 244, 98, 240, 228, 204, 30, 142, 32, 70,
|
||||
69, 83, 110, 151, 10, 243, 141, 21, 223, 69, 61, 37, 59, 209, 102, 114,
|
||||
223, 33, 129, 254, 255, 103, 86, 247, 235, 72, 126, 177, 102, 226, 102, 30,
|
||||
149, 221, 62, 247, 251, 120, 163, 173, 57, 202, 204, 24, 39, 106, 120, 143,
|
||||
202, 176, 191, 147, 37, 38, 51, 133, 47, 245, 157, 132, 154, 71, 183, 111,
|
||||
30, 180, 18, 202, 82, 96, 170, 91, 157, 181, 212, 140, 256, 8, 196, 121,
|
||||
149, 79, 66, 127, 113, 78, 4, 197, 84, 256, 111, 222, 102, 63, 228, 104,
|
||||
136, 223, 67, 193, 93, 154, 249, 83, 204, 101, 200, 234, 84, 252, 230, 195,
|
||||
43, 140, 120, 242, 89, 63, 166, 233, 209, 94, 43, 170, 126, 5, 205, 78,
|
||||
112, 80, 143, 151, 146, 248, 137, 203, 45, 183, 61, 1, 155, 8, 102, 59,
|
||||
68, 212, 230, 61, 254, 191, 128, 223, 176, 123, 229, 27, 146, 120, 96, 165,
|
||||
213, 12, 232, 40, 186, 225, 66, 105, 200, 195, 212, 110, 237, 238, 151, 19,
|
||||
12, 171, 150, 82, 7, 228, 79, 52, 15, 78, 62, 43, 21, 154, 114, 21,
|
||||
12, 212, 256, 232, 125, 127, 5, 51, 37, 252, 136, 13, 47, 195, 168, 191,
|
||||
231, 55, 57, 251, 214, 116, 15, 86, 210, 41, 249, 242, 119, 27, 250, 203,
|
||||
107, 69, 90, 43, 206, 154, 127, 54, 100, 78, 187, 54, 244, 177, 234, 167,
|
||||
202, 136, 209, 171, 69, 114, 133, 173, 26, 139, 78, 141, 128, 32, 124, 39,
|
||||
45, 218, 96, 68, 90, 44, 67, 62, 83, 190, 188, 256, 103, 42, 102, 64,
|
||||
249, 0, 141, 11, 61, 69, 70, 66, 233, 237, 29, 200, 251, 157, 71, 51,
|
||||
64, 133, 113, 76, 35, 125, 76, 137, 217, 145, 35, 69, 226, 180, 56, 249,
|
||||
156, 163, 176, 237, 81, 54, 85, 169, 115, 211, 129, 70, 248, 40, 252, 192,
|
||||
194, 101, 247, 8, 181, 124, 217, 191, 194, 93, 99, 127, 117, 177, 144, 151,
|
||||
228, 121, 32, 11, 89, 81, 26, 29, 183, 76, 249, 132, 179, 70, 34, 102,
|
||||
20, 66, 87, 63, 124, 205, 174, 177, 87, 219, 73, 218, 91, 87, 176, 72,
|
||||
15, 211, 47, 61, 251, 165, 39, 247, 146, 70, 150, 57, 1, 212, 36, 162,
|
||||
39, 38, 16, 216, 3, 50, 116, 200, 32, 234, 77, 181, 155, 19, 90, 188,
|
||||
36, 6, 254, 46, 46, 203, 25, 230, 181, 196, 4, 151, 225, 65, 122, 216,
|
||||
168, 86, 158, 131, 136, 16, 49, 102, 233, 64, 154, 88, 228, 52, 146, 69,
|
||||
93, 157, 243, 121, 70, 209, 126, 213, 88, 145, 236, 65, 70, 96, 204, 47,
|
||||
10, 200, 77, 8, 103, 150, 48, 153, 5, 37, 52, 235, 209, 31, 181, 126,
|
||||
83, 142, 224, 140, 6, 32, 200, 171, 160, 179, 115, 229, 75, 194, 208, 39,
|
||||
59, 223, 52, 247, 38, 197, 135, 1, 6, 189, 106, 114, 168, 5, 211, 222,
|
||||
44, 63, 90, 160, 116, 172, 170, 133, 125, 138, 39, 131, 23, 178, 10, 214,
|
||||
36, 93, 28, 59, 68, 17, 123, 25, 255, 184, 204, 102, 194, 214, 129, 94,
|
||||
159, 245, 112, 141, 62, 11, 61, 197, 124, 221, 205, 11, 79, 71, 201, 54,
|
||||
58, 150, 29, 121, 87, 46, 240, 201, 68, 20, 194, 209, 47, 152, 158, 174,
|
||||
193, 164, 120, 255, 216, 165, 247, 58, 85, 130, 220, 23, 122, 223, 188, 98,
|
||||
21, 70, 72, 170, 150, 237, 76, 143, 112, 238, 206, 146, 215, 110, 4, 250,
|
||||
68, 44, 174, 177, 30, 98, 143, 241, 180, 127, 113, 48, 0, 1, 179, 199,
|
||||
59, 106, 201, 114, 29, 86, 173, 133, 217, 44, 200, 141, 107, 172, 16, 60,
|
||||
82, 58, 239, 94, 141, 234, 186, 235, 109, 173, 249, 139, 141, 59, 100, 248,
|
||||
84, 144, 49, 160, 51, 207, 164, 103, 74, 97, 146, 202, 193, 125, 168, 134,
|
||||
236, 111, 135, 121, 59, 145, 168, 200, 181, 173, 109, 2, 255, 6, 9, 245,
|
||||
90, 202, 214, 143, 121, 65, 85, 232, 132, 77, 228, 84, 26, 54, 184, 15,
|
||||
161, 29, 177, 79, 43, 0, 156, 184, 163, 165, 62, 90, 179, 93, 45, 239,
|
||||
1, 16, 120, 189, 127, 47, 74, 166, 20, 214, 233, 226, 89, 217, 229, 26,
|
||||
156, 53, 162, 60, 21, 3, 192, 72, 111, 51, 53, 101, 181, 208, 88, 82,
|
||||
179, 160, 219, 113, 240, 108, 43, 224, 162, 147, 62, 14, 95, 81, 205, 4,
|
||||
160, 177, 225, 115, 29, 69, 235, 168, 148, 29, 128, 114, 124, 129, 172, 165,
|
||||
215, 231, 214, 86, 160, 44, 157, 91, 248, 183, 73, 164, 56, 181, 162, 92,
|
||||
141, 118, 127, 240, 196, 77, 0, 9, 244, 79, 250, 100, 195, 25, 255, 85,
|
||||
94, 35, 212, 137, 107, 34, 110, 20, 200, 104, 17, 32, 231, 43, 150, 159,
|
||||
231, 216, 223, 190, 226, 109, 162, 197, 87, 92, 224, 11, 111, 73, 60, 225,
|
||||
238, 73, 246, 169, 19, 217, 119, 38, 121, 118, 70, 82, 99, 241, 110, 67,
|
||||
31, 76, 146, 215, 124, 240, 31, 103, 139, 224, 75, 160, 31, 78, 93, 4,
|
||||
64, 9, 103, 223, 6, 227, 119, 85, 116, 81, 21, 43, 46, 206, 234, 132,
|
||||
85, 99, 22, 131, 135, 97, 86, 13, 234, 188, 21, 14, 89, 169, 207, 238,
|
||||
219, 177, 190, 72, 157, 41, 114, 140, 92, 141, 186, 1, 63, 107, 225, 184,
|
||||
118, 150, 153, 254, 241, 106, 120, 210, 104, 144, 151, 161, 88, 206, 125, 164,
|
||||
15, 211, 173, 49, 146, 241, 71, 36, 58, 201, 46, 27, 33, 187, 91, 162,
|
||||
117, 19, 210, 213, 187, 97, 193, 50, 190, 114, 217, 60, 61, 167, 207, 213,
|
||||
213, 53, 135, 34, 156, 91, 115, 119, 46, 99, 242, 1, 90, 52, 198, 227,
|
||||
201, 91, 216, 146, 210, 82, 121, 38, 73, 133, 182, 193, 132, 148, 246, 75,
|
||||
109, 157, 179, 113, 176, 134, 205, 159, 148, 58, 103, 171, 132, 156, 133, 147,
|
||||
161, 231, 39, 100, 175, 97, 125, 28, 183, 129, 135, 191, 202, 181, 29, 218,
|
||||
43, 104, 148, 203, 189, 204, 4, 182, 169, 1, 134, 122, 141, 202, 13, 187,
|
||||
177, 112, 162, 35, 231, 6, 8, 241, 99, 6, 191, 45, 113, 113, 101, 104};
|
||||
|
||||
// The S-Box we use for further linearity breaking.
|
||||
// We created it by taking the digits of decimal expansion of e.
|
||||
// The code that created it can be found in 'ProduceRandomSBox.c'.
|
||||
unsigned char SBox[256] = {
|
||||
//0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
0x7d, 0xd1, 0x70, 0x0b, 0xfa, 0x39, 0x18, 0xc3, 0xf3, 0xbb, 0xa7, 0xd4, 0x84, 0x25, 0x3b, 0x3c, // 0
|
||||
0x2c, 0x15, 0x69, 0x9a, 0xf9, 0x27, 0xfb, 0x02, 0x52, 0xba, 0xa8, 0x4b, 0x20, 0xb5, 0x8b, 0x3a, // 1
|
||||
0x88, 0x8e, 0x26, 0xcb, 0x71, 0x5e, 0xaf, 0xad, 0x0c, 0xac, 0xa1, 0x93, 0xc6, 0x78, 0xce, 0xfc, // 2
|
||||
0x2a, 0x76, 0x17, 0x1f, 0x62, 0xc2, 0x2e, 0x99, 0x11, 0x37, 0x65, 0x40, 0xfd, 0xa0, 0x03, 0xc1, // 3
|
||||
0xca, 0x48, 0xe2, 0x9b, 0x81, 0xe4, 0x1c, 0x01, 0xec, 0x68, 0x7a, 0x5a, 0x50, 0xf8, 0x0e, 0xa3, // 4
|
||||
0xe8, 0x61, 0x2b, 0xa2, 0xeb, 0xcf, 0x8c, 0x3d, 0xb4, 0x95, 0x13, 0x08, 0x46, 0xab, 0x91, 0x7b, // 5
|
||||
0xea, 0x55, 0x67, 0x9d, 0xdd, 0x29, 0x6a, 0x8f, 0x9f, 0x22, 0x4e, 0xf2, 0x57, 0xd2, 0xa9, 0xbd, // 6
|
||||
0x38, 0x16, 0x5f, 0x4c, 0xf7, 0x9e, 0x1b, 0x2f, 0x30, 0xc7, 0x41, 0x24, 0x5c, 0xbf, 0x05, 0xf6, // 7
|
||||
0x0a, 0x31, 0xa5, 0x45, 0x21, 0x33, 0x6b, 0x6d, 0x6c, 0x86, 0xe1, 0xa4, 0xe6, 0x92, 0x9c, 0xdf, // 8
|
||||
0xe7, 0xbe, 0x28, 0xe3, 0xfe, 0x06, 0x4d, 0x98, 0x80, 0x04, 0x96, 0x36, 0x3e, 0x14, 0x4a, 0x34, // 9
|
||||
0xd3, 0xd5, 0xdb, 0x44, 0xcd, 0xf5, 0x54, 0xdc, 0x89, 0x09, 0x90, 0x42, 0x87, 0xff, 0x7e, 0x56, // A
|
||||
0x5d, 0x59, 0xd7, 0x23, 0x75, 0x19, 0x97, 0x73, 0x83, 0x64, 0x53, 0xa6, 0x1e, 0xd8, 0xb0, 0x49, // B
|
||||
0x3f, 0xef, 0xbc, 0x7f, 0x43, 0xf0, 0xc9, 0x72, 0x0f, 0x63, 0x79, 0x2d, 0xc0, 0xda, 0x66, 0xc8, // C
|
||||
0x32, 0xde, 0x47, 0x07, 0xb8, 0xe9, 0x1d, 0xc4, 0x85, 0x74, 0x82, 0xcc, 0x60, 0x51, 0x77, 0x0d, // D
|
||||
0xaa, 0x35, 0xed, 0x58, 0x7c, 0x5b, 0xb9, 0x94, 0x6e, 0x8d, 0xb1, 0xc5, 0xb7, 0xee, 0xb6, 0xae, // E
|
||||
0x10, 0xe0, 0xd6, 0xd9, 0xe5, 0x4f, 0xf1, 0x12, 0x00, 0xd0, 0xf4, 0x1a, 0x6f, 0x8a, 0xb3, 0xb2 }; // F
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Helper functions definition portion.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Don't vectorize, move decl to header file
|
||||
|
||||
// Translates an input array with values in base 257 to output array with values in base 256.
|
||||
// Returns the carry bit.
|
||||
//
|
||||
// Parameters:
|
||||
// - input: the input array of size EIGHTH_N. Each value in the array is a number in Z_257.
|
||||
// The MSB is assumed to be the last one in the array.
|
||||
// - output: the input array encoded in base 256.
|
||||
//
|
||||
// Returns:
|
||||
// - The carry bit (MSB).
|
||||
swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N]);
|
||||
|
||||
// Translates an input integer into the range (-FIELD_SIZE / 2) <= result <= (FIELD_SIZE / 2).
|
||||
//
|
||||
// Parameters:
|
||||
// - x: the input integer.
|
||||
//
|
||||
// Returns:
|
||||
// - The result, which equals (x MOD FIELD_SIZE), such that |result| <= (FIELD_SIZE / 2).
|
||||
int Center(int x);
|
||||
|
||||
// Calculates bit reversal permutation.
|
||||
//
|
||||
// Parameters:
|
||||
// - input: the input to reverse.
|
||||
// - numOfBits: the number of bits in the input to reverse.
|
||||
//
|
||||
// Returns:
|
||||
// - The resulting number, which is obtained from the input by reversing its bits.
|
||||
int ReverseBits(int input, int numOfBits);
|
||||
|
||||
// Initializes the FFT fast lookup table.
|
||||
// Shall be called only once.
|
||||
void InitializeSWIFFTX();
|
||||
|
||||
// Calculates the FFT.
|
||||
//
|
||||
// Parameters:
|
||||
// - input: the input to the FFT.
|
||||
// - output: the resulting output.
|
||||
void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper functions implementation portion.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Don't vectorize, delete this copy.
|
||||
|
||||
swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N])
|
||||
{
|
||||
swift_int32_t pairs[EIGHTH_N / 2];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < EIGHTH_N; i += 2)
|
||||
{
|
||||
// input[i] + 257 * input[i + 1]
|
||||
pairs[i >> 1] = input[i] + input[i + 1] + (input[i + 1] << 8);
|
||||
}
|
||||
|
||||
for (i = (EIGHTH_N / 2) - 1; i > 0; --i)
|
||||
{
|
||||
int j;
|
||||
|
||||
for (j = i - 1; j < (EIGHTH_N / 2) - 1; ++j)
|
||||
{
|
||||
// pairs[j + 1] * 513, because 257^2 = 513 % 256^2.
|
||||
register swift_int32_t temp = pairs[j] + pairs[j + 1] + (pairs[j + 1] << 9);
|
||||
pairs[j] = temp & 0xffff;
|
||||
pairs[j + 1] += (temp >> 16);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < EIGHTH_N; i += 2)
|
||||
{
|
||||
output[i] = (unsigned char) (pairs[i >> 1] & 0xff);
|
||||
output[i + 1] = (unsigned char) ((pairs[i >> 1] >> 8) & 0xff);
|
||||
}
|
||||
|
||||
return (pairs[EIGHTH_N/2 - 1] >> 16);
|
||||
}
|
||||
|
||||
int Center(int x)
|
||||
{
|
||||
int result = x % FIELD_SIZE;
|
||||
|
||||
if (result > (FIELD_SIZE / 2))
|
||||
result -= FIELD_SIZE;
|
||||
|
||||
if (result < (FIELD_SIZE / -2))
|
||||
result += FIELD_SIZE;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int ReverseBits(int input, int numOfBits)
|
||||
{
|
||||
register int reversed = 0;
|
||||
|
||||
for (input |= numOfBits; input > 1; input >>= 1)
|
||||
reversed = (reversed << 1) | (input & 1);
|
||||
|
||||
return reversed;
|
||||
}
|
||||
|
||||
void InitializeSWIFFTX()
|
||||
{
|
||||
int i, j, k, x;
|
||||
// The powers of OMEGA
|
||||
int omegaPowers[2 * N];
|
||||
omegaPowers[0] = 1;
|
||||
|
||||
if (wasSetupDone)
|
||||
return;
|
||||
|
||||
for (i = 1; i < (2 * N); ++i)
|
||||
{
|
||||
omegaPowers[i] = Center(omegaPowers[i - 1] * OMEGA);
|
||||
}
|
||||
|
||||
for (i = 0; i < (N / W); ++i)
|
||||
{
|
||||
for (j = 0; j < W; ++j)
|
||||
{
|
||||
multipliers[(i << 3) + j] = omegaPowers[ReverseBits(i, N / W) * (2 * j + 1)];
|
||||
}
|
||||
}
|
||||
|
||||
for (x = 0; x < 256; ++x)
|
||||
{
|
||||
for (j = 0; j < 8; ++j)
|
||||
{
|
||||
register int temp = 0;
|
||||
for (k = 0; k < 8; ++k)
|
||||
{
|
||||
temp += omegaPowers[(EIGHTH_N * (2 * j + 1) * ReverseBits(k, W)) % (2 * N)]
|
||||
* ((x >> k) & 1);
|
||||
}
|
||||
|
||||
fftTable[(x << 3) + j] = Center(temp);
|
||||
}
|
||||
}
|
||||
|
||||
wasSetupDone = true;
|
||||
}
|
||||
|
||||
// input should be deinterleaved in contiguos memory
|
||||
// output and F are 4x32
|
||||
// multipliers & fftTable are scalar 16
|
||||
|
||||
|
||||
void FFT_4way(const unsigned char input[EIGHTH_N], swift_int32_t *output)
|
||||
{
|
||||
swift_int16_t *mult = multipliers;
|
||||
m128_swift_int32_t F[64];
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
int j = i<<3;
|
||||
|
||||
// Need to isolate bytes in input, 8 bytes per lane.
|
||||
// Each iteration of the loop process one input vector
|
||||
// Each lane reads a different index to ffttable.
|
||||
|
||||
// deinterleave the input!
|
||||
|
||||
// load table with 4 lanes from different indexes into fftTable
|
||||
// extract bytes into m128 4x16
|
||||
// mutiply by vectorized mult
|
||||
|
||||
// input[lane][byte]
|
||||
|
||||
__m128i table;
|
||||
table = _mm_set_epi32( fftTable[ input[3][i] ],
|
||||
fftTable[ input[2][i] ],
|
||||
fftTable[ input[1][i] ],
|
||||
fftTable[ input[0][i] ] );
|
||||
|
||||
F[i ] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ), table );
|
||||
|
||||
table = _mm_set_epi32( fftTable[ input[3][i+1] ]
|
||||
fftTable[ input[2][i+1] ]
|
||||
fftTable[ input[1][i+1] ]
|
||||
fftTable[ input[0][i+1] ] );
|
||||
|
||||
F[i+8] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ), table );
|
||||
|
||||
|
||||
m128_swift_int16_t *table = &( fftTable[input[i] << 3] );
|
||||
|
||||
F[i ] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ),
|
||||
mm128_const1_32( table[0] ) );
|
||||
F[i+ 8] = _mm_mullo_epi32( mm128_const1_32( mult[j+1] ),
|
||||
mm128_const1_32( table[1] ) );
|
||||
F[i+16] = _mm_mullo_epi32( mm128_const1_32( mult[j+2] ),
|
||||
mm128_const1_32( table[2] ) );
|
||||
F[i+24] = _mm_mullo_epi32( mm128_const1_32( mult[j+3] ),
|
||||
mm128_const1_32( table[3] ) );
|
||||
F[i+32] = _mm_mullo_epi32( mm128_const1_32( mult[j+4] ),
|
||||
mm128_const1_32( table[4] ) );
|
||||
F[i+40] = _mm_mullo_epi32( mm128_const1_32( mult[j+5] ),
|
||||
mm128_const1_32( table[5] ) );
|
||||
F[i+48] = _mm_mullo_epi32( mm128_const1_32( mult[j+6] ),
|
||||
mm128_const1_32( table[6] ) );
|
||||
F[i+56] = _mm_mullo_epi32( mm128_const1_32( mult[j+7] ),
|
||||
mm128_const1_32( table[7] ) );
|
||||
}
|
||||
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
{
|
||||
int j = i<<3;
|
||||
ADD_SUB_4WAY( F[j ], F[j+1] );
|
||||
ADD_SUB_4WAY( F[j+2], F[j+3] );
|
||||
ADD_SUB_4WAY( F[j+4], F[j+5] );
|
||||
ADD_SUB_4WAY( F[j+6], F[j+7] );
|
||||
|
||||
F[j+3] = _mm_slli_epi32( F[j+3], 4 );
|
||||
F[j+7] = _mm_slli_epi32( F[j+7], 4 );
|
||||
|
||||
ADD_SUB_4WAY( F[j ], F[j+2] );
|
||||
ADD_SUB_4WAY( F[j+1], F[j+3] );
|
||||
ADD_SUB_4WAY( F[j+4], F[j+6] );
|
||||
ADD_SUB_4WAY( F[j+5], F[j+7] );
|
||||
|
||||
F[j+5] = _mm_slli_epi32( F[j+5], 2 );
|
||||
F[j+6] = _mm_slli_epi32( F[j+6], 4 );
|
||||
F[j+7] = _mm_slli_epi32( F[j+7], 6 );
|
||||
|
||||
ADD_SUB_4WAY( F[j ], F[j+4] );
|
||||
ADD_SUB_4WAY( F[j+1], F[j+5] );
|
||||
ADD_SUB_4WAY( F[j+2], F[j+6] );
|
||||
ADD_SUB_4WAY( F[j+3], F[j+7] );
|
||||
|
||||
output[i ] = Q_REDUCE_4WAY( F[j ] );
|
||||
output[i+ 8] = Q_REDUCE_4WAY( F[j+1] );
|
||||
output[i+16] = Q_REDUCE_4WAY( F[j+2] );
|
||||
output[i+24] = Q_REDUCE_4WAY( F[j+3] );
|
||||
output[i+32] = Q_REDUCE_4WAY( F[j+4] );
|
||||
output[i+40] = Q_REDUCE_4WAY( F[j+5] );
|
||||
output[i+48] = Q_REDUCE_4WAY( F[j+6] );
|
||||
output[i+56] = Q_REDUCE_4WAY( F[j+7] );
|
||||
}
|
||||
}
|
||||
|
||||
// Calculates the FFT part of SWIFFT.
|
||||
// We divided the SWIFFT calculation into two, because that way we could save 2 computations of
|
||||
// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs
|
||||
// is only the A's part.
|
||||
//
|
||||
// Parameters:
|
||||
// - input: the input to FFT.
|
||||
// - m: the input size divided by 8. The function performs m FFTs.
|
||||
// - output: will store the result.
|
||||
void SWIFFTFFT(const unsigned char *input, int m, swift_int32_t *output)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0;
|
||||
i < m;
|
||||
i++, input += EIGHTH_N, output += N)
|
||||
{
|
||||
FFT(input, output);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculates the 'sum' part of SWIFFT, including the base change at the end.
|
||||
// We divided the SWIFFT calculation into two, because that way we could save 2 computations of
|
||||
// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs
|
||||
// is only the A's part.
|
||||
//
|
||||
// Parameters:
|
||||
// - input: the input. Of size 64 * m.
|
||||
// - m: the input size divided by 64.
|
||||
// - output: will store the result.
|
||||
// - a: the coefficients in the sum. Of size 64 * m.
|
||||
void SWIFFTSum(const swift_int32_t *input, int m, unsigned char *output, const swift_int16_t *a)
|
||||
{
|
||||
int i, j;
|
||||
swift_int32_t result[N];
|
||||
register swift_int16_t carry = 0;
|
||||
|
||||
for (j = 0; j < N; ++j)
|
||||
{
|
||||
register swift_int32_t sum = 0;
|
||||
const register swift_int32_t *f = input + j;
|
||||
const register swift_int16_t *k = a + j;
|
||||
|
||||
for (i = 0; i < m; i++, f += N,k += N)
|
||||
{
|
||||
sum += (*f) * (*k);
|
||||
}
|
||||
|
||||
result[j] = sum;
|
||||
}
|
||||
|
||||
for (j = 0; j < N; ++j)
|
||||
{
|
||||
result[j] = ((FIELD_SIZE << 22) + result[j]) % FIELD_SIZE;
|
||||
}
|
||||
|
||||
for (j = 0; j < 8; ++j)
|
||||
{
|
||||
int register carryBit = TranslateToBase256(result + (j << 3), output + (j << 3));
|
||||
carry |= carryBit << j;
|
||||
}
|
||||
|
||||
output[N] = carry;
|
||||
}
|
||||
|
||||
|
||||
// On entry input is interleaved 4x64. SIZE is *4 lanes / 8 bytes,
|
||||
// multiply by 2.
|
||||
|
||||
|
||||
void ComputeSingleSWIFFTX_4way( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
||||
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE],
|
||||
bool doSmooth)
|
||||
{
|
||||
int i;
|
||||
// Will store the result of the FFT parts:
|
||||
m128_swift_int32_t fftOut[N * M];
|
||||
// swift_int32_t fftOut[N * M];
|
||||
unsigned char intermediate[N * 3 + 8];
|
||||
unsigned char carry0,carry1,carry2;
|
||||
|
||||
// Do the three SWIFFTS while remembering the three carry bytes (each carry byte gets
|
||||
// overriden by the following SWIFFT):
|
||||
|
||||
// 1. Compute the FFT of the input - the common part for the first 3 SWIFFTs:
|
||||
SWIFFTFFT(input, M, fftOut);
|
||||
|
||||
// 2. Compute the sums of the 3 SWIFFTs, each using a different set of coefficients:
|
||||
|
||||
// 2a. The first SWIFFT:
|
||||
SWIFFTSum(fftOut, M, intermediate, As);
|
||||
// Remember the carry byte:
|
||||
carry0 = intermediate[N];
|
||||
|
||||
// 2b. The second one:
|
||||
SWIFFTSum(fftOut, M, intermediate + N, As + (M * N));
|
||||
carry1 = intermediate[2 * N];
|
||||
|
||||
// 2c. The third one:
|
||||
SWIFFTSum(fftOut, M, intermediate + (2 * N), As + 2 * (M * N));
|
||||
carry2 = intermediate[3 * N];
|
||||
|
||||
//2d. Put three carry bytes in their place
|
||||
intermediate[3 * N] = carry0;
|
||||
intermediate[(3 * N) + 1] = carry1;
|
||||
intermediate[(3 * N) + 2] = carry2;
|
||||
|
||||
// Padding intermediate output with 5 zeroes.
|
||||
memset(intermediate + (3 * N) + 3, 0, 5);
|
||||
|
||||
// Apply the S-Box:
|
||||
for (i = 0; i < (3 * N) + 8; ++i)
|
||||
{
|
||||
intermediate[i] = SBox[intermediate[i]];
|
||||
}
|
||||
|
||||
// 3. The final and last SWIFFT:
|
||||
SWIFFTFFT(intermediate, 3 * (N/8) + 1, fftOut);
|
||||
SWIFFTSum(fftOut, 3 * (N/8) + 1, output, As);
|
||||
|
||||
if (doSmooth)
|
||||
{
|
||||
unsigned char sum[N];
|
||||
register int i, j;
|
||||
memset(sum, 0, N);
|
||||
|
||||
for (i = 0; i < (N + 1) * 8; ++i)
|
||||
{
|
||||
register const swift_int16_t *AsRow;
|
||||
register int AShift;
|
||||
|
||||
if (!(output[i >> 3] & (1 << (i & 7))))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
AsRow = As + N * M + (i & ~(N - 1)) ;
|
||||
AShift = i & 63;
|
||||
|
||||
for (j = AShift; j < N; ++j)
|
||||
{
|
||||
sum[j] += AsRow[j - AShift];
|
||||
}
|
||||
|
||||
for(j = 0; j < AShift; ++j)
|
||||
{
|
||||
sum[j] -= AsRow[N - AShift + j];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < N; ++i)
|
||||
{
|
||||
output[i] = sum[i];
|
||||
}
|
||||
|
||||
output[N] = 0;
|
||||
}
|
||||
}
|
1243
algo/swifftx/swifftx.c
Normal file
1243
algo/swifftx/swifftx.c
Normal file
File diff suppressed because it is too large
Load Diff
1155
algo/swifftx/swifftx.c.bak
Normal file
1155
algo/swifftx/swifftx.c.bak
Normal file
File diff suppressed because it is too large
Load Diff
78
algo/swifftx/swifftx.h
Normal file
78
algo/swifftx/swifftx.h
Normal file
@@ -0,0 +1,78 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// SWIFFTX ANSI C OPTIMIZED 32BIT IMPLEMENTATION FOR NIST SHA-3 COMPETITION
|
||||
//
|
||||
// SWIFFTX.h
|
||||
//
|
||||
// October 2008
|
||||
//
|
||||
// This file is the exact copy from the reference implementation.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifndef __SWIFFTX__
|
||||
#define __SWIFFTX__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
// See the remarks concerning compatibility issues inside stdint.h.
|
||||
//#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "stdint.h"
|
||||
//#include "stdbool.h"
|
||||
//#include "SHA3swift.h"
|
||||
|
||||
// The size of SWIFFTX input in bytes.
|
||||
#define SWIFFTX_INPUT_BLOCK_SIZE 256
|
||||
|
||||
// The size of output block in bytes. The compression function of SWIFFT outputs a block of
|
||||
// this size (i.e., this is the size of the resulting hash value).
|
||||
#define SWIFFTX_OUTPUT_BLOCK_SIZE 65
|
||||
|
||||
// Computes the result of a single SWIFFT operation.
|
||||
// This is the simple implementation, where our main concern is to show our design principles.
|
||||
// It is made more efficient in the optimized version, by using FFT instead of DFT, and
|
||||
// through other speed-up techniques.
|
||||
//
|
||||
// Parameters:
|
||||
// - input: the input string. Consists of 8*m input bytes, where each octet passes the DFT
|
||||
// processing.
|
||||
// - m: the length of the input in bytes.
|
||||
// - output: the resulting hash value of SWIFFT, of size 65 bytes (520 bit). This is the
|
||||
// result of summing the dot products of the DFTS with the A's after applying the base
|
||||
// change transformation
|
||||
// - A: the A's coefficients to work with (since every SWIFFT in SWIFFTX uses different As).
|
||||
// A single application of SWIFFT uses 64*m A's.
|
||||
void ComputeSingleSWIFFT(unsigned char *input, unsigned short m,
|
||||
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE],
|
||||
const swift_int16_t *a);
|
||||
|
||||
// Computes the result of a single SWIFFTX operation.
|
||||
// NOTE: for simplicity we use 'ComputeSingleSWIFFT()' as a subroutine. This is only to show
|
||||
// the design idea. In the optimized versions we don't do this for efficiency concerns, since
|
||||
// there we compute the first part (which doesn't involve the A coefficients) only once for all
|
||||
// of the 3 invocations of SWIFFT. This enables us to introduce a significant speedup.
|
||||
//
|
||||
// Parameters:
|
||||
// - input: the input input of 256 bytes (2048 bit).
|
||||
// - output: the resulting hash value of SWIFFT, of size 64 bytes (512 bit).
|
||||
// - doSMooth: if true, a final smoothing stage is performed and the output is of size 512 bits.
|
||||
//
|
||||
// Returns:
|
||||
// - Success value.
|
||||
void ComputeSingleSWIFFTX( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
||||
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE] );
|
||||
|
||||
void ComputeSingleSWIFFTX_smooth( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
||||
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE], bool doSmooth);
|
||||
|
||||
// Calculates the powers of OMEGA and generates the bit reversal permutation.
|
||||
// You must call this function before doing SWIFFT/X, otherwise you will get zeroes everywhere.
|
||||
void InitializeSWIFFTX();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __SWIFFTX__
|
@@ -12,7 +12,6 @@ bool register_c11_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&c11_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -125,7 +125,6 @@ bool register_fresh_algo( algo_gate_t* gate )
|
||||
algo_not_tested();
|
||||
gate->scanhash = (void*)&scanhash_fresh;
|
||||
gate->hash = (void*)&freshhash;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
@@ -12,7 +12,6 @@ bool register_timetravel_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&timetravel_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
@@ -12,7 +12,6 @@ bool register_timetravel10_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&timetravel10_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
@@ -3,7 +3,6 @@
|
||||
bool register_tribus_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x1ffff;
|
||||
#if defined (TRIBUS_4WAY)
|
||||
// init_tribus_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_tribus_4way;
|
||||
|
@@ -12,7 +12,6 @@ bool register_x11_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&x11_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -12,7 +12,6 @@ bool register_x11gost_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&x11gost_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -12,7 +12,6 @@ bool register_x12_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&x12hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -12,7 +12,6 @@ bool register_phi1612_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&phi1612_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -12,7 +12,6 @@ bool register_x13_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&x13hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -12,7 +12,6 @@
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
//#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
@@ -28,7 +27,6 @@ typedef struct {
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
// luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
simd_2way_context simd;
|
||||
@@ -49,7 +47,6 @@ void init_x13bcd_4way_ctx()
|
||||
skein512_4way_init( &x13bcd_4way_ctx.skein );
|
||||
jh512_4way_init( &x13bcd_4way_ctx.jh );
|
||||
keccak512_4way_init( &x13bcd_4way_ctx.keccak );
|
||||
// luffa_2way_init( &x13bcd_4way_ctx.luffa, 512 );
|
||||
cubehashInit( &x13bcd_4way_ctx.cube, 512, 16, 32 );
|
||||
sph_shavite512_init( &x13bcd_4way_ctx.shavite );
|
||||
simd_2way_init( &x13bcd_4way_ctx.simd, 512 );
|
||||
@@ -72,8 +69,6 @@ void x13bcd_4way_hash( void *state, const void *input )
|
||||
// Blake
|
||||
memcpy( &ctx.blake, &x13bcd_ctx_mid, sizeof(x13bcd_ctx_mid) );
|
||||
blake512_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
|
||||
// blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// Bmw
|
||||
@@ -127,17 +122,6 @@ void x13bcd_4way_hash( void *state, const void *input )
|
||||
sm3_4way_close( &ctx.sm3, sm3_vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
|
||||
|
||||
/*
|
||||
// Luffa
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
*/
|
||||
|
||||
// Cubehash
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||
memcpy( &ctx.cube, &x13bcd_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
@@ -185,26 +169,6 @@ void x13bcd_4way_hash( void *state, const void *input )
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
/*
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// SM3 parallel 32 bit
|
||||
uint32_t sm3_vhash[32*4] __attribute__ ((aligned (64)));
|
||||
memset( sm3_vhash, 0, sizeof sm3_vhash );
|
||||
uint32_t sm3_hash0[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash0, 0, sizeof sm3_hash0 );
|
||||
uint32_t sm3_hash1[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash1, 0, sizeof sm3_hash1 );
|
||||
uint32_t sm3_hash2[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash2, 0, sizeof sm3_hash2 );
|
||||
uint32_t sm3_hash3[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash3, 0, sizeof sm3_hash3 );
|
||||
|
||||
sm3_4way( &ctx.sm3, vhash, 64 );
|
||||
sm3_4way_close( &ctx.sm3, sm3_vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
|
||||
*/
|
||||
|
||||
// Hamsi parallel 4x32x2
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
|
@@ -12,7 +12,6 @@ bool register_x13sm3_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&x13sm3_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -28,7 +27,6 @@ bool register_x13bcd_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&x13bcd_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -83,6 +83,5 @@ bool register_axiom_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_axiom;
|
||||
gate->hash = (void*)&axiomhash;
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
return true;
|
||||
}
|
||||
|
@@ -11,7 +11,6 @@ bool register_polytimos_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_polytimos;
|
||||
gate->hash = (void*)&polytimos_hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -12,7 +12,6 @@ bool register_veltor_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&veltor_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -12,7 +12,6 @@ bool register_x14_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&x14hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -275,34 +275,31 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
if ( s_ntime != endiandata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
const uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
@@ -312,14 +309,15 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
||||
x16r_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ ) if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( unlikely( (hash+(i<<3))[7] <= Htarg ) )
|
||||
if( likely( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !(*restart) );
|
||||
} while ( likely( ( n < max_nonce ) && !(*restart) ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
|
@@ -24,7 +24,6 @@
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread bool s_implemented = false;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
union _x16rt_4way_context_overlay
|
||||
@@ -64,26 +63,8 @@ void x16rt_4way_hash( void* output, const void* input )
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
|
||||
|
||||
/*
|
||||
void *in = (void*) input;
|
||||
uint32_t *in32 = (uint32_t*) hash0;
|
||||
uint32_t ntime = in32[17];
|
||||
if ( s_ntime == UINT32_MAX )
|
||||
{
|
||||
uint32_t _ALIGN(64) timeHash[8];
|
||||
x16rt_getTimeHash(ntime, &timeHash);
|
||||
x16rt_getAlgoString(&timeHash[0], hashOrder);
|
||||
}
|
||||
*/
|
||||
|
||||
// Input data is both 64 bit interleaved (input)
|
||||
// and deinterleaved in inp0-3.
|
||||
// If First function uses 64 bit data it is not required to interleave inp
|
||||
// first. It may use the inerleaved data dmost convenient, ie 4way 64 bit.
|
||||
// All other functions assume data is deinterleaved in hash0-3
|
||||
// All functions must exit with data deinterleaved in hash0-3.
|
||||
// Alias in0-3 points to either inp0-3 or hash0-3 according to
|
||||
// its hashOrder position. Size is also set accordingly.
|
||||
// and deinterleaved in inp0-3. First function has no need re-interleave.
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
@@ -290,44 +271,31 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t _ALIGN(64) timeHash[4*8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
uint32_t ntime = swab32( pdata[17] );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16rt_getTimeHash( ntime, &timeHash );
|
||||
x16rt_getAlgoString( &timeHash[0], hashOrder );
|
||||
s_ntime = ntime;
|
||||
s_implemented = true;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
||||
hashOrder, ntime, timeHash );
|
||||
}
|
||||
if ( !s_implemented )
|
||||
{
|
||||
applog( LOG_WARNING, "s not implemented");
|
||||
sleep(1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
|
@@ -331,35 +331,32 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0fff;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
if ( s_ntime != endiandata[17] )
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
const uint32_t ntime = bswap_32(pdata[17]);
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
@@ -368,14 +365,15 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
x16rv2_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ ) if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( unlikely( (hash+(i<<3))[7] <= Htarg ) )
|
||||
if( likely( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !(*restart) );
|
||||
} while ( likely( ( n < max_nonce ) && !(*restart) ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
|
@@ -368,7 +368,7 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
@@ -378,25 +378,22 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
if ( s_ntime != endiandata[17] )
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
|
@@ -803,52 +803,40 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
haval256_5_4way_close( &ctx.haval, state );
|
||||
}
|
||||
|
||||
int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
|
||||
int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *hash7 = &( hash[7<<2] );
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id;
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
// Need big endian data
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
for ( int m=0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
do
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev );
|
||||
sonoa_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hash7[ lane ] <= Htarg ) )
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ),
|
||||
*noncev );
|
||||
sonoa_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( ( ( hash7[ lane ] & mask ) == 0 ) )
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -11,7 +11,6 @@ bool register_sonoa_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_sonoa;
|
||||
gate->hash = (void*)&sonoa_hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&get_max64_0x1ffff;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
return true;
|
||||
};
|
||||
|
@@ -205,50 +205,40 @@ void x17_4way_hash( void *state, const void *input )
|
||||
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
// Need big endian data
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
const uint32_t mask = masks[ m ];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x17_4way_hash( hash, vdata );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x17_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( ( hash7[ lane ] & mask ) == 0 )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -12,7 +12,6 @@ bool register_xevan_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&xevan_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
311
algo/x22/x22i-4way.c
Normal file
311
algo/x22/x22i-4way.c
Normal file
@@ -0,0 +1,311 @@
|
||||
#include "x22i-gate.h"
|
||||
|
||||
#if defined(X22I_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/shavite/shavite-hash-2way.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/shabal-hash-4way.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
#include "algo/haval/haval-hash-4way.h"
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
#include "algo/lyra2/lyra2.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/swifftx/swifftx.h"
|
||||
|
||||
union _x22i_4way_ctx_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cube_2way_context cube;
|
||||
shavite512_2way_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
shabal512_4way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
haval256_5_4way_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
sha256_4way_context sha256;
|
||||
};
|
||||
typedef union _x22i_4way_ctx_overlay x22i_ctx_overlay;
|
||||
|
||||
void x22i_4way_hash( void *output, const void *input )
|
||||
{
|
||||
uint64_t hash0[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
||||
|
||||
// unsigned char hash[64 * 4] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hashA0[64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hashA1[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA2[64] __attribute__((aligned(32))) = {0};
|
||||
unsigned char hashA3[64] __attribute__((aligned(32))) = {0};
|
||||
x22i_ctx_overlay ctx;
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)hash0, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)hash1, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)hash2, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
||||
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
|
||||
(const BitSequence*)hash0, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
|
||||
(const BitSequence*)hash1, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
|
||||
(const BitSequence*)hash2, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
|
||||
(const BitSequence*)hash3, 512 );
|
||||
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32_512( &hash0[8], &hash1[8], &hash2[8], &hash3[8], vhash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash0[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash0[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash1[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash1[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash2[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash2[16] );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, &hash3[8], 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, &hash3[16] );
|
||||
|
||||
intrlv_4x64_512( vhash, &hash0[16], &hash1[16], &hash2[16], &hash3[16] );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
dintrlv_4x64_512( &hash0[24], &hash1[24], &hash2[24], &hash3[24], vhash );
|
||||
|
||||
// InitializeSWIFFTX();
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash0, (unsigned char*)hashA0);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash1, (unsigned char*)hashA1);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash2, (unsigned char*)hashA2);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash3, (unsigned char*)hashA3);
|
||||
|
||||
intrlv_4x32_512( vhashA, hashA0, hashA1, hashA2, hashA3 );
|
||||
|
||||
memset( vhash, 0, 64*4 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
memset( hashA0, 0, 64 );
|
||||
memset( hashA1, 0, 64 );
|
||||
memset( hashA2, 0, 64 );
|
||||
memset( hashA3, 0, 64 );
|
||||
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash0, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA0);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash1, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA1);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash2, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA2);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash3, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA3);
|
||||
|
||||
memset( hash0, 0, 64 );
|
||||
memset( hash1, 0, 64 );
|
||||
memset( hash2, 0, 64 );
|
||||
memset( hash3, 0, 64 );
|
||||
|
||||
LYRA2RE( (void*) hash0, 32, (const void*) hashA0, 32, (const void*) hashA0,
|
||||
32, 1, 4, 4 );
|
||||
LYRA2RE( (void*) hash1, 32, (const void*) hashA1, 32, (const void*) hashA1,
|
||||
32, 1, 4, 4 );
|
||||
LYRA2RE( (void*) hash2, 32, (const void*) hashA2, 32, (const void*) hashA2,
|
||||
32, 1, 4, 4 );
|
||||
LYRA2RE( (void*) hash3, 32, (const void*) hashA3, 32, (const void*) hashA3,
|
||||
32, 1, 4, 4 );
|
||||
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash0, 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash0);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash1, 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash1);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash2, 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash2);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash3, 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash3);
|
||||
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha256_4way_init( &ctx.sha256 );
|
||||
sha256_4way( &ctx.sha256, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha256, output );
|
||||
|
||||
// memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
|
||||
int scanhash_x22i_4way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x22i_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // X22I_4WAY
|
28
algo/x22/x22i-gate.c
Normal file
28
algo/x22/x22i-gate.c
Normal file
@@ -0,0 +1,28 @@
|
||||
#include "x22i-gate.h"
|
||||
|
||||
bool register_x22i_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X22I_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x22i_4way;
|
||||
gate->hash = (void*)&x22i_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x22i;
|
||||
gate->hash = (void*)&x22i_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_x25x_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X22I_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x25x_4way;
|
||||
gate->hash = (void*)&x25x_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x25x;
|
||||
gate->hash = (void*)&x25x_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
||||
return true;
|
||||
};
|
||||
|
35
algo/x22/x22i-gate.h
Normal file
35
algo/x22/x22i-gate.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#ifndef X22I_GATE_H__
|
||||
#define X22I_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include "simd-utils.h"
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define X22I_4WAY
|
||||
#endif
|
||||
|
||||
bool register_x22i__algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X22I_4WAY)
|
||||
|
||||
void x22i_4way_hash( void *state, const void *input );
|
||||
int scanhash_x22i_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x25x_4way_hash( void *state, const void *input );
|
||||
int scanhash_x25x_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
void x22i_hash( void *state, const void *input );
|
||||
int scanhash_x22i( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x25x_hash( void *state, const void *input );
|
||||
int scanhash_x25x( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif // X22I_GATE_H__
|
202
algo/x22/x22i.c
Normal file
202
algo/x22/x22i.c
Normal file
@@ -0,0 +1,202 @@
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#endif
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/haval/sph-haval.h"
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
#include "algo/lyra2/lyra2.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/swifftx/swifftx.h"
|
||||
#include "x22i-gate.h"
|
||||
|
||||
union _x22i_context_overlay
|
||||
{
|
||||
sph_blake512_context blake;
|
||||
sph_bmw512_context bmw;
|
||||
#if defined(__AES__)
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_jh512_context jh;
|
||||
sph_keccak512_context keccak;
|
||||
sph_skein512_context skein;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_haval256_5_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
SHA256_CTX sha256;
|
||||
};
|
||||
typedef union _x22i_context_overlay x22i_context_overlay;
|
||||
|
||||
void x22i_hash( void *output, const void *input )
|
||||
{
|
||||
unsigned char hash[64 * 4] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hash2[65] __attribute__((aligned(64))) = {0};
|
||||
x22i_context_overlay ctx;
|
||||
|
||||
sph_blake512_init(&ctx.blake);
|
||||
sph_blake512(&ctx.blake, input, 80);
|
||||
sph_blake512_close(&ctx.blake, hash);
|
||||
|
||||
sph_bmw512_init(&ctx.bmw);
|
||||
sph_bmw512(&ctx.bmw, (const void*) hash, 64);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)hash, 512 );
|
||||
#else
|
||||
sph_groestl512_init( &ctx.groestl );
|
||||
sph_groestl512( &ctx.groestl, hash, 64 );
|
||||
sph_groestl512_close( &ctx.groestl, hash );
|
||||
#endif
|
||||
|
||||
sph_skein512_init(&ctx.skein);
|
||||
sph_skein512(&ctx.skein, (const void*) hash, 64);
|
||||
sph_skein512_close(&ctx.skein, hash);
|
||||
|
||||
sph_jh512_init(&ctx.jh);
|
||||
sph_jh512(&ctx.jh, (const void*) hash, 64);
|
||||
sph_jh512_close(&ctx.jh, hash);
|
||||
|
||||
sph_keccak512_init(&ctx.keccak);
|
||||
sph_keccak512(&ctx.keccak, (const void*) hash, 64);
|
||||
sph_keccak512_close(&ctx.keccak, hash);
|
||||
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, 64 );
|
||||
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
|
||||
(const byte*)hash, 64 );
|
||||
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash, 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash);
|
||||
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, 512 );
|
||||
|
||||
#if defined(__AES__)
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, 512 );
|
||||
#else
|
||||
sph_echo512_init( &ctx.echo );
|
||||
sph_echo512( &ctx.echo, hash, 64 );
|
||||
sph_echo512_close( &ctx.echo, hash );
|
||||
#endif
|
||||
|
||||
sph_hamsi512_init(&ctx.hamsi);
|
||||
sph_hamsi512(&ctx.hamsi, (const void*) hash, 64);
|
||||
sph_hamsi512_close(&ctx.hamsi, hash);
|
||||
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) hash, 64);
|
||||
sph_fugue512_close(&ctx.fugue, hash);
|
||||
|
||||
sph_shabal512_init(&ctx.shabal);
|
||||
sph_shabal512(&ctx.shabal, (const void*) hash, 64);
|
||||
sph_shabal512_close(&ctx.shabal, &hash[64]);
|
||||
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash[64], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash[128]);
|
||||
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, (const void*) &hash[128], 64);
|
||||
SHA512_Final( (void*) &hash[192], &ctx.sha512 );
|
||||
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash, (unsigned char*)hash2);
|
||||
|
||||
memset(hash, 0, 64);
|
||||
sph_haval256_5_init(&ctx.haval);
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash2, 64);
|
||||
sph_haval256_5_close(&ctx.haval,hash);
|
||||
|
||||
memset(hash2, 0, 64);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hash2);
|
||||
|
||||
memset(hash, 0, 64);
|
||||
LYRA2RE((void*) hash, 32, (const void*) hash2, 32, (const void*) hash2, 32, 1, 4, 4);
|
||||
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash, 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash);
|
||||
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, (const void*) hash, 64 );
|
||||
SHA256_Final( (unsigned char*) hash, &ctx.sha256 );
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_x22i( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
for (int k=0; k < 20; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = ++n;
|
||||
be32enc( &endiandata[19], n );
|
||||
|
||||
x22i_hash( hash, endiandata );
|
||||
|
||||
if ( hash[7] < Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
402
algo/x22/x25x-4way.c
Normal file
402
algo/x22/x25x-4way.c
Normal file
@@ -0,0 +1,402 @@
|
||||
#include "x22i-gate.h"
|
||||
|
||||
#if defined(X22I_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
#include "algo/shabal/shabal-hash-4way.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
#include "algo/haval/haval-hash-4way.h"
|
||||
#include "algo/blake/blake2s-hash-4way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
#include "algo/lyra2/lyra2.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/swifftx/swifftx.h"
|
||||
#include "algo/panama/sph_panama.h"
|
||||
#include "algo/lanehash/lane.h"
|
||||
|
||||
union _x25x_4way_ctx_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
shabal512_4way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
haval256_5_4way_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
sha256_4way_context sha256;
|
||||
sph_panama_context panama;
|
||||
blake2s_4way_state blake2s;
|
||||
};
|
||||
typedef union _x25x_4way_ctx_overlay x25x_4way_ctx_overlay;
|
||||
|
||||
void x25x_shuffle( void *hash )
|
||||
{
|
||||
// Simple shuffle algorithm, instead of just reversing
|
||||
#define X25X_SHUFFLE_BLOCKS (24 * 64 / 2)
|
||||
#define X25X_SHUFFLE_ROUNDS 12
|
||||
|
||||
static const uint16_t x25x_round_const[X25X_SHUFFLE_ROUNDS] =
|
||||
{
|
||||
0x142c, 0x5830, 0x678c, 0xe08c, 0x3c67, 0xd50d, 0xb1d8, 0xecb2,
|
||||
0xd7ee, 0x6783, 0xfa6c, 0x4b9c
|
||||
};
|
||||
|
||||
uint16_t* block_pointer = (uint16_t*)hash;
|
||||
for ( int r = 0; r < X25X_SHUFFLE_ROUNDS; r++ )
|
||||
{
|
||||
for ( int i = 0; i < X25X_SHUFFLE_BLOCKS; i++ )
|
||||
{
|
||||
uint16_t block_value = block_pointer[ X25X_SHUFFLE_BLOCKS - i - 1 ];
|
||||
block_pointer[i] ^= block_pointer[ block_value % X25X_SHUFFLE_BLOCKS ]
|
||||
+ ( x25x_round_const[r] << (i % 16) );
|
||||
}
|
||||
}
|
||||
|
||||
#undef X25X_SHUFFLE_BLOCKS
|
||||
#undef X25X_SHUFFLE_ROUNDS
|
||||
}
|
||||
|
||||
void x25x_4way_hash( void *output, const void *input )
|
||||
{
|
||||
unsigned char hash0[25][64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hash1[25][64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hash2[25][64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hash3[25][64] __attribute__((aligned(64))) = {0};
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
unsigned char vhashA[24][64*4] __attribute__ ((aligned (64)));
|
||||
x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x64_512( &hash0[0], &hash1[0], &hash2[0], &hash3[0], vhash );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64_512( &hash0[1], &hash1[1], &hash2[1], &hash3[1], vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)&hash0[2],
|
||||
(const char*)&hash0[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)&hash1[2],
|
||||
(const char*)&hash1[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)&hash2[2],
|
||||
(const char*)&hash2[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)&hash3[2],
|
||||
(const char*)&hash3[1], 512 );
|
||||
|
||||
intrlv_4x64_512( vhash, &hash0[2], &hash1[2], &hash2[2], &hash3[2] );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64_512( &hash0[3], &hash1[3], &hash2[3], &hash3[3], vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64_512( &hash0[4], &hash1[4], &hash2[4], &hash3[4], vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64_512( &hash0[5], &hash1[5], &hash2[5], &hash3[5], vhash );
|
||||
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash0[6],
|
||||
(const BitSequence*)&hash0[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash1[6],
|
||||
(const BitSequence*)&hash1[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash2[6],
|
||||
(const BitSequence*)&hash2[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash3[6],
|
||||
(const BitSequence*)&hash3[5], 64 );
|
||||
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) &hash0[7],
|
||||
(const byte*)&hash0[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) &hash1[7],
|
||||
(const byte*)&hash1[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) &hash2[7],
|
||||
(const byte*)&hash2[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) &hash3[7],
|
||||
(const byte*)&hash3[6], 64 );
|
||||
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) &hash0[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, &hash0[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) &hash1[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, &hash1[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) &hash2[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, &hash2[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) &hash3[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, &hash3[8]);
|
||||
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)&hash0[9],
|
||||
(const BitSequence*)&hash0[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)&hash1[9],
|
||||
(const BitSequence*)&hash1[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)&hash2[9],
|
||||
(const BitSequence*)&hash2[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)&hash3[9],
|
||||
(const BitSequence*)&hash3[8], 512 );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)&hash0[10],
|
||||
(const BitSequence*)&hash0[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)&hash1[10],
|
||||
(const BitSequence*)&hash1[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)&hash2[10],
|
||||
(const BitSequence*)&hash2[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)&hash3[10],
|
||||
(const BitSequence*)&hash3[9], 512 );
|
||||
|
||||
intrlv_4x64_512( vhash, &hash0[10], &hash1[10], &hash2[10], &hash3[10] );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64_512( &hash0[11], &hash1[11], &hash2[11], &hash3[11], vhash );
|
||||
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) &hash0[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, &hash0[12]);
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) &hash1[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, &hash1[12]);
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) &hash2[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, &hash2[12]);
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) &hash3[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, &hash3[12]);
|
||||
|
||||
intrlv_4x32_512( vhash, &hash0[12], &hash1[12], &hash2[12], &hash3[12] );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32_512( &hash0[13], &hash1[13], &hash2[13], &hash3[13], vhash );
|
||||
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash0[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash0[14]);
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash1[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash1[14]);
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash2[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash2[14]);
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash3[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash3[14]);
|
||||
|
||||
intrlv_4x64_512( vhash, &hash0[14], &hash1[14], &hash2[14], &hash3[14] );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64_512( &hash0[15], &hash1[15], &hash2[15], &hash3[15], vhash );
|
||||
|
||||
|
||||
ComputeSingleSWIFFTX((unsigned char*)&hash0[12], (unsigned char*)&hash0[16]);
|
||||
ComputeSingleSWIFFTX((unsigned char*)&hash1[12], (unsigned char*)&hash1[16]);
|
||||
ComputeSingleSWIFFTX((unsigned char*)&hash2[12], (unsigned char*)&hash2[16]);
|
||||
ComputeSingleSWIFFTX((unsigned char*)&hash3[12], (unsigned char*)&hash3[16]);
|
||||
|
||||
intrlv_4x32_512( &vhashA, &hash0[16], &hash1[16], &hash2[16], &hash3[16] );
|
||||
|
||||
memset( vhash, 0, 64*4 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashA, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
dintrlv_4x32_512( &hash0[17], &hash1[17], &hash2[17], &hash3[17], vhash );
|
||||
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) &hash0[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) &hash0[18]);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) &hash1[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) &hash1[18]);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) &hash2[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) &hash2[18]);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) &hash3[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) &hash3[18]);
|
||||
|
||||
LYRA2RE( (void*)&hash0[19], 32, (const void*)&hash0[18], 32,
|
||||
(const void*)&hash0[18], 32, 1, 4, 4 );
|
||||
LYRA2RE( (void*)&hash1[19], 32, (const void*)&hash1[18], 32,
|
||||
(const void*)&hash1[18], 32, 1, 4, 4 );
|
||||
LYRA2RE( (void*)&hash2[19], 32, (const void*)&hash2[18], 32,
|
||||
(const void*)&hash2[18], 32, 1, 4, 4 );
|
||||
LYRA2RE( (void*)&hash3[19], 32, (const void*)&hash3[18], 32,
|
||||
(const void*)&hash3[18], 32, 1, 4, 4 );
|
||||
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) &hash0[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) &hash0[20]);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) &hash1[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) &hash1[20]);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) &hash2[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) &hash2[20]);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) &hash3[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) &hash3[20]);
|
||||
|
||||
intrlv_4x32_512( vhashA, &hash0[20], &hash1[20], &hash2[20], &hash3[20] );
|
||||
memset( vhash, 0, 64*4 );
|
||||
|
||||
sha256_4way_init( &ctx.sha256 );
|
||||
sha256_4way( &ctx.sha256, vhashA, 64 );
|
||||
sha256_4way_close( &ctx.sha256, vhash );
|
||||
dintrlv_4x32_512( &hash0[21], &hash1[21], &hash2[21], &hash3[21], vhash );
|
||||
|
||||
sph_panama_init(&ctx.panama);
|
||||
sph_panama (&ctx.panama, (const void*) &hash0[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) &hash0[22]);
|
||||
sph_panama_init(&ctx.panama);
|
||||
sph_panama (&ctx.panama, (const void*) &hash1[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) &hash1[22]);
|
||||
sph_panama_init(&ctx.panama);
|
||||
sph_panama (&ctx.panama, (const void*) &hash2[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) &hash2[22]);
|
||||
sph_panama_init(&ctx.panama);
|
||||
sph_panama (&ctx.panama, (const void*) &hash3[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) &hash3[22]);
|
||||
|
||||
laneHash(512, (const BitSequence*)&hash0[22], 512, (BitSequence*)&hash0[23]);
|
||||
laneHash(512, (const BitSequence*)&hash1[22], 512, (BitSequence*)&hash1[23]);
|
||||
laneHash(512, (const BitSequence*)&hash2[22], 512, (BitSequence*)&hash2[23]);
|
||||
laneHash(512, (const BitSequence*)&hash3[22], 512, (BitSequence*)&hash3[23]);
|
||||
|
||||
x25x_shuffle( hash0 );
|
||||
x25x_shuffle( hash1 );
|
||||
x25x_shuffle( hash2 );
|
||||
x25x_shuffle( hash3 );
|
||||
|
||||
intrlv_4x32_512( &vhashA[ 0], &hash0[ 0], &hash1[ 0], &hash2[ 0], &hash3[ 0] );
|
||||
intrlv_4x32_512( &vhashA[ 1], &hash0[ 1], &hash1[ 1], &hash2[ 1], &hash3[ 1] );
|
||||
intrlv_4x32_512( &vhashA[ 2], &hash0[ 2], &hash1[ 2], &hash2[ 2], &hash3[ 2] );
|
||||
intrlv_4x32_512( &vhashA[ 3], &hash0[ 3], &hash1[ 3], &hash2[ 3], &hash3[ 3] );
|
||||
intrlv_4x32_512( &vhashA[ 4], &hash0[ 4], &hash1[ 4], &hash2[ 4], &hash3[ 4] );
|
||||
intrlv_4x32_512( &vhashA[ 5], &hash0[ 5], &hash1[ 5], &hash2[ 5], &hash3[ 5] );
|
||||
intrlv_4x32_512( &vhashA[ 6], &hash0[ 6], &hash1[ 6], &hash2[ 6], &hash3[ 6] );
|
||||
intrlv_4x32_512( &vhashA[ 7], &hash0[ 7], &hash1[ 7], &hash2[ 7], &hash3[ 7] );
|
||||
intrlv_4x32_512( &vhashA[ 8], &hash0[ 8], &hash1[ 8], &hash2[ 8], &hash3[ 8] );
|
||||
intrlv_4x32_512( &vhashA[ 9], &hash0[ 9], &hash1[ 9], &hash2[ 9], &hash3[ 9] );
|
||||
intrlv_4x32_512( &vhashA[10], &hash0[10], &hash1[10], &hash2[10], &hash3[10] );
|
||||
intrlv_4x32_512( &vhashA[11], &hash0[11], &hash1[11], &hash2[11], &hash3[11] );
|
||||
intrlv_4x32_512( &vhashA[12], &hash0[12], &hash1[12], &hash2[12], &hash3[12] );
|
||||
intrlv_4x32_512( &vhashA[13], &hash0[13], &hash1[13], &hash2[13], &hash3[13] );
|
||||
intrlv_4x32_512( &vhashA[14], &hash0[14], &hash1[14], &hash2[14], &hash3[14] );
|
||||
intrlv_4x32_512( &vhashA[15], &hash0[15], &hash1[15], &hash2[15], &hash3[15] );
|
||||
intrlv_4x32_512( &vhashA[16], &hash0[16], &hash1[16], &hash2[16], &hash3[16] );
|
||||
intrlv_4x32_512( &vhashA[17], &hash0[17], &hash1[17], &hash2[17], &hash3[17] );
|
||||
intrlv_4x32_512( &vhashA[18], &hash0[18], &hash1[18], &hash2[18], &hash3[18] );
|
||||
intrlv_4x32_512( &vhashA[19], &hash0[19], &hash1[19], &hash2[19], &hash3[19] );
|
||||
intrlv_4x32_512( &vhashA[20], &hash0[20], &hash1[20], &hash2[20], &hash3[20] );
|
||||
intrlv_4x32_512( &vhashA[21], &hash0[21], &hash1[21], &hash2[21], &hash3[21] );
|
||||
intrlv_4x32_512( &vhashA[22], &hash0[22], &hash1[22], &hash2[22], &hash3[22] );
|
||||
intrlv_4x32_512( &vhashA[23], &hash0[23], &hash1[23], &hash2[23], &hash3[23] );
|
||||
|
||||
blake2s_4way_init( &ctx.blake2s, 32 );
|
||||
blake2s_4way_full_blocks( &ctx.blake2s, vhash, vhashA, 64*24 );
|
||||
|
||||
dintrlv_4x32( &hash0[24], &hash1[24], &hash2[24], &hash3[24], vhash, 256 );
|
||||
|
||||
memcpy(output, &hash0[24], 32);
|
||||
memcpy(output+32, &hash1[24], 32);
|
||||
memcpy(output+64, &hash2[24], 32);
|
||||
memcpy(output+96, &hash3[24], 32);
|
||||
}
|
||||
|
||||
int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x25x_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( unlikely( (hash+(i<<3))[7] <= Htarg ) )
|
||||
if( likely( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
236
algo/x22/x25x.c
Normal file
236
algo/x22/x25x.c
Normal file
@@ -0,0 +1,236 @@
|
||||
#include "x22i-gate.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#endif
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/haval/sph-haval.h"
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
#include "algo/lyra2/lyra2.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/swifftx/swifftx.h"
|
||||
#include "algo/blake/sph-blake2s.h"
|
||||
#include "algo/panama/sph_panama.h"
|
||||
#include "algo/lanehash/lane.h"
|
||||
|
||||
union _x25x_context_overlay
|
||||
{
|
||||
sph_blake512_context blake;
|
||||
sph_bmw512_context bmw;
|
||||
#if defined(__AES__)
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_jh512_context jh;
|
||||
sph_keccak512_context keccak;
|
||||
sph_skein512_context skein;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_haval256_5_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
SHA256_CTX sha256;
|
||||
sph_panama_context panama;
|
||||
blake2s_state blake2s;
|
||||
};
|
||||
typedef union _x25x_context_overlay x25x_context_overlay;
|
||||
|
||||
void x25x_hash( void *output, const void *input )
|
||||
{
|
||||
unsigned char hash[25][64] __attribute__((aligned(64))) = {0};
|
||||
x25x_context_overlay ctx;
|
||||
|
||||
sph_blake512_init(&ctx.blake);
|
||||
sph_blake512(&ctx.blake, input, 80);
|
||||
sph_blake512_close(&ctx.blake, &hash[0] );
|
||||
|
||||
sph_bmw512_init(&ctx.bmw);
|
||||
sph_bmw512(&ctx.bmw, (const void*) &hash[0], 64);
|
||||
sph_bmw512_close(&ctx.bmw, &hash[1]);
|
||||
|
||||
#if defined(__AES__)
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)&hash[2],
|
||||
(const char*)&hash[1], 512 );
|
||||
#else
|
||||
sph_groestl512_init( &ctx.groestl );
|
||||
sph_groestl512( &ctx.groestl, &hash[1], 64 );
|
||||
sph_groestl512_close( &ctx.groestl, &hash[2] );
|
||||
#endif
|
||||
|
||||
sph_skein512_init(&ctx.skein);
|
||||
sph_skein512(&ctx.skein, (const void*) &hash[2], 64);
|
||||
sph_skein512_close(&ctx.skein, &hash[3]);
|
||||
|
||||
sph_jh512_init(&ctx.jh);
|
||||
sph_jh512(&ctx.jh, (const void*) &hash[3], 64);
|
||||
sph_jh512_close(&ctx.jh, &hash[4]);
|
||||
|
||||
sph_keccak512_init(&ctx.keccak);
|
||||
sph_keccak512(&ctx.keccak, (const void*) &hash[4], 64);
|
||||
sph_keccak512_close(&ctx.keccak, &hash[5]);
|
||||
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash[6],
|
||||
(const BitSequence*)&hash[5], 64 );
|
||||
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) &hash[7],
|
||||
(const byte*)&hash[6], 64 );
|
||||
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) &hash[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, &hash[8]);
|
||||
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)&hash[9],
|
||||
(const BitSequence*)&hash[8], 512 );
|
||||
|
||||
#if defined(__AES__)
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)&hash[10],
|
||||
(const BitSequence*)&hash[9], 512 );
|
||||
#else
|
||||
sph_echo512_init( &ctx.echo );
|
||||
sph_echo512( &ctx.echo, &hash[9], 64 );
|
||||
sph_echo512_close( &ctx.echo, &hash[10] );
|
||||
#endif
|
||||
|
||||
sph_hamsi512_init(&ctx.hamsi);
|
||||
sph_hamsi512(&ctx.hamsi, (const void*) &hash[10], 64);
|
||||
sph_hamsi512_close(&ctx.hamsi, &hash[11]);
|
||||
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) &hash[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, &hash[12]);
|
||||
|
||||
sph_shabal512_init(&ctx.shabal);
|
||||
sph_shabal512(&ctx.shabal, (const void*) &hash[12], 64);
|
||||
sph_shabal512_close(&ctx.shabal, &hash[13]);
|
||||
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash[14]);
|
||||
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, (const void*) &hash[14], 64);
|
||||
SHA512_Final( (void*) &hash[15], &ctx.sha512 );
|
||||
|
||||
ComputeSingleSWIFFTX((unsigned char*)&hash[12], (unsigned char*)&hash[16]);
|
||||
|
||||
sph_haval256_5_init(&ctx.haval);
|
||||
sph_haval256_5(&ctx.haval,(const void*) &hash[16], 64);
|
||||
sph_haval256_5_close(&ctx.haval,&hash[17]);
|
||||
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) &hash[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) &hash[18]);
|
||||
|
||||
LYRA2RE( (void*)&hash[19], 32, (const void*)&hash[18], 32,
|
||||
(const void*)&hash[18], 32, 1, 4, 4 );
|
||||
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) &hash[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) &hash[20]);
|
||||
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, (const void*) &hash[20], 64 );
|
||||
SHA256_Final( (unsigned char*) &hash[21], &ctx.sha256 );
|
||||
|
||||
sph_panama_init(&ctx.panama);
|
||||
sph_panama (&ctx.panama, (const void*) &hash[21], 64 );
|
||||
sph_panama_close(&ctx.panama, (void*) &hash[22]);
|
||||
|
||||
laneHash(512, (const BitSequence*) &hash[22], 512, (BitSequence*) &hash[23]);
|
||||
|
||||
// Simple shuffle algorithm, instead of just reversing
|
||||
#define X25X_SHUFFLE_BLOCKS (24 * 64 / 2)
|
||||
#define X25X_SHUFFLE_ROUNDS 12
|
||||
|
||||
static const uint16_t x25x_round_const[X25X_SHUFFLE_ROUNDS] =
|
||||
{
|
||||
0x142c, 0x5830, 0x678c, 0xe08c, 0x3c67, 0xd50d, 0xb1d8, 0xecb2,
|
||||
0xd7ee, 0x6783, 0xfa6c, 0x4b9c
|
||||
};
|
||||
|
||||
uint16_t* block_pointer = (uint16_t*)hash;
|
||||
for ( int r = 0; r < X25X_SHUFFLE_ROUNDS; r++ )
|
||||
{
|
||||
for ( int i = 0; i < X25X_SHUFFLE_BLOCKS; i++ )
|
||||
{
|
||||
uint16_t block_value = block_pointer[ X25X_SHUFFLE_BLOCKS - i - 1 ];
|
||||
block_pointer[i] ^= block_pointer[ block_value % X25X_SHUFFLE_BLOCKS ]
|
||||
+ ( x25x_round_const[r] << (i % 16) );
|
||||
}
|
||||
}
|
||||
|
||||
#undef X25X_SHUFFLE_BLOCKS
|
||||
#undef X25X_SHUFFLE_ROUNDS
|
||||
|
||||
blake2s_simple( (uint8_t*)&hash[24], (const void*)(&hash[0]), 64 * 24 );
|
||||
|
||||
memcpy(output, &hash[24], 32);
|
||||
}
|
||||
|
||||
int scanhash_x25x( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
for (int k=0; k < 20; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = ++n;
|
||||
be32enc( &endiandata[19], n );
|
||||
|
||||
x25x_hash( hash, endiandata );
|
||||
|
||||
if ( hash[7] < Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -416,16 +416,6 @@ int scanhash_yescrypt( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t yescrypt_get_max64()
|
||||
{
|
||||
return 0x1ffLL;
|
||||
}
|
||||
|
||||
int64_t yescryptr16_get_max64()
|
||||
{
|
||||
return 0xfffLL;
|
||||
}
|
||||
|
||||
void yescrypt_gate_base(algo_gate_t *gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
@@ -437,7 +427,6 @@ void yescrypt_gate_base(algo_gate_t *gate )
|
||||
bool register_yescrypt_algo( algo_gate_t* gate )
|
||||
{
|
||||
yescrypt_gate_base( gate );
|
||||
gate->get_max64 = (void*)&yescrypt_get_max64;
|
||||
|
||||
if ( opt_param_n ) YESCRYPT_N = opt_param_n;
|
||||
else YESCRYPT_N = 2048;
|
||||
@@ -469,7 +458,6 @@ bool register_yescrypt_algo( algo_gate_t* gate )
|
||||
bool register_yescryptr8_algo( algo_gate_t* gate )
|
||||
{
|
||||
yescrypt_gate_base( gate );
|
||||
gate->get_max64 = (void*)&yescrypt_get_max64;
|
||||
yescrypt_client_key = "Client Key";
|
||||
yescrypt_client_key_len = 10;
|
||||
YESCRYPT_N = 2048;
|
||||
@@ -481,7 +469,6 @@ bool register_yescryptr8_algo( algo_gate_t* gate )
|
||||
bool register_yescryptr16_algo( algo_gate_t* gate )
|
||||
{
|
||||
yescrypt_gate_base( gate );
|
||||
gate->get_max64 = (void*)&yescryptr16_get_max64;
|
||||
yescrypt_client_key = "Client Key";
|
||||
yescrypt_client_key_len = 10;
|
||||
YESCRYPT_N = 4096;
|
||||
@@ -493,7 +480,6 @@ bool register_yescryptr16_algo( algo_gate_t* gate )
|
||||
bool register_yescryptr32_algo( algo_gate_t* gate )
|
||||
{
|
||||
yescrypt_gate_base( gate );
|
||||
gate->get_max64 = (void*)&yescryptr16_get_max64;
|
||||
yescrypt_client_key = "WaviBanana";
|
||||
yescrypt_client_key_len = 10;
|
||||
YESCRYPT_N = 4096;
|
||||
|
322
algo/yespower/crypto/blake2b-yp.c
Normal file
322
algo/yespower/crypto/blake2b-yp.c
Normal file
@@ -0,0 +1,322 @@
|
||||
/*
|
||||
* Copyright 2009 Colin Percival, 2014 savale
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algo/yespower/crypto/sph_types.h>
|
||||
#include <algo/yespower/utils/sysendian.h>
|
||||
#include "blake2b-yp.h"
|
||||
|
||||
// Cyclic right rotation.
|
||||
#ifndef ROTR64
|
||||
#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
|
||||
#endif
|
||||
|
||||
// Little-endian byte access.
|
||||
#define B2B_GET64(p) \
|
||||
(((uint64_t) ((uint8_t *) (p))[0]) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[7]) << 56))
|
||||
|
||||
// G Mixing function.
|
||||
#define B2B_G(a, b, c, d, x, y) { \
|
||||
v[a] = v[a] + v[b] + x; \
|
||||
v[d] = ROTR64(v[d] ^ v[a], 32); \
|
||||
v[c] = v[c] + v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c], 24); \
|
||||
v[a] = v[a] + v[b] + y; \
|
||||
v[d] = ROTR64(v[d] ^ v[a], 16); \
|
||||
v[c] = v[c] + v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c], 63); }
|
||||
|
||||
// Initialization Vector.
|
||||
static const uint64_t blake2b_iv[8] = {
|
||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
|
||||
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
|
||||
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
||||
};
|
||||
|
||||
// Compression function. "last" flag indicates last block.
|
||||
static void blake2b_compress(blake2b_yp_ctx *ctx, int last)
|
||||
{
|
||||
const uint8_t sigma[12][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
int i;
|
||||
uint64_t v[16], m[16];
|
||||
|
||||
// init work variables
|
||||
for (i = 0; i < 8; i++) {
|
||||
v[i] = ctx->h[i];
|
||||
v[i + 8] = blake2b_iv[i];
|
||||
}
|
||||
|
||||
v[12] ^= ctx->t[0]; // low 64 bits of offset
|
||||
v[13] ^= ctx->t[1]; // high 64 bits
|
||||
|
||||
// last block flag set ?
|
||||
if (last) {
|
||||
v[14] = ~v[14];
|
||||
}
|
||||
|
||||
// get little-endian words
|
||||
for (i = 0; i < 16; i++) {
|
||||
m[i] = B2B_GET64(&ctx->b[8 * i]);
|
||||
}
|
||||
|
||||
// twelve rounds
|
||||
for (i = 0; i < 12; i++) {
|
||||
B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]);
|
||||
B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]);
|
||||
B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]);
|
||||
B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]);
|
||||
B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]);
|
||||
B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]);
|
||||
B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]);
|
||||
B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]);
|
||||
}
|
||||
|
||||
for(i = 0; i < 8; ++i) {
|
||||
ctx->h[i] ^= v[i] ^ v[i + 8];
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the hashing context "ctx" with optional key "key".
|
||||
// 1 <= outlen <= 64 gives the digest size in bytes.
|
||||
// Secret key (also <= 64 bytes) is optional (keylen = 0).
|
||||
int blake2b_yp_init(blake2b_yp_ctx *ctx, size_t outlen,
|
||||
const void *key, size_t keylen) // (keylen=0: no key)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
// illegal parameters
|
||||
if (outlen == 0 || outlen > 64 || keylen > 64) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// state, "param block"
|
||||
for (i = 0; i < 8; i++) {
|
||||
ctx->h[i] = blake2b_iv[i];
|
||||
}
|
||||
|
||||
ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen;
|
||||
|
||||
ctx->t[0] = 0; // input count low word
|
||||
ctx->t[1] = 0; // input count high word
|
||||
ctx->c = 0; // pointer within buffer
|
||||
ctx->outlen = outlen;
|
||||
|
||||
// zero input block
|
||||
for (i = keylen; i < 128; i++) {
|
||||
ctx->b[i] = 0;
|
||||
}
|
||||
|
||||
if (keylen > 0) {
|
||||
blake2b_yp_update(ctx, key, keylen);
|
||||
ctx->c = 128; // at the end
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Add "inlen" bytes from "in" into the hash.
|
||||
void blake2b_yp_update(blake2b_yp_ctx *ctx,
|
||||
const void *in, size_t inlen) // data bytes
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < inlen; i++) {
|
||||
if (ctx->c == 128) { // buffer full ?
|
||||
ctx->t[0] += ctx->c; // add counters
|
||||
if (ctx->t[0] < ctx->c) // carry overflow ?
|
||||
ctx->t[1]++; // high word
|
||||
blake2b_compress(ctx, 0); // compress (not last)
|
||||
ctx->c = 0; // counter to zero
|
||||
}
|
||||
ctx->b[ctx->c++] = ((const uint8_t *) in)[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Generate the message digest (size given in init).
|
||||
// Result placed in "out".
|
||||
void blake2b_yp_final(blake2b_yp_ctx *ctx, void *out)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
ctx->t[0] += ctx->c; // mark last block offset
|
||||
// carry overflow
|
||||
if (ctx->t[0] < ctx->c) {
|
||||
ctx->t[1]++; // high word
|
||||
}
|
||||
|
||||
// fill up with zeros
|
||||
while (ctx->c < 128) {
|
||||
ctx->b[ctx->c++] = 0;
|
||||
}
|
||||
|
||||
blake2b_compress(ctx, 1); // final block flag = 1
|
||||
|
||||
// little endian convert and store
|
||||
for (i = 0; i < ctx->outlen; i++) {
|
||||
((uint8_t *) out)[i] =
|
||||
(ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
// inlen = number of bytes
|
||||
void blake2b_yp_hash(void *out, const void *in, size_t inlen) {
|
||||
blake2b_yp_ctx ctx;
|
||||
blake2b_yp_init(&ctx, 32, NULL, 0);
|
||||
blake2b_yp_update(&ctx, in, inlen);
|
||||
blake2b_yp_final(&ctx, out);
|
||||
}
|
||||
|
||||
// // keylen = number of bytes
|
||||
void hmac_blake2b_yp_init(hmac_yp_ctx *hctx, const void *_key, size_t keylen) {
|
||||
const uint8_t *key = _key;
|
||||
uint8_t keyhash[32];
|
||||
uint8_t pad[64];
|
||||
uint64_t i;
|
||||
|
||||
if (keylen > 64) {
|
||||
blake2b_yp_hash(keyhash, key, keylen);
|
||||
key = keyhash;
|
||||
keylen = 32;
|
||||
}
|
||||
|
||||
blake2b_yp_init(&hctx->inner, 32, NULL, 0);
|
||||
memset(pad, 0x36, 64);
|
||||
for (i = 0; i < keylen; ++i) {
|
||||
pad[i] ^= key[i];
|
||||
}
|
||||
|
||||
blake2b_yp_update(&hctx->inner, pad, 64);
|
||||
blake2b_yp_init(&hctx->outer, 32, NULL, 0);
|
||||
memset(pad, 0x5c, 64);
|
||||
for (i = 0; i < keylen; ++i) {
|
||||
pad[i] ^= key[i];
|
||||
}
|
||||
|
||||
blake2b_yp_update(&hctx->outer, pad, 64);
|
||||
memset(keyhash, 0, 32);
|
||||
}
|
||||
|
||||
// datalen = number of bits
|
||||
void hmac_blake2b_yp_update(hmac_yp_ctx *hctx, const void *data, size_t datalen) {
|
||||
// update the inner state
|
||||
blake2b_yp_update(&hctx->inner, data, datalen);
|
||||
}
|
||||
|
||||
void hmac_blake2b_yp_final(hmac_yp_ctx *hctx, uint8_t *digest) {
|
||||
uint8_t ihash[32];
|
||||
blake2b_yp_final(&hctx->inner, ihash);
|
||||
blake2b_yp_update(&hctx->outer, ihash, 32);
|
||||
blake2b_yp_final(&hctx->outer, digest);
|
||||
memset(ihash, 0, 32);
|
||||
}
|
||||
|
||||
// // keylen = number of bytes; inlen = number of bytes
|
||||
void hmac_blake2b_yp_hash(void *out, const void *key, size_t keylen, const void *in, size_t inlen) {
|
||||
hmac_yp_ctx hctx;
|
||||
hmac_blake2b_yp_init(&hctx, key, keylen);
|
||||
hmac_blake2b_yp_update(&hctx, in, inlen);
|
||||
hmac_blake2b_yp_final(&hctx, out);
|
||||
}
|
||||
|
||||
void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||
{
|
||||
hmac_yp_ctx PShctx, hctx;
|
||||
size_t i;
|
||||
uint8_t ivec[4];
|
||||
uint8_t U[32];
|
||||
uint8_t T[32];
|
||||
uint64_t j;
|
||||
int k;
|
||||
size_t clen;
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
hmac_blake2b_yp_init(&PShctx, passwd, passwdlen);
|
||||
hmac_blake2b_yp_update(&PShctx, salt, saltlen);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
be32enc(ivec, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(&hctx, &PShctx, sizeof(hmac_yp_ctx));
|
||||
hmac_blake2b_yp_update(&hctx, ivec, 4);
|
||||
hmac_blake2b_yp_final(&hctx, U);
|
||||
|
||||
/* T_i = U_1 ... */
|
||||
memcpy(T, U, 32);
|
||||
|
||||
for (j = 2; j <= c; j++) {
|
||||
/* Compute U_j. */
|
||||
hmac_blake2b_yp_init(&hctx, passwd, passwdlen);
|
||||
hmac_blake2b_yp_update(&hctx, U, 32);
|
||||
hmac_blake2b_yp_final(&hctx, U);
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for (k = 0; k < 32; k++) {
|
||||
T[k] ^= U[k];
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy as many bytes as necessary into buf. */
|
||||
clen = dkLen - i * 32;
|
||||
if (clen > 32) {
|
||||
clen = 32;
|
||||
}
|
||||
|
||||
memcpy(&buf[i * 32], T, clen);
|
||||
}
|
||||
|
||||
/* Clean PShctx, since we never called _Final on it. */
|
||||
memset(&PShctx, 0, sizeof(hmac_yp_ctx));
|
||||
}
|
42
algo/yespower/crypto/blake2b-yp.h
Normal file
42
algo/yespower/crypto/blake2b-yp.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#pragma once
|
||||
#ifndef __BLAKE2B_H__
|
||||
#define __BLAKE2B_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_MSC_VER) || defined(__x86_64__) || defined(__x86__)
|
||||
#define NATIVE_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
// state context
|
||||
typedef struct {
|
||||
uint8_t b[128]; // input buffer
|
||||
uint64_t h[8]; // chained state
|
||||
uint64_t t[2]; // total number of bytes
|
||||
size_t c; // pointer for b[]
|
||||
size_t outlen; // digest size
|
||||
} blake2b_yp_ctx;
|
||||
|
||||
typedef struct {
|
||||
blake2b_yp_ctx inner;
|
||||
blake2b_yp_ctx outer;
|
||||
} hmac_yp_ctx;
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int blake2b_yp_init(blake2b_yp_ctx *ctx, size_t outlen, const void *key, size_t keylen);
|
||||
void blake2b_yp_update(blake2b_yp_ctx *ctx, const void *in, size_t inlen);
|
||||
void blake2b_yp_final(blake2b_yp_ctx *ctx, void *out);
|
||||
void blake2b_yp_hash(void *out, const void *in, size_t inlen);
|
||||
void hmac_blake2b_yp_hash(void *out, const void *key, size_t keylen, const void *in, size_t inlen);
|
||||
void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
1976
algo/yespower/crypto/sph_types.h
Normal file
1976
algo/yespower/crypto/sph_types.h
Normal file
File diff suppressed because it is too large
Load Diff
1
algo/yespower/utils/insecure_memzero.h
Normal file
1
algo/yespower/utils/insecure_memzero.h
Normal file
@@ -0,0 +1 @@
|
||||
#define insecure_memzero(buf, len) /* empty */
|
94
algo/yespower/utils/sysendian.h
Normal file
94
algo/yespower/utils/sysendian.h
Normal file
@@ -0,0 +1,94 @@
|
||||
/*-
|
||||
* Copyright 2007-2014 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SYSENDIAN_H_
|
||||
#define _SYSENDIAN_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/* Avoid namespace collisions with BSD <sys/endian.h>. */
|
||||
#define be32dec libcperciva_be32dec
|
||||
#define be32enc libcperciva_be32enc
|
||||
#define be64enc libcperciva_be64enc
|
||||
#define le32dec libcperciva_le32dec
|
||||
#define le32enc libcperciva_le32enc
|
||||
|
||||
static inline uint32_t
|
||||
be32dec(const void * pp)
|
||||
{
|
||||
const uint8_t * p = (uint8_t const *)pp;
|
||||
|
||||
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
|
||||
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
|
||||
}
|
||||
|
||||
static inline void
|
||||
be32enc(void * pp, uint32_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[3] = x & 0xff;
|
||||
p[2] = (x >> 8) & 0xff;
|
||||
p[1] = (x >> 16) & 0xff;
|
||||
p[0] = (x >> 24) & 0xff;
|
||||
}
|
||||
|
||||
static inline void
|
||||
be64enc(void * pp, uint64_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[7] = x & 0xff;
|
||||
p[6] = (x >> 8) & 0xff;
|
||||
p[5] = (x >> 16) & 0xff;
|
||||
p[4] = (x >> 24) & 0xff;
|
||||
p[3] = (x >> 32) & 0xff;
|
||||
p[2] = (x >> 40) & 0xff;
|
||||
p[1] = (x >> 48) & 0xff;
|
||||
p[0] = (x >> 56) & 0xff;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
le32dec(const void * pp)
|
||||
{
|
||||
const uint8_t * p = (uint8_t const *)pp;
|
||||
|
||||
return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
|
||||
((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
|
||||
}
|
||||
|
||||
static inline void
|
||||
le32enc(void * pp, uint32_t x)
|
||||
{
|
||||
uint8_t * p = (uint8_t *)pp;
|
||||
|
||||
p[0] = x & 0xff;
|
||||
p[1] = (x >> 8) & 0xff;
|
||||
p[2] = (x >> 16) & 0xff;
|
||||
p[3] = (x >> 24) & 0xff;
|
||||
}
|
||||
|
||||
#endif /* !_SYSENDIAN_H_ */
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user