Compare commits

..

4 Commits

Author SHA1 Message Date
Jay D Dee
91ec6f1771 v3.9.11 2019-11-26 09:22:03 -05:00
Jay D Dee
a52c5eccf7 v3.9.10 2019-11-22 20:29:18 -05:00
Jay D Dee
86b889e1b0 v3.9.9.1 2019-10-24 14:11:26 -04:00
Jay D Dee
72330eb5a7 v3.9.9 2019-10-10 19:58:34 -04:00
118 changed files with 14265 additions and 2769 deletions

View File

@@ -117,6 +117,7 @@ cpuminer_SOURCES = \
algo/keccak/keccak-4way.c\
algo/keccak/keccak-gate.c \
algo/keccak/sse2/keccak.c \
algo/lanehash/lane.c \
algo/luffa/sph_luffa.c \
algo/luffa/luffa.c \
algo/luffa/luffa_for_sse2.c \
@@ -200,6 +201,7 @@ cpuminer_SOURCES = \
algo/skein/skein2-gate.c \
algo/sm3/sm3.c \
algo/sm3/sm3-hash-4way.c \
algo/swifftx/swifftx.c \
algo/tiger/sph_tiger.c \
algo/whirlpool/sph_whirlpool.c \
algo/whirlpool/whirlpool-hash-4way.c \
@@ -279,10 +281,17 @@ cpuminer_SOURCES = \
algo/x17/sonoa-4way.c \
algo/x17/sonoa.c \
algo/x20/x20r.c \
algo/x22/x22i-4way.c \
algo/x22/x22i.c \
algo/x22/x22i-gate.c \
algo/x22/x25x.c \
algo/x22/x25x-4way.c \
algo/yescrypt/yescrypt.c \
algo/yescrypt/sha256_Y.c \
algo/yescrypt/yescrypt-best.c \
algo/yespower/yespower.c \
algo/yespower/yespower-gate.c \
algo/yespower/yespower-blake2b.c \
algo/yespower/crypto/blake2b-yp.c \
algo/yespower/sha256_p.c \
algo/yespower/yespower-opt.c

View File

@@ -92,6 +92,7 @@ Supported Algorithms
phi2-lux identical to phi2
pluck Pluck:128 (Supcoin)
polytimos Ninja
power2b MicroBitcoin (MBC)
quark Quark
qubit Qubit
scrypt scrypt(1024, 1, 1) (default)
@@ -121,13 +122,15 @@ Supported Algorithms
x13sm3 hsr (Hshare)
x14 X14
x15 X15
x16r Ravencoin (RVN) (original algo)
x16rv2 Ravencoin (RVN) (new algo)
x16r
x16rv2 Ravencoin (RVN)
x16rt Gincoin (GIN)
x16rt_veil Veil (VEIL)
x16rt-veil Veil (VEIL)
x16s Pigeoncoin (PGN)
x17
x21s
x22i
x25x
xevan Bitsend (BSD)
yescrypt Globalboost-Y (BSTY)
yescryptr8 BitZeny (ZNY)
@@ -135,6 +138,7 @@ Supported Algorithms
yescryptr32 WAVI
yespower Cryply
yespowerr16 Yenten (YTN)
yespower-b2b generic yespower + blake2b
zr5 Ziftr
Errata
@@ -158,10 +162,12 @@ Bugs
----
Users are encouraged to post their bug reports using git issues or on the
Bitcoin Talk forum at:
Bitcoin Talk forum or opening an issue in git:
https://bitcointalk.org/index.php?topic=1326803.0
https://github.com/JayDDee/cpuminer-opt/issues
All problem reports must be accompanied by a proper problem definition.
This should include how the problem occurred, the command line and
output from the miner showing the startup messages and any errors.
@@ -173,10 +179,6 @@ Donations
cpuminer-opt has no fees of any kind but donations are accepted.
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ
Happy mining!

View File

@@ -1,14 +1,6 @@
cpuminer-opt is a console program run from the command line using the
keyboard, not the mouse.
cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
This feature requires recent SW including GCC version 5 or higher and
openssl version 1.1 or higher. It may also require using "-march=znver1"
compile flag.
cpuminer-opt is a console program, if you're using a mouse you're doing it
wrong.
Security warning
----------------
@@ -34,10 +26,43 @@ Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
supported.
64 bit Linux or Windows operating system. Apple and Android are not supported.
FreeBSD YMMV.
Change Log
----------
v3.9.11
Added x22i & x25x algos.
Blake2s 2% faster AVX2 with Intel CPU, slower with Ryzen v1, v2 ?
v3.9.10
Faster X* algos with AVX2.
Small improvements to summary stats report.
v3.9.9.1
Fixed a day1 bug that could cause the miner to idle for up to 2 minutes
under certain circumstances.
Redesigned summary stats report now includes session statistics.
More robust handling of statistics to reduce corruption.
Removed --hide-diff option.
Better handling of cpu-affinity with more than 64 CPUs.
v3.9.9
Added power2b algo for MicroBitcoin.
Added generic yespower-b2b (yespower + blake2b) algo to be used with
the parameters introduced in v3.9.7 for yespower & yescrypt.
Display additional info when a share is rejected.
Some low level enhancements and minor tweaking of log output.
RELEASE_NOTES (this file) and README.md added to Windows release package.
v3.9.8.1
Summary log report will be generated on stratum diff change or after 5 minutes,

View File

@@ -116,8 +116,6 @@ void init_algo_gate( algo_gate_t* gate )
gate->get_nonceptr = (void*)&std_get_nonceptr;
gate->work_decode = (void*)&std_le_work_decode;
gate->decode_extra_data = (void*)&do_nothing;
gate->wait_for_diff = (void*)&std_wait_for_diff;
gate->get_max64 = (void*)&get_max64_0x1fffffLL;
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
@@ -204,6 +202,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_PHI2: register_phi2_algo ( gate ); break;
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
case ALGO_POWER2B: register_power2b_algo ( gate ); break;
case ALGO_QUARK: register_quark_algo ( gate ); break;
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
@@ -239,6 +238,8 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_X16S: register_x16s_algo ( gate ); break;
case ALGO_X17: register_x17_algo ( gate ); break;
case ALGO_X21S: register_x21s_algo ( gate ); break;
case ALGO_X22I: register_x22i_algo ( gate ); break;
case ALGO_X25X: register_x25x_algo ( gate ); break;
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
/* case ALGO_YESCRYPT: register_yescrypt_05_algo ( gate ); break;
case ALGO_YESCRYPTR8: register_yescryptr8_05_algo ( gate ); break;
@@ -251,6 +252,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
case ALGO_YESPOWER: register_yespower_algo ( gate ); break;
case ALGO_YESPOWERR16: register_yespowerr16_algo ( gate ); break;
case ALGO_YESPOWER_B2B: register_yespower_b2b_algo ( gate ); break;
case ALGO_ZR5: register_zr5_algo ( gate ); break;
default:
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
@@ -276,7 +278,7 @@ bool register_json_rpc2( algo_gate_t *gate )
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
gate->wait_for_diff = (void*)&do_nothing;
// gate->wait_for_diff = (void*)&do_nothing;
gate->get_new_work = (void*)&jr2_get_new_work;
gate->get_nonceptr = (void*)&jr2_get_nonceptr;
gate->stratum_gen_work = (void*)&jr2_stratum_gen_work;

View File

@@ -35,7 +35,7 @@
// 6. Determine if other non existant functions are required.
// That is determined by the need to add code in cpu-miner.c
// that applies only to the new algo. That is forbidden. All
// algo specific code must be in theh algo's file.
// algo specific code must be in the algo's file.
//
// 7. If new functions need to be added to the gate add the type
// to the structure, declare a null instance in this file and define
@@ -48,10 +48,10 @@
// instances as they are defined by default, or unsafe functions that
// are not needed by the algo.
//
// 9. Add an case entry to the switch/case in function register_gate
// 9. Add a case entry to the switch/case in function register_gate
// in file algo-gate-api.c for the new algo.
//
// 10 If a new function type was defined add an entry to ini talgo_gate
// 10 If a new function type was defined add an entry to init algo_gate
// to initialize the new function to its null instance described in step 7.
//
// 11. If the new algo has aliases add them to the alias array in
@@ -85,14 +85,16 @@
typedef uint32_t set_t;
#define EMPTY_SET 0
#define SSE2_OPT 1
#define AES_OPT 2
#define SSE42_OPT 4
#define AVX_OPT 8
#define AVX2_OPT 0x10
#define SHA_OPT 0x20
#define AVX512_OPT 0x40
#define EMPTY_SET 0
#define SSE2_OPT 1
#define AES_OPT 2
#define SSE42_OPT 4
#define AVX_OPT 8 // Sandybridge
#define AVX2_OPT 0x10 // Haswell
#define SHA_OPT 0x20 // sha256 (Ryzen, Ice Lake)
#define AVX512_OPT 0x40 // AVX512- F, VL, DQ, BW (Skylake-X)
#define VAES_OPT 0x80 // VAES (Ice Lake)
// return set containing all elements from sets a & b
inline set_t set_union ( set_t a, set_t b ) { return a | b; }
@@ -108,14 +110,7 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
typedef struct
{
// special case, only one target, provides a callback for scanhash to
// submit work with less overhead.
// bool (*submit_work ) ( struct thr_info*, const struct work* );
// mandatory functions, must be overwritten
// Added a 5th arg for the thread_info structure to replace the int thr id
// in the first arg. Both will co-exist during the trasition.
//int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* );
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
// optional unsafe, must be overwritten if algo uses function
@@ -123,27 +118,55 @@ void ( *hash ) ( void*, const void*, uint32_t ) ;
void ( *hash_suw ) ( void*, const void* );
//optional, safe to use default in most cases
// Allocate thread local buffers and other initialization specific to miner
// threads.
bool ( *miner_thread_init ) ( int );
// Generate global blockheader from stratum data.
void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* );
// Get thread local copy of blockheader with unique nonce.
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t*,
bool );
// Return pointer to nonce in blockheader.
uint32_t *( *get_nonceptr ) ( uint32_t* );
void ( *decode_extra_data ) ( struct work*, uint64_t* );
void ( *wait_for_diff ) ( struct stratum_ctx* );
int64_t ( *get_max64 ) ();
// Decode getwork blockheader
bool ( *work_decode ) ( const json_t*, struct work* );
// Extra getwork data
void ( *decode_extra_data ) ( struct work*, uint64_t* );
bool ( *submit_getwork_result ) ( CURL*, struct work* );
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
// Increment extranonce
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
uint32_t*, uint32_t, uint32_t );
uint32_t*, uint32_t, uint32_t );
// Build mining.submit message
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
char* ( *malloc_txs_request ) ( struct work* );
// Big or little
void ( *set_work_data_endian ) ( struct work* );
double ( *calc_network_diff ) ( struct work* );
// Wait for first work
bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
void ( *resync_threads ) ( struct work* );
// Diverge mining threads
bool ( *do_this_thread ) ( int );
// After do_this_thread
void ( *resync_threads ) ( struct work* );
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
bool ( *stratum_handle_response )( json_t* );
set_t optimizations;
@@ -198,8 +221,6 @@ void null_hash_suw();
// optional safe targets, default listed first unless noted.
void std_wait_for_diff();
uint32_t *std_get_nonceptr( uint32_t *work_data );
uint32_t *jr2_get_nonceptr( uint32_t *work_data );
@@ -214,21 +235,13 @@ void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
// pick your favorite or define your own
int64_t get_max64_0x1fffffLL(); // default
int64_t get_max64_0x40LL();
int64_t get_max64_0x3ffff();
int64_t get_max64_0x3fffffLL();
int64_t get_max64_0x1ffff();
int64_t get_max64_0xffffLL();
bool std_le_work_decode( const json_t *val, struct work *work );
bool std_be_work_decode( const json_t *val, struct work *work );
bool jr2_work_decode( const json_t *val, struct work *work );
bool jr2_work_decode( const json_t *val, struct work *work );
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
bool jr2_submit_getwork_result( CURL *curl, struct work *work );
bool jr2_submit_getwork_result( CURL *curl, struct work *work );
void std_le_build_stratum_request( char *req, struct work *work );
void std_be_build_stratum_request( char *req, struct work *work );
@@ -242,8 +255,8 @@ void set_work_data_big_endian( struct work *work );
double std_calc_network_diff( struct work *work );
void std_build_block_header( struct work* g_work, uint32_t version,
uint32_t *prevhash, uint32_t *merkle_root,
uint32_t ntime, uint32_t nbits );
uint32_t *prevhash, uint32_t *merkle_root,
uint32_t ntime, uint32_t nbits );
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
@@ -264,8 +277,8 @@ int std_get_work_data_size();
// by calling the algo's register function.
bool register_algo_gate( int algo, algo_gate_t *gate );
// Override any default gate functions that are applicable and do any other
// algo-specific initialization.
// Called by algos toverride any default gate functions that are applicable
// and do any other algo-specific initialization.
// The register functions for all the algos can be declared here to reduce
// compiler warnings but that's just more work for devs adding new algos.
bool register_algo( algo_gate_t *gate );
@@ -278,5 +291,7 @@ bool register_json_rpc2( algo_gate_t *gate );
// use this to call the hash function of an algo directly, ie util.c test.
void exec_hash_function( int algo, void *output, const void *pdata );
void get_algo_alias( char** algo_or_alias );
// Validate a string as a known algo and alias, updates arg to proper
// algo name if valid alias, NULL if invalid alias or algo.
void get_algo_alias( char **algo_or_alias );

View File

@@ -74,18 +74,12 @@ int scanhash_argon2( struct work* work, uint32_t max_nonce,
return 0;
}
int64_t argon2_get_max64 ()
{
return 0x1ffLL;
}
bool register_argon2_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_argon2;
gate->hash = (void*)&argon2hash;
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
gate->get_max64 = (void*)&argon2_get_max64;
opt_target_factor = 65536.0;
return true;

View File

@@ -179,12 +179,9 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
return 0;
}
int64_t get_max64_0x1ff() { return 0x1ff; }
bool register_argon2d4096_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_argon2d4096;
gate->get_max64 = (void*)&get_max64_0x1ff;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
opt_target_factor = 65536.0;
return true;

View File

@@ -1,18 +1,8 @@
#include "blake-gate.h"
int64_t blake_get_max64 ()
{
return 0x7ffffLL;
}
bool register_blake_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT;
gate->get_max64 = (void*)&blake_get_max64;
//#if defined (__AVX2__) && defined (FOUR_WAY)
// gate->optimizations = SSE2_OPT | AVX2_OPT;
// gate->scanhash = (void*)&scanhash_blake_8way;
// gate->hash = (void*)&blakehash_8way;
#if defined(BLAKE_4WAY)
four_way_not_tested();
gate->scanhash = (void*)&scanhash_blake_4way;

View File

@@ -1,13 +1,5 @@
#include "blake2b-gate.h"
/*
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
int64_t blake2s_get_max64 ()
{
return 0x7ffffLL;
}
*/
bool register_blake2b_algo( algo_gate_t* gate )
{
#if defined(BLAKE2B_4WAY)
@@ -17,7 +9,6 @@ bool register_blake2b_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_blake2b;
gate->hash = (void*)&blake2b_hash;
#endif
// gate->get_max64 = (void*)&blake2s_get_max64;
gate->optimizations = AVX2_OPT;
return true;
};

View File

@@ -1,12 +1,5 @@
#include "blake2s-gate.h"
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
int64_t blake2s_get_max64 ()
{
return 0x7ffffLL;
}
bool register_blake2s_algo( algo_gate_t* gate )
{
#if defined(BLAKE2S_8WAY)
@@ -19,7 +12,6 @@ bool register_blake2s_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_blake2s;
gate->hash = (void*)&blake2s_hash;
#endif
gate->get_max64 = (void*)&blake2s_get_max64;
gate->optimizations = SSE2_OPT | AVX2_OPT;
return true;
};

View File

@@ -20,12 +20,13 @@
//#if defined(__SSE4_2__)
#if defined(__SSE2__)
/*
static const uint32_t blake2s_IV[8] =
{
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
*/
static const uint8_t blake2s_sigma[10][16] =
{
@@ -41,6 +42,7 @@ static const uint8_t blake2s_sigma[10][16] =
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
};
// define a constant for initial param.
int blake2s_4way_init( blake2s_4way_state *S, const uint8_t outlen )
@@ -88,41 +90,45 @@ int blake2s_4way_compress( blake2s_4way_state *S, const __m128i* block )
memcpy_128( m, block, 16 );
memcpy_128( v, S->h, 8 );
v[ 8] = _mm_set1_epi32( blake2s_IV[0] );
v[ 9] = _mm_set1_epi32( blake2s_IV[1] );
v[10] = _mm_set1_epi32( blake2s_IV[2] );
v[11] = _mm_set1_epi32( blake2s_IV[3] );
v[ 8] = m128_const1_64( 0x6A09E6676A09E667ULL );
v[ 9] = m128_const1_64( 0xBB67AE85BB67AE85ULL );
v[10] = m128_const1_64( 0x3C6EF3723C6EF372ULL );
v[11] = m128_const1_64( 0xA54FF53AA54FF53AULL );
v[12] = _mm_xor_si128( _mm_set1_epi32( S->t[0] ),
_mm_set1_epi32( blake2s_IV[4] ) );
m128_const1_64( 0x510E527F510E527FULL ) );
v[13] = _mm_xor_si128( _mm_set1_epi32( S->t[1] ),
_mm_set1_epi32( blake2s_IV[5] ) );
m128_const1_64( 0x9B05688C9B05688CULL ) );
v[14] = _mm_xor_si128( _mm_set1_epi32( S->f[0] ),
_mm_set1_epi32( blake2s_IV[6] ) );
m128_const1_64( 0x1F83D9AB1F83D9ABULL ) );
v[15] = _mm_xor_si128( _mm_set1_epi32( S->f[1] ),
_mm_set1_epi32( blake2s_IV[7] ) );
m128_const1_64( 0x5BE0CD195BE0CD19ULL ) );
#define G4W(r,i,a,b,c,d) \
#define G4W( sigma0, sigma1, a, b, c, d ) \
do { \
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+0] ] ); \
uint8_t s0 = sigma0; \
uint8_t s1 = sigma1; \
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ s0 ] ); \
d = mm128_ror_32( _mm_xor_si128( d, a ), 16 ); \
c = _mm_add_epi32( c, d ); \
b = mm128_ror_32( _mm_xor_si128( b, c ), 12 ); \
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+1] ] ); \
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ s1 ] ); \
d = mm128_ror_32( _mm_xor_si128( d, a ), 8 ); \
c = _mm_add_epi32( c, d ); \
b = mm128_ror_32( _mm_xor_si128( b, c ), 7 ); \
} while(0)
#define ROUND4W(r) \
do { \
G4W( r, 0, v[ 0], v[ 4], v[ 8], v[12] ); \
G4W( r, 1, v[ 1], v[ 5], v[ 9], v[13] ); \
G4W( r, 2, v[ 2], v[ 6], v[10], v[14] ); \
G4W( r, 3, v[ 3], v[ 7], v[11], v[15] ); \
G4W( r, 4, v[ 0], v[ 5], v[10], v[15] ); \
G4W( r, 5, v[ 1], v[ 6], v[11], v[12] ); \
G4W( r, 6, v[ 2], v[ 7], v[ 8], v[13] ); \
G4W( r, 7, v[ 3], v[ 4], v[ 9], v[14] ); \
uint8_t *sigma = (uint8_t*)&blake2s_sigma[r]; \
G4W( sigma[ 0], sigma[ 1], v[ 0], v[ 4], v[ 8], v[12] ); \
G4W( sigma[ 2], sigma[ 3], v[ 1], v[ 5], v[ 9], v[13] ); \
G4W( sigma[ 4], sigma[ 5], v[ 2], v[ 6], v[10], v[14] ); \
G4W( sigma[ 6], sigma[ 7], v[ 3], v[ 7], v[11], v[15] ); \
G4W( sigma[ 8], sigma[ 9], v[ 0], v[ 5], v[10], v[15] ); \
G4W( sigma[10], sigma[11], v[ 1], v[ 6], v[11], v[12] ); \
G4W( sigma[12], sigma[13], v[ 2], v[ 7], v[ 8], v[13] ); \
G4W( sigma[14], sigma[15], v[ 3], v[ 4], v[ 9], v[14] ); \
} while(0)
ROUND4W( 0 );
@@ -144,26 +150,47 @@ do { \
return 0;
}
// There is a problem that can't be resolved internally.
// If the last block is a full 64 bytes it should not be compressed in
// update but left for final. However, when streaming, it isn't known
// which block is last. There may be a subsequent call to update to add
// more data.
//
// The reference code handled this by juggling 2 blocks at a time at
// a significant performance penalty.
//
// Instead a new function is introduced called full_blocks which combines
// update and final and is to be used in non-streaming mode where the data
// is a multiple of 64 bytes.
//
// Supported:
// 64 + 16 bytes (blake2s with midstate optimization)
// 80 bytes without midstate (blake2s without midstate optimization)
// Any multiple of 64 bytes in one shot (x25x)
//
// Unsupported:
// Stream of 64 byte blocks one at a time.
//
// use for part blocks or when streaming more data
int blake2s_4way_update( blake2s_4way_state *S, const void *in,
uint64_t inlen )
{
__m128i *input = (__m128i*)in;
__m128i *buf = (__m128i*)S->buf;
const int bsize = BLAKE2S_BLOCKBYTES;
__m128i *input = (__m128i*)in;
__m128i *buf = (__m128i*)S->buf;
while( inlen > 0 )
{
size_t left = S->buflen;
if( inlen >= bsize - left )
if( inlen >= BLAKE2S_BLOCKBYTES - left )
{
memcpy_128( buf + (left>>2), input, (bsize - left) >> 2 );
S->buflen += bsize - left;
memcpy_128( buf + (left>>2), input, (BLAKE2S_BLOCKBYTES - left) >> 2 );
S->buflen += BLAKE2S_BLOCKBYTES - left;
S->t[0] += BLAKE2S_BLOCKBYTES;
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
blake2s_4way_compress( S, buf );
S->buflen = 0;
input += ( bsize >> 2 );
inlen -= bsize;
input += ( BLAKE2S_BLOCKBYTES >> 2 );
inlen -= BLAKE2S_BLOCKBYTES;
}
else
{
@@ -195,8 +222,45 @@ int blake2s_4way_final( blake2s_4way_state *S, void *out, uint8_t outlen )
return 0;
}
// Update and final when inlen is a multiple of 64 bytes
int blake2s_4way_full_blocks( blake2s_4way_state *S, void *out,
const void *input, uint64_t inlen )
{
__m128i *in = (__m128i*)input;
__m128i *buf = (__m128i*)S->buf;
while( inlen > BLAKE2S_BLOCKBYTES )
{
memcpy_128( buf, in, BLAKE2S_BLOCKBYTES >> 2 );
S->buflen = BLAKE2S_BLOCKBYTES;
inlen -= BLAKE2S_BLOCKBYTES;
S->t[0] += BLAKE2S_BLOCKBYTES;
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
blake2s_4way_compress( S, buf );
S->buflen = 0;
in += ( BLAKE2S_BLOCKBYTES >> 2 );
}
// last block
memcpy_128( buf, in, BLAKE2S_BLOCKBYTES >> 2 );
S->buflen = BLAKE2S_BLOCKBYTES;
S->t[0] += S->buflen;
S->t[1] += ( S->t[0] < S->buflen );
if ( S->last_node ) S->f[1] = ~0U;
S->f[0] = ~0U;
blake2s_4way_compress( S, buf );
for ( int i = 0; i < 8; ++i )
casti_m128i( out, i ) = S->h[ i ];
return 0;
}
#if defined(__AVX2__)
// The commented code below is slower on Intel but faster on
// Zen1 AVX2. It's also faster than Zen1 AVX.
// Ryzen gen2 is unknown at this time.
int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block )
{
__m256i m[16];
@@ -205,6 +269,23 @@ int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block )
memcpy_256( m, block, 16 );
memcpy_256( v, S->h, 8 );
v[ 8] = m256_const1_64( 0x6A09E6676A09E667ULL );
v[ 9] = m256_const1_64( 0xBB67AE85BB67AE85ULL );
v[10] = m256_const1_64( 0x3C6EF3723C6EF372ULL );
v[11] = m256_const1_64( 0xA54FF53AA54FF53AULL );
v[12] = _mm256_xor_si256( _mm256_set1_epi32( S->t[0] ),
m256_const1_64( 0x510E527F510E527FULL ) );
v[13] = _mm256_xor_si256( _mm256_set1_epi32( S->t[1] ),
m256_const1_64( 0x9B05688C9B05688CULL ) );
v[14] = _mm256_xor_si256( _mm256_set1_epi32( S->f[0] ),
m256_const1_64( 0x1F83D9AB1F83D9ABULL ) );
v[15] = _mm256_xor_si256( _mm256_set1_epi32( S->f[1] ),
m256_const1_64( 0x5BE0CD195BE0CD19ULL ) );
/*
v[ 8] = _mm256_set1_epi32( blake2s_IV[0] );
v[ 9] = _mm256_set1_epi32( blake2s_IV[1] );
v[10] = _mm256_set1_epi32( blake2s_IV[2] );
@@ -218,6 +299,7 @@ int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block )
v[15] = _mm256_xor_si256( _mm256_set1_epi32( S->f[1] ),
_mm256_set1_epi32( blake2s_IV[7] ) );
#define G8W(r,i,a,b,c,d) \
do { \
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
@@ -231,7 +313,36 @@ do { \
c = _mm256_add_epi32( c, d ); \
b = mm256_ror_32( _mm256_xor_si256( b, c ), 7 ); \
} while(0)
*/
#define G8W( sigma0, sigma1, a, b, c, d) \
do { \
uint8_t s0 = sigma0; \
uint8_t s1 = sigma1; \
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), m[ s0 ] ); \
d = mm256_ror_32( _mm256_xor_si256( d, a ), 16 ); \
c = _mm256_add_epi32( c, d ); \
b = mm256_ror_32( _mm256_xor_si256( b, c ), 12 ); \
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), m[ s1 ] ); \
d = mm256_ror_32( _mm256_xor_si256( d, a ), 8 ); \
c = _mm256_add_epi32( c, d ); \
b = mm256_ror_32( _mm256_xor_si256( b, c ), 7 ); \
} while(0)
#define ROUND8W(r) \
do { \
uint8_t *sigma = (uint8_t*)&blake2s_sigma[r]; \
G8W( sigma[ 0], sigma[ 1], v[ 0], v[ 4], v[ 8], v[12] ); \
G8W( sigma[ 2], sigma[ 3], v[ 1], v[ 5], v[ 9], v[13] ); \
G8W( sigma[ 4], sigma[ 5], v[ 2], v[ 6], v[10], v[14] ); \
G8W( sigma[ 6], sigma[ 7], v[ 3], v[ 7], v[11], v[15] ); \
G8W( sigma[ 8], sigma[ 9], v[ 0], v[ 5], v[10], v[15] ); \
G8W( sigma[10], sigma[11], v[ 1], v[ 6], v[11], v[12] ); \
G8W( sigma[12], sigma[13], v[ 2], v[ 7], v[ 8], v[13] ); \
G8W( sigma[14], sigma[15], v[ 3], v[ 4], v[ 9], v[14] ); \
} while(0)
/*
#define ROUND8W(r) \
do { \
G8W( r, 0, v[ 0], v[ 4], v[ 8], v[12] ); \
@@ -243,6 +354,7 @@ do { \
G8W( r, 6, v[ 2], v[ 7], v[ 8], v[13] ); \
G8W( r, 7, v[ 3], v[ 4], v[ 9], v[14] ); \
} while(0)
*/
ROUND8W( 0 );
ROUND8W( 1 );

View File

@@ -64,7 +64,7 @@ typedef struct __blake2s_nway_param
ALIGN( 64 ) typedef struct __blake2s_4way_state
{
__m128i h[8];
uint8_t buf[ BLAKE2S_BLOCKBYTES * 4 ];
uint8_t buf[ 2 * BLAKE2S_BLOCKBYTES * 4 ];
uint32_t t[2];
uint32_t f[2];
size_t buflen;
@@ -81,7 +81,7 @@ int blake2s_4way_final( blake2s_4way_state *S, void *out, uint8_t outlen );
ALIGN( 64 ) typedef struct __blake2s_8way_state
{
__m256i h[8];
uint8_t buf[ BLAKE2S_BLOCKBYTES * 8 ];
uint8_t buf[ 2 * BLAKE2S_BLOCKBYTES * 8 ];
uint32_t t[2];
uint32_t f[2];
size_t buflen;
@@ -92,6 +92,9 @@ int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen );
int blake2s_8way_update( blake2s_8way_state *S, const void *in,
uint64_t inlen );
int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen );
int blake2s_4way_full_blocks( blake2s_4way_state *S, void *out,
const void *input, uint64_t inlen );
#endif

View File

@@ -70,18 +70,3 @@ int scanhash_blake2s( struct work *work,
return 0;
}
/*
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
int64_t blake2s_get_max64 ()
{
return 0x7ffffLL;
}
bool register_blake2s_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_blake2s;
gate->hash = (void*)&blake2s_hash;
gate->get_max64 = (void*)&blake2s_get_max64;
return true;
};
*/

View File

@@ -403,7 +403,9 @@ static const sph_u64 CB[16] = {
__m256i M[16]; \
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
__m256i V8, V9, VA, VB, VC, VD, VE, VF; \
unsigned r; \
const __m256i shuff_bswap64 = m256_const2_64( 0x08090a0b0c0d0e0f, \
0x0001020304050607 ) \
unsigned r; \
V0 = H0; \
V1 = H1; \
V2 = H2; \
@@ -412,53 +414,53 @@ static const sph_u64 CB[16] = {
V5 = H5; \
V6 = H6; \
V7 = H7; \
V8 = _mm256_xor_si256( S0, _mm256_set_epi64x( CB0, CB0, CB0, CB0 ) ); \
V9 = _mm256_xor_si256( S1, _mm256_set_epi64x( CB1, CB1, CB1, CB1 ) ); \
VA = _mm256_xor_si256( S2, _mm256_set_epi64x( CB2, CB2, CB2, CB2 ) ); \
VB = _mm256_xor_si256( S3, _mm256_set_epi64x( CB3, CB3, CB3, CB3 ) ); \
VC = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
_mm256_set_epi64x( CB4, CB4, CB4, CB4 ) ); \
VD = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
_mm256_set_epi64x( CB5, CB5, CB5, CB5 ) ); \
VE = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
_mm256_set_epi64x( CB6, CB6, CB6, CB6 ) ); \
VF = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
_mm256_set_epi64x( CB7, CB7, CB7, CB7 ) ); \
M[0x0] = mm256_bswap_64( *(buf+0) ); \
M[0x1] = mm256_bswap_64( *(buf+1) ); \
M[0x2] = mm256_bswap_64( *(buf+2) ); \
M[0x3] = mm256_bswap_64( *(buf+3) ); \
M[0x4] = mm256_bswap_64( *(buf+4) ); \
M[0x5] = mm256_bswap_64( *(buf+5) ); \
M[0x6] = mm256_bswap_64( *(buf+6) ); \
M[0x7] = mm256_bswap_64( *(buf+7) ); \
M[0x8] = mm256_bswap_64( *(buf+8) ); \
M[0x9] = mm256_bswap_64( *(buf+9) ); \
M[0xA] = mm256_bswap_64( *(buf+10) ); \
M[0xB] = mm256_bswap_64( *(buf+11) ); \
M[0xC] = mm256_bswap_64( *(buf+12) ); \
M[0xD] = mm256_bswap_64( *(buf+13) ); \
M[0xE] = mm256_bswap_64( *(buf+14) ); \
M[0xF] = mm256_bswap_64( *(buf+15) ); \
V8 = _mm256_xor_si256( S0, _mm256_set1_epi64x( CB0 ) ); \
V9 = _mm256_xor_si256( S1, _mm256_set1_epi64x( CB1 ) ); \
VA = _mm256_xor_si256( S2, _mm256_set1_epi64x( CB2 ) ); \
VB = _mm256_xor_si256( S3, _mm256_set1_epi64x( CB3 ) ); \
VC = _mm256_xor_si256( _mm256_set1_epi64x( T0 ), \
_mm256_set1_epi64x( CB4 ) ); \
VD = _mm256_xor_si256( _mm256_set1_epi64x( T0 ), \
_mm256_set1_epi64x( CB5 ) ); \
VE = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
_mm256_set1_epi64x( CB6 ) ); \
VF = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
_mm256_set1_epi64x( CB7, CB7, CB7, CB7 ) ); \
M[0x0] = _mm256_shuffle_epi8( *(buf+ 0), shuff_bswap64 ); \
M[0x1] = _mm256_shuffle_epi8( *(buf+ 1), shuff_bswap64 ); \
M[0x2] = _mm256_shuffle_epi8( *(buf+ 2), shuff_bswap64 ); \
M[0x3] = _mm256_shuffle_epi8( *(buf+ 3), shuff_bswap64 ); \
M[0x4] = _mm256_shuffle_epi8( *(buf+ 4), shuff_bswap64 ); \
M[0x5] = _mm256_shuffle_epi8( *(buf+ 5), shuff_bswap64 ); \
M[0x6] = _mm256_shuffle_epi8( *(buf+ 6), shuff_bswap64 ); \
M[0x7] = _mm256_shuffle_epi8( *(buf+ 7), shuff_bswap64 ); \
M[0x8] = _mm256_shuffle_epi8( *(buf+ 8), shuff_bswap64 ); \
M[0x9] = _mm256_shuffle_epi8( *(buf+ 9), shuff_bswap64 ); \
M[0xA] = _mm256_shuffle_epi8( *(buf+10), shuff_bswap64 ); \
M[0xB] = _mm256_shuffle_epi8( *(buf+11), shuff_bswap64 ); \
M[0xC] = _mm256_shuffle_epi8( *(buf+12), shuff_bswap64 ); \
M[0xD] = _mm256_shuffle_epi8( *(buf+13), shuff_bswap64 ); \
M[0xE] = _mm256_shuffle_epi8( *(buf+14), shuff_bswap64 ); \
M[0xF] = _mm256_shuffle_epi8( *(buf+15), shuff_bswap64 ); \
for (r = 0; r < 16; r ++) \
ROUND_B_4WAY(r); \
H0 = _mm256_xor_si256( _mm256_xor_si256( \
H0 = _mm256_xor_si256( _mm256_xor_si256( \
_mm256_xor_si256( S0, V0 ), V8 ), H0 ); \
H1 = _mm256_xor_si256( _mm256_xor_si256( \
H1 = _mm256_xor_si256( _mm256_xor_si256( \
_mm256_xor_si256( S1, V1 ), V9 ), H1 ); \
H2 = _mm256_xor_si256( _mm256_xor_si256( \
H2 = _mm256_xor_si256( _mm256_xor_si256( \
_mm256_xor_si256( S2, V2 ), VA ), H2 ); \
H3 = _mm256_xor_si256( _mm256_xor_si256( \
H3 = _mm256_xor_si256( _mm256_xor_si256( \
_mm256_xor_si256( S3, V3 ), VB ), H3 ); \
H4 = _mm256_xor_si256( _mm256_xor_si256( \
H4 = _mm256_xor_si256( _mm256_xor_si256( \
_mm256_xor_si256( S0, V4 ), VC ), H4 ); \
H5 = _mm256_xor_si256( _mm256_xor_si256( \
H5 = _mm256_xor_si256( _mm256_xor_si256( \
_mm256_xor_si256( S1, V5 ), VD ), H5 ); \
H6 = _mm256_xor_si256( _mm256_xor_si256( \
H6 = _mm256_xor_si256( _mm256_xor_si256( \
_mm256_xor_si256( S2, V6 ), VE ), H6 ); \
H7 = _mm256_xor_si256( _mm256_xor_si256( \
H7 = _mm256_xor_si256( _mm256_xor_si256( \
_mm256_xor_si256( S3, V7 ), VF ), H7 ); \
} while (0)
} while (0)
#else
@@ -491,8 +493,7 @@ static const sph_u64 CB[16] = {
m256_const1_64( CB6 ) ); \
VF = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
m256_const1_64( CB7 ) ); \
shuf_bswap64 = m256_const_64( 0x08090a0b0c0d0e0f, 0x0001020304050607, \
0x08090a0b0c0d0e0f, 0x0001020304050607 ); \
shuf_bswap64 = m256_const2_64( 0x08090a0b0c0d0e0f, 0x0001020304050607 ); \
M0 = _mm256_shuffle_epi8( *(buf+ 0), shuf_bswap64 ); \
M1 = _mm256_shuffle_epi8( *(buf+ 1), shuf_bswap64 ); \
M2 = _mm256_shuffle_epi8( *(buf+ 2), shuf_bswap64 ); \
@@ -620,7 +621,7 @@ blake64_4way_close( blake_4way_big_context *sc,
bit_len = ((unsigned)ptr << 3);
z = 0x80 >> n;
zz = ((ub & -z) | z) & 0xFF;
buf[ptr>>3] = _mm256_set_epi64x( zz, zz, zz, zz );
buf[ptr>>3] = _mm256_set1_epi64x( zz );
tl = sc->T0 + bit_len;
th = sc->T1;
if (ptr == 0 )

View File

@@ -1,13 +1,6 @@
#include "blakecoin-gate.h"
#include <memory.h>
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
int64_t blakecoin_get_max64 ()
{
return 0x7ffffLL;
// return 0x3fffffLL;
}
// vanilla uses default gen merkle root, otherwise identical to blakecoin
bool register_vanilla_algo( algo_gate_t* gate )
{
@@ -23,7 +16,6 @@ bool register_vanilla_algo( algo_gate_t* gate )
gate->hash = (void*)&blakecoinhash;
#endif
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&blakecoin_get_max64;
return true;
}

View File

@@ -93,33 +93,3 @@ int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
return 0;
}
/*
void blakecoin_gen_merkle_root ( char* merkle_root, struct stratum_ctx* sctx )
{
SHA256( sctx->job.coinbase, (int)sctx->job.coinbase_size, merkle_root );
}
*/
/*
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
int64_t blakecoin_get_max64 ()
{
return 0x7ffffLL;
}
// vanilla uses default gen merkle root, otherwise identical to blakecoin
bool register_vanilla_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_blakecoin;
gate->hash = (void*)&blakecoinhash;
gate->get_max64 = (void*)&blakecoin_get_max64;
blakecoin_init( &blake_init_ctx );
return true;
}
bool register_blakecoin_algo( algo_gate_t* gate )
{
register_vanilla_algo( gate );
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
return true;
}
*/

View File

@@ -38,7 +38,7 @@ void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
if (!have_longpoll && work->height > *net_blocks + 1)
{
char netinfo[64] = { 0 };
if (opt_showdiff && net_diff > 0.)
if ( net_diff > 0. )
{
if (net_diff != work->targetdiff)
sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
@@ -116,7 +116,7 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
// block header suffix from coinb2 (stake version)
memcpy( &g_work->data[44],
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
sctx->bloc_height = g_work->data[32];
sctx->block_height = g_work->data[32];
//applog_hex(work->data, 180);
//applog_hex(&work->data[36], 36);
}
@@ -154,7 +154,6 @@ bool register_decred_algo( algo_gate_t* gate )
#endif
gate->optimizations = AVX2_OPT;
gate->get_nonceptr = (void*)&decred_get_nonceptr;
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
gate->decode_extra_data = (void*)&decred_decode_extradata;
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;

View File

@@ -143,7 +143,7 @@ void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
if (!have_longpoll && work->height > *net_blocks + 1)
{
char netinfo[64] = { 0 };
if (opt_showdiff && net_diff > 0.)
if (net_diff > 0.)
{
if (net_diff != work->targetdiff)
sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
@@ -269,7 +269,6 @@ bool register_decred_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_decred;
gate->hash = (void*)&decred_hash;
gate->get_nonceptr = (void*)&decred_get_nonceptr;
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
gate->decode_extra_data = (void*)&decred_decode_extradata;
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;

View File

@@ -10,7 +10,6 @@ bool register_pentablake_algo( algo_gate_t* gate )
gate->hash = (void*)&pentablakehash;
#endif
gate->optimizations = AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -1,11 +1,8 @@
#include "bmw512-gate.h"
int64_t bmw512_get_max64() { return 0x7ffffLL; }
bool register_bmw512_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT;
gate->get_max64 = (void*)&bmw512_get_max64;
opt_target_factor = 256.0;
#if defined (BMW512_4WAY)
gate->scanhash = (void*)&scanhash_bmw512_4way;

View File

@@ -363,7 +363,6 @@ bool register_cryptolight_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_cryptolight;
gate->hash = (void*)&cryptolight_hash;
gate->hash_suw = (void*)&cryptolight_hash;
gate->get_max64 = (void*)&get_max64_0x40LL;
return true;
};

View File

@@ -111,7 +111,6 @@ bool register_cryptonight_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_cryptonight;
gate->hash = (void*)&cryptonight_hash;
gate->hash_suw = (void*)&cryptonight_hash_suw;
gate->get_max64 = (void*)&get_max64_0x40LL;
return true;
};
@@ -123,7 +122,6 @@ bool register_cryptonightv7_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_cryptonight;
gate->hash = (void*)&cryptonight_hash;
gate->hash_suw = (void*)&cryptonight_hash_suw;
gate->get_max64 = (void*)&get_max64_0x40LL;
return true;
};

View File

@@ -7,7 +7,7 @@
// 2x128
/*
// The result of hashing 10 rounds of initial data which consists of params
// zero padded.
static const uint64_t IV256[] =
@@ -25,7 +25,7 @@ static const uint64_t IV512[] =
0x148FE485FCD398D9, 0xB64445321B017BEF, 0x2FF5781C6A536159, 0x0DBADEA991FA7934,
0xA5A70E75D65C8A2B, 0xBC796576B1C62456, 0xE7989AF11921C8F7, 0xD43E3B447795D246
};
*/
static void transform_2way( cube_2way_context *sp )
{
@@ -97,39 +97,30 @@ static void transform_2way( cube_2way_context *sp )
int cube_2way_init( cube_2way_context *sp, int hashbitlen, int rounds,
int blockbytes )
{
__m128i* h = (__m128i*)sp->h;
__m256i *h = (__m256i*)sp->h;
__m128i *iv = (__m128i*)( hashbitlen == 512 ? (__m128i*)IV512
: (__m128i*)IV256 );
sp->hashlen = hashbitlen/128;
sp->blocksize = blockbytes/16;
sp->rounds = rounds;
sp->pos = 0;
if ( hashbitlen == 512 )
{
h[ 0] = m128_const_64( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 );
h[ 2] = m128_const_64( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 );
h[ 4] = m128_const_64( 0x825B453797CF0BEF, 0xA647A8B34D42C787 );
h[ 6] = m128_const_64( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 );
h[ 8] = m128_const_64( 0xB64445321B017BEF, 0x148FE485FCD398D9 );
h[10] = m128_const_64( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 );
h[12] = m128_const_64( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B );
h[14] = m128_const_64( 0xD43E3B447795D246, 0xE7989AF11921C8F7 );
h[1] = h[ 0]; h[ 3] = h[ 2]; h[ 5] = h[ 4]; h[ 7] = h[ 6];
h[9] = h[ 8]; h[11] = h[10]; h[13] = h[12]; h[15] = h[14];
}
else
{
h[ 0] = m128_const_64( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 );
h[ 2] = m128_const_64( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B );
h[ 4] = m128_const_64( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 );
h[ 6] = m128_const_64( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 );
h[ 8] = m128_const_64( 0xF0B266796C859D41, 0x6107FBD5D89041C3 );
h[10] = m128_const_64( 0x93CB628565C892FD, 0x5FA2560309392549 );
h[12] = m128_const_64( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE );
h[14] = m128_const_64( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB );
h[1] = h[ 0]; h[ 3] = h[ 2]; h[ 5] = h[ 4]; h[ 7] = h[ 6];
h[9] = h[ 8]; h[11] = h[10]; h[13] = h[12]; h[15] = h[14];
}
h[ 0] = m256_const1_128( iv[0] );
h[ 1] = m256_const1_128( iv[1] );
h[ 2] = m256_const1_128( iv[2] );
h[ 3] = m256_const1_128( iv[3] );
h[ 4] = m256_const1_128( iv[4] );
h[ 5] = m256_const1_128( iv[5] );
h[ 6] = m256_const1_128( iv[6] );
h[ 7] = m256_const1_128( iv[7] );
h[ 0] = m256_const1_128( iv[0] );
h[ 1] = m256_const1_128( iv[1] );
h[ 2] = m256_const1_128( iv[2] );
h[ 3] = m256_const1_128( iv[3] );
h[ 4] = m256_const1_128( iv[4] );
h[ 5] = m256_const1_128( iv[5] );
h[ 6] = m256_const1_128( iv[6] );
h[ 7] = m256_const1_128( iv[7] );
return 0;
}
@@ -164,11 +155,11 @@ int cube_2way_close( cube_2way_context *sp, void *output )
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
_mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 ) );
m256_const2_64( 0, 0x0000000000000080 ) );
transform_2way( sp );
sp->h[7] = _mm256_xor_si256( sp->h[7],
_mm256_set_epi32( 1,0,0,0, 1,0,0,0 ) );
m256_const2_64( 0x0000000100000000, 0 ) );
for ( i = 0; i < 10; ++i ) transform_2way( sp );
@@ -197,13 +188,13 @@ int cube_2way_update_close( cube_2way_context *sp, void *output,
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
_mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 ) );
m256_const2_64( 0, 0x0000000000000080 ) );
transform_2way( sp );
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
1,0,0,0 ) );
sp->h[7] = _mm256_xor_si256( sp->h[7],
m256_const2_64( 0x0000000100000000, 0 ) );
for ( i = 0; i < 10; ++i ) transform_2way( sp );
for ( i = 0; i < 10; ++i ) transform_2way( sp );
memcpy( hash, sp->h, sp->hashlen<<5 );
return 0;

View File

@@ -100,7 +100,6 @@ bool register_dmd_gr_algo( algo_gate_t* gate )
gate->optimizations = SSE2_OPT | AES_OPT;
gate->scanhash = (void*)&scanhash_groestl;
gate->hash = (void*)&groestlhash;
gate->get_max64 = (void*)&get_max64_0x3ffff;
opt_target_factor = 256.0;
return true;
};

View File

@@ -88,15 +88,3 @@ int scanhash_myriad( struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
/*
bool register_myriad_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT;
init_myrgr_ctx();
gate->scanhash = (void*)&scanhash_myriad;
gate->hash = (void*)&myriadhash;
// gate->hash_alt = (void*)&myriadhash;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};
*/

View File

@@ -12,7 +12,6 @@ bool register_myriad_algo( algo_gate_t* gate )
gate->hash = (void*)&myriad_hash;
#endif
gate->optimizations = AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -32,8 +32,6 @@
#include <stddef.h>
#include <string.h>
//#include "miner.h"
#include "hamsi-hash-4way.h"
#if defined(__AVX2__)
@@ -100,7 +98,7 @@ extern "C"{
#endif
//#include "hamsi-helper-4way.c"
/*
static const sph_u32 IV512[] = {
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
@@ -109,7 +107,7 @@ static const sph_u32 IV512[] = {
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
SPH_C32(0x6769756d)
};
*/
static const sph_u32 alpha_n[] = {
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
@@ -138,6 +136,7 @@ static const sph_u32 alpha_f[] = {
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
};
// imported from hamsi helper
/* Note: this table lists bits within each byte from least
@@ -529,49 +528,34 @@ static const sph_u32 T512[64][16] = {
SPH_C32(0xe7e00a94) }
};
#define INPUT_BIG \
do { \
const __m256i zero = _mm256_setzero_si256(); \
__m256i db = *buf; \
const sph_u32 *tp = &T512[0][0]; \
m0 = zero; \
m1 = zero; \
m2 = zero; \
m3 = zero; \
m4 = zero; \
m5 = zero; \
m6 = zero; \
m7 = zero; \
const uint64_t *tp = (uint64_t*)&T512[0][0]; \
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = m256_zero; \
for ( int u = 0; u < 64; u++ ) \
{ \
__m256i dm = _mm256_and_si256( db, m256_one_64 ) ; \
dm = mm256_negate_32( _mm256_or_si256( dm, \
_mm256_slli_epi64( dm, 32 ) ) ); \
m0 = _mm256_xor_si256( m0, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0x1], tp[0x0], tp[0x1], tp[0x0], \
tp[0x1], tp[0x0], tp[0x1], tp[0x0] ) ) ); \
m256_const1_64( tp[0] ) ) ); \
m1 = _mm256_xor_si256( m1, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0x3], tp[0x2], tp[0x3], tp[0x2], \
tp[0x3], tp[0x2], tp[0x3], tp[0x2] ) ) ); \
m256_const1_64( tp[1] ) ) ); \
m2 = _mm256_xor_si256( m2, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0x5], tp[0x4], tp[0x5], tp[0x4], \
tp[0x5], tp[0x4], tp[0x5], tp[0x4] ) ) ); \
m256_const1_64( tp[2] ) ) ); \
m3 = _mm256_xor_si256( m3, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0x7], tp[0x6], tp[0x7], tp[0x6], \
tp[0x7], tp[0x6], tp[0x7], tp[0x6] ) ) ); \
m256_const1_64( tp[3] ) ) ); \
m4 = _mm256_xor_si256( m4, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0x9], tp[0x8], tp[0x9], tp[0x8], \
tp[0x9], tp[0x8], tp[0x9], tp[0x8] ) ) ); \
m256_const1_64( tp[4] ) ) ); \
m5 = _mm256_xor_si256( m5, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0xB], tp[0xA], tp[0xB], tp[0xA], \
tp[0xB], tp[0xA], tp[0xB], tp[0xA] ) ) ); \
m256_const1_64( tp[5] ) ) ); \
m6 = _mm256_xor_si256( m6, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0xD], tp[0xC], tp[0xD], tp[0xC], \
tp[0xD], tp[0xC], tp[0xD], tp[0xC] ) ) ); \
m256_const1_64( tp[6] ) ) ); \
m7 = _mm256_xor_si256( m7, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0xF], tp[0xE], tp[0xF], tp[0xE], \
tp[0xF], tp[0xE], tp[0xF], tp[0xE] ) ) ); \
tp += 0x10; \
m256_const1_64( tp[7] ) ) ); \
tp += 8; \
db = _mm256_srli_epi64( db, 1 ); \
} \
} while (0)
@@ -662,55 +646,39 @@ do { \
#define ROUND_BIG(rc, alpha) \
do { \
__m256i t0, t1, t2, t3; \
s0 = _mm256_xor_si256( s0, _mm256_set_epi32( \
alpha[0x01] ^ (rc), alpha[0x00], alpha[0x01] ^ (rc), alpha[0x00], \
alpha[0x01] ^ (rc), alpha[0x00], alpha[0x01] ^ (rc), alpha[0x00] ) ); \
s1 = _mm256_xor_si256( s1, _mm256_set_epi32( \
alpha[0x03], alpha[0x02], alpha[0x03], alpha[0x02], \
alpha[0x03], alpha[0x02], alpha[0x03], alpha[0x02] ) ); \
s2 = _mm256_xor_si256( s2, _mm256_set_epi32( \
alpha[0x05], alpha[0x04], alpha[0x05], alpha[0x04], \
alpha[0x05], alpha[0x04], alpha[0x05], alpha[0x04] ) ); \
s3 = _mm256_xor_si256( s3, _mm256_set_epi32( \
alpha[0x07], alpha[0x06], alpha[0x07], alpha[0x06], \
alpha[0x07], alpha[0x06], alpha[0x07], alpha[0x06] ) ); \
s4 = _mm256_xor_si256( s4, _mm256_set_epi32( \
alpha[0x09], alpha[0x08], alpha[0x09], alpha[0x08], \
alpha[0x09], alpha[0x08], alpha[0x09], alpha[0x08] ) ); \
s5 = _mm256_xor_si256( s5, _mm256_set_epi32( \
alpha[0x0B], alpha[0x0A], alpha[0x0B], alpha[0x0A], \
alpha[0x0B], alpha[0x0A], alpha[0x0B], alpha[0x0A] ) ); \
s6 = _mm256_xor_si256( s6, _mm256_set_epi32( \
alpha[0x0D], alpha[0x0C], alpha[0x0D], alpha[0x0C], \
alpha[0x0D], alpha[0x0C], alpha[0x0D], alpha[0x0C] ) ); \
s7 = _mm256_xor_si256( s7, _mm256_set_epi32( \
alpha[0x0F], alpha[0x0E], alpha[0x0F], alpha[0x0E], \
alpha[0x0F], alpha[0x0E], alpha[0x0F], alpha[0x0E] ) ); \
s8 = _mm256_xor_si256( s8, _mm256_set_epi32( \
alpha[0x11], alpha[0x10], alpha[0x11], alpha[0x10], \
alpha[0x11], alpha[0x10], alpha[0x11], alpha[0x10] ) ); \
s9 = _mm256_xor_si256( s9, _mm256_set_epi32( \
alpha[0x13], alpha[0x12], alpha[0x13], alpha[0x12], \
alpha[0x13], alpha[0x12], alpha[0x13], alpha[0x12] ) ); \
sA = _mm256_xor_si256( sA, _mm256_set_epi32( \
alpha[0x15], alpha[0x14], alpha[0x15], alpha[0x14], \
alpha[0x15], alpha[0x14], alpha[0x15], alpha[0x14] ) ); \
sB = _mm256_xor_si256( sB, _mm256_set_epi32( \
alpha[0x17], alpha[0x16], alpha[0x17], alpha[0x16], \
alpha[0x17], alpha[0x16], alpha[0x17], alpha[0x16] ) ); \
sC = _mm256_xor_si256( sC, _mm256_set_epi32( \
alpha[0x19], alpha[0x18], alpha[0x19], alpha[0x18], \
alpha[0x19], alpha[0x18], alpha[0x19], alpha[0x18] ) ); \
sD = _mm256_xor_si256( sD, _mm256_set_epi32( \
alpha[0x1B], alpha[0x1A], alpha[0x1B], alpha[0x1A], \
alpha[0x1B], alpha[0x1A], alpha[0x1B], alpha[0x1A] ) ); \
sE = _mm256_xor_si256( sE, _mm256_set_epi32( \
alpha[0x1D], alpha[0x1C], alpha[0x1D], alpha[0x1C], \
alpha[0x1D], alpha[0x1C], alpha[0x1D], alpha[0x1C] ) ); \
sF = _mm256_xor_si256( sF, _mm256_set_epi32( \
alpha[0x1F], alpha[0x1E], alpha[0x1F], alpha[0x1E], \
alpha[0x1F], alpha[0x1E], alpha[0x1F], alpha[0x1E] ) ); \
__m256i t0, t1, t2, t3; \
s0 = _mm256_xor_si256( s0, m256_const1_64( \
( ( (uint64_t)( (rc) ^ alpha[1] ) << 32 ) ) | (uint64_t)alpha[0] ) ); \
s1 = _mm256_xor_si256( s1, m256_const1_64( \
( (uint64_t)alpha[ 3] << 32 ) | (uint64_t)alpha[ 2] ) ); \
s2 = _mm256_xor_si256( s2, m256_const1_64( \
( (uint64_t)alpha[ 5] << 32 ) | (uint64_t)alpha[ 4] ) ); \
s3 = _mm256_xor_si256( s3, m256_const1_64( \
( (uint64_t)alpha[ 7] << 32 ) | (uint64_t)alpha[ 6] ) ); \
s4 = _mm256_xor_si256( s4, m256_const1_64( \
( (uint64_t)alpha[ 9] << 32 ) | (uint64_t)alpha[ 8] ) ); \
s5 = _mm256_xor_si256( s5, m256_const1_64( \
( (uint64_t)alpha[11] << 32 ) | (uint64_t)alpha[10] ) ); \
s6 = _mm256_xor_si256( s6, m256_const1_64( \
( (uint64_t)alpha[13] << 32 ) | (uint64_t)alpha[12] ) ); \
s7 = _mm256_xor_si256( s7, m256_const1_64( \
( (uint64_t)alpha[15] << 32 ) | (uint64_t)alpha[14] ) ); \
s8 = _mm256_xor_si256( s8, m256_const1_64( \
( (uint64_t)alpha[17] << 32 ) | (uint64_t)alpha[16] ) ); \
s9 = _mm256_xor_si256( s9, m256_const1_64( \
( (uint64_t)alpha[19] << 32 ) | (uint64_t)alpha[18] ) ); \
sA = _mm256_xor_si256( sA, m256_const1_64( \
( (uint64_t)alpha[21] << 32 ) | (uint64_t)alpha[20] ) ); \
sB = _mm256_xor_si256( sB, m256_const1_64( \
( (uint64_t)alpha[23] << 32 ) | (uint64_t)alpha[22] ) ); \
sC = _mm256_xor_si256( sC, m256_const1_64( \
( (uint64_t)alpha[25] << 32 ) | (uint64_t)alpha[24] ) ); \
sD = _mm256_xor_si256( sD, m256_const1_64( \
( (uint64_t)alpha[27] << 32 ) | (uint64_t)alpha[26] ) ); \
sE = _mm256_xor_si256( sE, m256_const1_64( \
( (uint64_t)alpha[29] << 32 ) | (uint64_t)alpha[28] ) ); \
sF = _mm256_xor_si256( sF, m256_const1_64( \
( (uint64_t)alpha[31] << 32 ) | (uint64_t)alpha[30] ) ); \
\
SBOX( s0, s4, s8, sC ); \
SBOX( s1, s5, s9, sD ); \
@@ -864,47 +832,22 @@ void hamsi_big_final( hamsi_4way_big_context *sc, __m256i *buf )
void hamsi512_4way_init( hamsi_4way_big_context *sc )
{
sc->partial_len = 0;
sph_u32 lo, hi;
sc->count_high = sc->count_low = 0;
for ( int i = 0; i < 8; i++ )
{
lo = 2*i;
hi = 2*i + 1;
sc->h[i] = _mm256_set_epi32( IV512[hi], IV512[lo], IV512[hi], IV512[lo],
IV512[hi], IV512[lo], IV512[hi], IV512[lo] );
}
sc->h[0] = m256_const1_64( 0x6c70617273746565 );
sc->h[1] = m256_const1_64( 0x656e62656b204172 );
sc->h[2] = m256_const1_64( 0x302c206272672031 );
sc->h[3] = m256_const1_64( 0x3434362c75732032 );
sc->h[4] = m256_const1_64( 0x3030312020422d33 );
sc->h[5] = m256_const1_64( 0x656e2d484c657576 );
sc->h[6] = m256_const1_64( 0x6c65652c65766572 );
sc->h[7] = m256_const1_64( 0x6769756d2042656c );
}
void hamsi512_4way( hamsi_4way_big_context *sc, const void *data, size_t len )
{
__m256i *vdata = (__m256i*)data;
// It looks like the only way to get in here is if core was previously called
// with a very small len
// That's not likely even with 80 byte input so deprecate partial len
/*
if ( sc->partial_len != 0 )
{
size_t mlen;
mlen = 8 - sc->partial_len;
if ( len < mlen )
{
memcpy_256( sc->partial + (sc->partial_len >> 3), data, len>>3 );
sc->partial_len += len;
return;
}
else
{
memcpy_256( sc->partial + (sc->partial_len >> 3), data, mlen>>3 );
len -= mlen;
vdata += mlen>>3;
hamsi_big( sc, sc->partial, 1 );
sc->partial_len = 0;
}
}
*/
hamsi_big( sc, vdata, len>>3 );
vdata += ( (len& ~(size_t)7) >> 3 );
len &= (size_t)7;
@@ -920,8 +863,9 @@ void hamsi512_4way_close( hamsi_4way_big_context *sc, void *dst )
sph_enc32be( &ch, sc->count_high );
sph_enc32be( &cl, sc->count_low + ( sc->partial_len << 3 ) );
pad[0] = _mm256_set_epi32( cl, ch, cl, ch, cl, ch, cl, ch );
sc->buf[0] = _mm256_set_epi32( 0UL, 0x80UL, 0UL, 0x80UL,
0UL, 0x80UL, 0UL, 0x80UL );
sc->buf[0] = m256_const1_64( 0x80 );
// sc->buf[0] = _mm256_set_epi32( 0UL, 0x80UL, 0UL, 0x80UL,
// 0UL, 0x80UL, 0UL, 0x80UL );
hamsi_big( sc, sc->buf, 1 );
hamsi_big_final( sc, pad );

View File

@@ -94,7 +94,7 @@ extern "C"{
#define Sb(x0, x1, x2, x3, c) \
do { \
__m256i cc = _mm256_set_epi64x( c, c, c, c ); \
__m256i cc = _mm256_set1_epi64x( c ); \
x3 = mm256_not( x3 ); \
x0 = _mm256_xor_si256( x0, _mm256_andnot_si256( x2, cc ) ); \
tmp = _mm256_xor_si256( cc, _mm256_and_si256( x0, x1 ) ); \

View File

@@ -1,12 +1,10 @@
#include "keccak-gate.h"
int64_t keccak_get_max64() { return 0x7ffffLL; }
bool register_keccak_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT;
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
gate->get_max64 = (void*)&keccak_get_max64;
opt_target_factor = 128.0;
#if defined (KECCAK_4WAY)
gate->scanhash = (void*)&scanhash_keccak_4way;
@@ -22,7 +20,6 @@ bool register_keccakc_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT;
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
gate->get_max64 = (void*)&keccak_get_max64;
opt_target_factor = 256.0;
#if defined (KECCAK_4WAY)
gate->scanhash = (void*)&scanhash_keccak_4way;

2156
algo/lanehash/lane.c Normal file

File diff suppressed because it is too large Load Diff

50
algo/lanehash/lane.h Normal file
View File

@@ -0,0 +1,50 @@
/*
* Copyright (c) 2008 Sebastiaan Indesteege
* <sebastiaan.indesteege@esat.kuleuven.be>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Optimised ANSI-C implementation of LANE
*/
#ifndef LANE_H
#define LANE_H
#include <string.h>
//#include "algo/sha/sha3-defs.h"
#include <stdint.h>
typedef unsigned char BitSequence;
typedef unsigned long long DataLength;
//typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHBITLEN = 2, BAD_DATABITLEN = 3 } HashReturn;
//typedef unsigned char u8;
//typedef unsigned int u32;
//typedef unsigned long long u64;
typedef struct {
int hashbitlen;
uint64_t ctr;
uint32_t h[16];
uint8_t buffer[128];
} hashState;
void laneInit (hashState *state, int hashbitlen);
void laneUpdate (hashState *state, const BitSequence *data, DataLength databitlen);
void laneFinal (hashState *state, BitSequence *hashval);
void laneHash (int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
#endif /* LANE_H */

View File

@@ -1,23 +1,3 @@
/*
* luffa_for_sse2.c
* Version 2.0 (Sep 15th 2009)
*
* Copyright (C) 2008-2009 Hitachi, Ltd. All rights reserved.
*
* Hitachi, Ltd. is the owner of this software and hereby grant
* the U.S. Government and any interested party the right to use
* this software for the purposes of the SHA-3 evaluation process,
* notwithstanding that this software is copyrighted.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <string.h>
#include <immintrin.h>
#include "luffa-hash-2way.h"
@@ -26,31 +6,30 @@
#include "simd-utils.h"
#define MASK _mm256_set_epi32( 0UL, 0UL, 0UL, 0xffffffffUL, \
0UL, 0UL, 0UL, 0xffffffffUL )
#define cns(i) m256_const1_128( ( (__m128i*)CNS_INIT)[i] )
#define ADD_CONSTANT(a,b,c0,c1)\
a = _mm256_xor_si256(a,c0);\
b = _mm256_xor_si256(b,c1);\
#define MULT2(a0,a1) \
#define MULT2( a0, a1, mask ) \
do { \
register __m256i b = _mm256_xor_si256( a0, \
_mm256_shuffle_epi32( _mm256_and_si256(a1,MASK), 16 ) ); \
__m256i b = _mm256_xor_si256( a0, \
_mm256_shuffle_epi32( _mm256_and_si256(a1,mask), 16 ) ); \
a0 = _mm256_or_si256( _mm256_srli_si256(b,4), _mm256_slli_si256(a1,12) ); \
a1 = _mm256_or_si256( _mm256_srli_si256(a1,4), _mm256_slli_si256(b,12) ); \
} while(0)
// confirm pointer arithmetic
// ok but use array indexes
#define STEP_PART(x,c,t)\
#define STEP_PART(x,c0,c1,t)\
SUBCRUMB(*x,*(x+1),*(x+2),*(x+3),*t);\
SUBCRUMB(*(x+5),*(x+6),*(x+7),*(x+4),*t);\
MIXWORD(*x,*(x+4),*t,*(t+1));\
MIXWORD(*(x+1),*(x+5),*t,*(t+1));\
MIXWORD(*(x+2),*(x+6),*t,*(t+1));\
MIXWORD(*(x+3),*(x+7),*t,*(t+1));\
ADD_CONSTANT(*x, *(x+4), *c, *(c+1));
ADD_CONSTANT(*x, *(x+4), c0, c1);
#define SUBCRUMB(a0,a1,a2,a3,t)\
t = _mm256_load_si256(&a0);\
@@ -245,7 +224,7 @@ static const uint32 CNS_INIT[128] __attribute((aligned(32))) = {
0x00000000,0x00000000,0x00000000,0xfc053c31
};
__m256i CNS[32];
/***************************************************/
/* Round function */
@@ -258,6 +237,7 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
__m256i msg0, msg1;
__m256i tmp[2];
__m256i x[8];
const __m256i MASK = m256_const2_64( 0, 0x00000000ffffffff );
t0 = chainv[0];
t1 = chainv[1];
@@ -271,7 +251,7 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
t0 = _mm256_xor_si256( t0, chainv[8] );
t1 = _mm256_xor_si256( t1, chainv[9] );
MULT2( t0, t1 );
MULT2( t0, t1, MASK );
msg0 = _mm256_shuffle_epi32( msg[0], 27 );
msg1 = _mm256_shuffle_epi32( msg[1], 27 );
@@ -290,66 +270,66 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
t0 = chainv[0];
t1 = chainv[1];
MULT2( chainv[0], chainv[1]);
MULT2( chainv[0], chainv[1], MASK );
chainv[0] = _mm256_xor_si256( chainv[0], chainv[2] );
chainv[1] = _mm256_xor_si256( chainv[1], chainv[3] );
MULT2( chainv[2], chainv[3]);
MULT2( chainv[2], chainv[3], MASK );
chainv[2] = _mm256_xor_si256(chainv[2], chainv[4]);
chainv[3] = _mm256_xor_si256(chainv[3], chainv[5]);
MULT2( chainv[4], chainv[5]);
MULT2( chainv[4], chainv[5], MASK );
chainv[4] = _mm256_xor_si256(chainv[4], chainv[6]);
chainv[5] = _mm256_xor_si256(chainv[5], chainv[7]);
MULT2( chainv[6], chainv[7]);
MULT2( chainv[6], chainv[7], MASK );
chainv[6] = _mm256_xor_si256(chainv[6], chainv[8]);
chainv[7] = _mm256_xor_si256(chainv[7], chainv[9]);
MULT2( chainv[8], chainv[9]);
MULT2( chainv[8], chainv[9], MASK );
chainv[8] = _mm256_xor_si256( chainv[8], t0 );
chainv[9] = _mm256_xor_si256( chainv[9], t1 );
t0 = chainv[8];
t1 = chainv[9];
MULT2( chainv[8], chainv[9]);
MULT2( chainv[8], chainv[9], MASK );
chainv[8] = _mm256_xor_si256( chainv[8], chainv[6] );
chainv[9] = _mm256_xor_si256( chainv[9], chainv[7] );
MULT2( chainv[6], chainv[7]);
MULT2( chainv[6], chainv[7], MASK );
chainv[6] = _mm256_xor_si256( chainv[6], chainv[4] );
chainv[7] = _mm256_xor_si256( chainv[7], chainv[5] );
MULT2( chainv[4], chainv[5]);
MULT2( chainv[4], chainv[5], MASK );
chainv[4] = _mm256_xor_si256( chainv[4], chainv[2] );
chainv[5] = _mm256_xor_si256( chainv[5], chainv[3] );
MULT2( chainv[2], chainv[3] );
MULT2( chainv[2], chainv[3], MASK );
chainv[2] = _mm256_xor_si256( chainv[2], chainv[0] );
chainv[3] = _mm256_xor_si256( chainv[3], chainv[1] );
MULT2( chainv[0], chainv[1] );
MULT2( chainv[0], chainv[1], MASK );
chainv[0] = _mm256_xor_si256( _mm256_xor_si256( chainv[0], t0 ), msg0 );
chainv[1] = _mm256_xor_si256( _mm256_xor_si256( chainv[1], t1 ), msg1 );
MULT2( msg0, msg1);
MULT2( msg0, msg1, MASK );
chainv[2] = _mm256_xor_si256( chainv[2], msg0 );
chainv[3] = _mm256_xor_si256( chainv[3], msg1 );
MULT2( msg0, msg1);
MULT2( msg0, msg1, MASK );
chainv[4] = _mm256_xor_si256( chainv[4], msg0 );
chainv[5] = _mm256_xor_si256( chainv[5], msg1 );
MULT2( msg0, msg1);
MULT2( msg0, msg1, MASK );
chainv[6] = _mm256_xor_si256( chainv[6], msg0 );
chainv[7] = _mm256_xor_si256( chainv[7], msg1 );
MULT2( msg0, msg1);
MULT2( msg0, msg1, MASK );
chainv[8] = _mm256_xor_si256( chainv[8], msg0 );
chainv[9] = _mm256_xor_si256( chainv[9], msg1 );
MULT2( msg0, msg1);
MULT2( msg0, msg1, MASK );
chainv[3] = _mm256_or_si256( _mm256_slli_epi32( chainv[3], 1 ),
_mm256_srli_epi32( chainv[3], 31 ) );
@@ -365,14 +345,14 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
chainv[1],chainv[3],chainv[5],chainv[7],
x[4], x[5], x[6], x[7] );
STEP_PART( &x[0], &CNS[ 0], &tmp[0] );
STEP_PART( &x[0], &CNS[ 2], &tmp[0] );
STEP_PART( &x[0], &CNS[ 4], &tmp[0] );
STEP_PART( &x[0], &CNS[ 6], &tmp[0] );
STEP_PART( &x[0], &CNS[ 8], &tmp[0] );
STEP_PART( &x[0], &CNS[10], &tmp[0] );
STEP_PART( &x[0], &CNS[12], &tmp[0] );
STEP_PART( &x[0], &CNS[14], &tmp[0] );
STEP_PART( &x[0], cns( 0), cns( 1), &tmp[0] );
STEP_PART( &x[0], cns( 2), cns( 3), &tmp[0] );
STEP_PART( &x[0], cns( 4), cns( 5), &tmp[0] );
STEP_PART( &x[0], cns( 6), cns( 7), &tmp[0] );
STEP_PART( &x[0], cns( 8), cns( 9), &tmp[0] );
STEP_PART( &x[0], cns(10), cns(11), &tmp[0] );
STEP_PART( &x[0], cns(12), cns(13), &tmp[0] );
STEP_PART( &x[0], cns(14), cns(15), &tmp[0] );
MIXTON1024( x[0], x[1], x[2], x[3],
chainv[0], chainv[2], chainv[4],chainv[6],
@@ -380,25 +360,24 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
chainv[1],chainv[3],chainv[5],chainv[7]);
/* Process last 256-bit block */
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[16], CNS[17],
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(16), cns(17),
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[18], CNS[19],
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(18), cns(19),
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[20], CNS[21],
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(20), cns(21),
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[22], CNS[23],
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(22), cns(23),
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[24], CNS[25],
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(24), cns(25),
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[26], CNS[27],
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(26), cns(27),
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[28], CNS[29],
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(28), cns(29),
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[30], CNS[31],
STEP_PART2( chainv[8], chainv[9], t0, t1, cns(30), cns(31),
tmp[0], tmp[1] );
}
/***************************************************/
/* Finalization function */
/* state: hash context */
@@ -410,8 +389,9 @@ void finalization512_2way( luffa_2way_context *state, uint32 *b )
__m256i* chainv = state->chainv;
__m256i t[2];
__m256i zero[2];
zero[0] = zero[1] = _mm256_setzero_si256();
zero[0] = zero[1] = m256_zero;
const __m256i shuff_bswap32 = m256_const2_64( 0x0c0d0e0f08090a0b,
0x0405060700010203 );
/*---- blank round with m=0 ----*/
rnd512_2way( state, zero );
@@ -433,8 +413,10 @@ void finalization512_2way( luffa_2way_context *state, uint32 *b )
_mm256_store_si256( (__m256i*)&hash[0], t[0] );
_mm256_store_si256( (__m256i*)&hash[8], t[1] );
casti_m256i( b, 0 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
casti_m256i( b, 1 ) = mm256_bswap_32( casti_m256i( hash, 1 ) );
casti_m256i( b, 0 ) = _mm256_shuffle_epi8(
casti_m256i( hash, 0 ), shuff_bswap32 );
casti_m256i( b, 1 ) = _mm256_shuffle_epi8(
casti_m256i( hash, 1 ), shuff_bswap32 );
rnd512_2way( state, zero );
@@ -455,26 +437,27 @@ void finalization512_2way( luffa_2way_context *state, uint32 *b )
_mm256_store_si256( (__m256i*)&hash[0], t[0] );
_mm256_store_si256( (__m256i*)&hash[8], t[1] );
casti_m256i( b, 2 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
casti_m256i( b, 3 ) = mm256_bswap_32( casti_m256i( hash, 1 ) );
casti_m256i( b, 2 ) = _mm256_shuffle_epi8(
casti_m256i( hash, 0 ), shuff_bswap32 );
casti_m256i( b, 3 ) = _mm256_shuffle_epi8(
casti_m256i( hash, 1 ), shuff_bswap32 );
}
int luffa_2way_init( luffa_2way_context *state, int hashbitlen )
{
int i;
state->hashbitlen = hashbitlen;
for ( i=0; i<32; i++ ) CNS[i] =
_mm256_set_epi32( CNS_INIT[ (i<<2) + 3 ], CNS_INIT[ (i<<2) +2 ],
CNS_INIT[ (i<<2) + 1 ], CNS_INIT[ (i<<2) ],
CNS_INIT[ (i<<2) + 3 ], CNS_INIT[ (i<<2) +2 ],
CNS_INIT[ (i<<2) + 1 ], CNS_INIT[ (i<<2) ] );
for ( i=0; i<10; i++ ) state->chainv[i] =
_mm256_set_epi32( IV[ (i<<2) +3 ], IV[ (i<<2) +2 ],
IV[ (i<<2) +1 ], IV[ (i<<2) ],
IV[ (i<<2) +3 ], IV[ (i<<2) +2 ],
IV[ (i<<2) +1 ], IV[ (i<<2) ] );
__m128i *iv = (__m128i*)IV;
state->chainv[0] = m256_const1_128( iv[0] );
state->chainv[1] = m256_const1_128( iv[1] );
state->chainv[2] = m256_const1_128( iv[2] );
state->chainv[3] = m256_const1_128( iv[3] );
state->chainv[4] = m256_const1_128( iv[4] );
state->chainv[5] = m256_const1_128( iv[5] );
state->chainv[6] = m256_const1_128( iv[6] );
state->chainv[7] = m256_const1_128( iv[7] );
state->chainv[8] = m256_const1_128( iv[8] );
state->chainv[9] = m256_const1_128( iv[9] );
((__m256i*)state->buffer)[0] = m256_zero;
((__m256i*)state->buffer)[1] = m256_zero;
@@ -492,13 +475,15 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
__m256i msg[2];
int i;
int blocks = (int)len >> 5;
const __m256i shuff_bswap32 = m256_const2_64( 0x0c0d0e0f08090a0b,
0x0405060700010203 );
state-> rembytes = (int)len & 0x1F;
// full blocks
for ( i = 0; i < blocks; i++, vdata+=2 )
{
msg[0] = mm256_bswap_32( vdata[ 0] );
msg[1] = mm256_bswap_32( vdata[ 1 ] );
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
msg[1] = _mm256_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
rnd512_2way( state, msg );
}
@@ -507,9 +492,8 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
if ( state->rembytes )
{
// remaining data bytes
buffer[0] = mm256_bswap_32( vdata[0] );
buffer[1] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
buffer[0] = _mm256_shuffle_epi8( vdata[0], shuff_bswap32 );
buffer[1] = m256_const2_64( 0, 0x0000000080000000 );
}
return 0;
}
@@ -525,8 +509,7 @@ int luffa_2way_close( luffa_2way_context *state, void *hashval )
rnd512_2way( state, buffer );
else
{ // empty pad block, constant data
msg[0] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
msg[0] = m256_const2_64( 0, 0x0000000080000000 );
msg[1] = m256_zero;
rnd512_2way( state, msg );
}
@@ -545,13 +528,16 @@ int luffa_2way_update_close( luffa_2way_context *state,
__m256i msg[2];
int i;
const int blocks = (int)( inlen >> 5 );
const __m256i shuff_bswap32 = m256_const2_64( 0x0c0d0e0f08090a0b,
0x0405060700010203 );
state->rembytes = inlen & 0x1F;
// full blocks
for ( i = 0; i < blocks; i++, vdata+=2 )
{
msg[0] = mm256_bswap_32( vdata[ 0 ] );
msg[1] = mm256_bswap_32( vdata[ 1 ] );
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
msg[1] = _mm256_shuffle_epi8( vdata[ 1 ], shuff_bswap32 );
rnd512_2way( state, msg );
}
@@ -559,16 +545,14 @@ int luffa_2way_update_close( luffa_2way_context *state,
if ( state->rembytes )
{
// padding of partial block
msg[0] = mm256_bswap_32( vdata[0] );
msg[1] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
msg[0] = _mm256_shuffle_epi8( vdata[ 0 ], shuff_bswap32 );
msg[1] = m256_const2_64( 0, 0x0000000080000000 );
rnd512_2way( state, msg );
}
else
{
// empty pad block
msg[0] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
msg[0] = m256_const2_64( 0, 0x0000000080000000 );
msg[1] = m256_zero;
rnd512_2way( state, msg );
}

View File

@@ -541,7 +541,9 @@ static void finalization512( hashState_luffa *state, uint32 *b )
uint32 hash[8] __attribute((aligned(64)));
__m256i* chainv = (__m256i*)state->chainv;
__m256i t;
const __m128i zero = _mm_setzero_si128();
const __m128i zero = m128_zero;
const __m256i shuff_bswap32 = m256_const2_64( 0x0c0d0e0f08090a0b,
0x0405060700010203 );
rnd512( state, zero, zero );
@@ -555,7 +557,9 @@ static void finalization512( hashState_luffa *state, uint32 *b )
_mm256_store_si256( (__m256i*)hash, t );
casti_m256i( b, 0 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
casti_m256i( b, 0 ) = _mm256_shuffle_epi8(
casti_m256i( hash, 0 ), shuff_bswap32 );
// casti_m256i( b, 0 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
rnd512( state, zero, zero );
@@ -568,7 +572,9 @@ static void finalization512( hashState_luffa *state, uint32 *b )
_mm256_store_si256( (__m256i*)hash, t );
casti_m256i( b, 1 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
casti_m256i( b, 1 ) = _mm256_shuffle_epi8(
casti_m256i( hash, 0 ), shuff_bswap32 );
// casti_m256i( b, 1 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
}
#else

View File

@@ -127,7 +127,6 @@ bool register_lyra2z_algo( algo_gate_t* gate )
gate->hash = (void*)&lyra2z_hash;
#endif
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
opt_target_factor = 256.0;
return true;
};
@@ -147,15 +146,12 @@ bool register_lyra2h_algo( algo_gate_t* gate )
gate->hash = (void*)&lyra2h_hash;
#endif
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
opt_target_factor = 256.0;
return true;
};
/////////////////////////////////
int64_t allium_get_max64_0xFFFFLL() { return 0xFFFFLL; }
bool register_allium_algo( algo_gate_t* gate )
{
#if defined (ALLIUM_4WAY)
@@ -168,7 +164,6 @@ bool register_allium_algo( algo_gate_t* gate )
gate->hash = (void*)&allium_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->get_max64 = (void*)&allium_get_max64_0xFFFFLL;
opt_target_factor = 256.0;
return true;
};
@@ -214,7 +209,6 @@ bool register_phi2_algo( algo_gate_t* gate )
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
gate->build_extraheader = (void*)&phi2_build_extraheader;
gate->get_max64 = (void*)&get_max64_0xffffLL;
opt_target_factor = 256.0;
#if defined(PHI2_4WAY)
gate->scanhash = (void*)&scanhash_phi2_4way;

View File

@@ -113,18 +113,12 @@ int scanhash_lyra2re( struct work *work, uint32_t max_nonce,
return 0;
}
int64_t lyra2re_get_max64 ()
{
return 0xffffLL;
}
bool register_lyra2re_algo( algo_gate_t* gate )
{
init_lyra2re_ctx();
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_lyra2re;
gate->hash = (void*)&lyra2re_hash;
gate->get_max64 = (void*)&lyra2re_get_max64;
opt_target_factor = 128.0;
return true;
};

View File

@@ -113,17 +113,18 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
lyra2rev3_8way_hash( hash, vdata );
pdata[19] = n;
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hash7[lane] <= Htarg ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
n += 8;
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
} while ( likely( (n < max_nonce-8) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
return 0;
}

View File

@@ -70,7 +70,6 @@ bool register_lyra2z330_algo( algo_gate_t* gate )
gate->miner_thread_init = (void*)&lyra2z330_thread_init;
gate->scanhash = (void*)&scanhash_lyra2z330;
gate->hash = (void*)&lyra2z330_hash;
gate->get_max64 = (void*)&get_max64_0xffffLL;
opt_target_factor = 256.0;
return true;
};

View File

@@ -263,10 +263,9 @@ inline void absorbBlockBlake2Safe( uint64_t *State, const uint64_t *In,
#if defined (__AVX2__)
register __m256i state0, state1, state2, state3;
const __m256i zero = m256_zero;
state0 = zero;
state1 = zero;
state0 =
state1 = m256_zero;
state2 = m256_const_64( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL,
0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL );
state3 = m256_const_64( 0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL,
@@ -290,12 +289,11 @@ inline void absorbBlockBlake2Safe( uint64_t *State, const uint64_t *In,
#elif defined (__SSE2__)
__m128i state0, state1, state2, state3, state4, state5, state6, state7;
const __m128i zero = m128_zero;
state0 = zero;
state1 = zero;
state2 = zero;
state3 = zero;
state0 =
state1 =
state2 =
state3 = m128_zero;
state4 = m128_const_64( 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL );
state5 = m128_const_64( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL );
state6 = m128_const_64( 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL );

View File

@@ -296,8 +296,6 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
pdata[19] = n;
// can this be skipped after finding a share? Seems to work ok.
//out:
mpf_set_prec_raw(magifpi, prec0);
mpf_set_prec_raw(magifpi0, prec0);
mpf_set_prec_raw(mptmp, prec0);
@@ -323,7 +321,6 @@ bool register_m7m_algo( algo_gate_t *gate )
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->get_max64 = (void*)&get_max64_0x1ffff;
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
opt_target_factor = 65536.0;
return true;

View File

@@ -208,12 +208,6 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
++(*nonceptr);
}
int64_t zr5_get_max64 ()
{
// return 0x1ffffLL;
return 0x1fffffLL;
}
void zr5_display_pok( struct work* work )
{
if ( work->data[0] & 0x00008000 )
@@ -229,7 +223,6 @@ bool register_zr5_algo( algo_gate_t* gate )
gate->get_new_work = (void*)&zr5_get_new_work;
gate->scanhash = (void*)&scanhash_zr5;
gate->hash = (void*)&zr5hash;
gate->get_max64 = (void*)&zr5_get_max64;
gate->decode_extra_data = (void*)&zr5_display_pok;
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;

View File

@@ -94,8 +94,6 @@ void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
g_work->data[28] = 0x80000000;
}
int64_t lbry_get_max64() { return 0x1ffffLL; }
int lbry_get_work_data_size() { return LBRY_WORK_DATA_SIZE; }
bool register_lbry_algo( algo_gate_t* gate )
@@ -112,7 +110,6 @@ bool register_lbry_algo( algo_gate_t* gate )
gate->hash = (void*)&lbry_hash;
#endif
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
gate->get_max64 = (void*)&lbry_get_max64;
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
// gate->build_block_header = (void*)&build_block_header;
gate->build_extraheader = (void*)&lbry_build_extraheader;

View File

@@ -1070,17 +1070,6 @@ int scanhash_neoscrypt( struct work *work,
return 0;
}
int64_t get_neoscrypt_max64() { return 0x3ffff; }
void neoscrypt_wait_for_diff( struct stratum_ctx *stratum )
{
while ( !stratum->job.diff )
{
// applog(LOG_DEBUG, "Waiting for Stratum to set the job difficulty");
sleep(1);
}
}
int neoscrypt_get_work_data_size () { return 80; }
bool register_neoscrypt_algo( algo_gate_t* gate )
@@ -1088,8 +1077,6 @@ bool register_neoscrypt_algo( algo_gate_t* gate )
gate->optimizations = SSE2_OPT;
gate->scanhash = (void*)&scanhash_neoscrypt;
gate->hash = (void*)&neoscrypt;
gate->get_max64 = (void*)&get_neoscrypt_max64;
gate->wait_for_diff = (void*)&neoscrypt_wait_for_diff;
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;

View File

@@ -483,11 +483,6 @@ int scanhash_pluck( struct work *work, uint32_t max_nonce,
return 0;
}
int64_t pluck_get_max64 ()
{
return 0x1ffLL;
}
bool pluck_miner_thread_init( int thr_id )
{
scratchbuf = malloc( 128 * 1024 );
@@ -503,7 +498,6 @@ bool register_pluck_algo( algo_gate_t* gate )
gate->miner_thread_init = (void*)&pluck_miner_thread_init;
gate->scanhash = (void*)&scanhash_pluck;
gate->hash = (void*)&pluck_hash;
gate->get_max64 = (void*)&pluck_get_max64;
opt_target_factor = 65536.0;
return true;
};

View File

@@ -766,8 +766,6 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
return 0;
}
int64_t scrypt_get_max64() { return 0xfff; }
bool scrypt_miner_thread_init( int thr_id )
{
scratchbuf = scrypt_buffer_alloc( scratchbuf_size );
@@ -783,10 +781,8 @@ bool register_scrypt_algo( algo_gate_t* gate )
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
gate->scanhash = (void*)&scanhash_scrypt;
// gate->hash = (void*)&scrypt_1024_1_1_256_24way;
gate->get_max64 = (void*)&scrypt_get_max64;
opt_target_factor = 65536.0;
if ( !opt_param_n )
{
opt_param_n = 1024;

View File

@@ -240,7 +240,6 @@ bool register_scryptjane_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_scryptjane;
gate->hash = (void*)&scryptjanehash;
gate->get_max64 = (void*)&get_max64_0x40LL;
opt_target_factor = 65536.0;
// figure out if arg in N or Nfactor

View File

@@ -305,9 +305,11 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
low = low << 3;
sc->buf[ pad >> 2 ] =
mm128_bswap_32( _mm_set1_epi32( high ) );
mm128_bswap_32( m128_const1_32( high ) );
// mm128_bswap_32( _mm_set1_epi32( high ) );
sc->buf[ ( pad+4 ) >> 2 ] =
mm128_bswap_32( _mm_set1_epi32( low ) );
mm128_bswap_32( m128_const1_32( low ) );
// mm128_bswap_32( _mm_set1_epi32( low ) );
sha256_4way_round( sc, sc->buf, sc->val );
mm128_block_bswap_32( dst, sc->val );
@@ -538,9 +540,9 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
low = low << 3;
sc->buf[ pad >> 2 ] =
mm256_bswap_32( _mm256_set1_epi32( high ) );
mm256_bswap_32( m256_const1_32( high ) );
sc->buf[ ( pad+4 ) >> 2 ] =
mm256_bswap_32( _mm256_set1_epi32( low ) );
mm256_bswap_32( m256_const1_32( low ) );
sha256_8way_round( sc, sc->buf, sc->val );

View File

@@ -15,7 +15,6 @@ bool register_sha256t_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_sha256t;
gate->hash = (void*)&sha256t_hash;
#endif
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
}
@@ -34,7 +33,6 @@ bool register_sha256q_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_sha256q;
gate->hash = (void*)&sha256q_hash;
#endif
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
}

View File

@@ -252,16 +252,6 @@ void sha512_4way_init( sha512_4way_context *sc )
{
sc->initialized = false;
sc->count = 0;
/*
sc->val[0] = _mm256_set1_epi64x( H512[0] );
sc->val[1] = _mm256_set1_epi64x( H512[1] );
sc->val[2] = _mm256_set1_epi64x( H512[2] );
sc->val[3] = _mm256_set1_epi64x( H512[3] );
sc->val[4] = _mm256_set1_epi64x( H512[4] );
sc->val[5] = _mm256_set1_epi64x( H512[5] );
sc->val[6] = _mm256_set1_epi64x( H512[6] );
sc->val[7] = _mm256_set1_epi64x( H512[7] );
*/
}
void sha512_4way( sha512_4way_context *sc, const void *data, size_t len )
@@ -295,6 +285,8 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
unsigned ptr;
const int buf_size = 128;
const int pad = buf_size - 16;
const __m256i shuff_bswap64 = m256_const2_64( 0x08090a0b0c0d0e0f,
0x0001020304050607 );
ptr = (unsigned)sc->count & (buf_size - 1U);
sc->buf[ ptr>>3 ] = m256_const1_64( 0x80 );
@@ -308,10 +300,10 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
else
memset_zero_256( sc->buf + (ptr>>3), (pad - ptr) >> 3 );
sc->buf[ pad >> 3 ] =
mm256_bswap_64( _mm256_set1_epi64x( sc->count >> 61 ) );
sc->buf[ ( pad+8 ) >> 3 ] =
mm256_bswap_64( _mm256_set1_epi64x( sc->count << 3 ) );
sc->buf[ pad >> 3 ] = _mm256_shuffle_epi8(
_mm256_set1_epi64x( sc->count >> 61 ), shuff_bswap64 );
sc->buf[ ( pad+8 ) >> 3 ] = _mm256_shuffle_epi8(
_mm256_set1_epi64x( sc->count << 3 ), shuff_bswap64 );
sha512_4way_round( sc, sc->buf, sc->val );
mm256_block_bswap_64( dst, sc->val );

View File

@@ -5,6 +5,7 @@
#if defined(__AVX2__)
static const uint32_t IV512[] =
{
0x72FCCDD8, 0x79CA4727, 0x128A077B, 0x40D55AEC,
@@ -13,6 +14,7 @@ static const uint32_t IV512[] =
0xE275EADE, 0x502D9FCD, 0xB9357178, 0x022A4B9A
};
#define mm256_ror2x256hi_1x32( a, b ) \
_mm256_blend_epi32( mm256_ror1x32_128( a ), \
mm256_ror1x32_128( b ), 0x88 )
@@ -232,18 +234,14 @@ c512_2way( shavite512_2way_context *ctx, const void *msg )
void shavite512_2way_init( shavite512_2way_context *ctx )
{
casti_m256i( ctx->h, 0 ) =
_mm256_set_epi32( IV512[ 3], IV512[ 2], IV512[ 1], IV512[ 0],
IV512[ 3], IV512[ 2], IV512[ 1], IV512[ 0] );
casti_m256i( ctx->h, 1 ) =
_mm256_set_epi32( IV512[ 7], IV512[ 6], IV512[ 5], IV512[ 4],
IV512[ 7], IV512[ 6], IV512[ 5], IV512[ 4] );
casti_m256i( ctx->h, 2 ) =
_mm256_set_epi32( IV512[11], IV512[10], IV512[ 9], IV512[ 8],
IV512[11], IV512[10], IV512[ 9], IV512[ 8] );
casti_m256i( ctx->h, 3 ) =
_mm256_set_epi32( IV512[15], IV512[14], IV512[13], IV512[12],
IV512[15], IV512[14], IV512[13], IV512[12] );
__m256i *h = (__m256i*)ctx->h;
__m128i *iv = (__m128i*)IV512;
h[0] = m256_const1_128( iv[0] );
h[1] = m256_const1_128( iv[1] );
h[2] = m256_const1_128( iv[2] );
h[3] = m256_const1_128( iv[3] );
ctx->ptr = 0;
ctx->count0 = 0;
ctx->count1 = 0;
@@ -251,6 +249,7 @@ void shavite512_2way_init( shavite512_2way_context *ctx )
ctx->count3 = 0;
}
// not tested, use update_close
void shavite512_2way_update( shavite512_2way_context *ctx, const void *data,
size_t len )
{
@@ -287,6 +286,7 @@ void shavite512_2way_update( shavite512_2way_context *ctx, const void *data,
ctx->ptr = ptr;
}
// not tested
void shavite512_2way_close( shavite512_2way_context *ctx, void *dst )
{
unsigned char *buf;
@@ -300,7 +300,7 @@ void shavite512_2way_close( shavite512_2way_context *ctx, void *dst )
uint32_t vp = ctx->ptr>>5;
// Terminating byte then zero pad
casti_m256i( buf, vp++ ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
casti_m256i( buf, vp++ ) = m256_const2_64( 0, 0x0000000000000080 );
// Zero pad full vectors up to count
for ( ; vp < 6; vp++ )
@@ -314,14 +314,12 @@ void shavite512_2way_close( shavite512_2way_context *ctx, void *dst )
count.u32[2] = ctx->count2;
count.u32[3] = ctx->count3;
casti_m256i( buf, 6 ) = _mm256_set_epi16( count.u16[0], 0,0,0,0,0,0,0,
count.u16[0], 0,0,0,0,0,0,0 );
casti_m256i( buf, 7 ) = _mm256_set_epi16(
0x0200 , count.u16[7], count.u16[6], count.u16[5],
count.u16[4], count.u16[3], count.u16[2], count.u16[1],
0x0200 , count.u16[7], count.u16[6], count.u16[5],
count.u16[4], count.u16[3], count.u16[2], count.u16[1] );
casti_m256i( buf, 6 ) = m256_const1_128(
_mm_insert_epi16( m128_zero, count.u16[0], 7 ) );
casti_m256i( buf, 7 ) = m256_const1_128( _mm_set_epi16(
0x0200, count.u16[7], count.u16[6], count.u16[5],
count.u16[4], count.u16[3], count.u16[2], count.u16[1] ) );
c512_2way( ctx, buf);
casti_m256i( dst, 0 ) = casti_m256i( ctx->h, 0 );
@@ -382,23 +380,21 @@ void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
if ( vp == 0 ) // empty buf, xevan.
{
casti_m256i( buf, 0 ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
casti_m256i( buf, 0 ) = m256_const2_64( 0, 0x0000000000000080 );
memset_zero_256( (__m256i*)buf + 1, 5 );
ctx->count0 = ctx->count1 = ctx->count2 = ctx->count3 = 0;
}
else // half full buf, everyone else.
{
casti_m256i( buf, vp++ ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
casti_m256i( buf, vp++ ) = m256_const2_64( 0, 0x0000000000000080 );
memset_zero_256( (__m256i*)buf + vp, 6 - vp );
}
casti_m256i( buf, 6 ) = _mm256_set_epi16( count.u16[0], 0,0,0,0,0,0,0,
count.u16[0], 0,0,0,0,0,0,0 );
casti_m256i( buf, 7 ) = _mm256_set_epi16(
0x0200 , count.u16[7], count.u16[6], count.u16[5],
count.u16[4], count.u16[3], count.u16[2], count.u16[1],
0x0200 , count.u16[7], count.u16[6], count.u16[5],
count.u16[4], count.u16[3], count.u16[2], count.u16[1] );
casti_m256i( buf, 6 ) = m256_const1_128(
_mm_insert_epi16( m128_zero, count.u16[0], 7 ) );
casti_m256i( buf, 7 ) = m256_const1_128( _mm_set_epi16(
0x0200, count.u16[7], count.u16[6], count.u16[5],
count.u16[4], count.u16[3], count.u16[2], count.u16[1] ) );
c512_2way( ctx, buf);

View File

@@ -110,14 +110,26 @@ static const m256_v16 FFT256_Twiddle[] =
// imported from vector.c
#define REDUCE(x) \
_mm256_sub_epi16( _mm256_and_si256( x, m256_const1_64( \
0x00ff00ff00ff00ff ) ), _mm256_srai_epi16( x, 8 ) )
/*
#define REDUCE(x) \
_mm256_sub_epi16( _mm256_and_si256( x, _mm256_set1_epi16( 255 ) ), \
_mm256_srai_epi16( x, 8 ) )
*/
#define EXTRA_REDUCE_S(x)\
_mm256_sub_epi16( x, _mm256_and_si256( \
m256_const1_64( 0x0101010101010101 ), \
_mm256_cmpgt_epi16( x, m256_const1_64( 0x0080008000800080 ) ) ) )
/*
#define EXTRA_REDUCE_S(x)\
_mm256_sub_epi16( x, \
_mm256_and_si256( _mm256_set1_epi16( 257 ), \
_mm256_cmpgt_epi16( x, _mm256_set1_epi16( 128 ) ) ) )
*/
#define REDUCE_FULL_S( x ) EXTRA_REDUCE_S( REDUCE (x ) )

View File

@@ -2,8 +2,6 @@
#include "sph_skein.h"
#include "skein-hash-4way.h"
int64_t skein_get_max64() { return 0x7ffffLL; }
bool register_skein_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT | SHA_OPT;
@@ -14,7 +12,6 @@ bool register_skein_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_skein;
gate->hash = (void*)&skeinhash;
#endif
gate->get_max64 = (void*)&skein_get_max64;
return true;
};

View File

@@ -2,11 +2,6 @@
#include <stdint.h>
#include "sph_skein.h"
int64_t skein2_get_max64 ()
{
return 0x7ffffLL;
}
bool register_skein2_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT;
@@ -17,7 +12,6 @@ bool register_skein2_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_skein2;
gate->hash = (void*)&skein2hash;
#endif
gate->get_max64 = (void*)&skein2_get_max64;
return true;
};

View File

@@ -181,7 +181,7 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
for( j =0; j < 16; j++ )
{
SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ),
mm128_rol_32( T, j ) ), 7 );
mm128_rol_var_32( T, j ) ), 7 );
SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) );
TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF0( A, B, C ), D ),
SS2 ), W1[j] );
@@ -201,7 +201,7 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
for( j =16; j < 64; j++ )
{
SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ),
mm128_rol_32( T, j&31 ) ), 7 );
mm128_rol_var_32( T, j&31 ) ), 7 );
SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) );
TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF1( A, B, C ), D ),
SS2 ), W1[j] );

View File

@@ -0,0 +1,369 @@
#include "Swifftx_sha3.h"
extern "C" {
#include "SWIFFTX.h"
}
#include <math.h>
#include <stdlib.h>
#include <string.h>
// The default salt value.
// This is the expansion of e (Euler's number) - the 19 digits after 2.71:
// 8281828459045235360.
// The above in base 256, from MSB to LSB:
BitSequence SWIF_saltValueChar[SWIF_HAIFA_SALT_SIZE] = {114, 238, 247, 26, 192, 28, 170, 160};
// All the IVs here below were produced from the decimal digits of e's expansion.
// The code can be found in 'ProduceRandomIV.c'.
// The initial value for 224 digest size.
const BitSequence SWIF_HAIFA_IV_224[SWIFFTX_OUTPUT_BLOCK_SIZE] =
{37, 242, 132, 2, 167, 81, 158, 237, 113, 77, 162, 60, 65, 236, 108, 246,
101, 72, 190, 109, 58, 205, 99, 6, 114, 169, 104, 114, 38, 146, 121, 142,
59, 98, 233, 84, 72, 227, 22, 199, 17, 102, 198, 145, 24, 178, 37, 1,
215, 245, 66, 120, 230, 193, 113, 253, 165, 218, 66, 134, 49, 231, 124, 204,
0};
// The initial value for 256 digest size.
const BitSequence SWIF_HAIFA_IV_256[SWIFFTX_OUTPUT_BLOCK_SIZE] =
{250, 50, 42, 40, 14, 233, 53, 48, 227, 42, 237, 187, 211, 120, 209, 234,
27, 144, 4, 61, 243, 244, 29, 247, 37, 162, 70, 11, 231, 196, 53, 6,
193, 240, 94, 126, 204, 132, 104, 46, 114, 29, 3, 104, 118, 184, 201, 3,
57, 77, 91, 101, 31, 155, 84, 199, 228, 39, 198, 42, 248, 198, 201, 178,
8};
// The initial value for 384 digest size.
const BitSequence SWIF_HAIFA_IV_384[SWIFFTX_OUTPUT_BLOCK_SIZE] =
{40, 145, 193, 100, 205, 171, 47, 76, 254, 10, 196, 41, 165, 207, 200, 79,
109, 13, 75, 201, 17, 172, 64, 162, 217, 22, 88, 39, 51, 30, 220, 151,
133, 73, 216, 233, 184, 203, 77, 0, 248, 13, 28, 199, 30, 147, 232, 242,
227, 124, 169, 174, 14, 45, 27, 87, 254, 73, 68, 136, 135, 159, 83, 152,
0};
// The initial value for 512 digest size.
const BitSequence SWIF_HAIFA_IV_512[SWIFFTX_OUTPUT_BLOCK_SIZE] =
{195, 126, 197, 167, 157, 114, 99, 126, 208, 105, 200, 90, 71, 195, 144, 138,
142, 122, 123, 116, 24, 214, 168, 173, 203, 183, 194, 210, 102, 117, 138, 42,
114, 118, 132, 33, 35, 149, 143, 163, 163, 183, 243, 175, 72, 22, 201, 255,
102, 243, 22, 187, 211, 167, 239, 76, 164, 70, 80, 182, 181, 212, 9, 185,
0};
///////////////////////////////////////////////////////////////////////////////////////////////
// NIST API implementation portion.
///////////////////////////////////////////////////////////////////////////////////////////////
int Swifftx::Init(int hashbitlen)
{
switch(hashbitlen)
{
case 224:
swifftxState.hashbitlen = hashbitlen;
// Initializes h_0 in HAIFA:
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_224, SWIFFTX_OUTPUT_BLOCK_SIZE);
break;
case 256:
swifftxState.hashbitlen = hashbitlen;
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_256, SWIFFTX_OUTPUT_BLOCK_SIZE);
break;
case 384:
swifftxState.hashbitlen = hashbitlen;
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_384, SWIFFTX_OUTPUT_BLOCK_SIZE);
break;
case 512:
swifftxState.hashbitlen = hashbitlen;
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_512, SWIFFTX_OUTPUT_BLOCK_SIZE);
break;
default:
return BAD_HASHBITLEN;
}
swifftxState.wasUpdated = false;
swifftxState.remainingSize = 0;
memset(swifftxState.remaining, 0, SWIF_HAIFA_INPUT_BLOCK_SIZE);
memset(swifftxState.numOfBitsChar, 0, SWIF_HAIFA_NUM_OF_BITS_SIZE);
// Initialize the salt with the default value.
memcpy(swifftxState.salt, SWIF_saltValueChar, SWIF_HAIFA_SALT_SIZE);
InitializeSWIFFTX();
return SUCCESS;
}
int Swifftx::Update(const BitSequence *data, DataLength databitlen)
{
// The size of input in bytes after putting the remaining data from previous invocation.
int sizeOfInputAfterRemaining = 0;
// The input block to compression function of SWIFFTX:
BitSequence currInputBlock[SWIFFTX_INPUT_BLOCK_SIZE] = {0};
// Whether we handled a single block.
bool wasSingleBlockHandled = false;
swifftxState.wasUpdated = true;
// Handle an empty message as required by NIST. Since 'Final()' is oblivious to the input
// (but of course uses the output of the compression function from the previous round,
// which is called h_{i-1} in HAIFA article), we have to do nothing here.
if (databitlen == 0)
return SUCCESS;
// If we had before an input with unaligned length, return an error
if (swifftxState.remainingSize % 8)
{
return INPUT_DATA_NOT_ALIGNED;
}
// Convert remaining size to bytes.
swifftxState.remainingSize /= 8;
// As long as we have enough data combined from (remaining + data) to fill input block
//NASTAVENIE RUND
while (((databitlen / 8) + swifftxState.remainingSize) >= SWIF_HAIFA_INPUT_BLOCK_SIZE)
{
// Fill the input block with data:
// 1. The output of the previous block:
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
// 2. The input part of the block:
// 2a. The remaining data from the previous 'Update()' call:
if (swifftxState.remainingSize)
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE, swifftxState.remaining,
swifftxState.remainingSize);
// 2b. The input data that we have place for after the 'remaining':
sizeOfInputAfterRemaining = SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE
- ((int) swifftxState.remainingSize) - SWIF_HAIFA_NUM_OF_BITS_SIZE
- SWIF_HAIFA_SALT_SIZE;
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize,
data, sizeOfInputAfterRemaining);
// 3. The #bits part of the block:
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize
+ sizeOfInputAfterRemaining,
swifftxState.numOfBitsChar, SWIF_HAIFA_NUM_OF_BITS_SIZE);
// 4. The salt part of the block:
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize
+ sizeOfInputAfterRemaining + SWIF_HAIFA_NUM_OF_BITS_SIZE,
swifftxState.salt, SWIF_HAIFA_SALT_SIZE);
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, false);
// Update the #bits field with SWIF_HAIFA_INPUT_BLOCK_SIZE.
AddToCurrInBase256(swifftxState.numOfBitsChar, SWIF_HAIFA_INPUT_BLOCK_SIZE * 8);
wasSingleBlockHandled = true;
data += sizeOfInputAfterRemaining;
databitlen -= (sizeOfInputAfterRemaining * 8);
swifftxState.remainingSize = 0;
}
// Update the swifftxState.remaining and swifftxState.remainingSize.
// remainingSize will be in bits after exiting 'Update()'.
if (wasSingleBlockHandled)
{
swifftxState.remainingSize = (unsigned int) databitlen; // now remaining size is in bits.
if (swifftxState.remainingSize)
memcpy(swifftxState.remaining, data, (swifftxState.remainingSize + 7) / 8);
}
else
{
memcpy(swifftxState.remaining + swifftxState.remainingSize, data,
(size_t) (databitlen + 7) / 8);
swifftxState.remainingSize = (swifftxState.remainingSize * 8) + (unsigned short) databitlen;
}
return SUCCESS;
}
int Swifftx::Final(BitSequence *hashval)
{
int i;
// Whether to add one last block. True if the padding appended to the last block overflows
// the block size.
bool toAddFinalBlock = false;
bool toPutOneInFinalBlock = false;
unsigned short oneShift = 0;
// The size of the last input block before the zeroes padding. We add 1 here because we
// include the final '1' bit in the calculation and 7 as we round the length to bytes.
unsigned short sizeOfLastInputBlock = (swifftxState.remainingSize + 1 + 7) / 8;
// The number of bytes of zero in the padding part.
// The padding contains:
// 1. A single 1 bit.
// 2. As many zeroes as needed.
// 3. The message length in bits. Occupies SWIF_HAIFA_NUM_OF_BITS_SIZE bytes.
// 4. The digest size. Maximum is 512, so we need 2 bytes.
// If the total number achieved is negative, add an additional block, as HAIFA specifies.
short numOfZeroBytesInPadding = (short) SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE
- sizeOfLastInputBlock - (2 * SWIF_HAIFA_NUM_OF_BITS_SIZE) - 2
- SWIF_HAIFA_SALT_SIZE;
// The input block to compression function of SWIFFTX:
BitSequence currInputBlock[SWIFFTX_INPUT_BLOCK_SIZE] = {0};
// The message length in base 256.
BitSequence messageLengthChar[SWIF_HAIFA_NUM_OF_BITS_SIZE] = {0};
// The digest size used for padding:
unsigned char digestSizeLSB = swifftxState.hashbitlen % 256;
unsigned char digestSizeMSB = (swifftxState.hashbitlen - digestSizeLSB) / 256;
if (numOfZeroBytesInPadding < 1)
toAddFinalBlock = true;
// Fill the input block with data:
// 1. The output of the previous block:
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
// 2a. The input part of the block, which is the remaining data from the previous 'Update()'
// call, if exists and an extra '1' bit (maybe all we have is this extra 1):
// Add the last 1 in big-endian convention ...
if (swifftxState.remainingSize % 8 == 0)
{
swifftxState.remaining[sizeOfLastInputBlock - 1] = 0x80;
}
else
{
swifftxState.remaining[sizeOfLastInputBlock - 1] |= (1 << (7 - (swifftxState.remainingSize % 8)));
}
if (sizeOfLastInputBlock)
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE, swifftxState.remaining,
sizeOfLastInputBlock);
// Compute the message length in base 256:
for (i = 0; i < SWIF_HAIFA_NUM_OF_BITS_SIZE; ++i)
messageLengthChar[i] = swifftxState.numOfBitsChar[i];
if (sizeOfLastInputBlock)
AddToCurrInBase256(messageLengthChar, sizeOfLastInputBlock * 8);
if (!toAddFinalBlock)
{
// 2b. Put the zeroes:
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock,
0, numOfZeroBytesInPadding);
// 2c. Pad the message length:
for (i = 0; i < SWIF_HAIFA_NUM_OF_BITS_SIZE; ++i)
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
+ numOfZeroBytesInPadding + i] = messageLengthChar[i];
// 2d. Pad the digest size:
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
+ numOfZeroBytesInPadding + SWIF_HAIFA_NUM_OF_BITS_SIZE] = digestSizeMSB;
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
+ numOfZeroBytesInPadding + SWIF_HAIFA_NUM_OF_BITS_SIZE + 1] = digestSizeLSB;
}
else
{
// 2b. Put the zeroes, if at all:
if ((SWIF_HAIFA_INPUT_BLOCK_SIZE - sizeOfLastInputBlock) > 0)
{
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock,
0, SWIF_HAIFA_INPUT_BLOCK_SIZE - sizeOfLastInputBlock);
}
}
// 3. The #bits part of the block:
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE,
swifftxState.numOfBitsChar, SWIF_HAIFA_NUM_OF_BITS_SIZE);
// 4. The salt part of the block:
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
+ SWIF_HAIFA_NUM_OF_BITS_SIZE,
swifftxState.salt,
SWIF_HAIFA_SALT_SIZE);
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, !toAddFinalBlock);
// If we have to add one more block, it is now:
if (toAddFinalBlock)
{
// 1. The previous output block, as usual.
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
// 2a. Instead of the input, zeroes:
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE , 0,
SWIF_HAIFA_INPUT_BLOCK_SIZE - SWIF_HAIFA_NUM_OF_BITS_SIZE - 2);
// 2b. Instead of the input, the message length:
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
- SWIF_HAIFA_NUM_OF_BITS_SIZE - 2,
messageLengthChar,
SWIF_HAIFA_NUM_OF_BITS_SIZE);
// 2c. Instead of the input, the digest size:
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE - 2] = digestSizeMSB;
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE - 1] = digestSizeLSB;
// 3. The #bits part of the block, which is zero in case of additional block:
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE,
0,
SWIF_HAIFA_NUM_OF_BITS_SIZE);
// 4. The salt part of the block:
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
+ SWIF_HAIFA_NUM_OF_BITS_SIZE,
swifftxState.salt,
SWIF_HAIFA_SALT_SIZE);
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, true);
}
// Finally, copy the result into 'hashval'. In case the digest size is not 512bit, copy the
// first hashbitlen of them:
for (i = 0; i < (swifftxState.hashbitlen / 8); ++i)
hashval[i] = swifftxState.currOutputBlock[i];
return SUCCESS;
}
int Swifftx::Hash(int hashbitlen, const BitSequence *data, DataLength databitlen,
BitSequence *hashval)
{
int result;
//hashState state;
// The pointer to the current place in the input we take into the compression function.
DataLength currInputIndex = 0;
result = Swifftx::Init(hashbitlen);
if (result != SUCCESS)
return result;
for ( ; (databitlen / 8) > SWIF_HAIFA_INPUT_BLOCK_SIZE;
currInputIndex += SWIF_HAIFA_INPUT_BLOCK_SIZE, databitlen -= (SWIF_HAIFA_INPUT_BLOCK_SIZE * 8))
{
result = Swifftx::Update(data + currInputIndex, SWIF_HAIFA_INPUT_BLOCK_SIZE * 8);
if (result != SUCCESS)
return result;
}
// The length of the last block may be shorter than (SWIF_HAIFA_INPUT_BLOCK_SIZE * 8)
result = Swifftx::Update(data + currInputIndex, databitlen);
if (result != SUCCESS)
{
return result;
}
return Swifftx::Final(hashval);
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Helper fuction implementation portion.
///////////////////////////////////////////////////////////////////////////////////////////////
void Swifftx::AddToCurrInBase256(BitSequence value[SWIF_HAIFA_NUM_OF_BITS_SIZE],
unsigned short toAdd)
{
unsigned char remainder = 0;
short i;
BitSequence currValueInBase256[8] = {0};
unsigned short currIndex = 7;
unsigned short temp = 0;
do
{
remainder = toAdd % 256;
currValueInBase256[currIndex--] = remainder;
toAdd -= remainder;
toAdd /= 256;
}
while(toAdd != 0);
for (i = 7; i >= 0; --i)
{
temp = value[i] + currValueInBase256[i];
if (temp > 255)
{
value[i] = temp % 256;
currValueInBase256[i - 1]++;
}
else
value[i] = (unsigned char) temp;
}
}

View File

@@ -0,0 +1,79 @@
#ifndef SWIFFTX_SHA3_H
#define SWIFFTX_SHA3_H
#include "sha3_interface.h"
#include "stdbool.h"
#include "stdint.h"
class Swifftx : public SHA3 {
#define SWIFFTX_INPUT_BLOCK_SIZE 256
#define SWIFFTX_OUTPUT_BLOCK_SIZE 65
#define SWIF_HAIFA_SALT_SIZE 8
#define SWIF_HAIFA_NUM_OF_BITS_SIZE 8
#define SWIF_HAIFA_INPUT_BLOCK_SIZE (SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE \
- SWIF_HAIFA_NUM_OF_BITS_SIZE - SWIF_HAIFA_SALT_SIZE)
typedef unsigned char BitSequence;
//const DataLength SWIF_SALT_VALUE;
#define SWIF_HAIFA_IV 0
/*const BitSequence SWIF_HAIFA_IV_224[SWIFFTX_OUTPUT_BLOCK_SIZE];
const BitSequence SWIF_HAIFA_IV_256[SWIFFTX_OUTPUT_BLOCK_SIZE];
const BitSequence SWIF_HAIFA_IV_384[SWIFFTX_OUTPUT_BLOCK_SIZE];
const BitSequence SWIF_HAIFA_IV_512[SWIFFTX_OUTPUT_BLOCK_SIZE];*/
typedef enum
{
SUCCESS = 0,
FAIL = 1,
BAD_HASHBITLEN = 2,
BAD_SALT_SIZE = 3,
SET_SALT_VALUE_FAILED = 4,
INPUT_DATA_NOT_ALIGNED = 5
} HashReturn;
typedef struct hashState {
unsigned short hashbitlen;
// The data remained after the recent call to 'Update()'.
BitSequence remaining[SWIF_HAIFA_INPUT_BLOCK_SIZE + 1];
// The size of the remaining data in bits.
// Is 0 in case there is no remaning data at all.
unsigned int remainingSize;
// The current output of the compression function. At the end will contain the final digest
// (which may be needed to be truncated, depending on hashbitlen).
BitSequence currOutputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE];
// The value of '#bits hashed so far' field in HAIFA, in base 256.
BitSequence numOfBitsChar[SWIF_HAIFA_NUM_OF_BITS_SIZE];
// The salt value currently in use:
BitSequence salt[SWIF_HAIFA_SALT_SIZE];
// Indicates whether a single 'Update()' occured.
// Ater a call to 'Update()' the key and the salt values cannot be changed.
bool wasUpdated;
} hashState;
private:
int swifftxNumRounds;
hashState swifftxState;
public:
int Init(int hashbitlen);
int Update(const BitSequence *data, DataLength databitlen);
int Final(BitSequence *hashval);
int Hash(int hashbitlen, const BitSequence *data, DataLength databitlen,
BitSequence *hashval);
private:
static void AddToCurrInBase256(BitSequence value[SWIF_HAIFA_NUM_OF_BITS_SIZE], unsigned short toAdd);
};
#endif

View File

@@ -0,0 +1,21 @@
#pragma once
#include <cstdint>
namespace hash {
using BitSequence = unsigned char;
using DataLength = unsigned long long;
struct hash_interface {
virtual ~hash_interface() = default;
virtual int Init(int hash_bitsize) = 0;
virtual int Update(const BitSequence *data, DataLength data_bitsize) = 0;
virtual int Final(BitSequence *hash) = 0;
virtual int
Hash(int hash_bitsize, const BitSequence *data, DataLength data_bitsize, BitSequence *hash) = 0;
};
} // namespace hash

39
algo/swifftx/inttypes.h Normal file
View File

@@ -0,0 +1,39 @@
/*
inttypes.h
Contributors:
Created by Marek Michalkiewicz <marekm@linux.org.pl>
THIS SOFTWARE IS NOT COPYRIGHTED
This source code is offered for use in the public domain. You may
use, modify or distribute it freely.
This code is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESS OR IMPLIED ARE HEREBY
DISCLAIMED. This includes but is not limited to warranties of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#ifndef __INTTYPES_H_
#define __INTTYPES_H_
/* Use [u]intN_t if you need exactly N bits.
XXX - doesn't handle the -mint8 option. */
typedef signed char swift_int8_t;
typedef unsigned char swift_uint8_t;
typedef int swift_int16_t;
typedef unsigned int swift_uint16_t;
typedef long swift_int32_t;
typedef unsigned long swift_uint32_t;
typedef long long swift_int64_t;
typedef unsigned long long swift_uint64_t;
//typedef swift_int16_t intptr_t;
//typedef swift_uint16_t uintptr_t;
#endif

View File

@@ -0,0 +1,14 @@
#pragma once
#include <cstdint>
//#include <streams/hash/hash_interface.h>
#include "hash_interface.h"
namespace sha3 {
using BitSequence = hash::BitSequence;
using DataLength = hash::DataLength;
struct sha3_interface : hash::hash_interface {};
} // namespace sha3

47
algo/swifftx/stdbool.h Normal file
View File

@@ -0,0 +1,47 @@
/*
* Copyright (c) 2000 Jeroen Ruigrok van der Werven <asmodai@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: src/include/stdbool.h,v 1.6 2002/08/16 07:33:14 alfred Exp $
*/
#ifndef _STDBOOL_H_
#define _STDBOOL_H_
#define __bool_true_false_are_defined 1
#ifndef __cplusplus
#define false 0
#define true 1
//#define bool _Bool
//#if __STDC_VERSION__ < 199901L && __GNUC__ < 3
//typedef int _Bool;
//#endif
typedef int bool;
#endif /* !__cplusplus */
#endif /* !_STDBOOL_H_ */

54
algo/swifftx/stdint.h Normal file
View File

@@ -0,0 +1,54 @@
#ifndef _SWIFFT_STDINT_H
#define _SWIFFT_STDINT_H
///////////////////////////////////////////////////////////////////////////////////////////////
//
// A note from SWIFFTX implementers:
//
// Although the submission was targeted for Microsoft Visual Studio 2005 compiler, we strived
// to make the code as portable as possible. This is why we preferred to use the types defined
// here, instead of Microsoft-specific types. We compiled the code with gcc to make this sure.
// However, we couldn't use this header as is, due to VS2005 compiler objections. This is why
// we commented out certain defines and clearly marked it.
// To compile our code on gcc you may define SYS_STDINT.
//
///////////////////////////////////////////////////////////////////////////////////////////////
#ifdef SYS_STDINT
#include <stdint.h>
#else
#include "inttypes.h"
// The following was commented out by SWIFFTX implementers:
// __BEGIN_DECLS
typedef swift_int8_t swifftx_int_least8_t;
typedef swift_int16_t swifftx_int_least16_t;
typedef swift_int32_t swifftx_int_least32_t;
typedef swift_uint8_t swifftx_uint_least8_t;
typedef swift_uint16_t swifftx_uint_least16_t;
typedef swift_uint32_t swifftx_uint_least32_t;
#ifndef __STRICT_ANSI__
typedef swift_int64_t swifftx_int_least64_t;
typedef swift_uint64_t swifftx_uint_least64_t;
#endif
/*typedef signed char int_fast8_t;
typedef signed long int int_fast16_t;
typedef signed long int int_fast32_t;
typedef signed long long int int_fast64_t;
typedef unsigned char uint_fast8_t;
typedef unsigned long int uint_fast16_t;
typedef unsigned long int uint_fast32_t;
typedef unsigned long long int uint_fast64_t;*/
// The following was commented out by SWIFFTX implementers:
// #include <endian.h>
// __END_DECLS
#endif
#endif

912
algo/swifftx/swifftx-4way.c Normal file
View File

@@ -0,0 +1,912 @@
///////////////////////////////////////////////////////////////////////////////////////////////
//
// SWIFFTX ANSI C OPTIMIZED 32BIT IMPLEMENTATION FOR NIST SHA-3 COMPETITION
//
// SWIFFTX.c
//
// October 2008
//
// This is the source file of the OPTIMIZED 32BIT implementation of SWIFFTX hash function.
// SWIFFTX is a candidate function for SHA-3 NIST competition.
// More details about SWIFFTX can be found in the accompanying submission documents.
//
///////////////////////////////////////////////////////////////////////////////////////////////
#include "swifftx.h"
// See the remarks concerning compatibility issues inside stdint.h.
#include "stdint.h"
// Remove this while using gcc:
//#include "stdbool.h"
#include <memory.h>
///////////////////////////////////////////////////////////////////////////////////////////////
// Constants and static tables portion.
///////////////////////////////////////////////////////////////////////////////////////////////
// In SWIFFTX we work over Z_257, so this is the modulus and the arithmetic is performed modulo
// this number.
#define FIELD_SIZE 257
// The size of FFT we use:
#define N 64
#define LOGN 6
#define EIGHTH_N (N / 8)
// The number of FFTS done on the input.
#define M (SWIFFTX_INPUT_BLOCK_SIZE / 8) // 32
// Omega is the 128th root of unity in Z_257.
// We choose w = 42.
#define OMEGA 42
// The size of the inner FFT lookup table:
#define W 8
// Calculates the sum and the difference of two numbers.
//
// Parameters:
// - A: the first operand. After the operation stores the sum of the two operands.
// - B: the second operand. After the operation stores the difference between the first and the
// second operands.
#define ADD_SUB_4WAY( A, B ) \
{ \
__m128i temp = B; \
B = _mm_sub_epi32( A, B ); \
A = _mm_add_epi32( A, temp ); \
}
//#define ADD_SUB(A, B) {register int temp = (B); B = ((A) - (B)); A = ((A) + (temp));}
// Quickly reduces an integer modulo 257.
//
// Parameters:
// - A: the input.
#define Q_REDUCE( A ) ( _mm_sub_epi32( \
_mm_and_epi32( A, m128_const1_32( 0xff ) ), \
_mm_srli_epi32( A, 8 ) ) )
//#define Q_REDUCE(A) (((A) & 0xff) - ((A) >> 8))
// Since we need to do the setup only once, this is the indicator variable:
static bool wasSetupDone = false;
// This array stores the powers of omegas that correspond to the indices, which are the input
// values. Known also as the "outer FFT twiddle factors".
swift_int16_t multipliers[N];
// This array stores the powers of omegas, multiplied by the corresponding values.
// We store this table to save computation time.
//
// To calculate the intermediate value of the compression function (the first out of two
// stages), we multiply the k-th bit of x_i by w^[(2i + 1) * k]. {x_i} is the input to the
// compression function, i is between 0 and 31, x_i is a 64-bit value.
// One can see the formula for this (intermediate) stage in the SWIFFT FSE 2008 paper --
// formula (2), section 3, page 6.
swift_int16_t fftTable[256 * EIGHTH_N];
// The A's we use in SWIFFTX shall be random elements of Z_257.
// We generated these A's from the decimal expansion of PI as follows: we converted each
// triple of digits into a decimal number d. If d < (257 * 3) we used (d % 257) for the next A
// element, otherwise move to the next triple of digits in the expansion. This guarntees that
// the A's are random, provided that PI digits are.
const swift_int16_t As[3 * M * N] =
{141, 78, 139, 75, 238, 205, 129, 126, 22, 245, 197, 169, 142, 118, 105, 78,
50, 149, 29, 208, 114, 34, 85, 117, 67, 148, 86, 256, 25, 49, 133, 93,
95, 36, 68, 231, 211, 102, 151, 128, 224, 117, 193, 27, 102, 187, 7, 105,
45, 130, 108, 124, 171, 151, 189, 128, 218, 134, 233, 165, 14, 201, 145, 134,
52, 203, 91, 96, 197, 69, 134, 213, 136, 93, 3, 249, 141, 16, 210, 73,
6, 92, 58, 74, 174, 6, 254, 91, 201, 107, 110, 76, 103, 11, 73, 16,
34, 209, 7, 127, 146, 254, 95, 176, 57, 13, 108, 245, 77, 92, 186, 117,
124, 97, 105, 118, 34, 74, 205, 122, 235, 53, 94, 238, 210, 227, 183, 11,
129, 159, 105, 183, 142, 129, 86, 21, 137, 138, 224, 223, 190, 188, 179, 188,
256, 25, 217, 176, 36, 176, 238, 127, 160, 210, 155, 148, 132, 0, 54, 127,
145, 6, 46, 85, 243, 95, 173, 123, 178, 207, 211, 183, 224, 173, 146, 35,
71, 114, 50, 22, 175, 1, 28, 19, 112, 129, 21, 34, 161, 159, 115, 52,
4, 193, 211, 92, 115, 49, 59, 217, 218, 96, 61, 81, 24, 202, 198, 89,
45, 128, 8, 51, 253, 87, 171, 35, 4, 188, 171, 10, 3, 137, 238, 73,
19, 208, 124, 163, 103, 177, 155, 147, 46, 84, 253, 233, 171, 241, 211, 217,
159, 48, 96, 79, 237, 18, 171, 226, 99, 1, 97, 195, 216, 163, 198, 95,
0, 201, 65, 228, 21, 153, 124, 230, 44, 35, 44, 108, 85, 156, 249, 207,
26, 222, 131, 1, 60, 242, 197, 150, 181, 19, 116, 213, 75, 98, 124, 240,
123, 207, 62, 255, 60, 143, 187, 157, 139, 9, 12, 104, 89, 49, 193, 146,
104, 196, 181, 82, 198, 253, 192, 191, 255, 122, 212, 104, 47, 20, 132, 208,
46, 170, 2, 69, 234, 36, 56, 163, 28, 152, 104, 238, 162, 56, 24, 58,
38, 150, 193, 254, 253, 125, 173, 35, 73, 126, 247, 239, 216, 6, 199, 15,
90, 12, 97, 122, 9, 84, 207, 127, 219, 72, 58, 30, 29, 182, 41, 192,
235, 248, 237, 74, 72, 176, 210, 252, 45, 64, 165, 87, 202, 241, 236, 223,
151, 242, 119, 239, 52, 112, 169, 28, 13, 37, 160, 60, 158, 81, 133, 60,
16, 145, 249, 192, 173, 217, 214, 93, 141, 184, 54, 34, 161, 104, 157, 95,
38, 133, 218, 227, 211, 181, 9, 66, 137, 143, 77, 33, 248, 159, 4, 55,
228, 48, 99, 219, 222, 184, 15, 36, 254, 256, 157, 237, 87, 139, 209, 113,
232, 85, 126, 167, 197, 100, 103, 166, 64, 225, 125, 205, 117, 135, 84, 128,
231, 112, 90, 241, 28, 22, 210, 147, 186, 49, 230, 21, 108, 39, 194, 47,
123, 199, 107, 114, 30, 210, 250, 143, 59, 156, 131, 133, 221, 27, 76, 99,
208, 250, 78, 12, 211, 141, 95, 81, 195, 106, 8, 232, 150, 212, 205, 221,
11, 225, 87, 219, 126, 136, 137, 180, 198, 48, 68, 203, 239, 252, 194, 235,
142, 137, 174, 172, 190, 145, 250, 221, 182, 204, 1, 195, 130, 153, 83, 241,
161, 239, 211, 138, 11, 169, 155, 245, 174, 49, 10, 166, 16, 130, 181, 139,
222, 222, 112, 99, 124, 94, 51, 243, 133, 194, 244, 136, 35, 248, 201, 177,
178, 186, 129, 102, 89, 184, 180, 41, 149, 96, 165, 72, 225, 231, 134, 158,
199, 28, 249, 16, 225, 195, 10, 210, 164, 252, 138, 8, 35, 152, 213, 199,
82, 116, 97, 230, 63, 199, 241, 35, 79, 120, 54, 174, 67, 112, 1, 76,
69, 222, 194, 96, 82, 94, 25, 228, 196, 145, 155, 136, 228, 234, 46, 101,
246, 51, 103, 166, 246, 75, 9, 200, 161, 4, 108, 35, 129, 168, 208, 144,
50, 14, 13, 220, 41, 132, 122, 127, 194, 9, 232, 234, 107, 28, 187, 8,
51, 141, 97, 221, 225, 9, 113, 170, 166, 102, 135, 22, 231, 185, 227, 187,
110, 145, 251, 146, 76, 22, 146, 228, 7, 53, 64, 25, 62, 198, 130, 190,
221, 232, 169, 64, 188, 199, 237, 249, 173, 218, 196, 191, 48, 224, 5, 113,
100, 166, 160, 21, 191, 197, 61, 162, 149, 171, 240, 183, 129, 231, 123, 204,
192, 179, 134, 15, 47, 161, 142, 177, 239, 234, 186, 237, 231, 53, 208, 95,
146, 36, 225, 231, 89, 142, 93, 248, 137, 124, 83, 39, 69, 77, 89, 208,
182, 48, 85, 147, 244, 164, 246, 68, 38, 190, 220, 35, 202, 91, 157, 151,
201, 240, 185, 218, 4, 152, 2, 132, 177, 88, 190, 196, 229, 74, 220, 135,
137, 196, 11, 47, 5, 251, 106, 144, 163, 60, 222, 127, 52, 57, 202, 102,
64, 140, 110, 206, 23, 182, 39, 245, 1, 163, 157, 186, 163, 80, 7, 230,
44, 249, 176, 102, 164, 125, 147, 120, 18, 191, 186, 125, 64, 65, 198, 157,
164, 213, 95, 61, 13, 181, 208, 91, 242, 197, 158, 34, 98, 169, 91, 14,
17, 93, 157, 17, 65, 30, 183, 6, 139, 58, 255, 108, 100, 136, 209, 144,
164, 6, 237, 33, 210, 110, 57, 126, 197, 136, 125, 244, 165, 151, 168, 3,
143, 251, 247, 155, 136, 130, 88, 14, 74, 121, 250, 133, 21, 226, 185, 232,
118, 132, 89, 64, 204, 161, 2, 70, 224, 159, 35, 204, 123, 180, 13, 52,
231, 57, 25, 78, 66, 69, 97, 42, 198, 84, 176, 59, 8, 232, 125, 134,
193, 2, 232, 109, 216, 69, 90, 142, 32, 38, 249, 37, 75, 180, 184, 188,
19, 47, 120, 87, 146, 70, 232, 120, 191, 45, 33, 38, 19, 248, 110, 110,
44, 64, 2, 84, 244, 228, 252, 228, 170, 123, 38, 144, 213, 144, 171, 212,
243, 87, 189, 46, 128, 110, 84, 77, 65, 183, 61, 184, 101, 44, 168, 68,
14, 106, 105, 8, 227, 211, 166, 39, 152, 43, 52, 254, 197, 55, 119, 89,
168, 65, 53, 138, 177, 56, 219, 0, 58, 121, 148, 18, 44, 100, 215, 103,
145, 229, 117, 196, 91, 89, 113, 143, 172, 239, 249, 184, 154, 39, 112, 65,
204, 42, 84, 38, 155, 151, 151, 16, 100, 87, 174, 162, 145, 147, 149, 186,
237, 145, 134, 144, 198, 235, 213, 163, 48, 230, 24, 47, 57, 71, 127, 0,
150, 219, 12, 81, 197, 150, 131, 13, 169, 63, 175, 184, 48, 235, 65, 243,
149, 200, 163, 254, 202, 114, 247, 67, 143, 250, 126, 228, 80, 130, 216, 214,
36, 2, 230, 33, 119, 125, 3, 142, 237, 100, 3, 152, 197, 174, 244, 129,
232, 30, 206, 199, 39, 210, 220, 43, 237, 221, 201, 54, 179, 42, 28, 133,
246, 203, 198, 177, 0, 28, 194, 85, 223, 109, 155, 147, 221, 60, 133, 108,
157, 254, 26, 75, 157, 185, 49, 142, 31, 137, 71, 43, 63, 64, 237, 148,
237, 172, 159, 160, 155, 254, 234, 224, 140, 193, 114, 140, 62, 109, 136, 39,
255, 8, 158, 146, 128, 49, 222, 96, 57, 209, 180, 249, 202, 127, 113, 231,
78, 178, 46, 33, 228, 215, 104, 31, 207, 186, 82, 41, 42, 39, 103, 119,
123, 133, 243, 254, 238, 156, 90, 186, 37, 212, 33, 107, 252, 51, 177, 36,
237, 76, 159, 245, 93, 214, 97, 56, 190, 38, 160, 94, 105, 222, 220, 158,
49, 16, 191, 52, 120, 87, 179, 2, 27, 144, 223, 230, 184, 6, 129, 227,
69, 47, 215, 181, 162, 139, 72, 200, 45, 163, 159, 62, 2, 221, 124, 40,
159, 242, 35, 208, 179, 166, 98, 67, 178, 68, 143, 225, 178, 146, 187, 159,
57, 66, 176, 192, 236, 250, 168, 224, 122, 43, 159, 120, 133, 165, 122, 64,
87, 74, 161, 241, 9, 87, 90, 24, 255, 113, 203, 220, 57, 139, 197, 159,
31, 151, 27, 140, 77, 162, 7, 27, 84, 228, 187, 220, 53, 126, 162, 242,
84, 181, 223, 103, 86, 177, 207, 31, 140, 18, 207, 256, 201, 166, 96, 23,
233, 103, 197, 84, 161, 75, 59, 149, 138, 154, 119, 92, 16, 53, 116, 97,
220, 114, 35, 45, 77, 209, 40, 196, 71, 22, 81, 178, 110, 14, 3, 180,
110, 129, 112, 47, 18, 61, 134, 78, 73, 79, 254, 232, 125, 180, 205, 54,
220, 119, 63, 89, 181, 52, 77, 109, 151, 77, 80, 207, 144, 25, 20, 6,
208, 47, 201, 206, 192, 14, 73, 176, 256, 201, 207, 87, 216, 60, 56, 73,
92, 243, 179, 113, 49, 59, 55, 168, 121, 137, 69, 154, 95, 57, 187, 47,
129, 4, 15, 92, 6, 116, 69, 196, 48, 134, 84, 81, 111, 56, 38, 176,
239, 6, 128, 72, 242, 134, 36, 221, 59, 48, 242, 68, 130, 110, 171, 89,
13, 220, 48, 29, 5, 75, 104, 233, 91, 129, 105, 162, 44, 113, 163, 163,
85, 147, 190, 111, 197, 80, 213, 153, 81, 68, 203, 33, 161, 165, 10, 61,
120, 252, 0, 205, 28, 42, 193, 64, 39, 37, 83, 175, 5, 218, 215, 174,
128, 121, 231, 11, 150, 145, 135, 197, 136, 91, 193, 5, 107, 88, 82, 6,
4, 188, 256, 70, 40, 2, 167, 57, 169, 203, 115, 254, 215, 172, 84, 80,
188, 167, 34, 137, 43, 243, 2, 79, 178, 38, 188, 135, 233, 194, 208, 13,
11, 151, 231, 196, 12, 122, 162, 56, 17, 114, 191, 207, 90, 132, 64, 238,
187, 6, 198, 176, 240, 88, 118, 236, 15, 226, 166, 22, 193, 229, 82, 246,
213, 64, 37, 63, 31, 243, 252, 37, 156, 38, 175, 204, 138, 141, 211, 82,
106, 217, 97, 139, 153, 56, 129, 218, 158, 9, 83, 26, 87, 112, 71, 21,
250, 5, 65, 141, 68, 116, 231, 113, 10, 218, 99, 205, 201, 92, 157, 4,
97, 46, 49, 220, 72, 139, 103, 171, 149, 129, 193, 19, 69, 245, 43, 31,
58, 68, 36, 195, 159, 22, 54, 34, 233, 141, 205, 100, 226, 96, 22, 192,
41, 231, 24, 79, 234, 138, 30, 120, 117, 216, 172, 197, 172, 107, 86, 29,
181, 151, 0, 6, 146, 186, 68, 55, 54, 58, 213, 182, 60, 231, 33, 232,
77, 210, 216, 154, 80, 51, 141, 122, 68, 148, 219, 122, 254, 48, 64, 175,
41, 115, 62, 243, 141, 81, 119, 121, 5, 68, 121, 88, 239, 29, 230, 90,
135, 159, 35, 223, 168, 112, 49, 37, 146, 60, 126, 134, 42, 145, 115, 90,
73, 133, 211, 86, 120, 141, 122, 241, 127, 56, 130, 36, 174, 75, 83, 246,
112, 45, 136, 194, 201, 115, 1, 156, 114, 167, 208, 12, 176, 147, 32, 170,
251, 100, 102, 220, 122, 210, 6, 49, 75, 201, 38, 105, 132, 135, 126, 102,
13, 121, 76, 228, 202, 20, 61, 213, 246, 13, 207, 42, 148, 168, 37, 253,
34, 94, 141, 185, 18, 234, 157, 109, 104, 64, 250, 125, 49, 236, 86, 48,
196, 77, 75, 237, 156, 103, 225, 19, 110, 229, 22, 68, 177, 93, 221, 181,
152, 153, 61, 108, 101, 74, 247, 195, 127, 216, 30, 166, 168, 61, 83, 229,
120, 156, 96, 120, 201, 124, 43, 27, 253, 250, 120, 143, 89, 235, 189, 243,
150, 7, 127, 119, 149, 244, 84, 185, 134, 34, 128, 193, 236, 234, 132, 117,
137, 32, 145, 184, 44, 121, 51, 76, 11, 228, 142, 251, 39, 77, 228, 251,
41, 58, 246, 107, 125, 187, 9, 240, 35, 8, 11, 162, 242, 220, 158, 163,
2, 184, 163, 227, 242, 2, 100, 101, 2, 78, 129, 34, 89, 28, 26, 157,
79, 31, 107, 250, 194, 156, 186, 69, 212, 66, 41, 180, 139, 42, 211, 253,
256, 239, 29, 129, 104, 248, 182, 68, 1, 189, 48, 226, 36, 229, 3, 158,
41, 53, 241, 22, 115, 174, 16, 163, 224, 19, 112, 219, 177, 233, 42, 27,
250, 134, 18, 28, 145, 122, 68, 34, 134, 31, 147, 17, 39, 188, 150, 76,
45, 42, 167, 249, 12, 16, 23, 182, 13, 79, 121, 3, 70, 197, 239, 44,
86, 177, 255, 81, 64, 171, 138, 131, 73, 110, 44, 201, 254, 198, 146, 91,
48, 9, 104, 31, 29, 161, 101, 31, 138, 180, 231, 233, 79, 137, 61, 236,
140, 15, 249, 218, 234, 119, 99, 195, 110, 137, 237, 207, 8, 31, 45, 24,
90, 155, 203, 253, 192, 203, 65, 176, 210, 171, 142, 214, 220, 122, 136, 237,
189, 186, 147, 40, 80, 254, 173, 33, 191, 46, 192, 26, 108, 255, 228, 205,
61, 76, 39, 107, 225, 126, 228, 182, 140, 251, 143, 134, 252, 168, 221, 8,
185, 85, 60, 233, 147, 244, 87, 137, 8, 140, 96, 80, 53, 45, 175, 160,
124, 189, 112, 37, 144, 19, 70, 17, 170, 242, 2, 3, 28, 95, 120, 199,
212, 43, 9, 117, 86, 151, 101, 241, 200, 145, 241, 19, 178, 69, 204, 197,
227, 166, 94, 7, 193, 45, 247, 234, 19, 187, 212, 212, 236, 125, 33, 95,
198, 121, 122, 103, 77, 155, 235, 49, 25, 237, 249, 11, 162, 7, 238, 24,
16, 150, 129, 25, 152, 17, 42, 67, 247, 162, 77, 154, 31, 133, 55, 137,
79, 119, 153, 10, 86, 28, 244, 186, 41, 169, 106, 44, 10, 49, 110, 179,
32, 133, 155, 244, 61, 70, 131, 168, 170, 39, 231, 252, 32, 69, 92, 238,
239, 35, 132, 136, 236, 167, 90, 32, 123, 88, 69, 22, 20, 89, 145, 166,
30, 118, 75, 4, 49, 31, 225, 54, 11, 50, 56, 191, 246, 1, 187, 33,
119, 107, 139, 68, 19, 240, 131, 55, 94, 113, 31, 252, 12, 179, 121, 2,
120, 252, 0, 76, 41, 80, 185, 42, 62, 121, 105, 159, 121, 109, 111, 98,
7, 118, 86, 29, 210, 70, 231, 179, 223, 229, 164, 70, 62, 47, 0, 206,
204, 178, 168, 120, 224, 166, 99, 25, 103, 63, 246, 224, 117, 204, 75, 124,
140, 133, 110, 110, 222, 88, 151, 118, 46, 37, 22, 143, 158, 40, 2, 50,
153, 94, 190, 199, 13, 198, 127, 211, 180, 90, 183, 98, 0, 142, 210, 154,
100, 187, 67, 231, 202, 100, 198, 235, 252, 160, 247, 124, 247, 14, 121, 221,
57, 88, 253, 243, 185, 89, 45, 249, 221, 194, 108, 175, 193, 119, 50, 141,
223, 133, 136, 64, 176, 250, 129, 100, 124, 94, 181, 159, 99, 185, 177, 240,
135, 42, 103, 52, 202, 208, 143, 186, 193, 103, 154, 237, 102, 88, 225, 161,
50, 188, 191, 109, 12, 87, 19, 227, 247, 183, 13, 52, 205, 170, 205, 146,
89, 160, 18, 105, 192, 73, 231, 225, 184, 157, 252, 220, 61, 59, 169, 183,
221, 20, 141, 20, 158, 101, 245, 7, 245, 225, 118, 137, 84, 55, 19, 27,
164, 110, 35, 25, 202, 94, 150, 46, 91, 152, 130, 1, 7, 46, 16, 237,
171, 109, 19, 200, 65, 38, 10, 213, 70, 96, 126, 226, 185, 225, 181, 46,
10, 165, 11, 123, 53, 158, 22, 147, 64, 22, 227, 69, 182, 237, 197, 37,
39, 49, 186, 223, 139, 128, 55, 36, 166, 178, 220, 20, 98, 172, 166, 253,
45, 0, 120, 180, 189, 185, 158, 159, 196, 6, 214, 79, 141, 52, 156, 107,
5, 109, 142, 159, 33, 64, 190, 133, 95, 132, 95, 202, 160, 63, 186, 23,
231, 107, 163, 33, 234, 15, 244, 77, 108, 49, 51, 7, 164, 87, 142, 99,
240, 202, 47, 256, 118, 190, 196, 178, 217, 42, 39, 153, 21, 192, 232, 202,
14, 82, 179, 64, 233, 4, 219, 10, 133, 78, 43, 144, 146, 216, 202, 81,
71, 252, 8, 201, 68, 256, 85, 233, 164, 88, 176, 30, 5, 152, 126, 179,
249, 84, 140, 190, 159, 54, 118, 98, 2, 159, 27, 133, 74, 121, 239, 196,
71, 149, 119, 135, 102, 20, 87, 112, 44, 75, 221, 3, 151, 158, 5, 98,
152, 25, 97, 106, 63, 171, 240, 79, 234, 240, 230, 92, 76, 70, 173, 196,
36, 225, 218, 133, 64, 240, 150, 41, 146, 66, 133, 51, 134, 73, 170, 238,
140, 90, 45, 89, 46, 147, 96, 169, 174, 174, 244, 151, 90, 40, 32, 74,
38, 154, 246, 57, 31, 14, 189, 151, 83, 243, 197, 183, 220, 185, 53, 225,
51, 106, 188, 208, 222, 248, 93, 13, 93, 215, 131, 25, 142, 185, 113, 222,
131, 215, 149, 50, 159, 85, 32, 5, 205, 192, 2, 227, 42, 214, 197, 42,
126, 182, 68, 123, 109, 36, 237, 179, 170, 199, 77, 256, 5, 128, 214, 243,
137, 177, 170, 253, 179, 180, 153, 236, 100, 196, 216, 231, 198, 37, 192, 80,
121, 221, 246, 1, 16, 246, 29, 78, 64, 148, 124, 38, 96, 125, 28, 20,
48, 51, 73, 187, 139, 208, 98, 253, 221, 188, 84, 129, 1, 205, 95, 205,
117, 79, 71, 126, 134, 237, 19, 184, 137, 125, 129, 178, 223, 54, 188, 112,
30, 7, 225, 228, 205, 184, 233, 87, 117, 22, 58, 10, 8, 42, 2, 114,
254, 19, 17, 13, 150, 92, 233, 179, 63, 12, 60, 171, 127, 35, 50, 5,
195, 113, 241, 25, 249, 184, 166, 44, 221, 35, 151, 116, 8, 54, 195, 89,
218, 186, 132, 5, 41, 89, 226, 177, 11, 41, 87, 172, 5, 23, 20, 59,
228, 94, 76, 33, 137, 43, 151, 221, 61, 232, 4, 120, 93, 217, 80, 228,
228, 6, 58, 25, 62, 84, 91, 48, 209, 20, 247, 243, 55, 106, 80, 79,
235, 34, 20, 180, 146, 2, 236, 13, 236, 206, 243, 222, 204, 83, 148, 213,
214, 117, 237, 98, 0, 90, 204, 168, 32, 41, 126, 67, 191, 74, 27, 255,
26, 75, 240, 113, 185, 105, 167, 154, 112, 67, 151, 63, 161, 134, 239, 176,
42, 87, 249, 130, 45, 242, 17, 100, 107, 120, 212, 218, 237, 76, 231, 162,
175, 172, 118, 155, 92, 36, 124, 17, 121, 71, 13, 9, 82, 126, 147, 142,
218, 148, 138, 80, 163, 106, 164, 123, 140, 129, 35, 42, 186, 154, 228, 214,
75, 73, 8, 253, 42, 153, 232, 164, 95, 24, 110, 90, 231, 197, 90, 196,
57, 164, 252, 181, 31, 7, 97, 256, 35, 77, 200, 212, 99, 179, 92, 227,
17, 180, 49, 176, 9, 188, 13, 182, 93, 44, 128, 219, 134, 92, 151, 6,
23, 126, 200, 109, 66, 30, 140, 180, 146, 134, 67, 200, 7, 9, 223, 168,
186, 221, 3, 154, 150, 165, 43, 53, 138, 27, 86, 213, 235, 160, 70, 2,
240, 20, 89, 212, 84, 141, 168, 246, 183, 227, 30, 167, 138, 185, 253, 83,
52, 143, 236, 94, 59, 65, 89, 218, 194, 157, 164, 156, 111, 95, 202, 168,
245, 256, 151, 28, 222, 194, 72, 130, 217, 134, 253, 77, 246, 100, 76, 32,
254, 174, 182, 193, 14, 237, 74, 1, 74, 26, 135, 216, 152, 208, 112, 38,
181, 62, 25, 71, 61, 234, 254, 97, 191, 23, 92, 256, 190, 205, 6, 16,
134, 147, 210, 219, 148, 59, 73, 185, 24, 247, 174, 143, 116, 220, 128, 144,
111, 126, 101, 98, 130, 136, 101, 102, 69, 127, 24, 168, 146, 226, 226, 207,
176, 122, 149, 254, 134, 196, 22, 151, 197, 21, 50, 205, 116, 154, 65, 116,
177, 224, 127, 77, 177, 159, 225, 69, 176, 54, 100, 104, 140, 8, 11, 126,
11, 188, 185, 159, 107, 16, 254, 142, 80, 28, 5, 157, 104, 57, 109, 82,
102, 80, 173, 242, 238, 207, 57, 105, 237, 160, 59, 189, 189, 199, 26, 11,
190, 156, 97, 118, 20, 12, 254, 189, 165, 147, 142, 199, 5, 213, 64, 133,
108, 217, 133, 60, 94, 28, 116, 136, 47, 165, 125, 42, 183, 143, 14, 129,
223, 70, 212, 205, 181, 180, 3, 201, 182, 46, 57, 104, 239, 60, 99, 181,
220, 231, 45, 79, 156, 89, 149, 143, 190, 103, 153, 61, 235, 73, 136, 20,
89, 243, 16, 130, 247, 141, 134, 93, 80, 68, 85, 84, 8, 72, 194, 4,
242, 110, 19, 133, 199, 70, 172, 92, 132, 254, 67, 74, 36, 94, 13, 90,
154, 184, 9, 109, 118, 243, 214, 71, 36, 95, 0, 90, 201, 105, 112, 215,
69, 196, 224, 210, 236, 242, 155, 211, 37, 134, 69, 113, 157, 97, 68, 26,
230, 149, 219, 180, 20, 76, 172, 145, 154, 40, 129, 8, 93, 56, 162, 124,
207, 233, 105, 19, 3, 183, 155, 134, 8, 244, 213, 78, 139, 88, 156, 37,
51, 152, 111, 102, 112, 250, 114, 252, 201, 241, 133, 24, 136, 153, 5, 90,
210, 197, 216, 24, 131, 17, 147, 246, 13, 86, 3, 253, 179, 237, 101, 114,
243, 191, 207, 2, 220, 133, 244, 53, 87, 125, 154, 158, 197, 20, 8, 83,
32, 191, 38, 241, 204, 22, 168, 59, 217, 123, 162, 82, 21, 50, 130, 89,
239, 253, 195, 56, 253, 74, 147, 125, 234, 199, 250, 28, 65, 193, 22, 237,
193, 94, 58, 229, 139, 176, 69, 42, 179, 164, 150, 168, 246, 214, 86, 174,
59, 117, 15, 19, 76, 37, 214, 238, 153, 226, 154, 45, 109, 114, 198, 107,
45, 70, 238, 196, 142, 252, 244, 71, 123, 136, 134, 188, 99, 132, 25, 42,
240, 0, 196, 33, 26, 124, 256, 145, 27, 102, 153, 35, 28, 132, 221, 167,
138, 133, 41, 170, 95, 224, 40, 139, 239, 153, 1, 106, 255, 106, 170, 163,
127, 44, 155, 232, 194, 119, 232, 117, 239, 143, 108, 41, 3, 9, 180, 256,
144, 113, 133, 200, 79, 69, 128, 216, 31, 50, 102, 209, 249, 136, 150, 154,
182, 51, 228, 39, 127, 142, 87, 15, 94, 92, 187, 245, 31, 236, 64, 58,
114, 11, 17, 166, 189, 152, 218, 34, 123, 39, 58, 37, 153, 91, 63, 121,
31, 34, 12, 254, 106, 96, 171, 14, 155, 247, 214, 69, 24, 98, 3, 204,
202, 194, 207, 30, 253, 44, 119, 70, 14, 96, 82, 250, 63, 6, 232, 38,
89, 144, 102, 191, 82, 254, 20, 222, 96, 162, 110, 6, 159, 58, 200, 226,
98, 128, 42, 70, 84, 247, 128, 211, 136, 54, 143, 166, 60, 118, 99, 218,
27, 193, 85, 81, 219, 223, 46, 41, 23, 233, 152, 222, 36, 236, 54, 181,
56, 50, 4, 207, 129, 92, 78, 88, 197, 251, 131, 105, 31, 172, 38, 131,
19, 204, 129, 47, 227, 106, 202, 183, 23, 6, 77, 224, 102, 147, 11, 218,
131, 132, 60, 192, 208, 223, 236, 23, 103, 115, 89, 18, 185, 171, 70, 174,
139, 0, 100, 160, 221, 11, 228, 60, 12, 122, 114, 12, 157, 235, 148, 57,
83, 62, 173, 131, 169, 126, 85, 99, 93, 243, 81, 80, 29, 245, 206, 82,
236, 227, 166, 14, 230, 213, 144, 97, 27, 111, 99, 164, 105, 150, 89, 111,
252, 118, 140, 232, 120, 183, 137, 213, 232, 157, 224, 33, 134, 118, 186, 80,
159, 2, 186, 193, 54, 242, 25, 237, 232, 249, 226, 213, 90, 149, 90, 160,
118, 69, 64, 37, 10, 183, 109, 246, 30, 52, 219, 69, 189, 26, 116, 220,
50, 244, 243, 243, 139, 137, 232, 98, 38, 45, 256, 143, 171, 101, 73, 238,
123, 45, 194, 167, 250, 123, 12, 29, 136, 237, 141, 21, 89, 96, 199, 44,
8, 214, 208, 17, 113, 41, 137, 26, 166, 155, 89, 85, 54, 58, 97, 160,
50, 239, 58, 71, 21, 157, 139, 12, 37, 198, 182, 131, 149, 134, 16, 204,
164, 181, 248, 166, 52, 216, 136, 201, 37, 255, 187, 240, 5, 101, 147, 231,
14, 163, 253, 134, 146, 216, 8, 54, 224, 90, 220, 195, 75, 215, 186, 58,
71, 204, 124, 105, 239, 53, 16, 85, 69, 163, 195, 223, 33, 38, 69, 88,
88, 203, 99, 55, 176, 13, 156, 204, 236, 99, 194, 134, 75, 247, 126, 129,
160, 124, 233, 206, 139, 144, 154, 45, 233, 51, 206, 61, 60, 55, 205, 107,
84, 108, 96, 188, 203, 31, 89, 20, 115, 144, 137, 90, 237, 78, 231, 185,
120, 217, 1, 176, 169, 30, 155, 176, 100, 113, 53, 42, 193, 108, 14, 121,
176, 158, 137, 92, 178, 44, 110, 249, 108, 234, 94, 101, 128, 12, 250, 173,
72, 202, 232, 66, 139, 152, 189, 18, 32, 197, 9, 238, 246, 55, 119, 183,
196, 119, 113, 247, 191, 100, 200, 245, 46, 16, 234, 112, 136, 116, 232, 48,
176, 108, 11, 237, 14, 153, 93, 177, 124, 72, 67, 121, 135, 143, 45, 18,
97, 251, 184, 172, 136, 55, 213, 8, 103, 12, 221, 212, 13, 160, 116, 91,
237, 127, 218, 190, 103, 131, 77, 82, 36, 100, 22, 252, 79, 69, 54, 26,
65, 182, 115, 142, 247, 20, 89, 81, 188, 244, 27, 120, 240, 248, 13, 230,
67, 133, 32, 201, 129, 87, 9, 245, 66, 88, 166, 34, 46, 184, 119, 218,
144, 235, 163, 40, 138, 134, 127, 217, 64, 227, 116, 67, 55, 202, 130, 48,
199, 42, 251, 112, 124, 153, 123, 194, 243, 49, 250, 12, 78, 157, 167, 134,
210, 73, 156, 102, 21, 88, 216, 123, 45, 11, 208, 18, 47, 187, 20, 43,
3, 180, 124, 2, 136, 176, 77, 111, 138, 139, 91, 225, 126, 8, 74, 255,
88, 192, 193, 239, 138, 204, 139, 194, 166, 130, 252, 184, 140, 168, 30, 177,
121, 98, 131, 124, 69, 171, 75, 49, 184, 34, 76, 122, 202, 115, 184, 253,
120, 182, 33, 251, 1, 74, 216, 217, 243, 168, 70, 162, 119, 158, 197, 198,
61, 89, 7, 5, 54, 199, 211, 170, 23, 226, 44, 247, 165, 195, 7, 225,
91, 23, 50, 15, 51, 208, 106, 94, 12, 31, 43, 112, 146, 139, 246, 182,
113, 1, 97, 15, 66, 2, 51, 76, 164, 184, 237, 200, 218, 176, 72, 98,
33, 135, 38, 147, 140, 229, 50, 94, 81, 187, 129, 17, 238, 168, 146, 203,
181, 99, 164, 3, 104, 98, 255, 189, 114, 142, 86, 102, 229, 102, 80, 129,
64, 84, 79, 161, 81, 156, 128, 111, 164, 197, 18, 15, 55, 196, 198, 191,
28, 113, 117, 96, 207, 253, 19, 158, 231, 13, 53, 130, 252, 211, 58, 180,
212, 142, 7, 219, 38, 81, 62, 109, 167, 113, 33, 56, 97, 185, 157, 130,
186, 129, 119, 182, 196, 26, 54, 110, 65, 170, 166, 236, 30, 22, 162, 0,
106, 12, 248, 33, 48, 72, 159, 17, 76, 244, 172, 132, 89, 171, 196, 76,
254, 166, 76, 218, 226, 3, 52, 220, 238, 181, 179, 144, 225, 23, 3, 166,
158, 35, 228, 154, 204, 23, 203, 71, 134, 189, 18, 168, 236, 141, 117, 138,
2, 132, 78, 57, 154, 21, 250, 196, 184, 40, 161, 40, 10, 178, 134, 120,
132, 123, 101, 82, 205, 121, 55, 140, 231, 56, 231, 71, 206, 246, 198, 150,
146, 192, 45, 105, 242, 1, 125, 18, 176, 46, 222, 122, 19, 80, 113, 133,
131, 162, 81, 51, 98, 168, 247, 161, 139, 39, 63, 162, 22, 153, 170, 92,
91, 130, 174, 200, 45, 112, 99, 164, 132, 184, 191, 186, 200, 167, 86, 145,
167, 227, 130, 44, 12, 158, 172, 249, 204, 17, 54, 249, 16, 200, 21, 174,
67, 223, 105, 201, 50, 36, 133, 203, 244, 131, 228, 67, 29, 195, 91, 91,
55, 107, 167, 154, 170, 137, 218, 183, 169, 61, 99, 175, 128, 23, 142, 183,
66, 255, 59, 187, 66, 85, 212, 109, 168, 82, 16, 43, 67, 139, 114, 176,
216, 255, 130, 94, 152, 79, 183, 64, 100, 23, 214, 82, 34, 230, 48, 15,
242, 130, 50, 241, 81, 32, 5, 125, 183, 182, 184, 99, 248, 109, 159, 210,
226, 61, 119, 129, 39, 149, 78, 214, 107, 78, 147, 124, 228, 18, 143, 188,
84, 180, 233, 119, 64, 39, 158, 133, 177, 168, 6, 150, 80, 117, 150, 56,
49, 72, 49, 37, 30, 242, 49, 142, 33, 156, 34, 44, 44, 72, 58, 22,
249, 46, 168, 80, 25, 196, 64, 174, 97, 179, 244, 134, 213, 105, 63, 151,
21, 90, 168, 90, 245, 28, 157, 65, 250, 232, 188, 27, 99, 160, 156, 127,
68, 193, 10, 80, 205, 36, 138, 229, 12, 223, 70, 169, 251, 41, 48, 94,
41, 177, 99, 256, 158, 0, 6, 83, 231, 191, 120, 135, 157, 146, 218, 213,
160, 7, 47, 234, 98, 211, 79, 225, 179, 95, 175, 105, 185, 79, 115, 0,
104, 14, 65, 124, 15, 188, 52, 9, 253, 27, 132, 137, 13, 127, 75, 238,
185, 253, 33, 8, 52, 157, 164, 68, 232, 188, 69, 28, 209, 233, 5, 129,
216, 90, 252, 212, 33, 200, 222, 9, 112, 15, 43, 36, 226, 114, 15, 249,
217, 8, 148, 22, 147, 23, 143, 67, 222, 116, 235, 250, 212, 210, 39, 142,
108, 64, 209, 83, 73, 66, 99, 34, 17, 29, 45, 151, 244, 114, 28, 241,
144, 208, 146, 179, 132, 89, 217, 198, 252, 219, 205, 165, 75, 107, 11, 173,
76, 6, 196, 247, 152, 216, 248, 91, 209, 178, 57, 250, 174, 60, 79, 123,
18, 135, 9, 241, 230, 159, 184, 68, 156, 251, 215, 9, 113, 234, 75, 235,
103, 194, 205, 129, 230, 45, 96, 73, 157, 20, 200, 212, 212, 228, 161, 7,
231, 228, 108, 43, 198, 87, 140, 140, 4, 182, 164, 3, 53, 104, 250, 213,
85, 38, 89, 61, 52, 187, 35, 204, 86, 249, 100, 71, 248, 213, 163, 215,
66, 106, 252, 129, 40, 111, 47, 24, 186, 221, 85, 205, 199, 237, 122, 181,
32, 46, 182, 135, 33, 251, 142, 34, 208, 242, 128, 255, 4, 234, 15, 33,
167, 222, 32, 186, 191, 34, 255, 244, 98, 240, 228, 204, 30, 142, 32, 70,
69, 83, 110, 151, 10, 243, 141, 21, 223, 69, 61, 37, 59, 209, 102, 114,
223, 33, 129, 254, 255, 103, 86, 247, 235, 72, 126, 177, 102, 226, 102, 30,
149, 221, 62, 247, 251, 120, 163, 173, 57, 202, 204, 24, 39, 106, 120, 143,
202, 176, 191, 147, 37, 38, 51, 133, 47, 245, 157, 132, 154, 71, 183, 111,
30, 180, 18, 202, 82, 96, 170, 91, 157, 181, 212, 140, 256, 8, 196, 121,
149, 79, 66, 127, 113, 78, 4, 197, 84, 256, 111, 222, 102, 63, 228, 104,
136, 223, 67, 193, 93, 154, 249, 83, 204, 101, 200, 234, 84, 252, 230, 195,
43, 140, 120, 242, 89, 63, 166, 233, 209, 94, 43, 170, 126, 5, 205, 78,
112, 80, 143, 151, 146, 248, 137, 203, 45, 183, 61, 1, 155, 8, 102, 59,
68, 212, 230, 61, 254, 191, 128, 223, 176, 123, 229, 27, 146, 120, 96, 165,
213, 12, 232, 40, 186, 225, 66, 105, 200, 195, 212, 110, 237, 238, 151, 19,
12, 171, 150, 82, 7, 228, 79, 52, 15, 78, 62, 43, 21, 154, 114, 21,
12, 212, 256, 232, 125, 127, 5, 51, 37, 252, 136, 13, 47, 195, 168, 191,
231, 55, 57, 251, 214, 116, 15, 86, 210, 41, 249, 242, 119, 27, 250, 203,
107, 69, 90, 43, 206, 154, 127, 54, 100, 78, 187, 54, 244, 177, 234, 167,
202, 136, 209, 171, 69, 114, 133, 173, 26, 139, 78, 141, 128, 32, 124, 39,
45, 218, 96, 68, 90, 44, 67, 62, 83, 190, 188, 256, 103, 42, 102, 64,
249, 0, 141, 11, 61, 69, 70, 66, 233, 237, 29, 200, 251, 157, 71, 51,
64, 133, 113, 76, 35, 125, 76, 137, 217, 145, 35, 69, 226, 180, 56, 249,
156, 163, 176, 237, 81, 54, 85, 169, 115, 211, 129, 70, 248, 40, 252, 192,
194, 101, 247, 8, 181, 124, 217, 191, 194, 93, 99, 127, 117, 177, 144, 151,
228, 121, 32, 11, 89, 81, 26, 29, 183, 76, 249, 132, 179, 70, 34, 102,
20, 66, 87, 63, 124, 205, 174, 177, 87, 219, 73, 218, 91, 87, 176, 72,
15, 211, 47, 61, 251, 165, 39, 247, 146, 70, 150, 57, 1, 212, 36, 162,
39, 38, 16, 216, 3, 50, 116, 200, 32, 234, 77, 181, 155, 19, 90, 188,
36, 6, 254, 46, 46, 203, 25, 230, 181, 196, 4, 151, 225, 65, 122, 216,
168, 86, 158, 131, 136, 16, 49, 102, 233, 64, 154, 88, 228, 52, 146, 69,
93, 157, 243, 121, 70, 209, 126, 213, 88, 145, 236, 65, 70, 96, 204, 47,
10, 200, 77, 8, 103, 150, 48, 153, 5, 37, 52, 235, 209, 31, 181, 126,
83, 142, 224, 140, 6, 32, 200, 171, 160, 179, 115, 229, 75, 194, 208, 39,
59, 223, 52, 247, 38, 197, 135, 1, 6, 189, 106, 114, 168, 5, 211, 222,
44, 63, 90, 160, 116, 172, 170, 133, 125, 138, 39, 131, 23, 178, 10, 214,
36, 93, 28, 59, 68, 17, 123, 25, 255, 184, 204, 102, 194, 214, 129, 94,
159, 245, 112, 141, 62, 11, 61, 197, 124, 221, 205, 11, 79, 71, 201, 54,
58, 150, 29, 121, 87, 46, 240, 201, 68, 20, 194, 209, 47, 152, 158, 174,
193, 164, 120, 255, 216, 165, 247, 58, 85, 130, 220, 23, 122, 223, 188, 98,
21, 70, 72, 170, 150, 237, 76, 143, 112, 238, 206, 146, 215, 110, 4, 250,
68, 44, 174, 177, 30, 98, 143, 241, 180, 127, 113, 48, 0, 1, 179, 199,
59, 106, 201, 114, 29, 86, 173, 133, 217, 44, 200, 141, 107, 172, 16, 60,
82, 58, 239, 94, 141, 234, 186, 235, 109, 173, 249, 139, 141, 59, 100, 248,
84, 144, 49, 160, 51, 207, 164, 103, 74, 97, 146, 202, 193, 125, 168, 134,
236, 111, 135, 121, 59, 145, 168, 200, 181, 173, 109, 2, 255, 6, 9, 245,
90, 202, 214, 143, 121, 65, 85, 232, 132, 77, 228, 84, 26, 54, 184, 15,
161, 29, 177, 79, 43, 0, 156, 184, 163, 165, 62, 90, 179, 93, 45, 239,
1, 16, 120, 189, 127, 47, 74, 166, 20, 214, 233, 226, 89, 217, 229, 26,
156, 53, 162, 60, 21, 3, 192, 72, 111, 51, 53, 101, 181, 208, 88, 82,
179, 160, 219, 113, 240, 108, 43, 224, 162, 147, 62, 14, 95, 81, 205, 4,
160, 177, 225, 115, 29, 69, 235, 168, 148, 29, 128, 114, 124, 129, 172, 165,
215, 231, 214, 86, 160, 44, 157, 91, 248, 183, 73, 164, 56, 181, 162, 92,
141, 118, 127, 240, 196, 77, 0, 9, 244, 79, 250, 100, 195, 25, 255, 85,
94, 35, 212, 137, 107, 34, 110, 20, 200, 104, 17, 32, 231, 43, 150, 159,
231, 216, 223, 190, 226, 109, 162, 197, 87, 92, 224, 11, 111, 73, 60, 225,
238, 73, 246, 169, 19, 217, 119, 38, 121, 118, 70, 82, 99, 241, 110, 67,
31, 76, 146, 215, 124, 240, 31, 103, 139, 224, 75, 160, 31, 78, 93, 4,
64, 9, 103, 223, 6, 227, 119, 85, 116, 81, 21, 43, 46, 206, 234, 132,
85, 99, 22, 131, 135, 97, 86, 13, 234, 188, 21, 14, 89, 169, 207, 238,
219, 177, 190, 72, 157, 41, 114, 140, 92, 141, 186, 1, 63, 107, 225, 184,
118, 150, 153, 254, 241, 106, 120, 210, 104, 144, 151, 161, 88, 206, 125, 164,
15, 211, 173, 49, 146, 241, 71, 36, 58, 201, 46, 27, 33, 187, 91, 162,
117, 19, 210, 213, 187, 97, 193, 50, 190, 114, 217, 60, 61, 167, 207, 213,
213, 53, 135, 34, 156, 91, 115, 119, 46, 99, 242, 1, 90, 52, 198, 227,
201, 91, 216, 146, 210, 82, 121, 38, 73, 133, 182, 193, 132, 148, 246, 75,
109, 157, 179, 113, 176, 134, 205, 159, 148, 58, 103, 171, 132, 156, 133, 147,
161, 231, 39, 100, 175, 97, 125, 28, 183, 129, 135, 191, 202, 181, 29, 218,
43, 104, 148, 203, 189, 204, 4, 182, 169, 1, 134, 122, 141, 202, 13, 187,
177, 112, 162, 35, 231, 6, 8, 241, 99, 6, 191, 45, 113, 113, 101, 104};
// The S-Box we use for further linearity breaking.
// We created it by taking the digits of decimal expansion of e.
// The code that created it can be found in 'ProduceRandomSBox.c'.
unsigned char SBox[256] = {
//0 1 2 3 4 5 6 7 8 9 A B C D E F
0x7d, 0xd1, 0x70, 0x0b, 0xfa, 0x39, 0x18, 0xc3, 0xf3, 0xbb, 0xa7, 0xd4, 0x84, 0x25, 0x3b, 0x3c, // 0
0x2c, 0x15, 0x69, 0x9a, 0xf9, 0x27, 0xfb, 0x02, 0x52, 0xba, 0xa8, 0x4b, 0x20, 0xb5, 0x8b, 0x3a, // 1
0x88, 0x8e, 0x26, 0xcb, 0x71, 0x5e, 0xaf, 0xad, 0x0c, 0xac, 0xa1, 0x93, 0xc6, 0x78, 0xce, 0xfc, // 2
0x2a, 0x76, 0x17, 0x1f, 0x62, 0xc2, 0x2e, 0x99, 0x11, 0x37, 0x65, 0x40, 0xfd, 0xa0, 0x03, 0xc1, // 3
0xca, 0x48, 0xe2, 0x9b, 0x81, 0xe4, 0x1c, 0x01, 0xec, 0x68, 0x7a, 0x5a, 0x50, 0xf8, 0x0e, 0xa3, // 4
0xe8, 0x61, 0x2b, 0xa2, 0xeb, 0xcf, 0x8c, 0x3d, 0xb4, 0x95, 0x13, 0x08, 0x46, 0xab, 0x91, 0x7b, // 5
0xea, 0x55, 0x67, 0x9d, 0xdd, 0x29, 0x6a, 0x8f, 0x9f, 0x22, 0x4e, 0xf2, 0x57, 0xd2, 0xa9, 0xbd, // 6
0x38, 0x16, 0x5f, 0x4c, 0xf7, 0x9e, 0x1b, 0x2f, 0x30, 0xc7, 0x41, 0x24, 0x5c, 0xbf, 0x05, 0xf6, // 7
0x0a, 0x31, 0xa5, 0x45, 0x21, 0x33, 0x6b, 0x6d, 0x6c, 0x86, 0xe1, 0xa4, 0xe6, 0x92, 0x9c, 0xdf, // 8
0xe7, 0xbe, 0x28, 0xe3, 0xfe, 0x06, 0x4d, 0x98, 0x80, 0x04, 0x96, 0x36, 0x3e, 0x14, 0x4a, 0x34, // 9
0xd3, 0xd5, 0xdb, 0x44, 0xcd, 0xf5, 0x54, 0xdc, 0x89, 0x09, 0x90, 0x42, 0x87, 0xff, 0x7e, 0x56, // A
0x5d, 0x59, 0xd7, 0x23, 0x75, 0x19, 0x97, 0x73, 0x83, 0x64, 0x53, 0xa6, 0x1e, 0xd8, 0xb0, 0x49, // B
0x3f, 0xef, 0xbc, 0x7f, 0x43, 0xf0, 0xc9, 0x72, 0x0f, 0x63, 0x79, 0x2d, 0xc0, 0xda, 0x66, 0xc8, // C
0x32, 0xde, 0x47, 0x07, 0xb8, 0xe9, 0x1d, 0xc4, 0x85, 0x74, 0x82, 0xcc, 0x60, 0x51, 0x77, 0x0d, // D
0xaa, 0x35, 0xed, 0x58, 0x7c, 0x5b, 0xb9, 0x94, 0x6e, 0x8d, 0xb1, 0xc5, 0xb7, 0xee, 0xb6, 0xae, // E
0x10, 0xe0, 0xd6, 0xd9, 0xe5, 0x4f, 0xf1, 0x12, 0x00, 0xd0, 0xf4, 0x1a, 0x6f, 0x8a, 0xb3, 0xb2 }; // F
///////////////////////////////////////////////////////////////////////////////////////////////
//
// Helper functions definition portion.
//
///////////////////////////////////////////////////////////////////////////////////////////////
// Don't vectorize, move decl to header file
// Translates an input array with values in base 257 to output array with values in base 256.
// Returns the carry bit.
//
// Parameters:
// - input: the input array of size EIGHTH_N. Each value in the array is a number in Z_257.
// The MSB is assumed to be the last one in the array.
// - output: the input array encoded in base 256.
//
// Returns:
// - The carry bit (MSB).
swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N]);
// Translates an input integer into the range (-FIELD_SIZE / 2) <= result <= (FIELD_SIZE / 2).
//
// Parameters:
// - x: the input integer.
//
// Returns:
// - The result, which equals (x MOD FIELD_SIZE), such that |result| <= (FIELD_SIZE / 2).
int Center(int x);
// Calculates bit reversal permutation.
//
// Parameters:
// - input: the input to reverse.
// - numOfBits: the number of bits in the input to reverse.
//
// Returns:
// - The resulting number, which is obtained from the input by reversing its bits.
int ReverseBits(int input, int numOfBits);
// Initializes the FFT fast lookup table.
// Shall be called only once.
void InitializeSWIFFTX();
// Calculates the FFT.
//
// Parameters:
// - input: the input to the FFT.
// - output: the resulting output.
void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output);
///////////////////////////////////////////////////////////////////////////////////////////////
// Helper functions implementation portion.
///////////////////////////////////////////////////////////////////////////////////////////////
// Don't vectorize, delete this copy.
swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N])
{
swift_int32_t pairs[EIGHTH_N / 2];
int i;
for (i = 0; i < EIGHTH_N; i += 2)
{
// input[i] + 257 * input[i + 1]
pairs[i >> 1] = input[i] + input[i + 1] + (input[i + 1] << 8);
}
for (i = (EIGHTH_N / 2) - 1; i > 0; --i)
{
int j;
for (j = i - 1; j < (EIGHTH_N / 2) - 1; ++j)
{
// pairs[j + 1] * 513, because 257^2 = 513 % 256^2.
register swift_int32_t temp = pairs[j] + pairs[j + 1] + (pairs[j + 1] << 9);
pairs[j] = temp & 0xffff;
pairs[j + 1] += (temp >> 16);
}
}
for (i = 0; i < EIGHTH_N; i += 2)
{
output[i] = (unsigned char) (pairs[i >> 1] & 0xff);
output[i + 1] = (unsigned char) ((pairs[i >> 1] >> 8) & 0xff);
}
return (pairs[EIGHTH_N/2 - 1] >> 16);
}
int Center(int x)
{
int result = x % FIELD_SIZE;
if (result > (FIELD_SIZE / 2))
result -= FIELD_SIZE;
if (result < (FIELD_SIZE / -2))
result += FIELD_SIZE;
return result;
}
int ReverseBits(int input, int numOfBits)
{
register int reversed = 0;
for (input |= numOfBits; input > 1; input >>= 1)
reversed = (reversed << 1) | (input & 1);
return reversed;
}
void InitializeSWIFFTX()
{
int i, j, k, x;
// The powers of OMEGA
int omegaPowers[2 * N];
omegaPowers[0] = 1;
if (wasSetupDone)
return;
for (i = 1; i < (2 * N); ++i)
{
omegaPowers[i] = Center(omegaPowers[i - 1] * OMEGA);
}
for (i = 0; i < (N / W); ++i)
{
for (j = 0; j < W; ++j)
{
multipliers[(i << 3) + j] = omegaPowers[ReverseBits(i, N / W) * (2 * j + 1)];
}
}
for (x = 0; x < 256; ++x)
{
for (j = 0; j < 8; ++j)
{
register int temp = 0;
for (k = 0; k < 8; ++k)
{
temp += omegaPowers[(EIGHTH_N * (2 * j + 1) * ReverseBits(k, W)) % (2 * N)]
* ((x >> k) & 1);
}
fftTable[(x << 3) + j] = Center(temp);
}
}
wasSetupDone = true;
}
// input should be deinterleaved in contiguos memory
// output and F are 4x32
// multipliers & fftTable are scalar 16
void FFT_4way(const unsigned char input[EIGHTH_N], swift_int32_t *output)
{
swift_int16_t *mult = multipliers;
m128_swift_int32_t F[64];
for (int i = 0; i < 8; i++)
{
int j = i<<3;
// Need to isolate bytes in input, 8 bytes per lane.
// Each iteration of the loop process one input vector
// Each lane reads a different index to ffttable.
// deinterleave the input!
// load table with 4 lanes from different indexes into fftTable
// extract bytes into m128 4x16
// mutiply by vectorized mult
// input[lane][byte]
__m128i table;
table = _mm_set_epi32( fftTable[ input[3][i] ],
fftTable[ input[2][i] ],
fftTable[ input[1][i] ],
fftTable[ input[0][i] ] );
F[i ] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ), table );
table = _mm_set_epi32( fftTable[ input[3][i+1] ]
fftTable[ input[2][i+1] ]
fftTable[ input[1][i+1] ]
fftTable[ input[0][i+1] ] );
F[i+8] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ), table );
m128_swift_int16_t *table = &( fftTable[input[i] << 3] );
F[i ] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ),
mm128_const1_32( table[0] ) );
F[i+ 8] = _mm_mullo_epi32( mm128_const1_32( mult[j+1] ),
mm128_const1_32( table[1] ) );
F[i+16] = _mm_mullo_epi32( mm128_const1_32( mult[j+2] ),
mm128_const1_32( table[2] ) );
F[i+24] = _mm_mullo_epi32( mm128_const1_32( mult[j+3] ),
mm128_const1_32( table[3] ) );
F[i+32] = _mm_mullo_epi32( mm128_const1_32( mult[j+4] ),
mm128_const1_32( table[4] ) );
F[i+40] = _mm_mullo_epi32( mm128_const1_32( mult[j+5] ),
mm128_const1_32( table[5] ) );
F[i+48] = _mm_mullo_epi32( mm128_const1_32( mult[j+6] ),
mm128_const1_32( table[6] ) );
F[i+56] = _mm_mullo_epi32( mm128_const1_32( mult[j+7] ),
mm128_const1_32( table[7] ) );
}
for ( int i = 0; i < 8; i++ )
{
int j = i<<3;
ADD_SUB_4WAY( F[j ], F[j+1] );
ADD_SUB_4WAY( F[j+2], F[j+3] );
ADD_SUB_4WAY( F[j+4], F[j+5] );
ADD_SUB_4WAY( F[j+6], F[j+7] );
F[j+3] = _mm_slli_epi32( F[j+3], 4 );
F[j+7] = _mm_slli_epi32( F[j+7], 4 );
ADD_SUB_4WAY( F[j ], F[j+2] );
ADD_SUB_4WAY( F[j+1], F[j+3] );
ADD_SUB_4WAY( F[j+4], F[j+6] );
ADD_SUB_4WAY( F[j+5], F[j+7] );
F[j+5] = _mm_slli_epi32( F[j+5], 2 );
F[j+6] = _mm_slli_epi32( F[j+6], 4 );
F[j+7] = _mm_slli_epi32( F[j+7], 6 );
ADD_SUB_4WAY( F[j ], F[j+4] );
ADD_SUB_4WAY( F[j+1], F[j+5] );
ADD_SUB_4WAY( F[j+2], F[j+6] );
ADD_SUB_4WAY( F[j+3], F[j+7] );
output[i ] = Q_REDUCE_4WAY( F[j ] );
output[i+ 8] = Q_REDUCE_4WAY( F[j+1] );
output[i+16] = Q_REDUCE_4WAY( F[j+2] );
output[i+24] = Q_REDUCE_4WAY( F[j+3] );
output[i+32] = Q_REDUCE_4WAY( F[j+4] );
output[i+40] = Q_REDUCE_4WAY( F[j+5] );
output[i+48] = Q_REDUCE_4WAY( F[j+6] );
output[i+56] = Q_REDUCE_4WAY( F[j+7] );
}
}
// Calculates the FFT part of SWIFFT.
// We divided the SWIFFT calculation into two, because that way we could save 2 computations of
// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs
// is only the A's part.
//
// Parameters:
// - input: the input to FFT.
// - m: the input size divided by 8. The function performs m FFTs.
// - output: will store the result.
void SWIFFTFFT(const unsigned char *input, int m, swift_int32_t *output)
{
int i;
for (i = 0;
i < m;
i++, input += EIGHTH_N, output += N)
{
FFT(input, output);
}
}
// Calculates the 'sum' part of SWIFFT, including the base change at the end.
// We divided the SWIFFT calculation into two, because that way we could save 2 computations of
// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs
// is only the A's part.
//
// Parameters:
// - input: the input. Of size 64 * m.
// - m: the input size divided by 64.
// - output: will store the result.
// - a: the coefficients in the sum. Of size 64 * m.
void SWIFFTSum(const swift_int32_t *input, int m, unsigned char *output, const swift_int16_t *a)
{
int i, j;
swift_int32_t result[N];
register swift_int16_t carry = 0;
for (j = 0; j < N; ++j)
{
register swift_int32_t sum = 0;
const register swift_int32_t *f = input + j;
const register swift_int16_t *k = a + j;
for (i = 0; i < m; i++, f += N,k += N)
{
sum += (*f) * (*k);
}
result[j] = sum;
}
for (j = 0; j < N; ++j)
{
result[j] = ((FIELD_SIZE << 22) + result[j]) % FIELD_SIZE;
}
for (j = 0; j < 8; ++j)
{
int register carryBit = TranslateToBase256(result + (j << 3), output + (j << 3));
carry |= carryBit << j;
}
output[N] = carry;
}
// On entry input is interleaved 4x64. SIZE is *4 lanes / 8 bytes,
// multiply by 2.
void ComputeSingleSWIFFTX_4way( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE],
bool doSmooth)
{
int i;
// Will store the result of the FFT parts:
m128_swift_int32_t fftOut[N * M];
// swift_int32_t fftOut[N * M];
unsigned char intermediate[N * 3 + 8];
unsigned char carry0,carry1,carry2;
// Do the three SWIFFTS while remembering the three carry bytes (each carry byte gets
// overriden by the following SWIFFT):
// 1. Compute the FFT of the input - the common part for the first 3 SWIFFTs:
SWIFFTFFT(input, M, fftOut);
// 2. Compute the sums of the 3 SWIFFTs, each using a different set of coefficients:
// 2a. The first SWIFFT:
SWIFFTSum(fftOut, M, intermediate, As);
// Remember the carry byte:
carry0 = intermediate[N];
// 2b. The second one:
SWIFFTSum(fftOut, M, intermediate + N, As + (M * N));
carry1 = intermediate[2 * N];
// 2c. The third one:
SWIFFTSum(fftOut, M, intermediate + (2 * N), As + 2 * (M * N));
carry2 = intermediate[3 * N];
//2d. Put three carry bytes in their place
intermediate[3 * N] = carry0;
intermediate[(3 * N) + 1] = carry1;
intermediate[(3 * N) + 2] = carry2;
// Padding intermediate output with 5 zeroes.
memset(intermediate + (3 * N) + 3, 0, 5);
// Apply the S-Box:
for (i = 0; i < (3 * N) + 8; ++i)
{
intermediate[i] = SBox[intermediate[i]];
}
// 3. The final and last SWIFFT:
SWIFFTFFT(intermediate, 3 * (N/8) + 1, fftOut);
SWIFFTSum(fftOut, 3 * (N/8) + 1, output, As);
if (doSmooth)
{
unsigned char sum[N];
register int i, j;
memset(sum, 0, N);
for (i = 0; i < (N + 1) * 8; ++i)
{
register const swift_int16_t *AsRow;
register int AShift;
if (!(output[i >> 3] & (1 << (i & 7))))
{
continue;
}
AsRow = As + N * M + (i & ~(N - 1)) ;
AShift = i & 63;
for (j = AShift; j < N; ++j)
{
sum[j] += AsRow[j - AShift];
}
for(j = 0; j < AShift; ++j)
{
sum[j] -= AsRow[N - AShift + j];
}
}
for (i = 0; i < N; ++i)
{
output[i] = sum[i];
}
output[N] = 0;
}
}

1243
algo/swifftx/swifftx.c Normal file

File diff suppressed because it is too large Load Diff

1155
algo/swifftx/swifftx.c.bak Normal file

File diff suppressed because it is too large Load Diff

78
algo/swifftx/swifftx.h Normal file
View File

@@ -0,0 +1,78 @@
///////////////////////////////////////////////////////////////////////////////////////////////
//
// SWIFFTX ANSI C OPTIMIZED 32BIT IMPLEMENTATION FOR NIST SHA-3 COMPETITION
//
// SWIFFTX.h
//
// October 2008
//
// This file is the exact copy from the reference implementation.
//
///////////////////////////////////////////////////////////////////////////////////////////////
#ifndef __SWIFFTX__
#define __SWIFFTX__
#ifdef __cplusplus
extern "C"{
#endif
// See the remarks concerning compatibility issues inside stdint.h.
//#include <stdint.h>
#include <stdbool.h>
#include "stdint.h"
//#include "stdbool.h"
//#include "SHA3swift.h"
// The size of SWIFFTX input in bytes.
#define SWIFFTX_INPUT_BLOCK_SIZE 256
// The size of output block in bytes. The compression function of SWIFFT outputs a block of
// this size (i.e., this is the size of the resulting hash value).
#define SWIFFTX_OUTPUT_BLOCK_SIZE 65
// Computes the result of a single SWIFFT operation.
// This is the simple implementation, where our main concern is to show our design principles.
// It is made more efficient in the optimized version, by using FFT instead of DFT, and
// through other speed-up techniques.
//
// Parameters:
// - input: the input string. Consists of 8*m input bytes, where each octet passes the DFT
// processing.
// - m: the length of the input in bytes.
// - output: the resulting hash value of SWIFFT, of size 65 bytes (520 bit). This is the
// result of summing the dot products of the DFTS with the A's after applying the base
// change transformation
// - A: the A's coefficients to work with (since every SWIFFT in SWIFFTX uses different As).
// A single application of SWIFFT uses 64*m A's.
void ComputeSingleSWIFFT(unsigned char *input, unsigned short m,
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE],
const swift_int16_t *a);
// Computes the result of a single SWIFFTX operation.
// NOTE: for simplicity we use 'ComputeSingleSWIFFT()' as a subroutine. This is only to show
// the design idea. In the optimized versions we don't do this for efficiency concerns, since
// there we compute the first part (which doesn't involve the A coefficients) only once for all
// of the 3 invocations of SWIFFT. This enables us to introduce a significant speedup.
//
// Parameters:
// - input: the input input of 256 bytes (2048 bit).
// - output: the resulting hash value of SWIFFT, of size 64 bytes (512 bit).
// - doSMooth: if true, a final smoothing stage is performed and the output is of size 512 bits.
//
// Returns:
// - Success value.
void ComputeSingleSWIFFTX( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE] );
void ComputeSingleSWIFFTX_smooth( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE], bool doSmooth);
// Calculates the powers of OMEGA and generates the bit reversal permutation.
// You must call this function before doing SWIFFT/X, otherwise you will get zeroes everywhere.
void InitializeSWIFFTX();
#ifdef __cplusplus
}
#endif
#endif // __SWIFFTX__

View File

@@ -12,7 +12,6 @@ bool register_c11_algo( algo_gate_t* gate )
gate->hash = (void*)&c11_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -125,7 +125,6 @@ bool register_fresh_algo( algo_gate_t* gate )
algo_not_tested();
gate->scanhash = (void*)&scanhash_fresh;
gate->hash = (void*)&freshhash;
gate->get_max64 = (void*)&get_max64_0x3ffff;
opt_target_factor = 256.0;
return true;
};

View File

@@ -12,7 +12,6 @@ bool register_timetravel_algo( algo_gate_t* gate )
gate->hash = (void*)&timetravel_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
opt_target_factor = 256.0;
return true;
};

View File

@@ -12,7 +12,6 @@ bool register_timetravel10_algo( algo_gate_t* gate )
gate->hash = (void*)&timetravel10_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
opt_target_factor = 256.0;
return true;
};

View File

@@ -3,7 +3,6 @@
bool register_tribus_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x1ffff;
#if defined (TRIBUS_4WAY)
// init_tribus_4way_ctx();
gate->scanhash = (void*)&scanhash_tribus_4way;

View File

@@ -12,7 +12,6 @@ bool register_x11_algo( algo_gate_t* gate )
gate->hash = (void*)&x11_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -12,7 +12,6 @@ bool register_x11gost_algo( algo_gate_t* gate )
gate->hash = (void*)&x11gost_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -12,7 +12,6 @@ bool register_x12_algo( algo_gate_t* gate )
gate->hash = (void*)&x12hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -12,7 +12,6 @@ bool register_phi1612_algo( algo_gate_t* gate )
gate->hash = (void*)&phi1612_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -12,7 +12,6 @@ bool register_x13_algo( algo_gate_t* gate )
gate->hash = (void*)&x13hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -12,7 +12,6 @@
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
//#include "algo/luffa/luffa-hash-2way.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/simd-hash-2way.h"
@@ -28,7 +27,6 @@ typedef struct {
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
// luffa_2way_context luffa;
cubehashParam cube;
sph_shavite512_context shavite;
simd_2way_context simd;
@@ -49,7 +47,6 @@ void init_x13bcd_4way_ctx()
skein512_4way_init( &x13bcd_4way_ctx.skein );
jh512_4way_init( &x13bcd_4way_ctx.jh );
keccak512_4way_init( &x13bcd_4way_ctx.keccak );
// luffa_2way_init( &x13bcd_4way_ctx.luffa, 512 );
cubehashInit( &x13bcd_4way_ctx.cube, 512, 16, 32 );
sph_shavite512_init( &x13bcd_4way_ctx.shavite );
simd_2way_init( &x13bcd_4way_ctx.simd, 512 );
@@ -72,8 +69,6 @@ void x13bcd_4way_hash( void *state, const void *input )
// Blake
memcpy( &ctx.blake, &x13bcd_ctx_mid, sizeof(x13bcd_ctx_mid) );
blake512_4way( &ctx.blake, input + (64<<2), 16 );
// blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// Bmw
@@ -127,17 +122,6 @@ void x13bcd_4way_hash( void *state, const void *input )
sm3_4way_close( &ctx.sm3, sm3_vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
/*
// Luffa
intrlv_2x128( vhash, hash0, hash1, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, hash2, hash3, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
*/
// Cubehash
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
memcpy( &ctx.cube, &x13bcd_4way_ctx.cube, sizeof(cubehashParam) );
@@ -185,26 +169,6 @@ void x13bcd_4way_hash( void *state, const void *input )
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 );
/*
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
// SM3 parallel 32 bit
uint32_t sm3_vhash[32*4] __attribute__ ((aligned (64)));
memset( sm3_vhash, 0, sizeof sm3_vhash );
uint32_t sm3_hash0[32] __attribute__ ((aligned (32)));
memset( sm3_hash0, 0, sizeof sm3_hash0 );
uint32_t sm3_hash1[32] __attribute__ ((aligned (32)));
memset( sm3_hash1, 0, sizeof sm3_hash1 );
uint32_t sm3_hash2[32] __attribute__ ((aligned (32)));
memset( sm3_hash2, 0, sizeof sm3_hash2 );
uint32_t sm3_hash3[32] __attribute__ ((aligned (32)));
memset( sm3_hash3, 0, sizeof sm3_hash3 );
sm3_4way( &ctx.sm3, vhash, 64 );
sm3_4way_close( &ctx.sm3, sm3_vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
*/
// Hamsi parallel 4x32x2
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
hamsi512_4way( &ctx.hamsi, vhash, 64 );

View File

@@ -12,7 +12,6 @@ bool register_x13sm3_algo( algo_gate_t* gate )
gate->hash = (void*)&x13sm3_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};
@@ -28,7 +27,6 @@ bool register_x13bcd_algo( algo_gate_t* gate )
gate->hash = (void*)&x13bcd_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -83,6 +83,5 @@ bool register_axiom_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_axiom;
gate->hash = (void*)&axiomhash;
gate->get_max64 = (void*)&get_max64_0x40LL;
return true;
}

View File

@@ -11,7 +11,6 @@ bool register_polytimos_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_polytimos;
gate->hash = (void*)&polytimos_hash;
#endif
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -12,7 +12,6 @@ bool register_veltor_algo( algo_gate_t* gate )
gate->hash = (void*)&veltor_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -12,7 +12,6 @@ bool register_x14_algo( algo_gate_t* gate )
gate->hash = (void*)&x14hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -275,34 +275,31 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
if ( s_ntime != endiandata[17] )
{
uint32_t ntime = swab32(pdata[17]);
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
uint64_t *edata = (uint64_t*)endiandata;
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
mm256_bswap32_intrlv80_4x64( vdata, pdata );
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
}
do
{
@@ -312,14 +309,15 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
x16r_4way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 4; i++ ) if ( (hash+(i<<3))[7] <= Htarg )
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
for ( int i = 0; i < 4; i++ )
if ( unlikely( (hash+(i<<3))[7] <= Htarg ) )
if( likely( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
n += 4;
} while ( ( n < max_nonce ) && !(*restart) );
} while ( likely( ( n < max_nonce ) && !(*restart) ) );
*hashes_done = n - first_nonce + 1;
return 0;

View File

@@ -24,7 +24,6 @@
#include "algo/sha/sha-hash-4way.h"
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread bool s_implemented = false;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rt_4way_context_overlay
@@ -64,26 +63,8 @@ void x16rt_4way_hash( void* output, const void* input )
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
/*
void *in = (void*) input;
uint32_t *in32 = (uint32_t*) hash0;
uint32_t ntime = in32[17];
if ( s_ntime == UINT32_MAX )
{
uint32_t _ALIGN(64) timeHash[8];
x16rt_getTimeHash(ntime, &timeHash);
x16rt_getAlgoString(&timeHash[0], hashOrder);
}
*/
// Input data is both 64 bit interleaved (input)
// and deinterleaved in inp0-3.
// If First function uses 64 bit data it is not required to interleave inp
// first. It may use the inerleaved data dmost convenient, ie 4way 64 bit.
// All other functions assume data is deinterleaved in hash0-3
// All functions must exit with data deinterleaved in hash0-3.
// Alias in0-3 points to either inp0-3 or hash0-3 according to
// its hashOrder position. Size is also set accordingly.
// and deinterleaved in inp0-3. First function has no need re-interleave.
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
@@ -290,44 +271,31 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t _ALIGN(64) timeHash[4*8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
int thr_id = mythr->id;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
volatile uint8_t *restart = &(work_restart[thr_id].restart);
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
if ( opt_benchmark )
ptarget[7] = 0x0cff;
uint32_t ntime = swab32( pdata[17] );
mm256_bswap32_intrlv80_4x64( vdata, pdata );
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16rt_getTimeHash( ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], hashOrder );
s_ntime = ntime;
s_implemented = true;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
hashOrder, ntime, timeHash );
}
if ( !s_implemented )
{
applog( LOG_WARNING, "s not implemented");
sleep(1);
return 0;
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
uint64_t *edata = (uint64_t*)endiandata;
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{

View File

@@ -331,35 +331,32 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
int thr_id = mythr->id;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
volatile uint8_t *restart = &(work_restart[thr_id].restart);
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
if ( opt_benchmark )
ptarget[7] = 0x0fff;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
if ( s_ntime != endiandata[17] )
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
const uint32_t ntime = bswap_32(pdata[17]);
if ( s_ntime != ntime )
{
uint32_t ntime = swab32(pdata[17]);
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
uint64_t *edata = (uint64_t*)endiandata;
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
@@ -368,14 +365,15 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
x16rv2_4way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 4; i++ ) if ( (hash+(i<<3))[7] <= Htarg )
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
for ( int i = 0; i < 4; i++ )
if ( unlikely( (hash+(i<<3))[7] <= Htarg ) )
if( likely( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
n += 4;
} while ( ( n < max_nonce ) && !(*restart) );
} while ( likely( ( n < max_nonce ) && !(*restart) ) );
*hashes_done = n - first_nonce + 1;
return 0;

View File

@@ -368,7 +368,7 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
@@ -378,25 +378,22 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
__m256i *noncev = (__m256i*)vdata + 9; // aligned
volatile uint8_t *restart = &(work_restart[thr_id].restart);
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
if ( opt_benchmark )
ptarget[7] = 0x0cff;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
if ( s_ntime != endiandata[17] )
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
uint32_t ntime = swab32(pdata[17]);
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
uint64_t *edata = (uint64_t*)endiandata;
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(

View File

@@ -803,52 +803,40 @@ void sonoa_4way_hash( void *state, const void *input )
haval256_5_4way_close( &ctx.haval, state );
}
int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *hash7 = &( hash[7<<2] );
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id;
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const int thr_id = mythr->id;
// Need big endian data
mm256_bswap32_intrlv80_4x64( vdata, pdata );
for ( int m=0; m < 6; m++ ) if ( Htarg <= htmax[m] )
do
{
uint32_t mask = masks[m];
do
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev );
sonoa_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash7[ lane ] <= Htarg ) )
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ),
*noncev );
sonoa_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( ( ( hash7[ lane ] & mask ) == 0 ) )
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
n += 4;
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
break;
}
}
n += 4;
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return 0;
}

View File

@@ -11,7 +11,6 @@ bool register_sonoa_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_sonoa;
gate->hash = (void*)&sonoa_hash;
#endif
gate->get_max64 = (void*)&get_max64_0x1ffff;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
return true;
};

View File

@@ -205,50 +205,40 @@ void x17_4way_hash( void *state, const void *input )
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
// Need big endian data
mm256_bswap32_intrlv80_4x64( vdata, pdata );
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
{
const uint32_t mask = masks[ m ];
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x17_4way_hash( hash, vdata );
mm256_bswap32_intrlv80_4x64( vdata, pdata );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x17_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( ( hash7[ lane ] & mask ) == 0 )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
n += 4;
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
break;
}
for ( int lane = 0; lane < 4; lane++ )
if unlikely( ( hash7[ lane ] <= Htarg ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
n += 4;
} while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
return 0;
*hashes_done = n - first_nonce + 1;
return 0;
}
#endif

View File

@@ -12,7 +12,6 @@ bool register_xevan_algo( algo_gate_t* gate )
gate->hash = (void*)&xevan_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
opt_target_factor = 256.0;
return true;
};

311
algo/x22/x22i-4way.c Normal file
View File

@@ -0,0 +1,311 @@
#include "x22i-gate.h"
#if defined(X22I_4WAY)
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
#include "algo/luffa/luffa-hash-2way.h"
#include "algo/cubehash/cube-hash-2way.h"
#include "algo/shavite/shavite-hash-2way.h"
#include "algo/simd/simd-hash-2way.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/hamsi/hamsi-hash-4way.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/sha/sha-hash-4way.h"
#include "algo/haval/haval-hash-4way.h"
#include "algo/tiger/sph_tiger.h"
#include "algo/lyra2/lyra2.h"
#include "algo/gost/sph_gost.h"
#include "algo/swifftx/swifftx.h"
union _x22i_4way_ctx_overlay
{
blake512_4way_context blake;
bmw512_4way_context bmw;
hashState_groestl groestl;
hashState_echo echo;
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
luffa_2way_context luffa;
cube_2way_context cube;
shavite512_2way_context shavite;
simd_2way_context simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
sha512_4way_context sha512;
haval256_5_4way_context haval;
sph_tiger_context tiger;
sph_gost512_context gost;
sha256_4way_context sha256;
};
typedef union _x22i_4way_ctx_overlay x22i_ctx_overlay;
void x22i_4way_hash( void *output, const void *input )
{
uint64_t hash0[8*4] __attribute__ ((aligned (64)));
uint64_t hash1[8*4] __attribute__ ((aligned (64)));
uint64_t hash2[8*4] __attribute__ ((aligned (64)));
uint64_t hash3[8*4] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
// unsigned char hash[64 * 4] __attribute__((aligned(64))) = {0};
unsigned char hashA0[64] __attribute__((aligned(64))) = {0};
unsigned char hashA1[64] __attribute__((aligned(32))) = {0};
unsigned char hashA2[64] __attribute__((aligned(32))) = {0};
unsigned char hashA3[64] __attribute__((aligned(32))) = {0};
x22i_ctx_overlay ctx;
blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(const char*)hash0, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(const char*)hash1, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(const char*)hash2, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(const char*)hash3, 512 );
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
dintrlv_2x128_512( hash0, hash1, vhashA );
dintrlv_2x128_512( hash2, hash3, vhashB );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
(const BitSequence*)hash0, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
(const BitSequence*)hash1, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
(const BitSequence*)hash2, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
(const BitSequence*)hash3, 512 );
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32_512( &hash0[8], &hash1[8], &hash2[8], &hash3[8], vhash );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, &hash0[8], 64 );
sph_whirlpool_close( &ctx.whirlpool, &hash0[16] );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, &hash1[8], 64 );
sph_whirlpool_close( &ctx.whirlpool, &hash1[16] );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, &hash2[8], 64 );
sph_whirlpool_close( &ctx.whirlpool, &hash2[16] );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, &hash3[8], 64 );
sph_whirlpool_close( &ctx.whirlpool, &hash3[16] );
intrlv_4x64_512( vhash, &hash0[16], &hash1[16], &hash2[16], &hash3[16] );
sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64_512( &hash0[24], &hash1[24], &hash2[24], &hash3[24], vhash );
// InitializeSWIFFTX();
ComputeSingleSWIFFTX((unsigned char*)hash0, (unsigned char*)hashA0);
ComputeSingleSWIFFTX((unsigned char*)hash1, (unsigned char*)hashA1);
ComputeSingleSWIFFTX((unsigned char*)hash2, (unsigned char*)hashA2);
ComputeSingleSWIFFTX((unsigned char*)hash3, (unsigned char*)hashA3);
intrlv_4x32_512( vhashA, hashA0, hashA1, hashA2, hashA3 );
memset( vhash, 0, 64*4 );
haval256_5_4way_init( &ctx.haval );
haval256_5_4way( &ctx.haval, vhashA, 64 );
haval256_5_4way_close( &ctx.haval, vhash );
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
memset( hashA0, 0, 64 );
memset( hashA1, 0, 64 );
memset( hashA2, 0, 64 );
memset( hashA3, 0, 64 );
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash0, 64);
sph_tiger_close(&ctx.tiger, (void*) hashA0);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash1, 64);
sph_tiger_close(&ctx.tiger, (void*) hashA1);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash2, 64);
sph_tiger_close(&ctx.tiger, (void*) hashA2);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash3, 64);
sph_tiger_close(&ctx.tiger, (void*) hashA3);
memset( hash0, 0, 64 );
memset( hash1, 0, 64 );
memset( hash2, 0, 64 );
memset( hash3, 0, 64 );
LYRA2RE( (void*) hash0, 32, (const void*) hashA0, 32, (const void*) hashA0,
32, 1, 4, 4 );
LYRA2RE( (void*) hash1, 32, (const void*) hashA1, 32, (const void*) hashA1,
32, 1, 4, 4 );
LYRA2RE( (void*) hash2, 32, (const void*) hashA2, 32, (const void*) hashA2,
32, 1, 4, 4 );
LYRA2RE( (void*) hash3, 32, (const void*) hashA3, 32, (const void*) hashA3,
32, 1, 4, 4 );
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash0, 64);
sph_gost512_close(&ctx.gost, (void*) hash0);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash1, 64);
sph_gost512_close(&ctx.gost, (void*) hash1);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash2, 64);
sph_gost512_close(&ctx.gost, (void*) hash2);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash3, 64);
sph_gost512_close(&ctx.gost, (void*) hash3);
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
sha256_4way_init( &ctx.sha256 );
sha256_4way( &ctx.sha256, vhash, 64 );
sha256_4way_close( &ctx.sha256, output );
// memcpy(output, hash, 32);
}
int scanhash_x22i_4way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
InitializeSWIFFTX();
mm256_bswap32_intrlv80_4x64( vdata, pdata );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x22i_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if unlikely( ( hash7[ lane ] <= Htarg ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
n += 4;
} while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
return 0;
}
#endif // X22I_4WAY

28
algo/x22/x22i-gate.c Normal file
View File

@@ -0,0 +1,28 @@
#include "x22i-gate.h"
bool register_x22i_algo( algo_gate_t* gate )
{
#if defined (X22I_4WAY)
gate->scanhash = (void*)&scanhash_x22i_4way;
gate->hash = (void*)&x22i_4way_hash;
#else
gate->scanhash = (void*)&scanhash_x22i;
gate->hash = (void*)&x22i_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
return true;
};
bool register_x25x_algo( algo_gate_t* gate )
{
#if defined (X22I_4WAY)
gate->scanhash = (void*)&scanhash_x25x_4way;
gate->hash = (void*)&x25x_4way_hash;
#else
gate->scanhash = (void*)&scanhash_x25x;
gate->hash = (void*)&x25x_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
return true;
};

35
algo/x22/x22i-gate.h Normal file
View File

@@ -0,0 +1,35 @@
#ifndef X22I_GATE_H__
#define X22I_GATE_H__ 1
#include "algo-gate-api.h"
#include "simd-utils.h"
#include <stdint.h>
#include <unistd.h>
#if defined(__AVX2__) && defined(__AES__)
#define X22I_4WAY
#endif
bool register_x22i__algo( algo_gate_t* gate );
#if defined(X22I_4WAY)
void x22i_4way_hash( void *state, const void *input );
int scanhash_x22i_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void x25x_4way_hash( void *state, const void *input );
int scanhash_x25x_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif
void x22i_hash( void *state, const void *input );
int scanhash_x22i( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void x25x_hash( void *state, const void *input );
int scanhash_x25x( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif // X22I_GATE_H__

202
algo/x22/x22i.c Normal file
View File

@@ -0,0 +1,202 @@
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#else
#include "algo/groestl/sph_groestl.h"
#include "algo/echo/sph_echo.h"
#endif
#include "algo/skein/sph_skein.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/nist.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include <openssl/sha.h>
#include "algo/haval/sph-haval.h"
#include "algo/tiger/sph_tiger.h"
#include "algo/lyra2/lyra2.h"
#include "algo/gost/sph_gost.h"
#include "algo/swifftx/swifftx.h"
#include "x22i-gate.h"
union _x22i_context_overlay
{
sph_blake512_context blake;
sph_bmw512_context bmw;
#if defined(__AES__)
hashState_groestl groestl;
hashState_echo echo;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
#endif
sph_jh512_context jh;
sph_keccak512_context keccak;
sph_skein512_context skein;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
SHA512_CTX sha512;
sph_haval256_5_context haval;
sph_tiger_context tiger;
sph_gost512_context gost;
SHA256_CTX sha256;
};
typedef union _x22i_context_overlay x22i_context_overlay;
void x22i_hash( void *output, const void *input )
{
unsigned char hash[64 * 4] __attribute__((aligned(64))) = {0};
unsigned char hash2[65] __attribute__((aligned(64))) = {0};
x22i_context_overlay ctx;
sph_blake512_init(&ctx.blake);
sph_blake512(&ctx.blake, input, 80);
sph_blake512_close(&ctx.blake, hash);
sph_bmw512_init(&ctx.bmw);
sph_bmw512(&ctx.bmw, (const void*) hash, 64);
sph_bmw512_close(&ctx.bmw, hash);
#if defined(__AES__)
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)hash, 512 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, hash, 64 );
sph_groestl512_close( &ctx.groestl, hash );
#endif
sph_skein512_init(&ctx.skein);
sph_skein512(&ctx.skein, (const void*) hash, 64);
sph_skein512_close(&ctx.skein, hash);
sph_jh512_init(&ctx.jh);
sph_jh512(&ctx.jh, (const void*) hash, 64);
sph_jh512_close(&ctx.jh, hash);
sph_keccak512_init(&ctx.keccak);
sph_keccak512(&ctx.keccak, (const void*) hash, 64);
sph_keccak512_close(&ctx.keccak, hash);
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)hash, 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
(const byte*)hash, 64 );
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash, 64);
sph_shavite512_close(&ctx.shavite, hash);
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)hash,
(const BitSequence*)hash, 512 );
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash,
(const BitSequence*)hash, 512 );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, hash, 64 );
sph_echo512_close( &ctx.echo, hash );
#endif
sph_hamsi512_init(&ctx.hamsi);
sph_hamsi512(&ctx.hamsi, (const void*) hash, 64);
sph_hamsi512_close(&ctx.hamsi, hash);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) hash, 64);
sph_fugue512_close(&ctx.fugue, hash);
sph_shabal512_init(&ctx.shabal);
sph_shabal512(&ctx.shabal, (const void*) hash, 64);
sph_shabal512_close(&ctx.shabal, &hash[64]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) &hash[64], 64);
sph_whirlpool_close(&ctx.whirlpool, &hash[128]);
SHA512_Init( &ctx.sha512 );
SHA512_Update( &ctx.sha512, (const void*) &hash[128], 64);
SHA512_Final( (void*) &hash[192], &ctx.sha512 );
ComputeSingleSWIFFTX((unsigned char*)hash, (unsigned char*)hash2);
memset(hash, 0, 64);
sph_haval256_5_init(&ctx.haval);
sph_haval256_5(&ctx.haval,(const void*) hash2, 64);
sph_haval256_5_close(&ctx.haval,hash);
memset(hash2, 0, 64);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash, 64);
sph_tiger_close(&ctx.tiger, (void*) hash2);
memset(hash, 0, 64);
LYRA2RE((void*) hash, 32, (const void*) hash2, 32, (const void*) hash2, 32, 1, 4, 4);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash, 64);
sph_gost512_close(&ctx.gost, (void*) hash);
SHA256_Init( &ctx.sha256 );
SHA256_Update( &ctx.sha256, (const void*) hash, 64 );
SHA256_Final( (unsigned char*) hash, &ctx.sha256 );
memcpy(output, hash, 32);
}
int scanhash_x22i( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t hash[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t n = first_nonce;
const int thr_id = mythr->id;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);
InitializeSWIFFTX();
do
{
pdata[19] = ++n;
be32enc( &endiandata[19], n );
x22i_hash( hash, endiandata );
if ( hash[7] < Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
submit_solution( work, hash, mythr );
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = pdata[19] - first_nonce;
return 0;
}

402
algo/x22/x25x-4way.c Normal file
View File

@@ -0,0 +1,402 @@
#include "x22i-gate.h"
#if defined(X22I_4WAY)
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
#include "algo/hamsi/hamsi-hash-4way.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/sha/sha-hash-4way.h"
#include "algo/haval/haval-hash-4way.h"
#include "algo/blake/blake2s-hash-4way.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/nist.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/tiger/sph_tiger.h"
#include "algo/lyra2/lyra2.h"
#include "algo/gost/sph_gost.h"
#include "algo/swifftx/swifftx.h"
#include "algo/panama/sph_panama.h"
#include "algo/lanehash/lane.h"
union _x25x_4way_ctx_overlay
{
blake512_4way_context blake;
bmw512_4way_context bmw;
hashState_groestl groestl;
hashState_echo echo;
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
sha512_4way_context sha512;
haval256_5_4way_context haval;
sph_tiger_context tiger;
sph_gost512_context gost;
sha256_4way_context sha256;
sph_panama_context panama;
blake2s_4way_state blake2s;
};
typedef union _x25x_4way_ctx_overlay x25x_4way_ctx_overlay;
void x25x_shuffle( void *hash )
{
// Simple shuffle algorithm, instead of just reversing
#define X25X_SHUFFLE_BLOCKS (24 * 64 / 2)
#define X25X_SHUFFLE_ROUNDS 12
static const uint16_t x25x_round_const[X25X_SHUFFLE_ROUNDS] =
{
0x142c, 0x5830, 0x678c, 0xe08c, 0x3c67, 0xd50d, 0xb1d8, 0xecb2,
0xd7ee, 0x6783, 0xfa6c, 0x4b9c
};
uint16_t* block_pointer = (uint16_t*)hash;
for ( int r = 0; r < X25X_SHUFFLE_ROUNDS; r++ )
{
for ( int i = 0; i < X25X_SHUFFLE_BLOCKS; i++ )
{
uint16_t block_value = block_pointer[ X25X_SHUFFLE_BLOCKS - i - 1 ];
block_pointer[i] ^= block_pointer[ block_value % X25X_SHUFFLE_BLOCKS ]
+ ( x25x_round_const[r] << (i % 16) );
}
}
#undef X25X_SHUFFLE_BLOCKS
#undef X25X_SHUFFLE_ROUNDS
}
void x25x_4way_hash( void *output, const void *input )
{
unsigned char hash0[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash1[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash2[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash3[25][64] __attribute__((aligned(64))) = {0};
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
unsigned char vhashA[24][64*4] __attribute__ ((aligned (64)));
x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64)));
blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
dintrlv_4x64_512( &hash0[0], &hash1[0], &hash2[0], &hash3[0], vhash );
bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( &hash0[1], &hash1[1], &hash2[1], &hash3[1], vhash );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)&hash0[2],
(const char*)&hash0[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)&hash1[2],
(const char*)&hash1[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)&hash2[2],
(const char*)&hash2[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)&hash3[2],
(const char*)&hash3[1], 512 );
intrlv_4x64_512( vhash, &hash0[2], &hash1[2], &hash2[2], &hash3[2] );
skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64_512( &hash0[3], &hash1[3], &hash2[3], &hash3[3], vhash );
jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash );
dintrlv_4x64_512( &hash0[4], &hash1[4], &hash2[4], &hash3[4], vhash );
keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64_512( &hash0[5], &hash1[5], &hash2[5], &hash3[5], vhash );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash0[6],
(const BitSequence*)&hash0[5], 64 );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash1[6],
(const BitSequence*)&hash1[5], 64 );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash2[6],
(const BitSequence*)&hash2[5], 64 );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash3[6],
(const BitSequence*)&hash3[5], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) &hash0[7],
(const byte*)&hash0[6], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) &hash1[7],
(const byte*)&hash1[6], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) &hash2[7],
(const byte*)&hash2[6], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) &hash3[7],
(const byte*)&hash3[6], 64 );
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) &hash0[7], 64);
sph_shavite512_close(&ctx.shavite, &hash0[8]);
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) &hash1[7], 64);
sph_shavite512_close(&ctx.shavite, &hash1[8]);
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) &hash2[7], 64);
sph_shavite512_close(&ctx.shavite, &hash2[8]);
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) &hash3[7], 64);
sph_shavite512_close(&ctx.shavite, &hash3[8]);
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)&hash0[9],
(const BitSequence*)&hash0[8], 512 );
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)&hash1[9],
(const BitSequence*)&hash1[8], 512 );
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)&hash2[9],
(const BitSequence*)&hash2[8], 512 );
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)&hash3[9],
(const BitSequence*)&hash3[8], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)&hash0[10],
(const BitSequence*)&hash0[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)&hash1[10],
(const BitSequence*)&hash1[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)&hash2[10],
(const BitSequence*)&hash2[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)&hash3[10],
(const BitSequence*)&hash3[9], 512 );
intrlv_4x64_512( vhash, &hash0[10], &hash1[10], &hash2[10], &hash3[10] );
hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( &hash0[11], &hash1[11], &hash2[11], &hash3[11], vhash );
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) &hash0[11], 64);
sph_fugue512_close(&ctx.fugue, &hash0[12]);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) &hash1[11], 64);
sph_fugue512_close(&ctx.fugue, &hash1[12]);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) &hash2[11], 64);
sph_fugue512_close(&ctx.fugue, &hash2[12]);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) &hash3[11], 64);
sph_fugue512_close(&ctx.fugue, &hash3[12]);
intrlv_4x32_512( vhash, &hash0[12], &hash1[12], &hash2[12], &hash3[12] );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32_512( &hash0[13], &hash1[13], &hash2[13], &hash3[13], vhash );
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) &hash0[13], 64);
sph_whirlpool_close(&ctx.whirlpool, &hash0[14]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) &hash1[13], 64);
sph_whirlpool_close(&ctx.whirlpool, &hash1[14]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) &hash2[13], 64);
sph_whirlpool_close(&ctx.whirlpool, &hash2[14]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) &hash3[13], 64);
sph_whirlpool_close(&ctx.whirlpool, &hash3[14]);
intrlv_4x64_512( vhash, &hash0[14], &hash1[14], &hash2[14], &hash3[14] );
sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64_512( &hash0[15], &hash1[15], &hash2[15], &hash3[15], vhash );
ComputeSingleSWIFFTX((unsigned char*)&hash0[12], (unsigned char*)&hash0[16]);
ComputeSingleSWIFFTX((unsigned char*)&hash1[12], (unsigned char*)&hash1[16]);
ComputeSingleSWIFFTX((unsigned char*)&hash2[12], (unsigned char*)&hash2[16]);
ComputeSingleSWIFFTX((unsigned char*)&hash3[12], (unsigned char*)&hash3[16]);
intrlv_4x32_512( &vhashA, &hash0[16], &hash1[16], &hash2[16], &hash3[16] );
memset( vhash, 0, 64*4 );
haval256_5_4way_init( &ctx.haval );
haval256_5_4way( &ctx.haval, vhashA, 64 );
haval256_5_4way_close( &ctx.haval, vhash );
dintrlv_4x32_512( &hash0[17], &hash1[17], &hash2[17], &hash3[17], vhash );
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) &hash0[17], 64);
sph_tiger_close(&ctx.tiger, (void*) &hash0[18]);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) &hash1[17], 64);
sph_tiger_close(&ctx.tiger, (void*) &hash1[18]);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) &hash2[17], 64);
sph_tiger_close(&ctx.tiger, (void*) &hash2[18]);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) &hash3[17], 64);
sph_tiger_close(&ctx.tiger, (void*) &hash3[18]);
LYRA2RE( (void*)&hash0[19], 32, (const void*)&hash0[18], 32,
(const void*)&hash0[18], 32, 1, 4, 4 );
LYRA2RE( (void*)&hash1[19], 32, (const void*)&hash1[18], 32,
(const void*)&hash1[18], 32, 1, 4, 4 );
LYRA2RE( (void*)&hash2[19], 32, (const void*)&hash2[18], 32,
(const void*)&hash2[18], 32, 1, 4, 4 );
LYRA2RE( (void*)&hash3[19], 32, (const void*)&hash3[18], 32,
(const void*)&hash3[18], 32, 1, 4, 4 );
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) &hash0[19], 64);
sph_gost512_close(&ctx.gost, (void*) &hash0[20]);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) &hash1[19], 64);
sph_gost512_close(&ctx.gost, (void*) &hash1[20]);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) &hash2[19], 64);
sph_gost512_close(&ctx.gost, (void*) &hash2[20]);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) &hash3[19], 64);
sph_gost512_close(&ctx.gost, (void*) &hash3[20]);
intrlv_4x32_512( vhashA, &hash0[20], &hash1[20], &hash2[20], &hash3[20] );
memset( vhash, 0, 64*4 );
sha256_4way_init( &ctx.sha256 );
sha256_4way( &ctx.sha256, vhashA, 64 );
sha256_4way_close( &ctx.sha256, vhash );
dintrlv_4x32_512( &hash0[21], &hash1[21], &hash2[21], &hash3[21], vhash );
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) &hash0[21], 64 );
sph_panama_close(&ctx.panama, (void*) &hash0[22]);
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) &hash1[21], 64 );
sph_panama_close(&ctx.panama, (void*) &hash1[22]);
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) &hash2[21], 64 );
sph_panama_close(&ctx.panama, (void*) &hash2[22]);
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) &hash3[21], 64 );
sph_panama_close(&ctx.panama, (void*) &hash3[22]);
laneHash(512, (const BitSequence*)&hash0[22], 512, (BitSequence*)&hash0[23]);
laneHash(512, (const BitSequence*)&hash1[22], 512, (BitSequence*)&hash1[23]);
laneHash(512, (const BitSequence*)&hash2[22], 512, (BitSequence*)&hash2[23]);
laneHash(512, (const BitSequence*)&hash3[22], 512, (BitSequence*)&hash3[23]);
x25x_shuffle( hash0 );
x25x_shuffle( hash1 );
x25x_shuffle( hash2 );
x25x_shuffle( hash3 );
intrlv_4x32_512( &vhashA[ 0], &hash0[ 0], &hash1[ 0], &hash2[ 0], &hash3[ 0] );
intrlv_4x32_512( &vhashA[ 1], &hash0[ 1], &hash1[ 1], &hash2[ 1], &hash3[ 1] );
intrlv_4x32_512( &vhashA[ 2], &hash0[ 2], &hash1[ 2], &hash2[ 2], &hash3[ 2] );
intrlv_4x32_512( &vhashA[ 3], &hash0[ 3], &hash1[ 3], &hash2[ 3], &hash3[ 3] );
intrlv_4x32_512( &vhashA[ 4], &hash0[ 4], &hash1[ 4], &hash2[ 4], &hash3[ 4] );
intrlv_4x32_512( &vhashA[ 5], &hash0[ 5], &hash1[ 5], &hash2[ 5], &hash3[ 5] );
intrlv_4x32_512( &vhashA[ 6], &hash0[ 6], &hash1[ 6], &hash2[ 6], &hash3[ 6] );
intrlv_4x32_512( &vhashA[ 7], &hash0[ 7], &hash1[ 7], &hash2[ 7], &hash3[ 7] );
intrlv_4x32_512( &vhashA[ 8], &hash0[ 8], &hash1[ 8], &hash2[ 8], &hash3[ 8] );
intrlv_4x32_512( &vhashA[ 9], &hash0[ 9], &hash1[ 9], &hash2[ 9], &hash3[ 9] );
intrlv_4x32_512( &vhashA[10], &hash0[10], &hash1[10], &hash2[10], &hash3[10] );
intrlv_4x32_512( &vhashA[11], &hash0[11], &hash1[11], &hash2[11], &hash3[11] );
intrlv_4x32_512( &vhashA[12], &hash0[12], &hash1[12], &hash2[12], &hash3[12] );
intrlv_4x32_512( &vhashA[13], &hash0[13], &hash1[13], &hash2[13], &hash3[13] );
intrlv_4x32_512( &vhashA[14], &hash0[14], &hash1[14], &hash2[14], &hash3[14] );
intrlv_4x32_512( &vhashA[15], &hash0[15], &hash1[15], &hash2[15], &hash3[15] );
intrlv_4x32_512( &vhashA[16], &hash0[16], &hash1[16], &hash2[16], &hash3[16] );
intrlv_4x32_512( &vhashA[17], &hash0[17], &hash1[17], &hash2[17], &hash3[17] );
intrlv_4x32_512( &vhashA[18], &hash0[18], &hash1[18], &hash2[18], &hash3[18] );
intrlv_4x32_512( &vhashA[19], &hash0[19], &hash1[19], &hash2[19], &hash3[19] );
intrlv_4x32_512( &vhashA[20], &hash0[20], &hash1[20], &hash2[20], &hash3[20] );
intrlv_4x32_512( &vhashA[21], &hash0[21], &hash1[21], &hash2[21], &hash3[21] );
intrlv_4x32_512( &vhashA[22], &hash0[22], &hash1[22], &hash2[22], &hash3[22] );
intrlv_4x32_512( &vhashA[23], &hash0[23], &hash1[23], &hash2[23], &hash3[23] );
blake2s_4way_init( &ctx.blake2s, 32 );
blake2s_4way_full_blocks( &ctx.blake2s, vhash, vhashA, 64*24 );
dintrlv_4x32( &hash0[24], &hash1[24], &hash2[24], &hash3[24], vhash, 256 );
memcpy(output, &hash0[24], 32);
memcpy(output+32, &hash1[24], 32);
memcpy(output+64, &hash2[24], 32);
memcpy(output+96, &hash3[24], 32);
}
int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
InitializeSWIFFTX();
mm256_bswap32_intrlv80_4x64( vdata, pdata );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x25x_4way_hash( hash, vdata );
for ( int i = 0; i < 4; i++ )
if ( unlikely( (hash+(i<<3))[7] <= Htarg ) )
if( likely( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
n += 4;
} while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
return 0;
}
#endif

236
algo/x22/x25x.c Normal file
View File

@@ -0,0 +1,236 @@
#include "x22i-gate.h"
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#else
#include "algo/groestl/sph_groestl.h"
#include "algo/echo/sph_echo.h"
#endif
#include "algo/skein/sph_skein.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/nist.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include <openssl/sha.h>
#include "algo/haval/sph-haval.h"
#include "algo/tiger/sph_tiger.h"
#include "algo/lyra2/lyra2.h"
#include "algo/gost/sph_gost.h"
#include "algo/swifftx/swifftx.h"
#include "algo/blake/sph-blake2s.h"
#include "algo/panama/sph_panama.h"
#include "algo/lanehash/lane.h"
union _x25x_context_overlay
{
sph_blake512_context blake;
sph_bmw512_context bmw;
#if defined(__AES__)
hashState_groestl groestl;
hashState_echo echo;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
#endif
sph_jh512_context jh;
sph_keccak512_context keccak;
sph_skein512_context skein;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
SHA512_CTX sha512;
sph_haval256_5_context haval;
sph_tiger_context tiger;
sph_gost512_context gost;
SHA256_CTX sha256;
sph_panama_context panama;
blake2s_state blake2s;
};
typedef union _x25x_context_overlay x25x_context_overlay;
void x25x_hash( void *output, const void *input )
{
unsigned char hash[25][64] __attribute__((aligned(64))) = {0};
x25x_context_overlay ctx;
sph_blake512_init(&ctx.blake);
sph_blake512(&ctx.blake, input, 80);
sph_blake512_close(&ctx.blake, &hash[0] );
sph_bmw512_init(&ctx.bmw);
sph_bmw512(&ctx.bmw, (const void*) &hash[0], 64);
sph_bmw512_close(&ctx.bmw, &hash[1]);
#if defined(__AES__)
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)&hash[2],
(const char*)&hash[1], 512 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, &hash[1], 64 );
sph_groestl512_close( &ctx.groestl, &hash[2] );
#endif
sph_skein512_init(&ctx.skein);
sph_skein512(&ctx.skein, (const void*) &hash[2], 64);
sph_skein512_close(&ctx.skein, &hash[3]);
sph_jh512_init(&ctx.jh);
sph_jh512(&ctx.jh, (const void*) &hash[3], 64);
sph_jh512_close(&ctx.jh, &hash[4]);
sph_keccak512_init(&ctx.keccak);
sph_keccak512(&ctx.keccak, (const void*) &hash[4], 64);
sph_keccak512_close(&ctx.keccak, &hash[5]);
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)&hash[6],
(const BitSequence*)&hash[5], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) &hash[7],
(const byte*)&hash[6], 64 );
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) &hash[7], 64);
sph_shavite512_close(&ctx.shavite, &hash[8]);
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)&hash[9],
(const BitSequence*)&hash[8], 512 );
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)&hash[10],
(const BitSequence*)&hash[9], 512 );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, &hash[9], 64 );
sph_echo512_close( &ctx.echo, &hash[10] );
#endif
sph_hamsi512_init(&ctx.hamsi);
sph_hamsi512(&ctx.hamsi, (const void*) &hash[10], 64);
sph_hamsi512_close(&ctx.hamsi, &hash[11]);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) &hash[11], 64);
sph_fugue512_close(&ctx.fugue, &hash[12]);
sph_shabal512_init(&ctx.shabal);
sph_shabal512(&ctx.shabal, (const void*) &hash[12], 64);
sph_shabal512_close(&ctx.shabal, &hash[13]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) &hash[13], 64);
sph_whirlpool_close(&ctx.whirlpool, &hash[14]);
SHA512_Init( &ctx.sha512 );
SHA512_Update( &ctx.sha512, (const void*) &hash[14], 64);
SHA512_Final( (void*) &hash[15], &ctx.sha512 );
ComputeSingleSWIFFTX((unsigned char*)&hash[12], (unsigned char*)&hash[16]);
sph_haval256_5_init(&ctx.haval);
sph_haval256_5(&ctx.haval,(const void*) &hash[16], 64);
sph_haval256_5_close(&ctx.haval,&hash[17]);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) &hash[17], 64);
sph_tiger_close(&ctx.tiger, (void*) &hash[18]);
LYRA2RE( (void*)&hash[19], 32, (const void*)&hash[18], 32,
(const void*)&hash[18], 32, 1, 4, 4 );
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) &hash[19], 64);
sph_gost512_close(&ctx.gost, (void*) &hash[20]);
SHA256_Init( &ctx.sha256 );
SHA256_Update( &ctx.sha256, (const void*) &hash[20], 64 );
SHA256_Final( (unsigned char*) &hash[21], &ctx.sha256 );
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) &hash[21], 64 );
sph_panama_close(&ctx.panama, (void*) &hash[22]);
laneHash(512, (const BitSequence*) &hash[22], 512, (BitSequence*) &hash[23]);
// Simple shuffle algorithm, instead of just reversing
#define X25X_SHUFFLE_BLOCKS (24 * 64 / 2)
#define X25X_SHUFFLE_ROUNDS 12
static const uint16_t x25x_round_const[X25X_SHUFFLE_ROUNDS] =
{
0x142c, 0x5830, 0x678c, 0xe08c, 0x3c67, 0xd50d, 0xb1d8, 0xecb2,
0xd7ee, 0x6783, 0xfa6c, 0x4b9c
};
uint16_t* block_pointer = (uint16_t*)hash;
for ( int r = 0; r < X25X_SHUFFLE_ROUNDS; r++ )
{
for ( int i = 0; i < X25X_SHUFFLE_BLOCKS; i++ )
{
uint16_t block_value = block_pointer[ X25X_SHUFFLE_BLOCKS - i - 1 ];
block_pointer[i] ^= block_pointer[ block_value % X25X_SHUFFLE_BLOCKS ]
+ ( x25x_round_const[r] << (i % 16) );
}
}
#undef X25X_SHUFFLE_BLOCKS
#undef X25X_SHUFFLE_ROUNDS
blake2s_simple( (uint8_t*)&hash[24], (const void*)(&hash[0]), 64 * 24 );
memcpy(output, &hash[24], 32);
}
int scanhash_x25x( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t hash[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t n = first_nonce;
const int thr_id = mythr->id;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);
InitializeSWIFFTX();
do
{
pdata[19] = ++n;
be32enc( &endiandata[19], n );
x25x_hash( hash, endiandata );
if ( hash[7] < Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
submit_solution( work, hash, mythr );
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = pdata[19] - first_nonce;
return 0;
}

View File

@@ -416,16 +416,6 @@ int scanhash_yescrypt( struct work *work, uint32_t max_nonce,
return 0;
}
int64_t yescrypt_get_max64()
{
return 0x1ffLL;
}
int64_t yescryptr16_get_max64()
{
return 0xfffLL;
}
void yescrypt_gate_base(algo_gate_t *gate )
{
gate->optimizations = SSE2_OPT | SHA_OPT;
@@ -437,7 +427,6 @@ void yescrypt_gate_base(algo_gate_t *gate )
bool register_yescrypt_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescrypt_get_max64;
if ( opt_param_n ) YESCRYPT_N = opt_param_n;
else YESCRYPT_N = 2048;
@@ -469,7 +458,6 @@ bool register_yescrypt_algo( algo_gate_t* gate )
bool register_yescryptr8_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescrypt_get_max64;
yescrypt_client_key = "Client Key";
yescrypt_client_key_len = 10;
YESCRYPT_N = 2048;
@@ -481,7 +469,6 @@ bool register_yescryptr8_algo( algo_gate_t* gate )
bool register_yescryptr16_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescryptr16_get_max64;
yescrypt_client_key = "Client Key";
yescrypt_client_key_len = 10;
YESCRYPT_N = 4096;
@@ -493,7 +480,6 @@ bool register_yescryptr16_algo( algo_gate_t* gate )
bool register_yescryptr32_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescryptr16_get_max64;
yescrypt_client_key = "WaviBanana";
yescrypt_client_key_len = 10;
YESCRYPT_N = 4096;

View File

@@ -0,0 +1,322 @@
/*
* Copyright 2009 Colin Percival, 2014 savale
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <algo/yespower/crypto/sph_types.h>
#include <algo/yespower/utils/sysendian.h>
#include "blake2b-yp.h"
// Cyclic right rotation.
#ifndef ROTR64
#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
#endif
// Little-endian byte access.
#define B2B_GET64(p) \
(((uint64_t) ((uint8_t *) (p))[0]) ^ \
(((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \
(((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \
(((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \
(((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \
(((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \
(((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \
(((uint64_t) ((uint8_t *) (p))[7]) << 56))
// G Mixing function.
#define B2B_G(a, b, c, d, x, y) { \
v[a] = v[a] + v[b] + x; \
v[d] = ROTR64(v[d] ^ v[a], 32); \
v[c] = v[c] + v[d]; \
v[b] = ROTR64(v[b] ^ v[c], 24); \
v[a] = v[a] + v[b] + y; \
v[d] = ROTR64(v[d] ^ v[a], 16); \
v[c] = v[c] + v[d]; \
v[b] = ROTR64(v[b] ^ v[c], 63); }
// Initialization Vector.
static const uint64_t blake2b_iv[8] = {
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
};
// Compression function. "last" flag indicates last block.
static void blake2b_compress(blake2b_yp_ctx *ctx, int last)
{
const uint8_t sigma[12][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
};
int i;
uint64_t v[16], m[16];
// init work variables
for (i = 0; i < 8; i++) {
v[i] = ctx->h[i];
v[i + 8] = blake2b_iv[i];
}
v[12] ^= ctx->t[0]; // low 64 bits of offset
v[13] ^= ctx->t[1]; // high 64 bits
// last block flag set ?
if (last) {
v[14] = ~v[14];
}
// get little-endian words
for (i = 0; i < 16; i++) {
m[i] = B2B_GET64(&ctx->b[8 * i]);
}
// twelve rounds
for (i = 0; i < 12; i++) {
B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]);
B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]);
B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]);
B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]);
B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]);
B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]);
B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]);
B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]);
}
for(i = 0; i < 8; ++i) {
ctx->h[i] ^= v[i] ^ v[i + 8];
}
}
// Initialize the hashing context "ctx" with optional key "key".
// 1 <= outlen <= 64 gives the digest size in bytes.
// Secret key (also <= 64 bytes) is optional (keylen = 0).
int blake2b_yp_init(blake2b_yp_ctx *ctx, size_t outlen,
const void *key, size_t keylen) // (keylen=0: no key)
{
size_t i;
// illegal parameters
if (outlen == 0 || outlen > 64 || keylen > 64) {
return -1;
}
// state, "param block"
for (i = 0; i < 8; i++) {
ctx->h[i] = blake2b_iv[i];
}
ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen;
ctx->t[0] = 0; // input count low word
ctx->t[1] = 0; // input count high word
ctx->c = 0; // pointer within buffer
ctx->outlen = outlen;
// zero input block
for (i = keylen; i < 128; i++) {
ctx->b[i] = 0;
}
if (keylen > 0) {
blake2b_yp_update(ctx, key, keylen);
ctx->c = 128; // at the end
}
return 0;
}
// Add "inlen" bytes from "in" into the hash.
void blake2b_yp_update(blake2b_yp_ctx *ctx,
const void *in, size_t inlen) // data bytes
{
size_t i;
for (i = 0; i < inlen; i++) {
if (ctx->c == 128) { // buffer full ?
ctx->t[0] += ctx->c; // add counters
if (ctx->t[0] < ctx->c) // carry overflow ?
ctx->t[1]++; // high word
blake2b_compress(ctx, 0); // compress (not last)
ctx->c = 0; // counter to zero
}
ctx->b[ctx->c++] = ((const uint8_t *) in)[i];
}
}
// Generate the message digest (size given in init).
// Result placed in "out".
void blake2b_yp_final(blake2b_yp_ctx *ctx, void *out)
{
size_t i;
ctx->t[0] += ctx->c; // mark last block offset
// carry overflow
if (ctx->t[0] < ctx->c) {
ctx->t[1]++; // high word
}
// fill up with zeros
while (ctx->c < 128) {
ctx->b[ctx->c++] = 0;
}
blake2b_compress(ctx, 1); // final block flag = 1
// little endian convert and store
for (i = 0; i < ctx->outlen; i++) {
((uint8_t *) out)[i] =
(ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF;
}
}
// inlen = number of bytes
void blake2b_yp_hash(void *out, const void *in, size_t inlen) {
blake2b_yp_ctx ctx;
blake2b_yp_init(&ctx, 32, NULL, 0);
blake2b_yp_update(&ctx, in, inlen);
blake2b_yp_final(&ctx, out);
}
// // keylen = number of bytes
void hmac_blake2b_yp_init(hmac_yp_ctx *hctx, const void *_key, size_t keylen) {
const uint8_t *key = _key;
uint8_t keyhash[32];
uint8_t pad[64];
uint64_t i;
if (keylen > 64) {
blake2b_yp_hash(keyhash, key, keylen);
key = keyhash;
keylen = 32;
}
blake2b_yp_init(&hctx->inner, 32, NULL, 0);
memset(pad, 0x36, 64);
for (i = 0; i < keylen; ++i) {
pad[i] ^= key[i];
}
blake2b_yp_update(&hctx->inner, pad, 64);
blake2b_yp_init(&hctx->outer, 32, NULL, 0);
memset(pad, 0x5c, 64);
for (i = 0; i < keylen; ++i) {
pad[i] ^= key[i];
}
blake2b_yp_update(&hctx->outer, pad, 64);
memset(keyhash, 0, 32);
}
// datalen = number of bits
void hmac_blake2b_yp_update(hmac_yp_ctx *hctx, const void *data, size_t datalen) {
// update the inner state
blake2b_yp_update(&hctx->inner, data, datalen);
}
void hmac_blake2b_yp_final(hmac_yp_ctx *hctx, uint8_t *digest) {
uint8_t ihash[32];
blake2b_yp_final(&hctx->inner, ihash);
blake2b_yp_update(&hctx->outer, ihash, 32);
blake2b_yp_final(&hctx->outer, digest);
memset(ihash, 0, 32);
}
// // keylen = number of bytes; inlen = number of bytes
void hmac_blake2b_yp_hash(void *out, const void *key, size_t keylen, const void *in, size_t inlen) {
hmac_yp_ctx hctx;
hmac_blake2b_yp_init(&hctx, key, keylen);
hmac_blake2b_yp_update(&hctx, in, inlen);
hmac_blake2b_yp_final(&hctx, out);
}
void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
{
hmac_yp_ctx PShctx, hctx;
size_t i;
uint8_t ivec[4];
uint8_t U[32];
uint8_t T[32];
uint64_t j;
int k;
size_t clen;
/* Compute HMAC state after processing P and S. */
hmac_blake2b_yp_init(&PShctx, passwd, passwdlen);
hmac_blake2b_yp_update(&PShctx, salt, saltlen);
/* Iterate through the blocks. */
for (i = 0; i * 32 < dkLen; i++) {
/* Generate INT(i + 1). */
be32enc(ivec, (uint32_t)(i + 1));
/* Compute U_1 = PRF(P, S || INT(i)). */
memcpy(&hctx, &PShctx, sizeof(hmac_yp_ctx));
hmac_blake2b_yp_update(&hctx, ivec, 4);
hmac_blake2b_yp_final(&hctx, U);
/* T_i = U_1 ... */
memcpy(T, U, 32);
for (j = 2; j <= c; j++) {
/* Compute U_j. */
hmac_blake2b_yp_init(&hctx, passwd, passwdlen);
hmac_blake2b_yp_update(&hctx, U, 32);
hmac_blake2b_yp_final(&hctx, U);
/* ... xor U_j ... */
for (k = 0; k < 32; k++) {
T[k] ^= U[k];
}
}
/* Copy as many bytes as necessary into buf. */
clen = dkLen - i * 32;
if (clen > 32) {
clen = 32;
}
memcpy(&buf[i * 32], T, clen);
}
/* Clean PShctx, since we never called _Final on it. */
memset(&PShctx, 0, sizeof(hmac_yp_ctx));
}

View File

@@ -0,0 +1,42 @@
#pragma once
#ifndef __BLAKE2B_H__
#define __BLAKE2B_H__
#include <stddef.h>
#include <stdint.h>
#if defined(_MSC_VER) || defined(__x86_64__) || defined(__x86__)
#define NATIVE_LITTLE_ENDIAN
#endif
// state context
typedef struct {
uint8_t b[128]; // input buffer
uint64_t h[8]; // chained state
uint64_t t[2]; // total number of bytes
size_t c; // pointer for b[]
size_t outlen; // digest size
} blake2b_yp_ctx;
typedef struct {
blake2b_yp_ctx inner;
blake2b_yp_ctx outer;
} hmac_yp_ctx;
#if defined(__cplusplus)
extern "C" {
#endif
int blake2b_yp_init(blake2b_yp_ctx *ctx, size_t outlen, const void *key, size_t keylen);
void blake2b_yp_update(blake2b_yp_ctx *ctx, const void *in, size_t inlen);
void blake2b_yp_final(blake2b_yp_ctx *ctx, void *out);
void blake2b_yp_hash(void *out, const void *in, size_t inlen);
void hmac_blake2b_yp_hash(void *out, const void *key, size_t keylen, const void *in, size_t inlen);
void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen);
#if defined(__cplusplus)
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
#define insecure_memzero(buf, len) /* empty */

View File

@@ -0,0 +1,94 @@
/*-
* Copyright 2007-2014 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _SYSENDIAN_H_
#define _SYSENDIAN_H_
#include <stdint.h>
/* Avoid namespace collisions with BSD <sys/endian.h>. */
#define be32dec libcperciva_be32dec
#define be32enc libcperciva_be32enc
#define be64enc libcperciva_be64enc
#define le32dec libcperciva_le32dec
#define le32enc libcperciva_le32enc
static inline uint32_t
be32dec(const void * pp)
{
const uint8_t * p = (uint8_t const *)pp;
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
}
static inline void
be32enc(void * pp, uint32_t x)
{
uint8_t * p = (uint8_t *)pp;
p[3] = x & 0xff;
p[2] = (x >> 8) & 0xff;
p[1] = (x >> 16) & 0xff;
p[0] = (x >> 24) & 0xff;
}
static inline void
be64enc(void * pp, uint64_t x)
{
uint8_t * p = (uint8_t *)pp;
p[7] = x & 0xff;
p[6] = (x >> 8) & 0xff;
p[5] = (x >> 16) & 0xff;
p[4] = (x >> 24) & 0xff;
p[3] = (x >> 32) & 0xff;
p[2] = (x >> 40) & 0xff;
p[1] = (x >> 48) & 0xff;
p[0] = (x >> 56) & 0xff;
}
static inline uint32_t
le32dec(const void * pp)
{
const uint8_t * p = (uint8_t const *)pp;
return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
}
static inline void
le32enc(void * pp, uint32_t x)
{
uint8_t * p = (uint8_t *)pp;
p[0] = x & 0xff;
p[1] = (x >> 8) & 0xff;
p[2] = (x >> 16) & 0xff;
p[3] = (x >> 24) & 0xff;
}
#endif /* !_SYSENDIAN_H_ */

Some files were not shown because too many files have changed in this diff Show More