mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
72330eb5a7 | ||
![]() |
789c8b70bc | ||
![]() |
01550d94a2 | ||
![]() |
a042fb7612 | ||
![]() |
9d49e0be7a | ||
![]() |
a51f59086b | ||
![]() |
6f49ba09b7 |
25
Makefile.am
25
Makefile.am
@@ -18,7 +18,6 @@ dist_man_MANS = cpuminer.1
|
||||
cpuminer_SOURCES = \
|
||||
cpu-miner.c \
|
||||
util.c \
|
||||
uint256.cpp \
|
||||
api.c \
|
||||
sysinfos.c \
|
||||
algo-gate-api.c\
|
||||
@@ -51,12 +50,15 @@ cpuminer_SOURCES = \
|
||||
algo/blake/blake.c \
|
||||
algo/blake/blake-4way.c \
|
||||
algo/blake/sph_blake2b.c \
|
||||
algo/blake/blake2b.c \
|
||||
algo/blake/sph-blake2s.c \
|
||||
algo/blake/blake2s-hash-4way.c \
|
||||
algo/blake/blake2s.c \
|
||||
algo/blake/blake2s-gate.c \
|
||||
algo/blake/blake2s-4way.c \
|
||||
algo/blake/blake2b-hash-4way.c \
|
||||
algo/blake/blake2b.c \
|
||||
algo/blake/blake2b-gate.c \
|
||||
algo/blake/blake2b-4way.c \
|
||||
algo/blake/blakecoin-gate.c \
|
||||
algo/blake/mod_blakecoin.c \
|
||||
algo/blake/blakecoin.c \
|
||||
@@ -71,6 +73,9 @@ cpuminer_SOURCES = \
|
||||
algo/bmw/bmw256-hash-4way.c \
|
||||
algo/bmw/bmw512-hash-4way.c \
|
||||
algo/bmw/bmw256.c \
|
||||
algo/bmw/bmw512-gate.c \
|
||||
algo/bmw/bmw512.c \
|
||||
algo/bmw/bmw512-4way.c \
|
||||
algo/cryptonight/cryptolight.c \
|
||||
algo/cryptonight/cryptonight-common.c\
|
||||
algo/cryptonight/cryptonight-aesni.c\
|
||||
@@ -166,7 +171,8 @@ cpuminer_SOURCES = \
|
||||
algo/scryptjane/scrypt-jane.c \
|
||||
algo/sha/sph_sha2.c \
|
||||
algo/sha/sph_sha2big.c \
|
||||
algo/sha/sha2-hash-4way.c \
|
||||
algo/sha/sha256-hash-4way.c \
|
||||
algo/sha/sha512-hash-4way.c \
|
||||
algo/sha/sha256_hash_11way.c \
|
||||
algo/sha/sha2.c \
|
||||
algo/sha/sha256t-gate.c \
|
||||
@@ -238,6 +244,8 @@ cpuminer_SOURCES = \
|
||||
algo/x13/skunk-4way.c \
|
||||
algo/x13/skunk.c \
|
||||
algo/x13/drop.c \
|
||||
algo/x13/x13bcd-4way.c \
|
||||
algo/x13/x13bcd.c \
|
||||
algo/x14/x14-gate.c \
|
||||
algo/x14/x14.c \
|
||||
algo/x14/x14-4way.c \
|
||||
@@ -254,6 +262,13 @@ cpuminer_SOURCES = \
|
||||
algo/x16/x16r-gate.c \
|
||||
algo/x16/x16r.c \
|
||||
algo/x16/x16r-4way.c \
|
||||
algo/x16/x16rv2.c \
|
||||
algo/x16/x16rv2-4way.c \
|
||||
algo/x16/x16rt.c \
|
||||
algo/x16/x16rt-4way.c \
|
||||
algo/x16/hex.c \
|
||||
algo/x16/x21s-4way.c \
|
||||
algo/x16/x21s.c \
|
||||
algo/x17/x17-gate.c \
|
||||
algo/x17/x17.c \
|
||||
algo/x17/x17-4way.c \
|
||||
@@ -267,7 +282,9 @@ cpuminer_SOURCES = \
|
||||
algo/yescrypt/yescrypt.c \
|
||||
algo/yescrypt/sha256_Y.c \
|
||||
algo/yescrypt/yescrypt-best.c \
|
||||
algo/yespower/yespower.c \
|
||||
algo/yespower/yespower-gate.c \
|
||||
algo/yespower/yespower-blake2b.c \
|
||||
algo/yespower/crypto/blake2b-yp.c \
|
||||
algo/yespower/sha256_p.c \
|
||||
algo/yespower/yespower-opt.c
|
||||
|
||||
|
32
README.md
32
README.md
@@ -24,7 +24,7 @@ Requirements
|
||||
|
||||
1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
|
||||
Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
|
||||
optimizations a CPU with AES_NI is required. This includes Intel Westbridge
|
||||
optimizations a CPU with AES_NI is required. This includes Intel Westmere
|
||||
and newer and AMD equivalents. Further optimizations are available on some
|
||||
algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
|
||||
|
||||
@@ -55,9 +55,11 @@ Supported Algorithms
|
||||
axiom Shabal-256 MemoHash
|
||||
bastion
|
||||
blake Blake-256 (SFR)
|
||||
blakecoin blake256r8
|
||||
blake2b Blake2b 256
|
||||
blake2s Blake-2 S
|
||||
blakecoin blake256r8
|
||||
bmw BMW 256
|
||||
bmw512 BMW 512
|
||||
c11 Chaincoin
|
||||
decred
|
||||
deep Deepcoin (DCN)
|
||||
@@ -66,6 +68,7 @@ Supported Algorithms
|
||||
fresh Fresh
|
||||
groestl Groestl coin
|
||||
heavy Heavy
|
||||
hex x16r-hex
|
||||
hmq1725 Espers
|
||||
hodl Hodlcoin
|
||||
jha Jackpotcoin
|
||||
@@ -84,10 +87,12 @@ Supported Algorithms
|
||||
neoscrypt NeoScrypt(128, 2, 1)
|
||||
nist5 Nist5
|
||||
pentablake Pentablake
|
||||
phi1612 phi, LUX coin (original algo)
|
||||
phi2 LUX coin (new algo)
|
||||
phi1612 phi
|
||||
phi2 Luxcoin (LUX)
|
||||
phi2-lux identical to phi2
|
||||
pluck Pluck:128 (Supcoin)
|
||||
polytimos Ninja
|
||||
power2b MicroBitcoin (MBC)
|
||||
quark Quark
|
||||
qubit Qubit
|
||||
scrypt scrypt(1024, 1, 1) (default)
|
||||
@@ -113,12 +118,17 @@ Supported Algorithms
|
||||
x11gost sib (SibCoin)
|
||||
x12 Galaxie Cash (GCH)
|
||||
x13 X13
|
||||
x13bcd bcd
|
||||
x13sm3 hsr (Hshare)
|
||||
x14 X14
|
||||
x15 X15
|
||||
x16r Ravencoin (RVN)
|
||||
x16s pigeoncoin (PGN)
|
||||
x16r Ravencoin (RVN) (original algo)
|
||||
x16rv2 Ravencoin (RVN) (new algo)
|
||||
x16rt Gincoin (GIN)
|
||||
x16rt_veil Veil (VEIL)
|
||||
x16s Pigeoncoin (PGN)
|
||||
x17
|
||||
x21s
|
||||
xevan Bitsend (BSD)
|
||||
yescrypt Globalboost-Y (BSTY)
|
||||
yescryptr8 BitZeny (ZNY)
|
||||
@@ -126,6 +136,7 @@ Supported Algorithms
|
||||
yescryptr32 WAVI
|
||||
yespower Cryply
|
||||
yespowerr16 Yenten (YTN)
|
||||
yespoer-b2b generic yespower + blake2b
|
||||
zr5 Ziftr
|
||||
|
||||
Errata
|
||||
@@ -148,14 +159,15 @@ Benchmark testing does not work for x11evo.
|
||||
Bugs
|
||||
----
|
||||
|
||||
Users are encouraged to post their bug reports on the Bitcoin Talk
|
||||
forum at:
|
||||
Users are encouraged to post their bug reports using git issues or on the
|
||||
Bitcoin Talk forum at:
|
||||
|
||||
https://bitcointalk.org/index.php?topic=1326803.0
|
||||
|
||||
All problem reports must be accompanied by a proper definition.
|
||||
All problem reports must be accompanied by a proper problem definition.
|
||||
This should include how the problem occurred, the command line and
|
||||
output from the miner showing the startup and any errors.
|
||||
output from the miner showing the startup messages and any errors.
|
||||
A history is also useful, ie did it work before.
|
||||
|
||||
Donations
|
||||
---------
|
||||
|
@@ -6,9 +6,6 @@ This feature requires recent SW including GCC version 5 or higher and
|
||||
openssl version 1.1 or higher. It may also require using "-march=znver1"
|
||||
compile flag.
|
||||
|
||||
cpuminer-opt is a console program, if you're using a mouse you're doing it
|
||||
wrong.
|
||||
|
||||
Security warning
|
||||
----------------
|
||||
|
||||
@@ -34,10 +31,79 @@ Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
|
||||
supported.
|
||||
|
||||
64 bit Linux or Windows operating system. Apple and Android are not supported.
|
||||
FreeBSD YMMV.
|
||||
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.9.9
|
||||
|
||||
Added power2b algo for MicroBitcoin.
|
||||
|
||||
Added generic yespower-b2b (yespower + blake2b) algo to be used with
|
||||
the parameters introduced in v3.9.7 for yespower & yescrypt.
|
||||
|
||||
Display additional info when a share is rejected.
|
||||
|
||||
Some low level enhancements and minor tweaking of log output.
|
||||
|
||||
RELEASE_NOTES (this file) and README.md added to Windows release package.
|
||||
|
||||
v3.9.8.1
|
||||
|
||||
Summary log report will be generated on stratum diff change or after 5 minutes,
|
||||
whichever comes first, to prevent incorrect data in the report.
|
||||
|
||||
Removed phi2-lux alias (introduced in v3.9.8) due to Luxcoin's planned fork
|
||||
to a new algo. The new Luxcoin algo is not supported by cpuminer-opt.
|
||||
Until the fork Luxcoin can be mined using phi2 algo.
|
||||
|
||||
--hide-diff option is deprecated and has no effect. It will be removed in a
|
||||
future release.
|
||||
|
||||
v3.9.8
|
||||
|
||||
Changes to log output to provide data more relevant to actual mining
|
||||
performance.
|
||||
phi2 can now handle pools with a mix of coins that use and don't use roots.
|
||||
phi2-lux added as an alias for phi2 as they are identical except for roots.
|
||||
Add x16rv2 algo for Ravencoin fork.
|
||||
|
||||
v3.9.7
|
||||
|
||||
Command line option changes:
|
||||
|
||||
"-R" is no longer used as a shortcut for "--retry-pause", users must
|
||||
use the long option.
|
||||
|
||||
New options:
|
||||
|
||||
-N, --param-n: set the N parameter for yescrypt, yespower or scrypt algos
|
||||
-R, --param-r: set the R parameter for yescrypt or yespower algos, scrypt is
|
||||
hardcoded with R=1
|
||||
-K, --param-key: set the client key/pers parameter for yescrypt/yespower algos.
|
||||
|
||||
These options can be used to mine yescrypt or yespower variations using
|
||||
the generic yescrypt or yespower algo name and specifying the parameters
|
||||
manually. They can even be used to mine variations that aren't formally
|
||||
supported by a unique algo name. Existing algos can continue to to be mined
|
||||
using their original name without parameters.
|
||||
|
||||
v3.9.6.2
|
||||
|
||||
New algo blake2b.
|
||||
Faster myr-gr on Ryzen using SHA.
|
||||
Faster blake2s SSE2.
|
||||
Small speedup of around 1% for several other algos.
|
||||
|
||||
v3.9.6.1
|
||||
|
||||
New algos: x21s, hex (alias x16r-hex).
|
||||
|
||||
v3.9.6
|
||||
|
||||
New algos: bmw512, x16rt, x16rt-veil (alias veil), x13bcd (alias bcd).
|
||||
|
||||
v3.9.5.4
|
||||
|
||||
Fixed sha256q AVX2 poor performance.
|
||||
|
@@ -122,7 +122,6 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
|
||||
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
|
||||
gate->malloc_txs_request = (void*)&std_malloc_txs_request;
|
||||
gate->set_target = (void*)&std_set_target;
|
||||
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
|
||||
gate->build_block_header = (void*)&std_build_block_header;
|
||||
gate->build_extraheader = (void*)&std_build_extraheader;
|
||||
@@ -167,9 +166,10 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
|
||||
case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
|
||||
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
case ALGO_BMW512: register_bmw512_algo ( gate ); break;
|
||||
case ALGO_C11: register_c11_algo ( gate ); break;
|
||||
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
|
||||
@@ -181,6 +181,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_FRESH: register_fresh_algo ( gate ); break;
|
||||
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
|
||||
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
||||
case ALGO_HEX: register_hex_algo ( gate ); break;
|
||||
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||
case ALGO_JHA: register_jha_algo ( gate ); break;
|
||||
@@ -203,6 +204,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_PHI2: register_phi2_algo ( gate ); break;
|
||||
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
|
||||
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
|
||||
case ALGO_POWER2B: register_power2b_algo ( gate ); break;
|
||||
case ALGO_QUARK: register_quark_algo ( gate ); break;
|
||||
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
|
||||
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
|
||||
@@ -227,12 +229,17 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
|
||||
case ALGO_X12: register_x12_algo ( gate ); break;
|
||||
case ALGO_X13: register_x13_algo ( gate ); break;
|
||||
case ALGO_X13BCD: register_x13bcd_algo ( gate ); break;
|
||||
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
|
||||
case ALGO_X14: register_x14_algo ( gate ); break;
|
||||
case ALGO_X15: register_x15_algo ( gate ); break;
|
||||
case ALGO_X16R: register_x16r_algo ( gate ); break;
|
||||
case ALGO_X16RV2: register_x16rv2_algo ( gate ); break;
|
||||
case ALGO_X16RT: register_x16rt_algo ( gate ); break;
|
||||
case ALGO_X16RT_VEIL: register_x16rt_veil_algo ( gate ); break;
|
||||
case ALGO_X16S: register_x16s_algo ( gate ); break;
|
||||
case ALGO_X17: register_x17_algo ( gate ); break;
|
||||
case ALGO_X21S: register_x21s_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
/* case ALGO_YESCRYPT: register_yescrypt_05_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_05_algo ( gate ); break;
|
||||
@@ -245,6 +252,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_YESPOWER: register_yespower_algo ( gate ); break;
|
||||
case ALGO_YESPOWERR16: register_yespowerr16_algo ( gate ); break;
|
||||
case ALGO_YESPOWER_B2B: register_yespower_b2b_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
default:
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
|
||||
@@ -327,19 +335,16 @@ const char* const algo_alias_map[][2] =
|
||||
{ "lyra2", "lyra2re" },
|
||||
{ "lyra2v2", "lyra2rev2" },
|
||||
{ "lyra2v3", "lyra2rev3" },
|
||||
{ "lyra2zoin", "lyra2z330" },
|
||||
{ "myrgr", "myr-gr" },
|
||||
{ "myriad", "myr-gr" },
|
||||
{ "neo", "neoscrypt" },
|
||||
{ "phi", "phi1612" },
|
||||
// { "sia", "blake2b" },
|
||||
{ "sib", "x11gost" },
|
||||
{ "timetravel8", "timetravel" },
|
||||
{ "ziftr", "zr5" },
|
||||
{ "veil", "x16rt-veil" },
|
||||
{ "x16r-hex", "hex" },
|
||||
{ "yenten", "yescryptr16" },
|
||||
{ "yescryptr8k", "yescrypt" },
|
||||
{ "zcoin", "lyra2z" },
|
||||
{ "zoin", "lyra2z330" },
|
||||
{ "ziftr", "zr5" },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
||||
@@ -361,40 +366,3 @@ void get_algo_alias( char** algo_or_alias )
|
||||
#undef ALIAS
|
||||
#undef PROPER
|
||||
|
||||
bool submit_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
if ( submit_work( thr, work ) )
|
||||
{
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_BLUE, "Share %d submitted by thread %d, job %s.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr->id, work->job_id );
|
||||
return true;
|
||||
}
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
return false;
|
||||
}
|
||||
|
||||
bool submit_lane_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr, int lane )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
if ( submit_work( thr, work ) )
|
||||
{
|
||||
if ( !opt_quiet )
|
||||
// applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d.",
|
||||
// accepted_share_count + rejected_share_count + 1,
|
||||
// thr->id, lane );
|
||||
applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d, job %s.",
|
||||
accepted_share_count + rejected_share_count + 1, thr->id,
|
||||
lane, work->job_id );
|
||||
return true;
|
||||
}
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -85,14 +85,16 @@
|
||||
|
||||
typedef uint32_t set_t;
|
||||
|
||||
#define EMPTY_SET 0
|
||||
#define SSE2_OPT 1
|
||||
#define AES_OPT 2
|
||||
#define SSE42_OPT 4
|
||||
#define AVX_OPT 8
|
||||
#define AVX2_OPT 0x10
|
||||
#define SHA_OPT 0x20
|
||||
#define AVX512_OPT 0x40
|
||||
#define EMPTY_SET 0
|
||||
#define SSE2_OPT 1
|
||||
#define AES_OPT 2
|
||||
#define SSE42_OPT 4
|
||||
#define AVX_OPT 8 // Sandybridge
|
||||
#define AVX2_OPT 0x10 // Haswell
|
||||
#define SHA_OPT 0x20 // sha256 (Ryzen, Ice Lake)
|
||||
#define AVX512_OPT 0x40 // AVX512- F, VL, DQ, BW (Skylake-X)
|
||||
#define VAES_OPT 0x80 // VAES (Ice Lake)
|
||||
|
||||
|
||||
// return set containing all elements from sets a & b
|
||||
inline set_t set_union ( set_t a, set_t b ) { return a | b; }
|
||||
@@ -132,7 +134,6 @@ void ( *decode_extra_data ) ( struct work*, uint64_t* );
|
||||
void ( *wait_for_diff ) ( struct stratum_ctx* );
|
||||
int64_t ( *get_max64 ) ();
|
||||
bool ( *work_decode ) ( const json_t*, struct work* );
|
||||
void ( *set_target) ( struct work*, double );
|
||||
bool ( *submit_getwork_result ) ( CURL*, struct work* );
|
||||
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
|
||||
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
|
||||
@@ -193,15 +194,6 @@ void four_way_not_tested();
|
||||
// allways returns failure
|
||||
int null_scanhash();
|
||||
|
||||
// Allow algos to submit from scanhash loop.
|
||||
bool submit_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr );
|
||||
bool submit_lane_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr, int lane );
|
||||
|
||||
|
||||
bool submit_work( struct thr_info *thr, const struct work *work_in );
|
||||
|
||||
// displays warning
|
||||
void null_hash ();
|
||||
void null_hash_suw();
|
||||
@@ -232,10 +224,6 @@ int64_t get_max64_0x3fffffLL();
|
||||
int64_t get_max64_0x1ffff();
|
||||
int64_t get_max64_0xffffLL();
|
||||
|
||||
void std_set_target( struct work *work, double job_diff );
|
||||
void alt_set_target( struct work* work, double job_diff );
|
||||
void scrypt_set_target( struct work *work, double job_diff );
|
||||
|
||||
bool std_le_work_decode( const json_t *val, struct work *work );
|
||||
bool std_be_work_decode( const json_t *val, struct work *work );
|
||||
bool jr2_work_decode( const json_t *val, struct work *work );
|
||||
|
@@ -85,8 +85,9 @@ bool register_argon2_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_argon2;
|
||||
gate->hash = (void*)&argon2hash;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->get_max64 = (void*)&argon2_get_max64;
|
||||
opt_target_factor = 65536.0;
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -36,43 +36,39 @@ void argon2d_crds_hash( void *output, const void *input )
|
||||
int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint32_t nonce = first_nonce;
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_crds_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_crds_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio(work, hash);
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_argon2d_crds_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d_crds;
|
||||
gate->hash = (void*)&argon2d_crds_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -107,43 +103,40 @@ void argon2d_dyn_hash( void *output, const void *input )
|
||||
int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint32_t nonce = first_nonce;
|
||||
do
|
||||
{
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_dyn_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_dyn_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio(work, hash);
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_argon2d_dyn_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d_dyn;
|
||||
gate->hash = (void*)&argon2d_dyn_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -171,11 +164,10 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
|
||||
be32enc( &endiandata[19], n );
|
||||
argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) endiandata, 80,
|
||||
(char*) endiandata, 80, (char*) vhash, 32, ARGON2_VERSION_13 );
|
||||
if ( vhash[7] < Htarg && fulltest( vhash, ptarget ) )
|
||||
if ( vhash[7] < Htarg && fulltest( vhash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return true;
|
||||
submit_solution( work, vhash, mythr );
|
||||
}
|
||||
n++;
|
||||
|
||||
@@ -192,9 +184,9 @@ int64_t get_max64_0x1ff() { return 0x1ff; }
|
||||
bool register_argon2d4096_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d4096;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0x1ff;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -28,6 +28,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
#include "core.h"
|
||||
#include "argon2d_thread.h"
|
||||
@@ -99,7 +100,8 @@ int allocate_memory(const argon2_context *context, uint8_t **memory,
|
||||
if (context->allocate_cbk) {
|
||||
(context->allocate_cbk)(memory, memory_size);
|
||||
} else {
|
||||
*memory = malloc(memory_size);
|
||||
*memory = _mm_malloc( memory_size, 64 );
|
||||
// *memory = malloc(memory_size);
|
||||
}
|
||||
|
||||
if (*memory == NULL) {
|
||||
@@ -116,7 +118,8 @@ void free_memory(const argon2_context *context, uint8_t *memory,
|
||||
if (context->free_cbk) {
|
||||
(context->free_cbk)(memory, memory_size);
|
||||
} else {
|
||||
free(memory);
|
||||
// free(memory);
|
||||
_mm_free( memory );
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -96,14 +96,14 @@ static void fill_block(__m256i *state, const block *ref_block,
|
||||
if (with_xor) {
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
|
||||
state[i], _mm256_load_si256((const __m256i *)ref_block->v + i));
|
||||
block_XY[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i));
|
||||
state[i], _mm256_load_si256((const __m256i *)next_block->v + i));
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
|
||||
state[i], _mm256_load_si256((const __m256i *)ref_block->v + i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -139,7 +139,7 @@ static void fill_block(__m256i *state, const block *ref_block,
|
||||
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm256_xor_si256(state[i], block_XY[i]);
|
||||
_mm256_storeu_si256((__m256i *)next_block->v + i, state[i]);
|
||||
_mm256_store_si256((__m256i *)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -29,6 +29,8 @@
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include "simd-utils.h"
|
||||
|
||||
#if !defined(__AVX512F__)
|
||||
#if !defined(__AVX2__)
|
||||
#if !defined(__XOP__)
|
||||
@@ -182,64 +184,63 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
|
||||
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
||||
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
|
||||
#define rotr32 mm256_swap32_64
|
||||
#define rotr24 mm256_ror3x8_64
|
||||
#define rotr16 mm256_ror1x16_64
|
||||
#define rotr63( x ) mm256_rol_64( x, 1 )
|
||||
|
||||
//#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
|
||||
//#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
//#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
||||
//#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
|
||||
|
||||
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
||||
__m256i ml0, ml1; \
|
||||
ml0 = _mm256_mul_epu32(A0, B0); \
|
||||
ml1 = _mm256_mul_epu32(A1, B1); \
|
||||
ml0 = _mm256_add_epi64(ml0, ml0); \
|
||||
ml1 = _mm256_add_epi64(ml1, ml1); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml0)); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml1)); \
|
||||
D0 = _mm256_xor_si256(D0, A0); \
|
||||
D0 = rotr32(D0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C0, D0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
||||
\
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B0 = rotr24(B0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(A1, B1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
||||
D1 = _mm256_xor_si256(D1, A1); \
|
||||
D0 = rotr32(D0); \
|
||||
D1 = rotr32(D1); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C1, D1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
||||
\
|
||||
ml0 = _mm256_mul_epu32(C0, D0); \
|
||||
ml1 = _mm256_mul_epu32(C1, D1); \
|
||||
ml0 = _mm256_add_epi64(ml0, ml0); \
|
||||
ml1 = _mm256_add_epi64(ml1, ml1); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml0)); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml1)); \
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B1 = _mm256_xor_si256(B1, C1); \
|
||||
B0 = rotr24(B0); \
|
||||
B1 = rotr24(B1); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
||||
__m256i ml0, ml1; \
|
||||
ml0 = _mm256_mul_epu32(A0, B0); \
|
||||
ml1 = _mm256_mul_epu32(A1, B1); \
|
||||
ml0 = _mm256_add_epi64(ml0, ml0); \
|
||||
ml1 = _mm256_add_epi64(ml1, ml1); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml0)); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml1)); \
|
||||
D0 = _mm256_xor_si256(D0, A0); \
|
||||
D0 = rotr16(D0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C0, D0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B0 = rotr63(B0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(A1, B1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
||||
D1 = _mm256_xor_si256(D1, A1); \
|
||||
D0 = rotr16(D0); \
|
||||
D1 = rotr16(D1); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C1, D1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
||||
ml0 = _mm256_mul_epu32(C0, D0); \
|
||||
ml1 = _mm256_mul_epu32(C1, D1); \
|
||||
ml0 = _mm256_add_epi64(ml0, ml0); \
|
||||
ml1 = _mm256_add_epi64(ml1, ml1); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml0)); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml1)); \
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B1 = _mm256_xor_si256(B1, C1); \
|
||||
B0 = rotr63(B0); \
|
||||
B1 = rotr63(B1); \
|
||||
} while((void)0, 0);
|
||||
|
||||
@@ -259,16 +260,14 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
||||
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
||||
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
\
|
||||
tmp1 = C0; \
|
||||
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
C0 = C1; \
|
||||
C1 = tmp1; \
|
||||
\
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
||||
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
C1 = tmp1; \
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
||||
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
} while(0);
|
||||
|
||||
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
@@ -287,16 +286,14 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
||||
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
||||
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
\
|
||||
tmp1 = C0; \
|
||||
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
C0 = C1; \
|
||||
C1 = tmp1; \
|
||||
\
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
|
||||
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
C1 = tmp1; \
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
|
||||
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
|
@@ -308,12 +308,12 @@ static const sph_u32 CS[16] = {
|
||||
#define GS_4WAY( m0, m1, c0, c1, a, b, c, d ) \
|
||||
do { \
|
||||
a = _mm_add_epi32( _mm_add_epi32( _mm_xor_si128( \
|
||||
_mm_set_epi32( c1, c1, c1, c1 ), m0 ), b ), a ); \
|
||||
_mm_set1_epi32( c1 ), m0 ), b ), a ); \
|
||||
d = mm128_ror_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm128_ror_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
a = _mm_add_epi32( _mm_add_epi32( _mm_xor_si128( \
|
||||
_mm_set_epi32( c0, c0, c0, c0 ), m1 ), b ), a ); \
|
||||
_mm_set1_epi32( c0 ), m1 ), b ), a ); \
|
||||
d = mm128_ror_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm128_ror_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
@@ -508,14 +508,18 @@ do { \
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm_xor_si128( S0, _mm_set1_epi32( CS0 ) ); \
|
||||
V9 = _mm_xor_si128( S1, _mm_set1_epi32( CS1 ) ); \
|
||||
VA = _mm_xor_si128( S2, _mm_set1_epi32( CS2 ) ); \
|
||||
VB = _mm_xor_si128( S3, _mm_set1_epi32( CS3 ) ); \
|
||||
VC = _mm_xor_si128( _mm_set1_epi32( T0 ), _mm_set1_epi32( CS4 ) ); \
|
||||
VD = _mm_xor_si128( _mm_set1_epi32( T0 ), _mm_set1_epi32( CS5 ) ); \
|
||||
VE = _mm_xor_si128( _mm_set1_epi32( T1 ), _mm_set1_epi32( CS6 ) ); \
|
||||
VF = _mm_xor_si128( _mm_set1_epi32( T1 ), _mm_set1_epi32( CS7 ) ); \
|
||||
V8 = _mm_xor_si128( S0, m128_const1_64( 0x243F6A88243F6A88 ) ); \
|
||||
V9 = _mm_xor_si128( S1, m128_const1_64( 0x85A308D385A308D3 ) ); \
|
||||
VA = _mm_xor_si128( S2, m128_const1_64( 0x13198A2E13198A2E ) ); \
|
||||
VB = _mm_xor_si128( S3, m128_const1_64( 0x0370734403707344 ) ); \
|
||||
VC = _mm_xor_si128( _mm_set1_epi32( T0 ), \
|
||||
m128_const1_64( 0xA4093822A4093822 ) ); \
|
||||
VD = _mm_xor_si128( _mm_set1_epi32( T0 ), \
|
||||
m128_const1_64( 0x299F31D0299F31D0 ) ); \
|
||||
VE = _mm_xor_si128( _mm_set1_epi32( T1 ), \
|
||||
m128_const1_64( 0x082EFA98082EFA98 ) ); \
|
||||
VF = _mm_xor_si128( _mm_set1_epi32( T1 ), \
|
||||
m128_const1_64( 0xEC4E6C89EC4E6C89 ) ); \
|
||||
BLAKE256_4WAY_BLOCK_BSWAP32; \
|
||||
ROUND_S_4WAY(0); \
|
||||
ROUND_S_4WAY(1); \
|
||||
@@ -631,16 +635,20 @@ do { \
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm256_xor_si256( S0, _mm256_set1_epi32( CS0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, _mm256_set1_epi32( CS1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, _mm256_set1_epi32( CS2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, _mm256_set1_epi32( CS3 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set1_epi32( T0 ), _mm256_set1_epi32( CS4 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set1_epi32( T0 ), _mm256_set1_epi32( CS5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set1_epi32( T1 ), _mm256_set1_epi32( CS6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set1_epi32( T1 ), _mm256_set1_epi32( CS7 ) ); \
|
||||
shuf_bswap32 = _mm256_set_epi64x( 0x0c0d0e0f08090a0b, 0x0405060700010203, \
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ); \
|
||||
V8 = _mm256_xor_si256( S0, m256_const1_64( 0x243F6A88243F6A88 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, m256_const1_64( 0x85A308D385A308D3 ) ); \
|
||||
VA = _mm256_xor_si256( S2, m256_const1_64( 0x13198A2E13198A2E ) ); \
|
||||
VB = _mm256_xor_si256( S3, m256_const1_64( 0x0370734403707344 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set1_epi32( T0 ),\
|
||||
m256_const1_64( 0xA4093822A4093822 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set1_epi32( T0 ),\
|
||||
m256_const1_64( 0x299F31D0299F31D0 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set1_epi32( T1 ), \
|
||||
m256_const1_64( 0x082EFA98082EFA98 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set1_epi32( T1 ), \
|
||||
m256_const1_64( 0xEC4E6C89EC4E6C89 ) ); \
|
||||
shuf_bswap32 = m256_const_64( 0x0c0d0e0f08090a0b, 0x0405060700010203, \
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ); \
|
||||
M0 = _mm256_shuffle_epi8( * buf , shuf_bswap32 ); \
|
||||
M1 = _mm256_shuffle_epi8( *(buf+ 1), shuf_bswap32 ); \
|
||||
M2 = _mm256_shuffle_epi8( *(buf+ 2), shuf_bswap32 ); \
|
||||
@@ -696,14 +704,14 @@ blake32_4way_init( blake_4way_small_context *ctx, const uint32_t *iv,
|
||||
const uint32_t *salt, int rounds )
|
||||
{
|
||||
__m128i zero = m128_zero;
|
||||
casti_m128i( ctx->H, 0 ) = _mm_set1_epi32( iv[0] );
|
||||
casti_m128i( ctx->H, 1 ) = _mm_set1_epi32( iv[1] );
|
||||
casti_m128i( ctx->H, 2 ) = _mm_set1_epi32( iv[2] );
|
||||
casti_m128i( ctx->H, 3 ) = _mm_set1_epi32( iv[3] );
|
||||
casti_m128i( ctx->H, 4 ) = _mm_set1_epi32( iv[4] );
|
||||
casti_m128i( ctx->H, 5 ) = _mm_set1_epi32( iv[5] );
|
||||
casti_m128i( ctx->H, 6 ) = _mm_set1_epi32( iv[6] );
|
||||
casti_m128i( ctx->H, 7 ) = _mm_set1_epi32( iv[7] );
|
||||
casti_m128i( ctx->H, 0 ) = m128_const1_64( 0x6A09E6676A09E667 );
|
||||
casti_m128i( ctx->H, 1 ) = m128_const1_64( 0xBB67AE85BB67AE85 );
|
||||
casti_m128i( ctx->H, 2 ) = m128_const1_64( 0x3C6EF3723C6EF372 );
|
||||
casti_m128i( ctx->H, 3 ) = m128_const1_64( 0xA54FF53AA54FF53A );
|
||||
casti_m128i( ctx->H, 4 ) = m128_const1_64( 0x510E527F510E527F );
|
||||
casti_m128i( ctx->H, 5 ) = m128_const1_64( 0x9B05688C9B05688C );
|
||||
casti_m128i( ctx->H, 6 ) = m128_const1_64( 0x1F83D9AB1F83D9AB );
|
||||
casti_m128i( ctx->H, 7 ) = m128_const1_64( 0x5BE0CD195BE0CD19 );
|
||||
|
||||
casti_m128i( ctx->S, 0 ) = zero;
|
||||
casti_m128i( ctx->S, 1 ) = zero;
|
||||
@@ -778,12 +786,13 @@ blake32_4way_close( blake_4way_small_context *ctx, unsigned ub, unsigned n,
|
||||
else
|
||||
ctx->T0 -= 512 - bit_len;
|
||||
|
||||
buf[vptr] = _mm_set1_epi32( 0x80 );
|
||||
buf[vptr] = m128_const1_64( 0x0000008000000080 );
|
||||
|
||||
if ( vptr < 12 )
|
||||
{
|
||||
memset_zero_128( buf + vptr + 1, 13 - vptr );
|
||||
buf[ 13 ] = _mm_or_si128( buf[ 13 ], _mm_set1_epi32( 0x01000000UL ) );
|
||||
buf[ 13 ] = _mm_or_si128( buf[ 13 ],
|
||||
m128_const1_64( 0x0100000001000000ULL ) );
|
||||
buf[ 14 ] = mm128_bswap_32( _mm_set1_epi32( th ) );
|
||||
buf[ 15 ] = mm128_bswap_32( _mm_set1_epi32( tl ) );
|
||||
blake32_4way( ctx, buf + vptr, 64 - ptr );
|
||||
@@ -795,7 +804,8 @@ blake32_4way_close( blake_4way_small_context *ctx, unsigned ub, unsigned n,
|
||||
ctx->T0 = 0xFFFFFE00UL;
|
||||
ctx->T1 = 0xFFFFFFFFUL;
|
||||
memset_zero_128( buf, 56>>2 );
|
||||
buf[ 13 ] = _mm_or_si128( buf[ 13 ], _mm_set1_epi32( 0x01000000UL ) );
|
||||
buf[ 13 ] = _mm_or_si128( buf[ 13 ],
|
||||
m128_const1_64( 0x0100000001000000ULL ) );
|
||||
buf[ 14 ] = mm128_bswap_32( _mm_set1_epi32( th ) );
|
||||
buf[ 15 ] = mm128_bswap_32( _mm_set1_epi32( tl ) );
|
||||
blake32_4way( ctx, buf, 64 );
|
||||
@@ -815,20 +825,18 @@ blake32_8way_init( blake_8way_small_context *sc, const sph_u32 *iv,
|
||||
const sph_u32 *salt, int rounds )
|
||||
{
|
||||
__m256i zero = m256_zero;
|
||||
casti_m256i( sc->H, 0 ) = _mm256_set1_epi32( iv[0] );
|
||||
casti_m256i( sc->H, 1 ) = _mm256_set1_epi32( iv[1] );
|
||||
casti_m256i( sc->H, 2 ) = _mm256_set1_epi32( iv[2] );
|
||||
casti_m256i( sc->H, 3 ) = _mm256_set1_epi32( iv[3] );
|
||||
casti_m256i( sc->H, 4 ) = _mm256_set1_epi32( iv[4] );
|
||||
casti_m256i( sc->H, 5 ) = _mm256_set1_epi32( iv[5] );
|
||||
casti_m256i( sc->H, 6 ) = _mm256_set1_epi32( iv[6] );
|
||||
casti_m256i( sc->H, 7 ) = _mm256_set1_epi32( iv[7] );
|
||||
|
||||
casti_m256i( sc->H, 0 ) = m256_const1_64( 0x6A09E6676A09E667 );
|
||||
casti_m256i( sc->H, 1 ) = m256_const1_64( 0xBB67AE85BB67AE85 );
|
||||
casti_m256i( sc->H, 2 ) = m256_const1_64( 0x3C6EF3723C6EF372 );
|
||||
casti_m256i( sc->H, 3 ) = m256_const1_64( 0xA54FF53AA54FF53A );
|
||||
casti_m256i( sc->H, 4 ) = m256_const1_64( 0x510E527F510E527F );
|
||||
casti_m256i( sc->H, 5 ) = m256_const1_64( 0x9B05688C9B05688C );
|
||||
casti_m256i( sc->H, 6 ) = m256_const1_64( 0x1F83D9AB1F83D9AB );
|
||||
casti_m256i( sc->H, 7 ) = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
||||
casti_m256i( sc->S, 0 ) = zero;
|
||||
casti_m256i( sc->S, 1 ) = zero;
|
||||
casti_m256i( sc->S, 2 ) = zero;
|
||||
casti_m256i( sc->S, 3 ) = zero;
|
||||
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
sc->rounds = rounds;
|
||||
@@ -887,7 +895,7 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
|
||||
ptr = sc->ptr;
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
buf[ptr>>2] = _mm256_set1_epi32( 0x80 );
|
||||
buf[ptr>>2] = m256_const1_64( 0x0000008000000080ULL );
|
||||
tl = sc->T0 + bit_len;
|
||||
th = sc->T1;
|
||||
|
||||
@@ -909,7 +917,7 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
memset_zero_256( buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
|
||||
if ( out_size_w32 == 8 )
|
||||
buf[52>>2] = _mm256_or_si256( buf[52>>2],
|
||||
_mm256_set1_epi32( 0x01000000UL ) );
|
||||
m256_const1_64( 0x0100000001000000ULL ) );
|
||||
*(buf+(56>>2)) = mm256_bswap_32( _mm256_set1_epi32( th ) );
|
||||
*(buf+(60>>2)) = mm256_bswap_32( _mm256_set1_epi32( tl ) );
|
||||
blake32_8way( sc, buf + (ptr>>2), 64 - ptr );
|
||||
@@ -922,7 +930,7 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
sc->T1 = SPH_C32(0xFFFFFFFFUL);
|
||||
memset_zero_256( buf, 56>>2 );
|
||||
if ( out_size_w32 == 8 )
|
||||
buf[52>>2] = _mm256_set1_epi32( 0x01000000UL );
|
||||
buf[52>>2] = m256_const1_64( 0x0100000001000000ULL );
|
||||
*(buf+(56>>2)) = mm256_bswap_32( _mm256_set1_epi32( th ) );
|
||||
*(buf+(60>>2)) = mm256_bswap_32( _mm256_set1_epi32( tl ) );
|
||||
blake32_8way( sc, buf, 64 );
|
||||
|
@@ -1,322 +0,0 @@
|
||||
// convert blake256 32 bit to use 64 bit with serial vectoring
|
||||
//
|
||||
// cut calls to GS in half
|
||||
//
|
||||
// combine V
|
||||
// v0 = {V0,V1}
|
||||
// v1 = {V2,V3}
|
||||
// v2 = {V4,V5}
|
||||
// v3 = {V6,V7}
|
||||
// v4 = {V8,V9}
|
||||
// v5 = {VA,VB}
|
||||
// v6 = {VC,VD}
|
||||
// v7 = {CE,VF}
|
||||
//
|
||||
// v6x = {VD,VC} swap(VC,VD) swap(v6)
|
||||
// v7x = {VF,VE} swap(VE,VF) swap(v7)
|
||||
//
|
||||
// V0 = v1v0
|
||||
// V1 = v3v2
|
||||
// V2 = v5v4
|
||||
// V3 = v7v6
|
||||
// V4 = v9v8
|
||||
// V5 = vbva
|
||||
// V6 = vdvc
|
||||
// V7 = vfve
|
||||
//
|
||||
// The rotate in ROUND is to effect straddle and unstraddle for the third
|
||||
// and 4th iteration of GS.
|
||||
// It concatenates 2 contiguous 256 bit vectors and extracts the middle
|
||||
// 256 bits. After the transform they must be restored with only the
|
||||
// chosen bits modified in the original 2 vectors.
|
||||
// ror1x128 achieves this by putting the chosen bits in arg1, the "low"
|
||||
// 256 bit vector and saves the untouched bits temporailly in arg0, the
|
||||
// "high" 256 bit vector. Simply reverse the process to restore data back
|
||||
// to original positions.
|
||||
|
||||
// Use standard 4way when AVX2 is not available use x2 mode with AVX2.
|
||||
//
|
||||
// Data is organised the same as 32 bit 4 way, in effect serial vectoring
|
||||
// on top of parallel vectoring. Same data in the same place just taking
|
||||
// two chunks at a time.
|
||||
//
|
||||
// Transparent to user, x2 mode used when AVX2 detected.
|
||||
// Use existing 4way context but revert to scalar types.
|
||||
// Same interleave function (128 bit) or x2 with 256 bit?
|
||||
// User trsnaparency would have to apply to interleave as well.
|
||||
//
|
||||
// Use common 4way update and close
|
||||
|
||||
/*
|
||||
typedef struct {
|
||||
unsigned char buf[64<<2];
|
||||
uint32_t H[8<<2];
|
||||
uint32_t S[4<<2];
|
||||
size_t ptr;
|
||||
uint32_t T0, T1;
|
||||
int rounds; // 14 for blake, 8 for blakecoin & vanilla
|
||||
} blakex2_4way_small_context __attribute__ ((aligned (64)));
|
||||
*/
|
||||
|
||||
static void
|
||||
blake32x2_4way_init( blake_4way_small_context *ctx, const uint32_t *iv,
|
||||
const uint32_t *salt, int rounds )
|
||||
{
|
||||
casti_m128i( ctx->H, 0 ) = _mm_set1_epi32( iv[0] );
|
||||
casti_m128i( ctx->H, 1 ) = _mm_set1_epi32( iv[1] );
|
||||
casti_m128i( ctx->H, 2 ) = _mm_set1_epi32( iv[2] );
|
||||
casti_m128i( ctx->H, 3 ) = _mm_set1_epi32( iv[3] );
|
||||
casti_m128i( ctx->H, 4 ) = _mm_set1_epi32( iv[4] );
|
||||
casti_m128i( ctx->H, 5 ) = _mm_set1_epi32( iv[5] );
|
||||
casti_m128i( ctx->H, 6 ) = _mm_set1_epi32( iv[6] );
|
||||
casti_m128i( ctx->H, 7 ) = _mm_set1_epi32( iv[7] );
|
||||
|
||||
casti_m128i( ctx->S, 0 ) = m128_zero;
|
||||
casti_m128i( ctx->S, 1 ) = m128_zero;
|
||||
casti_m128i( ctx->S, 2 ) = m128_zero;
|
||||
casti_m128i( ctx->S, 3 ) = m128_zero;
|
||||
/*
|
||||
sc->S[0] = _mm_set1_epi32( salt[0] );
|
||||
sc->S[1] = _mm_set1_epi32( salt[1] );
|
||||
sc->S[2] = _mm_set1_epi32( salt[2] );
|
||||
sc->S[3] = _mm_set1_epi32( salt[3] );
|
||||
*/
|
||||
ctx->T0 = ctx->T1 = 0;
|
||||
ctx->ptr = 0;
|
||||
ctx->rounds = rounds;
|
||||
}
|
||||
|
||||
static void
|
||||
blake32x2( blake_4way_small_context *ctx, const void *data, size_t len )
|
||||
{
|
||||
__m128i *buf = (__m256i*)ctx->buf;
|
||||
size_t bptr = ctx->ptr << 2;
|
||||
size_t vptr = ctx->ptr >> 3;
|
||||
size_t blen = len << 2;
|
||||
// unsigned char *buf = ctx->buf;
|
||||
// size_t ptr = ctx->ptr<<4; // repurposed
|
||||
DECL_STATE32x2
|
||||
|
||||
// buf = sc->buf;
|
||||
// ptr = sc->ptr;
|
||||
|
||||
// adjust len for use with ptr, clen, all absolute bytes.
|
||||
// int blen = len<<2;
|
||||
|
||||
if ( blen < (sizeof ctx->buf) - bptr )
|
||||
{
|
||||
memcpy( buf + vptr, data, blen );
|
||||
ptr += blen;
|
||||
ctx->ptr = bptr >> 2;;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE32( ctx );
|
||||
while ( blen > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
|
||||
clen = ( sizeof sc->buf ) - ptr;
|
||||
if ( clen > blen )
|
||||
clen = blen;
|
||||
memcpy( buf + vptr, data, clen );
|
||||
bptr += clen;
|
||||
vptr = bptr >> 5;
|
||||
data = (const unsigned char *)data + clen;
|
||||
blen -= clen;
|
||||
if ( bptr == sizeof ctx->buf )
|
||||
{
|
||||
if ( ( T0 = T0 + 512 ) < 512 ) // not needed, will never rollover
|
||||
T1 += 1;
|
||||
COMPRESS32x2_4WAY( ctx->rounds );
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE32x2( ctx );
|
||||
ctx->ptr = bptr >> 2;
|
||||
}
|
||||
|
||||
static void
|
||||
blake32x2_4way_close( blake_4way_small_context *ctx, void *dst )
|
||||
{
|
||||
__m256i buf[8] __attribute__ ((aligned (64)));
|
||||
size_t ptr = ctx->ptr;
|
||||
size_t vptr = ctx->ptr>>2;
|
||||
unsigned bit_len = ( (unsigned)ptr << 3 ); // one lane
|
||||
uint32_t th = ctx->T1;
|
||||
uint32_t tl = ctx->T0 + bit_len;
|
||||
|
||||
if ( ptr == 0 )
|
||||
{
|
||||
ctx->T0 = 0xFFFFFE00UL;
|
||||
ctx->T1 = 0xFFFFFFFFUL;
|
||||
}
|
||||
else if ( ctx->T0 == 0 )
|
||||
{
|
||||
ctx->T0 = 0xFFFFFE00UL + bit_len;
|
||||
ctx->T1 -= 1;
|
||||
}
|
||||
else
|
||||
ctx->T0 -= 512 - bit_len;
|
||||
|
||||
// memset doesn't do ints
|
||||
buf[ vptr ] = _mm256_set_epi32( 0,0,0,0, 0x80, 0x80, 0x80, 0x80 );
|
||||
|
||||
if ( vptr < 5 )
|
||||
{
|
||||
memset_zero_256( buf + vptr + 1, 6 - vptr );
|
||||
buf[ 6 ] = _mm256_or_si256( vbuf[ 6 ], _mm256_set_epi32(
|
||||
0x01000000UL,0x01000000UL,0x01000000UL,0x01000000UL, 0,0,0,0 ) );
|
||||
buf[ 7 ] = mm256_bswap_32( _mm256_set_epi32( tl,tl,tl,tl,
|
||||
th,th,th,th ) );
|
||||
blake32x2_4way( ctx, buf + vptr, 64 - ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_256( vbuf + vptr + 1, 7 - vptr );
|
||||
blake32x2_4way( ctx, vbuf + ptr, 64 - ptr );
|
||||
ctx->T0 = 0xFFFFFE00UL;
|
||||
ctx->T1 = 0xFFFFFFFFUL;
|
||||
buf[ 6 ] = mm256_zero;
|
||||
buf[ 6 ] = _mm256_set_epi32( 0,0,0,0,
|
||||
0x01000000UL,0x01000000UL,0x01000000UL,0x01000000UL );
|
||||
buf[ 7 ] = mm256_bswap_32( _mm256_set_epi32( tl, tl, tl, tl,
|
||||
th, th, th, th );
|
||||
blake32x2_4way( ctx, buf, 64 );
|
||||
}
|
||||
|
||||
casti_m256i( dst, 0 ) = mm256_bswap_32( casti_m256i( ctx->H, 0 ) );
|
||||
casti_m256i( dst, 1 ) = mm256_bswap_32( casti_m256i( ctx->H, 1 ) );
|
||||
casti_m256i( dst, 2 ) = mm256_bswap_32( casti_m256i( ctx->H, 2 ) );
|
||||
casti_m256i( dst, 3 ) = mm256_bswap_32( casti_m256i( ctx->H, 3 ) );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#define DECL_STATE32x2_4WAY \
|
||||
__m256i H0, H1, H2, H3; \
|
||||
__m256i S0, S1; \
|
||||
uint32_t T0, T1;
|
||||
|
||||
#define READ_STATE32x2_4WAY(state) do \
|
||||
{ \
|
||||
H0 = casti_m256i( state->H, 0 ); \
|
||||
H1 = casti_m256i( state->H, 1 ); \
|
||||
H2 = casti_m256i( state->H, 2 ); \
|
||||
H3 = casti_m256i( state->H, 3 ); \
|
||||
S0 = casti_m256i( state->S, 0 ); \
|
||||
S1 = casti_m256i( state->S, 1 ); \
|
||||
T0 = state->T0; \
|
||||
T1 = state->T1; \
|
||||
|
||||
#define WRITE_STATE32x2_4WAY(state) do { \
|
||||
casti_m256i( state->H, 0 ) = H0; \
|
||||
casti_m256i( state->H, 1 ) = H1; \
|
||||
casti_m256i( state->H, 2 ) = H2; \
|
||||
casti_m256i( state->H, 3 ) = H3; \
|
||||
casti_m256i( state->S, 0 ) = S0; \
|
||||
casti_m256i( state->S, 1 ) = S1; \
|
||||
state->T0 = T0; \
|
||||
state->T1 = T1; \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define GSx2_4WAY( m0m2, m1m3, c0c2, c1c3, a, b, c, d ) do \
|
||||
{ \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
|
||||
_mm256_set_epi32( c1,c3, c1,c3, c1,c3, c1,c3 ), \
|
||||
_mm256_set_epi32( m0,m2, m0,m2, m0,m2, m0,m2 ) ), b ), a ); \
|
||||
d = mm256_ror_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
|
||||
_mm256_set_epi32( c0,c2, c0,c2, c0,c2, c0,c2 ), \
|
||||
_mm256_set_epi32( m1,m3, m1,m3, m1,m3, m1,m3 ) ), b ), a ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_Sx2_4WAY(r) do \
|
||||
{ \
|
||||
GS2_4WAY( Mx(r, 0), Mx(r, 1), Mx(r, 2), Mx(r, 3), \
|
||||
CSx(r, 0), CSx(r, 1), CSx(r, 2), CSx(r, 3), V0, V2, V4, V6 ); \
|
||||
GS2_4WAY( Mx(r, 4), Mx(r, 5), Mx(r, 6), Mx(r, 7), \
|
||||
CSx(r, 4), CSx(r, 5), CSx(r, 6), CSx(r, 7), V1, V3, V5, V7 ); \
|
||||
mm256_ror1x128_512( V3, V2 ); \
|
||||
mm256_ror1x128_512( V6, V7 ); \
|
||||
GS2_4WAY( Mx(r, 8), Mx(r, 9), Mx(r, A), Mx(r, B), \
|
||||
CSx(r, 8), CSx(r, 9), CSx(r, A), CSx(r, B), V0, V2, V5, V7 ); \
|
||||
GS2_4WAY( Mx(r, C), Mx(r, D), Mx(r, C), Mx(r, D), \
|
||||
CSx(r, C), CSx(r, D), CSx(r, C), CSx(r, D), V1, V3, V4, V6 ); \
|
||||
mm256_rol1x128_512( V2, V3 ); \
|
||||
mm256_rol1x128_512( V7, V6 );
|
||||
|
||||
#define COMPRESS32x2_4WAY( rounds ) do \
|
||||
{ \
|
||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
unsigned r; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
V3 = H3; \
|
||||
V4 = _mm256_xor_si256( S0, _mm256_set_epi32( CS1, CS1, CS1, CS1, \
|
||||
CS0, CS0, CS0, CS0 ) ); \
|
||||
V5 = _mm256_xor_si256( S1, _mm256_set_epi32( CS3, CS3, CS3, CS3, \
|
||||
CS2, CS2, CS2, CS2 ) ); \
|
||||
V6 = _mm256_xor_si256( _mm256_set1_epi32( T0 ), \
|
||||
_mm256_set_epi32( CS5, CS5, CS5, CS5, \
|
||||
CS4, CS4, CS4, CS4 ) ); \
|
||||
V7 = _mm256_xor_si256( _mm256_set1_epi32( T1 ), \
|
||||
_mm256_set_epi32( CS7, CS7, CS7, CS7, \
|
||||
CS6, CS6, CS6, CS6 ) ); \
|
||||
M0 = mm256_bswap_32( buf[ 0] ); \
|
||||
M1 = mm256_bswap_32( buf[ 1] ); \
|
||||
M2 = mm256_bswap_32( buf[ 2] ); \
|
||||
M3 = mm256_bswap_32( buf[ 3] ); \
|
||||
M4 = mm256_bswap_32( buf[ 4] ); \
|
||||
M5 = mm256_bswap_32( buf[ 5] ); \
|
||||
M6 = mm256_bswap_32( buf[ 6] ); \
|
||||
M7 = mm256_bswap_32( buf[ 7] ); \
|
||||
ROUND_Sx2_4WAY(0); \
|
||||
ROUND_Sx2_4WAY(1); \
|
||||
ROUND_Sx2_4WAY(2); \
|
||||
ROUND_Sx2_4WAY(3); \
|
||||
ROUND_Sx2_4WAY(4); \
|
||||
ROUND_Sx2_4WAY(5); \
|
||||
ROUND_Sx2_4WAY(6); \
|
||||
ROUND_Sx2_4WAY(7); \
|
||||
if (rounds == 14) \
|
||||
{ \
|
||||
ROUND_Sx2_4WAY(8); \
|
||||
ROUND_Sx2_4WAY(9); \
|
||||
ROUND_Sx2_4WAY(0); \
|
||||
ROUND_Sx2_4WAY(1); \
|
||||
ROUND_Sx2_4WAY(2); \
|
||||
ROUND_Sx2_4WAY(3); \
|
||||
} \
|
||||
H0 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( V8, V0 ), S0 ), H0 ); \
|
||||
H1 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( V9, V1 ), S1 ), H1 ); \
|
||||
H2 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( VA, V2 ), S2 ), H2 ); \
|
||||
H3 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( VB, V3 ), S3 ), H3 ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
67
algo/blake/blake2b-4way.c
Normal file
67
algo/blake/blake2b-4way.c
Normal file
@@ -0,0 +1,67 @@
|
||||
/**
|
||||
* Blake2-B Implementation
|
||||
* tpruvot@github 2015-2016
|
||||
*/
|
||||
|
||||
#include "blake2b-gate.h"
|
||||
|
||||
#if defined(BLAKE2B_4WAY)
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "blake2b-hash-4way.h"
|
||||
|
||||
// Function not used, code inlined.
|
||||
void blake2b_4way_hash(void *output, const void *input)
|
||||
{
|
||||
blake2b_4way_ctx ctx;
|
||||
blake2b_4way_init( &ctx );
|
||||
blake2b_4way_update( &ctx, input, 80 );
|
||||
blake2b_4way_final( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_blake2b_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));;
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (32)));;
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
blake2b_4way_ctx ctx __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[25]); // 3*8+1
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
do {
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
blake2b_4way_init( &ctx );
|
||||
blake2b_4way_update( &ctx, vdata, 80 );
|
||||
blake2b_4way_final( &ctx, hash );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash7[ lane<<1 ] < Htarg )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
25
algo/blake/blake2b-gate.c
Normal file
25
algo/blake/blake2b-gate.c
Normal file
@@ -0,0 +1,25 @@
|
||||
#include "blake2b-gate.h"
|
||||
|
||||
/*
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blake2s_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
*/
|
||||
|
||||
bool register_blake2b_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(BLAKE2B_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_blake2b_4way;
|
||||
gate->hash = (void*)&blake2b_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_blake2b;
|
||||
gate->hash = (void*)&blake2b_hash;
|
||||
#endif
|
||||
// gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
gate->optimizations = AVX2_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
26
algo/blake/blake2b-gate.h
Normal file
26
algo/blake/blake2b-gate.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#ifndef __BLAKE2B_GATE_H__
|
||||
#define __BLAKE2B_GATE_H__ 1
|
||||
|
||||
#include <stdint.h>
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define BLAKE2B_4WAY
|
||||
#endif
|
||||
|
||||
bool register_blake2b_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(BLAKE2B_4WAY)
|
||||
|
||||
void blake2b_4way_hash( void *state, const void *input );
|
||||
int scanhash_blake2b_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#else
|
||||
|
||||
void blake2b_hash( void *state, const void *input );
|
||||
int scanhash_blake2b( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
215
algo/blake/blake2b-hash-4way.c
Normal file
215
algo/blake/blake2b-hash-4way.c
Normal file
@@ -0,0 +1,215 @@
|
||||
/*
|
||||
* Copyright 2009 Colin Percival, 2014 savale
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "blake2b-hash-4way.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// G Mixing function.
|
||||
|
||||
#define B2B_G(a, b, c, d, x, y) \
|
||||
{ \
|
||||
v[a] = _mm256_add_epi64( _mm256_add_epi64( v[a], v[b] ), x ); \
|
||||
v[d] = mm256_ror_64( _mm256_xor_si256( v[d], v[a] ), 32 ); \
|
||||
v[c] = _mm256_add_epi64( v[c], v[d] ); \
|
||||
v[b] = mm256_ror_64( _mm256_xor_si256( v[b], v[c] ), 24 ); \
|
||||
v[a] = _mm256_add_epi64( _mm256_add_epi64( v[a], v[b] ), y ); \
|
||||
v[d] = mm256_ror_64( _mm256_xor_si256( v[d], v[a] ), 16 ); \
|
||||
v[c] = _mm256_add_epi64( v[c], v[d] ); \
|
||||
v[b] = mm256_ror_64( _mm256_xor_si256( v[b], v[c] ), 63 ); \
|
||||
}
|
||||
|
||||
// Initialization Vector.
|
||||
/*
|
||||
static const uint64_t blake2b_iv[8] = {
|
||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
|
||||
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
|
||||
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
||||
};
|
||||
*/
|
||||
|
||||
static void blake2b_4way_compress( blake2b_4way_ctx *ctx, int last )
|
||||
{
|
||||
const uint8_t sigma[12][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
int i;
|
||||
__m256i v[16], m[16];
|
||||
|
||||
v[ 0] = ctx->h[0];
|
||||
v[ 1] = ctx->h[1];
|
||||
v[ 2] = ctx->h[2];
|
||||
v[ 3] = ctx->h[3];
|
||||
v[ 4] = ctx->h[4];
|
||||
v[ 5] = ctx->h[5];
|
||||
v[ 6] = ctx->h[6];
|
||||
v[ 7] = ctx->h[7];
|
||||
v[ 8] = m256_const1_64( 0x6A09E667F3BCC908 );
|
||||
v[ 9] = m256_const1_64( 0xBB67AE8584CAA73B );
|
||||
v[10] = m256_const1_64( 0x3C6EF372FE94F82B );
|
||||
v[11] = m256_const1_64( 0xA54FF53A5F1D36F1 );
|
||||
v[12] = m256_const1_64( 0x510E527FADE682D1 );
|
||||
v[13] = m256_const1_64( 0x9B05688C2B3E6C1F );
|
||||
v[14] = m256_const1_64( 0x1F83D9ABFB41BD6B );
|
||||
v[15] = m256_const1_64( 0x5BE0CD19137E2179 );
|
||||
|
||||
v[12] = _mm256_xor_si256( v[12], _mm256_set1_epi64x( ctx->t[0] ) );
|
||||
v[13] = _mm256_xor_si256( v[13], _mm256_set1_epi64x( ctx->t[1] ) );
|
||||
|
||||
if ( last )
|
||||
v[14] = mm256_not( v[14] );
|
||||
|
||||
m[ 0] = ctx->b[ 0];
|
||||
m[ 1] = ctx->b[ 1];
|
||||
m[ 2] = ctx->b[ 2];
|
||||
m[ 3] = ctx->b[ 3];
|
||||
m[ 4] = ctx->b[ 4];
|
||||
m[ 5] = ctx->b[ 5];
|
||||
m[ 6] = ctx->b[ 6];
|
||||
m[ 7] = ctx->b[ 7];
|
||||
m[ 8] = ctx->b[ 8];
|
||||
m[ 9] = ctx->b[ 9];
|
||||
m[10] = ctx->b[10];
|
||||
m[11] = ctx->b[11];
|
||||
m[12] = ctx->b[12];
|
||||
m[13] = ctx->b[13];
|
||||
m[14] = ctx->b[14];
|
||||
m[15] = ctx->b[15];
|
||||
|
||||
for ( i = 0; i < 12; i++ )
|
||||
{
|
||||
B2B_G( 0, 4, 8, 12, m[ sigma[i][ 0] ], m[ sigma[i][ 1] ] );
|
||||
B2B_G( 1, 5, 9, 13, m[ sigma[i][ 2] ], m[ sigma[i][ 3] ] );
|
||||
B2B_G( 2, 6, 10, 14, m[ sigma[i][ 4] ], m[ sigma[i][ 5] ] );
|
||||
B2B_G( 3, 7, 11, 15, m[ sigma[i][ 6] ], m[ sigma[i][ 7] ] );
|
||||
B2B_G( 0, 5, 10, 15, m[ sigma[i][ 8] ], m[ sigma[i][ 9] ] );
|
||||
B2B_G( 1, 6, 11, 12, m[ sigma[i][10] ], m[ sigma[i][11] ] );
|
||||
B2B_G( 2, 7, 8, 13, m[ sigma[i][12] ], m[ sigma[i][13] ] );
|
||||
B2B_G( 3, 4, 9, 14, m[ sigma[i][14] ], m[ sigma[i][15] ] );
|
||||
}
|
||||
|
||||
ctx->h[0] = _mm256_xor_si256( _mm256_xor_si256( ctx->h[0], v[0] ), v[ 8] );
|
||||
ctx->h[1] = _mm256_xor_si256( _mm256_xor_si256( ctx->h[1], v[1] ), v[ 9] );
|
||||
ctx->h[2] = _mm256_xor_si256( _mm256_xor_si256( ctx->h[2], v[2] ), v[10] );
|
||||
ctx->h[3] = _mm256_xor_si256( _mm256_xor_si256( ctx->h[3], v[3] ), v[11] );
|
||||
ctx->h[4] = _mm256_xor_si256( _mm256_xor_si256( ctx->h[4], v[4] ), v[12] );
|
||||
ctx->h[5] = _mm256_xor_si256( _mm256_xor_si256( ctx->h[5], v[5] ), v[13] );
|
||||
ctx->h[6] = _mm256_xor_si256( _mm256_xor_si256( ctx->h[6], v[6] ), v[14] );
|
||||
ctx->h[7] = _mm256_xor_si256( _mm256_xor_si256( ctx->h[7], v[7] ), v[15] );
|
||||
}
|
||||
|
||||
int blake2b_4way_init( blake2b_4way_ctx *ctx )
|
||||
{
|
||||
size_t i;
|
||||
|
||||
ctx->h[0] = m256_const1_64( 0x6A09E667F3BCC908 );
|
||||
ctx->h[1] = m256_const1_64( 0xBB67AE8584CAA73B );
|
||||
ctx->h[2] = m256_const1_64( 0x3C6EF372FE94F82B );
|
||||
ctx->h[3] = m256_const1_64( 0xA54FF53A5F1D36F1 );
|
||||
ctx->h[4] = m256_const1_64( 0x510E527FADE682D1 );
|
||||
ctx->h[5] = m256_const1_64( 0x9B05688C2B3E6C1F );
|
||||
ctx->h[6] = m256_const1_64( 0x1F83D9ABFB41BD6B );
|
||||
ctx->h[7] = m256_const1_64( 0x5BE0CD19137E2179 );
|
||||
|
||||
ctx->h[0] = _mm256_xor_si256( ctx->h[0], m256_const1_64( 0x01010020 ) );
|
||||
|
||||
ctx->t[0] = 0;
|
||||
ctx->t[1] = 0;
|
||||
ctx->c = 0;
|
||||
ctx->outlen = 32;
|
||||
|
||||
for ( i = 0; i < 16; i++ )
|
||||
ctx->b[i] = m256_zero;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blake2b_4way_update( blake2b_4way_ctx *ctx, const void *input,
|
||||
size_t inlen )
|
||||
{
|
||||
__m256i* in =(__m256i*)input;
|
||||
|
||||
size_t i, c;
|
||||
c = ctx->c >> 3;
|
||||
|
||||
for ( i = 0; i < (inlen >> 3); i++ )
|
||||
{
|
||||
if ( ctx->c == 128 )
|
||||
{
|
||||
ctx->t[0] += ctx->c;
|
||||
if ( ctx->t[0] < ctx->c )
|
||||
ctx->t[1]++;
|
||||
blake2b_4way_compress( ctx, 0 );
|
||||
ctx->c = 0;
|
||||
}
|
||||
ctx->b[ c++ ] = in[i];
|
||||
ctx->c += 8;
|
||||
}
|
||||
}
|
||||
|
||||
void blake2b_4way_final( blake2b_4way_ctx *ctx, void *out )
|
||||
{
|
||||
size_t c;
|
||||
c = ctx->c >> 3;
|
||||
|
||||
ctx->t[0] += ctx->c;
|
||||
if ( ctx->t[0] < ctx->c )
|
||||
ctx->t[1]++;
|
||||
|
||||
while ( ctx->c < 128 )
|
||||
{
|
||||
ctx->b[c++] = m256_zero;
|
||||
ctx->c += 8;
|
||||
}
|
||||
|
||||
blake2b_4way_compress( ctx, 1 ); // final block flag = 1
|
||||
|
||||
casti_m256i( out, 0 ) = ctx->h[0];
|
||||
casti_m256i( out, 1 ) = ctx->h[1];
|
||||
casti_m256i( out, 2 ) = ctx->h[2];
|
||||
casti_m256i( out, 3 ) = ctx->h[3];
|
||||
}
|
||||
|
||||
#endif
|
35
algo/blake/blake2b-hash-4way.h
Normal file
35
algo/blake/blake2b-hash-4way.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#pragma once
|
||||
#ifndef __BLAKE2B_HASH_4WAY_H__
|
||||
#define __BLAKE2B_HASH_4WAY_H__
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include "simd-utils.h"
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <inttypes.h>
|
||||
#define inline __inline
|
||||
#define ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGN(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
|
||||
// state context
|
||||
ALIGN(64) typedef struct {
|
||||
__m256i b[16]; // input buffer
|
||||
__m256i h[8]; // chained state
|
||||
uint64_t t[2]; // total number of bytes
|
||||
size_t c; // pointer for b[]
|
||||
size_t outlen; // digest size
|
||||
} blake2b_4way_ctx __attribute__((aligned(64)));
|
||||
|
||||
int blake2b_4way_init( blake2b_4way_ctx *ctx );
|
||||
void blake2b_4way_update( blake2b_4way_ctx *ctx, const void *input,
|
||||
size_t inlen );
|
||||
void blake2b_4way_final( blake2b_4way_ctx *ctx, void *out );
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -3,13 +3,11 @@
|
||||
* tpruvot@github 2015-2016
|
||||
*/
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include "blake2b-gate.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "algo/blake/sph_blake2b.h"
|
||||
|
||||
//static __thread sph_blake2b_ctx s_midstate;
|
||||
//static __thread sph_blake2b_ctx s_ctx;
|
||||
#define MIDLEN 76
|
||||
#define A 64
|
||||
|
||||
@@ -25,16 +23,6 @@ void blake2b_hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
/*
|
||||
static void blake2b_hash_end(uint32_t *output, const uint32_t *input)
|
||||
{
|
||||
s_ctx.outlen = MIDLEN;
|
||||
memcpy(&s_ctx, &s_midstate, 32 + 16 + MIDLEN);
|
||||
sph_blake2b_update(&s_ctx, (uint8_t*) &input[MIDLEN/4], 80 - MIDLEN);
|
||||
sph_blake2b_final(&s_ctx, (uint8_t*) output);
|
||||
}
|
||||
*/
|
||||
|
||||
int scanhash_blake2b( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
@@ -45,7 +33,7 @@ int scanhash_blake2b( struct work *work, uint32_t max_nonce,
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[8];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
@@ -53,179 +41,23 @@ int scanhash_blake2b( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
}
|
||||
|
||||
// midstate (untested yet)
|
||||
//blake2b_init(&s_midstate, 32, NULL, 0);
|
||||
//blake2b_update(&s_midstate, (uint8_t*) endiandata, MIDLEN);
|
||||
//memcpy(&s_ctx, &s_midstate, sizeof(blake2b_ctx));
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[8], n);
|
||||
be32enc(&endiandata[19], n);
|
||||
//blake2b_hash_end(vhashcpu, endiandata);
|
||||
blake2b_hash(vhashcpu, endiandata);
|
||||
|
||||
if (vhashcpu[7] < Htarg && fulltest(vhashcpu, ptarget)) {
|
||||
work_set_target_ratio(work, vhashcpu);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[8] = n;
|
||||
pdata[19] = n;
|
||||
return 1;
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[8] = n;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void swab256(void *dest_p, const void *src_p)
|
||||
{
|
||||
uint32_t *dest = (uint32_t *)dest_p;
|
||||
const uint32_t *src = (uint32_t *)src_p;
|
||||
|
||||
dest[0] = swab32(src[7]);
|
||||
dest[1] = swab32(src[6]);
|
||||
dest[2] = swab32(src[5]);
|
||||
dest[3] = swab32(src[4]);
|
||||
dest[4] = swab32(src[3]);
|
||||
dest[5] = swab32(src[2]);
|
||||
dest[6] = swab32(src[1]);
|
||||
dest[7] = swab32(src[0]);
|
||||
}
|
||||
|
||||
/* compute nbits to get the network diff */
|
||||
void blake2b_calc_network_diff(struct work *work)
|
||||
{
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
uint32_t nbits = work->data[11]; // unsure if correct
|
||||
uint32_t bits = (nbits & 0xffffff);
|
||||
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
|
||||
|
||||
double d = (double)0x0000ffff / (double)bits;
|
||||
for (int m=shift; m < 29; m++) d *= 256.0;
|
||||
for (int m=29; m < shift; m++) d /= 256.0;
|
||||
if (opt_debug_diff)
|
||||
applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
|
||||
net_diff = d;
|
||||
}
|
||||
|
||||
void blake2b_be_build_stratum_request( char *req, struct work *work )
|
||||
{
|
||||
unsigned char *xnonce2str;
|
||||
uint32_t ntime, nonce;
|
||||
char ntimestr[9], noncestr[9];
|
||||
be32enc( &ntime, work->data[ algo_gate.ntime_index ] );
|
||||
be32enc( &nonce, work->data[ algo_gate.nonce_index ] );
|
||||
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
|
||||
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
|
||||
uint16_t high_nonce = swab32(work->data[9]) >> 16;
|
||||
xnonce2str = abin2hex((unsigned char*)(&high_nonce), 2);
|
||||
snprintf( req, JSON_BUF_LEN,
|
||||
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free( xnonce2str );
|
||||
}
|
||||
|
||||
#define min(a,b) (a>b ? (b) :(a))
|
||||
|
||||
// merkle root handled here, no need for gen_merkle_root gate target
|
||||
void blake2b_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
uchar merkle_root[64] = { 0 };
|
||||
uint32_t extraheader[32] = { 0 };
|
||||
int headersize = 0;
|
||||
size_t t;
|
||||
int i;
|
||||
|
||||
// merkle root
|
||||
memcpy( merkle_root, sctx->job.coinbase, 32 );
|
||||
headersize = min( (int)sctx->job.coinbase_size - 32, sizeof(extraheader) );
|
||||
memcpy( extraheader, &sctx->job.coinbase[32], headersize );
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
// Assemble block header
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
// g_work->data[0] = le32dec( sctx->job.version );
|
||||
// for ( i = 0; i < 8; i++ )
|
||||
// g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[i] = ((uint32_t*)sctx->job.prevhash)[7-i];
|
||||
// for ( i = 0; i < 8; i++ )
|
||||
// g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
|
||||
g_work->data[8] = 0; // nonce
|
||||
g_work->data[9] = swab32( extraheader[0] ) | ( rand() & 0xf0 );
|
||||
g_work->data[10] = be32dec( sctx->job.ntime );
|
||||
g_work->data[11] = be32dec( sctx->job.nbits );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[12+i] = ( (uint32_t*)merkle_root )[i];
|
||||
}
|
||||
|
||||
#undef min
|
||||
|
||||
void blake2b_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t* end_nonce_ptr, bool clean_job )
|
||||
{
|
||||
const int wkcmp_sz = 32; // bytes
|
||||
const int wkcmp_off = 32 + 16;
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
|
||||
if ( memcmp( &work->data[ wkcmp_off ], &g_work->data[ wkcmp_off ], wkcmp_sz )
|
||||
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|
||||
|| strcmp( work->job_id, g_work->job_id ) ) )
|
||||
{
|
||||
work_free( work );
|
||||
work_copy( work, g_work );
|
||||
*nonceptr = ( 0xffffffffU / opt_n_threads ) * thr_id;
|
||||
if ( opt_randomize )
|
||||
*nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
|
||||
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
|
||||
}
|
||||
else
|
||||
++(*nonceptr);
|
||||
|
||||
// suprnova job_id check without data/target/height change...
|
||||
// we just may have copied new g_wwork to work so why this test here?
|
||||
// if ( have_stratum && strcmp( work->job_id, g_work->job_id ) )
|
||||
// exit thread loop
|
||||
// continue;
|
||||
// else
|
||||
// {
|
||||
// nonceptr[1] += 0x10;
|
||||
// nonceptr[1] |= thr_id;
|
||||
// }
|
||||
}
|
||||
|
||||
bool blake2b_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id )
|
||||
{
|
||||
if ( have_stratum && strcmp( stratum->job.job_id, work->job_id ) )
|
||||
// need to regen g_work..
|
||||
return false;
|
||||
// extradata: prevent duplicates
|
||||
work->data[ 8 ] += 0x10;
|
||||
work->data[ 8 + 1 ] |= thr_id;
|
||||
return true;
|
||||
}
|
||||
|
||||
double blake2b_get_max64() { return 0x1fffffLL; }
|
||||
|
||||
bool register_blake2b_algo( algo_gate_t* gate )
|
||||
{
|
||||
algo_not_tested();
|
||||
gate->ntime_index = 10;
|
||||
gate->nbits_index = 11;
|
||||
gate->nonce_index = 8;
|
||||
gate->work_cmp_size = 32;
|
||||
gate->scanhash = (void*)&scanhash_blake2b;
|
||||
gate->hash = (void*)&blake2b_hash;
|
||||
gate->calc_network_diff = (void*)&blake2b_calc_network_diff;
|
||||
gate->build_stratum_request = (void*)&blake2b_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
gate->build_extraheader = (void*)&blake2b_build_extraheader;
|
||||
gate->get_new_work = (void*)&blake2b_get_new_work;
|
||||
gate->get_max64 = (void*)&blake2b_get_max64;
|
||||
gate->ready_to_mine = (void*)&blake2b_ready_to_mine;
|
||||
have_gbt = false;
|
||||
return true;
|
||||
}
|
||||
|
@@ -20,7 +20,7 @@ bool register_blake2s_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&blake2s_hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -4,7 +4,8 @@
|
||||
#include <stdint.h>
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
//#if defined(__SSE4_2__)
|
||||
#if defined(__SSE2__)
|
||||
#define BLAKE2S_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
|
@@ -17,7 +17,9 @@
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
//#if defined(__SSE4_2__)
|
||||
#if defined(__SSE2__)
|
||||
|
||||
|
||||
static const uint32_t blake2s_IV[8] =
|
||||
{
|
||||
@@ -57,8 +59,18 @@ int blake2s_4way_init( blake2s_4way_state *S, const uint8_t outlen )
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
|
||||
memset( S, 0, sizeof( blake2s_4way_state ) );
|
||||
for( int i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm_set1_epi32( blake2s_IV[i] );
|
||||
|
||||
S->h[0] = m128_const1_64( 0x6A09E6676A09E667ULL );
|
||||
S->h[1] = m128_const1_64( 0xBB67AE85BB67AE85ULL );
|
||||
S->h[2] = m128_const1_64( 0x3C6EF3723C6EF372ULL );
|
||||
S->h[3] = m128_const1_64( 0xA54FF53AA54FF53AULL );
|
||||
S->h[4] = m128_const1_64( 0x510E527F510E527FULL );
|
||||
S->h[5] = m128_const1_64( 0x9B05688C9B05688CULL );
|
||||
S->h[6] = m128_const1_64( 0x1F83D9AB1F83D9ABULL );
|
||||
S->h[7] = m128_const1_64( 0x5BE0CD195BE0CD19ULL );
|
||||
|
||||
// for( int i = 0; i < 8; ++i )
|
||||
// S->h[i] = _mm_set1_epi32( blake2s_IV[i] );
|
||||
|
||||
uint32_t *p = ( uint32_t * )( P );
|
||||
|
||||
@@ -267,8 +279,18 @@ int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen )
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
|
||||
memset( S, 0, sizeof( blake2s_8way_state ) );
|
||||
for( int i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm256_set1_epi32( blake2s_IV[i] );
|
||||
S->h[0] = m256_const1_64( 0x6A09E6676A09E667ULL );
|
||||
S->h[1] = m256_const1_64( 0xBB67AE85BB67AE85ULL );
|
||||
S->h[2] = m256_const1_64( 0x3C6EF3723C6EF372ULL );
|
||||
S->h[3] = m256_const1_64( 0xA54FF53AA54FF53AULL );
|
||||
S->h[4] = m256_const1_64( 0x510E527F510E527FULL );
|
||||
S->h[5] = m256_const1_64( 0x9B05688C9B05688CULL );
|
||||
S->h[6] = m256_const1_64( 0x1F83D9AB1F83D9ABULL );
|
||||
S->h[7] = m256_const1_64( 0x5BE0CD195BE0CD19ULL );
|
||||
|
||||
|
||||
// for( int i = 0; i < 8; ++i )
|
||||
// S->h[i] = _mm256_set1_epi32( blake2s_IV[i] );
|
||||
|
||||
uint32_t *p = ( uint32_t * )( P );
|
||||
|
||||
|
@@ -14,7 +14,8 @@
|
||||
#ifndef __BLAKE2S_HASH_4WAY_H__
|
||||
#define __BLAKE2S_HASH_4WAY_H__ 1
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
//#if defined(__SSE4_2__)
|
||||
#if defined(__SSE2__)
|
||||
|
||||
#include "simd-utils.h"
|
||||
|
||||
|
@@ -307,12 +307,12 @@ static const sph_u64 CB[16] = {
|
||||
|
||||
#define GB_4WAY(m0, m1, c0, c1, a, b, c, d) do { \
|
||||
a = _mm256_add_epi64( _mm256_add_epi64( _mm256_xor_si256( \
|
||||
_mm256_set_epi64x( c1, c1, c1, c1 ), m0 ), b ), a ); \
|
||||
_mm256_set1_epi64x( c1 ), m0 ), b ), a ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a ), 32 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 25 ); \
|
||||
a = _mm256_add_epi64( _mm256_add_epi64( _mm256_xor_si256( \
|
||||
_mm256_set_epi64x( c0, c0, c0, c0 ), m1 ), b ), a ); \
|
||||
_mm256_set1_epi64x( c0 ), m1 ), b ), a ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 11 ); \
|
||||
@@ -479,20 +479,20 @@ static const sph_u64 CB[16] = {
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm256_xor_si256( S0, _mm256_set1_epi64x( CB0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, _mm256_set1_epi64x( CB1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, _mm256_set1_epi64x( CB2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, _mm256_set1_epi64x( CB3 ) ); \
|
||||
V8 = _mm256_xor_si256( S0, m256_const1_64( CB0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, m256_const1_64( CB1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, m256_const1_64( CB2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, m256_const1_64( CB3 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set1_epi64x( T0 ), \
|
||||
_mm256_set1_epi64x( CB4 ) ); \
|
||||
m256_const1_64( CB4 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set1_epi64x( T0 ), \
|
||||
_mm256_set1_epi64x( CB5 ) ); \
|
||||
m256_const1_64( CB5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
|
||||
_mm256_set1_epi64x( CB6 ) ); \
|
||||
m256_const1_64( CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
|
||||
_mm256_set1_epi64x( CB7 ) ); \
|
||||
shuf_bswap64 = _mm256_set_epi64x( 0x08090a0b0c0d0e0f, 0x0001020304050607, \
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ); \
|
||||
m256_const1_64( CB7 ) ); \
|
||||
shuf_bswap64 = m256_const_64( 0x08090a0b0c0d0e0f, 0x0001020304050607, \
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ); \
|
||||
M0 = _mm256_shuffle_epi8( *(buf+ 0), shuf_bswap64 ); \
|
||||
M1 = _mm256_shuffle_epi8( *(buf+ 1), shuf_bswap64 ); \
|
||||
M2 = _mm256_shuffle_epi8( *(buf+ 2), shuf_bswap64 ); \
|
||||
@@ -544,14 +544,14 @@ blake64_4way_init( blake_4way_big_context *sc, const sph_u64 *iv,
|
||||
const sph_u64 *salt )
|
||||
{
|
||||
__m256i zero = m256_zero;
|
||||
casti_m256i( sc->H, 0 ) = _mm256_set1_epi64x( iv[0] );
|
||||
casti_m256i( sc->H, 1 ) = _mm256_set1_epi64x( iv[1] );
|
||||
casti_m256i( sc->H, 2 ) = _mm256_set1_epi64x( iv[2] );
|
||||
casti_m256i( sc->H, 3 ) = _mm256_set1_epi64x( iv[3] );
|
||||
casti_m256i( sc->H, 4 ) = _mm256_set1_epi64x( iv[4] );
|
||||
casti_m256i( sc->H, 5 ) = _mm256_set1_epi64x( iv[5] );
|
||||
casti_m256i( sc->H, 6 ) = _mm256_set1_epi64x( iv[6] );
|
||||
casti_m256i( sc->H, 7 ) = _mm256_set1_epi64x( iv[7] );
|
||||
casti_m256i( sc->H, 0 ) = m256_const1_64( 0x6A09E667F3BCC908 );
|
||||
casti_m256i( sc->H, 1 ) = m256_const1_64( 0xBB67AE8584CAA73B );
|
||||
casti_m256i( sc->H, 2 ) = m256_const1_64( 0x3C6EF372FE94F82B );
|
||||
casti_m256i( sc->H, 3 ) = m256_const1_64( 0xA54FF53A5F1D36F1 );
|
||||
casti_m256i( sc->H, 4 ) = m256_const1_64( 0x510E527FADE682D1 );
|
||||
casti_m256i( sc->H, 5 ) = m256_const1_64( 0x9B05688C2B3E6C1F );
|
||||
casti_m256i( sc->H, 6 ) = m256_const1_64( 0x1F83D9ABFB41BD6B );
|
||||
casti_m256i( sc->H, 7 ) = m256_const1_64( 0x5BE0CD19137E2179 );
|
||||
|
||||
casti_m256i( sc->S, 0 ) = zero;
|
||||
casti_m256i( sc->S, 1 ) = zero;
|
||||
@@ -642,11 +642,9 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
memset_zero_256( buf + (ptr>>3) + 1, (104-ptr) >> 3 );
|
||||
if ( out_size_w64 == 8 )
|
||||
buf[(104>>3)] = _mm256_or_si256( buf[(104>>3)],
|
||||
_mm256_set1_epi64x( 0x0100000000000000ULL ) );
|
||||
*(buf+(112>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( th, th, th, th ) );
|
||||
*(buf+(120>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( tl, tl, tl, tl ) );
|
||||
m256_const1_64( 0x0100000000000000ULL ) );
|
||||
*(buf+(112>>3)) = _mm256_set1_epi64x( bswap_64( th ) );
|
||||
*(buf+(120>>3)) = _mm256_set1_epi64x( bswap_64( tl ) );
|
||||
|
||||
blake64_4way( sc, buf + (ptr>>3), 128 - ptr );
|
||||
}
|
||||
@@ -659,11 +657,9 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
|
||||
memset_zero_256( buf, 112>>3 );
|
||||
if ( out_size_w64 == 8 )
|
||||
buf[104>>3] = _mm256_set1_epi64x( 0x0100000000000000ULL );
|
||||
*(buf+(112>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( th, th, th, th ) );
|
||||
*(buf+(120>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( tl, tl, tl, tl ) );
|
||||
buf[104>>3] = m256_const1_64( 0x0100000000000000ULL );
|
||||
*(buf+(112>>3)) = _mm256_set1_epi64x( bswap_64( th ) );
|
||||
*(buf+(120>>3)) = _mm256_set1_epi64x( bswap_64( tl ) );
|
||||
|
||||
blake64_4way( sc, buf, 128 );
|
||||
}
|
||||
|
@@ -116,7 +116,7 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
// block header suffix from coinb2 (stake version)
|
||||
memcpy( &g_work->data[44],
|
||||
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
|
||||
sctx->bloc_height = g_work->data[32];
|
||||
sctx->block_height = g_work->data[32];
|
||||
//applog_hex(work->data, 180);
|
||||
//applog_hex(&work->data[36], 36);
|
||||
}
|
||||
|
@@ -103,7 +103,6 @@ static void blake2b_compress( sph_blake2b_ctx *ctx, int last )
|
||||
v[13] ^= ctx->t[1]; // high 64 bits
|
||||
if (last) // last block flag set ?
|
||||
v[14] = ~v[14];
|
||||
|
||||
for (i = 0; i < 16; i++) // get little-endian words
|
||||
m[i] = B2B_GET64(&ctx->b[8 * i]);
|
||||
|
||||
@@ -184,7 +183,8 @@ void sph_blake2b_final( sph_blake2b_ctx *ctx, void *out )
|
||||
|
||||
while (ctx->c < 128) // fill up with zeros
|
||||
ctx->b[ctx->c++] = 0;
|
||||
blake2b_compress(ctx, 1); // final block flag = 1
|
||||
|
||||
blake2b_compress(ctx, 1); // final block flag = 1
|
||||
|
||||
// little endian convert and store
|
||||
for (i = 0; i < ctx->outlen; i++) {
|
||||
|
@@ -62,7 +62,7 @@ typedef struct {
|
||||
|
||||
typedef bmw_4way_small_context bmw256_4way_context;
|
||||
|
||||
void bmw256_4way_init(void *cc);
|
||||
void bmw256_4way_init( bmw256_4way_context *ctx );
|
||||
|
||||
void bmw256_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
|
@@ -48,7 +48,7 @@ extern "C"{
|
||||
#if defined(__SSE2__)
|
||||
|
||||
// BMW-256 4 way 32
|
||||
|
||||
/*
|
||||
static const uint32_t IV256[] = {
|
||||
0x40414243, 0x44454647,
|
||||
0x48494A4B, 0x4C4D4E4F,
|
||||
@@ -59,6 +59,7 @@ static const uint32_t IV256[] = {
|
||||
0x70717273, 0x74757677,
|
||||
0x78797A7B, 0x7C7D7E7F
|
||||
};
|
||||
*/
|
||||
|
||||
#define ss0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 1), \
|
||||
@@ -462,13 +463,30 @@ static const __m128i final_s[16] =
|
||||
{ 0xaaaaaaafaaaaaaaf, 0xaaaaaaafaaaaaaaf }
|
||||
};
|
||||
*/
|
||||
static void
|
||||
bmw32_4way_init(bmw_4way_small_context *sc, const sph_u32 *iv)
|
||||
void bmw256_4way_init( bmw256_4way_context *ctx )
|
||||
{
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
sc->H[i] = _mm_set1_epi32( iv[i] );
|
||||
sc->ptr = 0;
|
||||
sc->bit_count = 0;
|
||||
ctx->H[ 0] = m128_const1_64( 0x4041424340414243 );
|
||||
ctx->H[ 1] = m128_const1_64( 0x4445464744454647 );
|
||||
ctx->H[ 2] = m128_const1_64( 0x48494A4B48494A4B );
|
||||
ctx->H[ 3] = m128_const1_64( 0x4C4D4E4F4C4D4E4F );
|
||||
ctx->H[ 4] = m128_const1_64( 0x5051525350515253 );
|
||||
ctx->H[ 5] = m128_const1_64( 0x5455565754555657 );
|
||||
ctx->H[ 6] = m128_const1_64( 0x58595A5B58595A5B );
|
||||
ctx->H[ 7] = m128_const1_64( 0x5C5D5E5F5C5D5E5F );
|
||||
ctx->H[ 8] = m128_const1_64( 0x6061626360616263 );
|
||||
ctx->H[ 9] = m128_const1_64( 0x6465666764656667 );
|
||||
ctx->H[10] = m128_const1_64( 0x68696A6B68696A6B );
|
||||
ctx->H[11] = m128_const1_64( 0x6C6D6E6F6C6D6E6F );
|
||||
ctx->H[12] = m128_const1_64( 0x7071727370717273 );
|
||||
ctx->H[13] = m128_const1_64( 0x7475767774757677 );
|
||||
ctx->H[14] = m128_const1_64( 0x78797A7B78797A7B );
|
||||
ctx->H[15] = m128_const1_64( 0x7C7D7E7F7C7D7E7F );
|
||||
|
||||
|
||||
// for ( int i = 0; i < 16; i++ )
|
||||
// sc->H[i] = _mm_set1_epi32( iv[i] );
|
||||
ctx->ptr = 0;
|
||||
ctx->bit_count = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -525,7 +543,7 @@ bmw32_4way_close(bmw_4way_small_context *sc, unsigned ub, unsigned n,
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
buf[ ptr>>2 ] = _mm_set1_epi32( 0x80 );
|
||||
buf[ ptr>>2 ] = m128_const1_64( 0x0000008000000080 );
|
||||
ptr += 4;
|
||||
h = sc->H;
|
||||
|
||||
@@ -551,11 +569,13 @@ bmw32_4way_close(bmw_4way_small_context *sc, unsigned ub, unsigned n,
|
||||
casti_m128i( dst, u ) = h1[v];
|
||||
}
|
||||
|
||||
/*
|
||||
void
|
||||
bmw256_4way_init(void *cc)
|
||||
{
|
||||
bmw32_4way_init(cc, IV256);
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
bmw256_4way(void *cc, const void *data, size_t len)
|
||||
@@ -1003,25 +1023,24 @@ static const __m256i final_s8[16] =
|
||||
|
||||
void bmw256_8way_init( bmw256_8way_context *ctx )
|
||||
{
|
||||
ctx->H[ 0] = _mm256_set1_epi32( IV256[ 0] );
|
||||
ctx->H[ 1] = _mm256_set1_epi32( IV256[ 1] );
|
||||
ctx->H[ 2] = _mm256_set1_epi32( IV256[ 2] );
|
||||
ctx->H[ 3] = _mm256_set1_epi32( IV256[ 3] );
|
||||
ctx->H[ 4] = _mm256_set1_epi32( IV256[ 4] );
|
||||
ctx->H[ 5] = _mm256_set1_epi32( IV256[ 5] );
|
||||
ctx->H[ 6] = _mm256_set1_epi32( IV256[ 6] );
|
||||
ctx->H[ 7] = _mm256_set1_epi32( IV256[ 7] );
|
||||
ctx->H[ 8] = _mm256_set1_epi32( IV256[ 8] );
|
||||
ctx->H[ 9] = _mm256_set1_epi32( IV256[ 9] );
|
||||
ctx->H[10] = _mm256_set1_epi32( IV256[10] );
|
||||
ctx->H[11] = _mm256_set1_epi32( IV256[11] );
|
||||
ctx->H[12] = _mm256_set1_epi32( IV256[12] );
|
||||
ctx->H[13] = _mm256_set1_epi32( IV256[13] );
|
||||
ctx->H[14] = _mm256_set1_epi32( IV256[14] );
|
||||
ctx->H[15] = _mm256_set1_epi32( IV256[15] );
|
||||
ctx->H[ 0] = m256_const1_64( 0x4041424340414243 );
|
||||
ctx->H[ 1] = m256_const1_64( 0x4445464744454647 );
|
||||
ctx->H[ 2] = m256_const1_64( 0x48494A4B48494A4B );
|
||||
ctx->H[ 3] = m256_const1_64( 0x4C4D4E4F4C4D4E4F );
|
||||
ctx->H[ 4] = m256_const1_64( 0x5051525350515253 );
|
||||
ctx->H[ 5] = m256_const1_64( 0x5455565754555657 );
|
||||
ctx->H[ 6] = m256_const1_64( 0x58595A5B58595A5B );
|
||||
ctx->H[ 7] = m256_const1_64( 0x5C5D5E5F5C5D5E5F );
|
||||
ctx->H[ 8] = m256_const1_64( 0x6061626360616263 );
|
||||
ctx->H[ 9] = m256_const1_64( 0x6465666764656667 );
|
||||
ctx->H[10] = m256_const1_64( 0x68696A6B68696A6B );
|
||||
ctx->H[11] = m256_const1_64( 0x6C6D6E6F6C6D6E6F );
|
||||
ctx->H[12] = m256_const1_64( 0x7071727370717273 );
|
||||
ctx->H[13] = m256_const1_64( 0x7475767774757677 );
|
||||
ctx->H[14] = m256_const1_64( 0x78797A7B78797A7B );
|
||||
ctx->H[15] = m256_const1_64( 0x7C7D7E7F7C7D7E7F );
|
||||
ctx->ptr = 0;
|
||||
ctx->bit_count = 0;
|
||||
|
||||
}
|
||||
|
||||
void bmw256_8way( bmw256_8way_context *ctx, const void *data, size_t len )
|
||||
@@ -1074,7 +1093,7 @@ void bmw256_8way_close( bmw256_8way_context *ctx, void *dst )
|
||||
|
||||
buf = ctx->buf;
|
||||
ptr = ctx->ptr;
|
||||
buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
|
||||
buf[ ptr>>2 ] = m256_const1_64( 0x0000008000000080 );
|
||||
ptr += 4;
|
||||
h = ctx->H;
|
||||
|
||||
@@ -1089,7 +1108,6 @@ void bmw256_8way_close( bmw256_8way_context *ctx, void *dst )
|
||||
buf[ (buf_size - 8) >> 2 ] = _mm256_set1_epi32( ctx->bit_count );
|
||||
buf[ (buf_size - 4) >> 2 ] = m256_zero;
|
||||
|
||||
|
||||
compress_small_8way( buf, h, h2 );
|
||||
|
||||
for ( u = 0; u < 16; u ++ )
|
||||
|
59
algo/bmw/bmw512-4way.c
Normal file
59
algo/bmw/bmw512-4way.c
Normal file
@@ -0,0 +1,59 @@
|
||||
#include "bmw512-gate.h"
|
||||
|
||||
#ifdef BMW512_4WAY
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
//#include "sph_keccak.h"
|
||||
#include "bmw-hash-4way.h"
|
||||
|
||||
void bmw512hash_4way(void *state, const void *input)
|
||||
{
|
||||
bmw512_4way_context ctx;
|
||||
bmw512_4way_init( &ctx );
|
||||
bmw512_4way( &ctx, input, 80 );
|
||||
bmw512_4way_close( &ctx, state );
|
||||
}
|
||||
|
||||
int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[25]); // 3*8+1
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
// const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
do {
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
bmw512hash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( ( ( hash7[ lane<<1 ] & 0xFFFFFF00 ) == 0 ) )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
20
algo/bmw/bmw512-gate.c
Normal file
20
algo/bmw/bmw512-gate.c
Normal file
@@ -0,0 +1,20 @@
|
||||
#include "bmw512-gate.h"
|
||||
|
||||
int64_t bmw512_get_max64() { return 0x7ffffLL; }
|
||||
|
||||
bool register_bmw512_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_max64 = (void*)&bmw512_get_max64;
|
||||
opt_target_factor = 256.0;
|
||||
#if defined (BMW512_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_bmw512_4way;
|
||||
gate->hash = (void*)&bmw512hash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_bmw512;
|
||||
gate->hash = (void*)&bmw512hash;
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
|
||||
|
23
algo/bmw/bmw512-gate.h
Normal file
23
algo/bmw/bmw512-gate.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#ifndef BMW512_GATE_H__
|
||||
#define BMW512_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define BMW512_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(BMW512_4WAY)
|
||||
|
||||
void bmw512hash_4way( void *state, const void *input );
|
||||
int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
void bmw512hash( void *state, const void *input );
|
||||
int scanhash_bmw512( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
@@ -961,8 +961,22 @@ static const __m256i final_b[16] =
|
||||
static void
|
||||
bmw64_4way_init( bmw_4way_big_context *sc, const sph_u64 *iv )
|
||||
{
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
sc->H[i] = _mm256_set1_epi64x( iv[i] );
|
||||
sc->H[ 0] = m256_const1_64( 0x8081828384858687 );
|
||||
sc->H[ 1] = m256_const1_64( 0x88898A8B8C8D8E8F );
|
||||
sc->H[ 2] = m256_const1_64( 0x9091929394959697 );
|
||||
sc->H[ 3] = m256_const1_64( 0x98999A9B9C9D9E9F );
|
||||
sc->H[ 4] = m256_const1_64( 0xA0A1A2A3A4A5A6A7 );
|
||||
sc->H[ 5] = m256_const1_64( 0xA8A9AAABACADAEAF );
|
||||
sc->H[ 6] = m256_const1_64( 0xB0B1B2B3B4B5B6B7 );
|
||||
sc->H[ 7] = m256_const1_64( 0xB8B9BABBBCBDBEBF );
|
||||
sc->H[ 8] = m256_const1_64( 0xC0C1C2C3C4C5C6C7 );
|
||||
sc->H[ 9] = m256_const1_64( 0xC8C9CACBCCCDCECF );
|
||||
sc->H[10] = m256_const1_64( 0xD0D1D2D3D4D5D6D7 );
|
||||
sc->H[11] = m256_const1_64( 0xD8D9DADBDCDDDEDF );
|
||||
sc->H[12] = m256_const1_64( 0xE0E1E2E3E4E5E6E7 );
|
||||
sc->H[13] = m256_const1_64( 0xE8E9EAEBECEDEEEF );
|
||||
sc->H[14] = m256_const1_64( 0xF0F1F2F3F4F5F6F7 );
|
||||
sc->H[15] = m256_const1_64( 0xF8F9FAFBFCFDFEFF );
|
||||
sc->ptr = 0;
|
||||
sc->bit_count = 0;
|
||||
}
|
||||
@@ -1014,13 +1028,11 @@ bmw64_4way_close(bmw_4way_big_context *sc, unsigned ub, unsigned n,
|
||||
__m256i *buf;
|
||||
__m256i h1[16], h2[16], *h;
|
||||
size_t ptr, u, v;
|
||||
unsigned z;
|
||||
const int buf_size = 128; // bytes of one lane, compatible with len
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ ptr>>3 ] = _mm256_set1_epi64x( z );
|
||||
buf[ ptr>>3 ] = m256_const1_64( 0x80 );
|
||||
ptr += 8;
|
||||
h = sc->H;
|
||||
|
||||
|
53
algo/bmw/bmw512.c
Normal file
53
algo/bmw/bmw512.c
Normal file
@@ -0,0 +1,53 @@
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "sph_bmw.h"
|
||||
|
||||
void bmw512hash(void *state, const void *input)
|
||||
{
|
||||
sph_bmw512_context ctx;
|
||||
uint32_t hash[32];
|
||||
|
||||
sph_bmw512_init( &ctx );
|
||||
sph_bmw512( &ctx,input, 80 );
|
||||
sph_bmw512_close( &ctx, hash );
|
||||
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_bmw512( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t endiandata[32];
|
||||
|
||||
for (int i=0; i < 19; i++)
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
|
||||
do {
|
||||
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
bmw512hash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFFF00)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -7,6 +7,7 @@
|
||||
|
||||
// 2x128
|
||||
|
||||
/*
|
||||
// The result of hashing 10 rounds of initial data which consists of params
|
||||
// zero padded.
|
||||
static const uint64_t IV256[] =
|
||||
@@ -24,13 +25,14 @@ static const uint64_t IV512[] =
|
||||
0x148FE485FCD398D9, 0xB64445321B017BEF, 0x2FF5781C6A536159, 0x0DBADEA991FA7934,
|
||||
0xA5A70E75D65C8A2B, 0xBC796576B1C62456, 0xE7989AF11921C8F7, 0xD43E3B447795D246
|
||||
};
|
||||
*/
|
||||
|
||||
static void transform_2way( cube_2way_context *sp )
|
||||
{
|
||||
int r;
|
||||
const int rounds = sp->rounds;
|
||||
|
||||
__m256i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3;
|
||||
__m256i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1;
|
||||
|
||||
x0 = _mm256_load_si256( (__m256i*)sp->h );
|
||||
x1 = _mm256_load_si256( (__m256i*)sp->h + 1 );
|
||||
@@ -47,18 +49,12 @@ static void transform_2way( cube_2way_context *sp )
|
||||
x5 = _mm256_add_epi32( x1, x5 );
|
||||
x6 = _mm256_add_epi32( x2, x6 );
|
||||
x7 = _mm256_add_epi32( x3, x7 );
|
||||
y0 = x2;
|
||||
y1 = x3;
|
||||
y2 = x0;
|
||||
y3 = x1;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
_mm256_srli_epi32( y0, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 7 ),
|
||||
_mm256_srli_epi32( y1, 25 ) );
|
||||
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 7 ),
|
||||
_mm256_srli_epi32( y2, 25 ) );
|
||||
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 7 ),
|
||||
_mm256_srli_epi32( y3, 25 ) );
|
||||
y0 = x0;
|
||||
y1 = x1;
|
||||
x0 = mm256_rol_32( x2, 7 );
|
||||
x1 = mm256_rol_32( x3, 7 );
|
||||
x2 = mm256_rol_32( y0, 7 );
|
||||
x3 = mm256_rol_32( y1, 7 );
|
||||
x0 = _mm256_xor_si256( x0, x4 );
|
||||
x1 = _mm256_xor_si256( x1, x5 );
|
||||
x2 = _mm256_xor_si256( x2, x6 );
|
||||
@@ -71,18 +67,12 @@ static void transform_2way( cube_2way_context *sp )
|
||||
x5 = _mm256_add_epi32( x1, x5 );
|
||||
x6 = _mm256_add_epi32( x2, x6 );
|
||||
x7 = _mm256_add_epi32( x3, x7 );
|
||||
y0 = x1;
|
||||
y1 = x0;
|
||||
y2 = x3;
|
||||
y3 = x2;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
|
||||
_mm256_srli_epi32( y0, 21 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
|
||||
_mm256_srli_epi32( y1, 21 ) );
|
||||
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 11 ),
|
||||
_mm256_srli_epi32( y2, 21 ) );
|
||||
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 11 ),
|
||||
_mm256_srli_epi32( y3, 21 ) );
|
||||
y0 = x0;
|
||||
y1 = x2;
|
||||
x0 = mm256_rol_32( x1, 11 );
|
||||
x1 = mm256_rol_32( y0, 11 );
|
||||
x2 = mm256_rol_32( x3, 11 );
|
||||
x3 = mm256_rol_32( y1, 11 );
|
||||
x0 = _mm256_xor_si256( x0, x4 );
|
||||
x1 = _mm256_xor_si256( x1, x5 );
|
||||
x2 = _mm256_xor_si256( x2, x6 );
|
||||
@@ -107,23 +97,40 @@ static void transform_2way( cube_2way_context *sp )
|
||||
int cube_2way_init( cube_2way_context *sp, int hashbitlen, int rounds,
|
||||
int blockbytes )
|
||||
{
|
||||
const uint64_t* iv = hashbitlen == 512 ? IV512 : IV256;
|
||||
__m128i* h = (__m128i*)sp->h;
|
||||
sp->hashlen = hashbitlen/128;
|
||||
sp->blocksize = blockbytes/16;
|
||||
sp->rounds = rounds;
|
||||
sp->pos = 0;
|
||||
|
||||
__m256i* h = (__m256i*)sp->h;
|
||||
|
||||
h[0] = _mm256_set_epi64x( iv[ 1], iv[ 0], iv[ 1], iv[ 0] );
|
||||
h[1] = _mm256_set_epi64x( iv[ 3], iv[ 2], iv[ 3], iv[ 2] );
|
||||
h[2] = _mm256_set_epi64x( iv[ 5], iv[ 4], iv[ 5], iv[ 4] );
|
||||
h[3] = _mm256_set_epi64x( iv[ 7], iv[ 6], iv[ 7], iv[ 6] );
|
||||
h[4] = _mm256_set_epi64x( iv[ 9], iv[ 8], iv[ 9], iv[ 8] );
|
||||
h[5] = _mm256_set_epi64x( iv[11], iv[10], iv[11], iv[10] );
|
||||
h[6] = _mm256_set_epi64x( iv[13], iv[12], iv[13], iv[12] );
|
||||
h[7] = _mm256_set_epi64x( iv[15], iv[14], iv[15], iv[14] );
|
||||
if ( hashbitlen == 512 )
|
||||
{
|
||||
|
||||
h[ 0] = m128_const_64( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 );
|
||||
h[ 2] = m128_const_64( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 );
|
||||
h[ 4] = m128_const_64( 0x825B453797CF0BEF, 0xA647A8B34D42C787 );
|
||||
h[ 6] = m128_const_64( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 );
|
||||
h[ 8] = m128_const_64( 0xB64445321B017BEF, 0x148FE485FCD398D9 );
|
||||
h[10] = m128_const_64( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 );
|
||||
h[12] = m128_const_64( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B );
|
||||
h[14] = m128_const_64( 0xD43E3B447795D246, 0xE7989AF11921C8F7 );
|
||||
h[1] = h[ 0]; h[ 3] = h[ 2]; h[ 5] = h[ 4]; h[ 7] = h[ 6];
|
||||
h[9] = h[ 8]; h[11] = h[10]; h[13] = h[12]; h[15] = h[14];
|
||||
}
|
||||
else
|
||||
{
|
||||
h[ 0] = m128_const_64( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 );
|
||||
h[ 2] = m128_const_64( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B );
|
||||
h[ 4] = m128_const_64( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 );
|
||||
h[ 6] = m128_const_64( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 );
|
||||
h[ 8] = m128_const_64( 0xF0B266796C859D41, 0x6107FBD5D89041C3 );
|
||||
h[10] = m128_const_64( 0x93CB628565C892FD, 0x5FA2560309392549 );
|
||||
h[12] = m128_const_64( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE );
|
||||
h[14] = m128_const_64( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB );
|
||||
h[1] = h[ 0]; h[ 3] = h[ 2]; h[ 5] = h[ 4]; h[ 7] = h[ 6];
|
||||
h[9] = h[ 8]; h[11] = h[10]; h[13] = h[12]; h[15] = h[14];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -165,7 +172,7 @@ int cube_2way_close( cube_2way_context *sp, void *output )
|
||||
|
||||
for ( i = 0; i < 10; ++i ) transform_2way( sp );
|
||||
|
||||
for ( i = 0; i < sp->hashlen; i++ ) hash[i] = sp->h[i];
|
||||
memcpy( hash, sp->h, sp->hashlen<<5 );
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -198,7 +205,7 @@ int cube_2way_update_close( cube_2way_context *sp, void *output,
|
||||
|
||||
for ( i = 0; i < 10; ++i ) transform_2way( sp );
|
||||
|
||||
for ( i = 0; i < sp->hashlen; i++ ) hash[i] = sp->h[i];
|
||||
memcpy( hash, sp->h, sp->hashlen<<5 );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -16,24 +16,6 @@
|
||||
#include "simd-utils.h"
|
||||
#include <stdio.h>
|
||||
|
||||
// The result of hashing 10 rounds of initial data which is params and
|
||||
// mostly zeros.
|
||||
static const uint64_t IV256[] =
|
||||
{
|
||||
0xCCD6F29FEA2BD4B4, 0x35481EAE63117E71, 0xE5D94E6322512D5B, 0xF4CC12BE7E624131,
|
||||
0x42AF2070C2D0B696, 0x3361DA8CD0720C35, 0x8EF8AD8328CCECA4, 0x40E5FBAB4680AC00,
|
||||
0x6107FBD5D89041C3, 0xF0B266796C859D41, 0x5FA2560309392549, 0x93CB628565C892FD,
|
||||
0x9E4B4E602AF2B5AE, 0x85254725774ABFDD, 0x4AB6AAD615815AEB, 0xD6032C0A9CDAF8AF
|
||||
};
|
||||
|
||||
static const uint64_t IV512[] =
|
||||
{
|
||||
0x50F494D42AEA2A61, 0x4167D83E2D538B8B, 0xC701CF8C3FEE2313, 0x50AC5695CC39968E,
|
||||
0xA647A8B34D42C787, 0x825B453797CF0BEF, 0xF22090C4EEF864D2, 0xA23911AED0E5CD33,
|
||||
0x148FE485FCD398D9, 0xB64445321B017BEF, 0x2FF5781C6A536159, 0x0DBADEA991FA7934,
|
||||
0xA5A70E75D65C8A2B, 0xBC796576B1C62456, 0xE7989AF11921C8F7, 0xD43E3B447795D246
|
||||
};
|
||||
|
||||
static void transform( cubehashParam *sp )
|
||||
{
|
||||
int r;
|
||||
@@ -53,26 +35,22 @@ static void transform( cubehashParam *sp )
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = x0;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( x1, 7 ),
|
||||
_mm256_srli_epi32( x1, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
_mm256_srli_epi32( y0, 25 ) );
|
||||
x0 = mm256_rol_32( x1, 7 );
|
||||
x1 = mm256_rol_32( y0, 7 );
|
||||
x0 = _mm256_xor_si256( x0, x2 );
|
||||
x1 = _mm256_xor_si256( x1, x3 );
|
||||
x2 = _mm256_shuffle_epi32( x2, 0x4e );
|
||||
x3 = _mm256_shuffle_epi32( x3, 0x4e );
|
||||
x2 = mm256_swap64_128( x2 );
|
||||
x3 = mm256_swap64_128( x3 );
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = _mm256_permute4x64_epi64( x0, 0x4e );
|
||||
y1 = _mm256_permute4x64_epi64( x1, 0x4e );
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
|
||||
_mm256_srli_epi32( y0, 21 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
|
||||
_mm256_srli_epi32( y1, 21 ) );
|
||||
y0 = mm256_swap_128( x0 );
|
||||
y1 = mm256_swap_128( x1 );
|
||||
x0 = mm256_rol_32( y0, 11 );
|
||||
x1 = mm256_rol_32( y1, 11 );
|
||||
x0 = _mm256_xor_si256( x0, x2 );
|
||||
x1 = _mm256_xor_si256( x1, x3 );
|
||||
x2 = _mm256_shuffle_epi32( x2, 0xb1 );
|
||||
x3 = _mm256_shuffle_epi32( x3, 0xb1 );
|
||||
x2 = mm256_swap32_64( x2 );
|
||||
x3 = mm256_swap32_64( x3 );
|
||||
}
|
||||
|
||||
_mm256_store_si256( (__m256i*)sp->x, x0 );
|
||||
@@ -147,37 +125,58 @@ static void transform( cubehashParam *sp )
|
||||
#endif
|
||||
} // transform
|
||||
|
||||
/*
|
||||
// The result of hashing 10 rounds of initial data which is params and
|
||||
// mostly zeros.
|
||||
static const uint64_t IV256[] =
|
||||
{
|
||||
0xCCD6F29FEA2BD4B4, 0x35481EAE63117E71, 0xE5D94E6322512D5B, 0xF4CC12BE7E624131,
|
||||
0x42AF2070C2D0B696, 0x3361DA8CD0720C35, 0x8EF8AD8328CCECA4, 0x40E5FBAB4680AC00,
|
||||
0x6107FBD5D89041C3, 0xF0B266796C859D41, 0x5FA2560309392549, 0x93CB628565C892FD,
|
||||
0x9E4B4E602AF2B5AE, 0x85254725774ABFDD, 0x4AB6AAD615815AEB, 0xD6032C0A9CDAF8AF
|
||||
};
|
||||
|
||||
static const uint64_t IV512[] =
|
||||
{
|
||||
0x50F494D42AEA2A61, 0x4167D83E2D538B8B, 0xC701CF8C3FEE2313, 0x50AC5695CC39968E,
|
||||
0xA647A8B34D42C787, 0x825B453797CF0BEF, 0xF22090C4EEF864D2, 0xA23911AED0E5CD33,
|
||||
0x148FE485FCD398D9, 0xB64445321B017BEF, 0x2FF5781C6A536159, 0x0DBADEA991FA7934,
|
||||
0xA5A70E75D65C8A2B, 0xBC796576B1C62456, 0xE7989AF11921C8F7, 0xD43E3B447795D246
|
||||
};
|
||||
*/
|
||||
|
||||
int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes)
|
||||
{
|
||||
const uint64_t* iv = hashbitlen == 512 ? IV512 : IV256;
|
||||
__m128i *x = (__m128i*)sp->x;
|
||||
sp->hashlen = hashbitlen/128;
|
||||
sp->blocksize = blockbytes/16;
|
||||
sp->rounds = rounds;
|
||||
sp->pos = 0;
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
__m256i* x = (__m256i*)sp->x;
|
||||
if ( hashbitlen == 512 )
|
||||
{
|
||||
|
||||
x[0] = _mm256_set_epi64x( iv[ 3], iv[ 2], iv[ 1], iv[ 0] );
|
||||
x[1] = _mm256_set_epi64x( iv[ 7], iv[ 6], iv[ 5], iv[ 4] );
|
||||
x[2] = _mm256_set_epi64x( iv[11], iv[10], iv[ 9], iv[ 8] );
|
||||
x[3] = _mm256_set_epi64x( iv[15], iv[14], iv[13], iv[12] );
|
||||
x[0] = m128_const_64( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 );
|
||||
x[1] = m128_const_64( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 );
|
||||
x[2] = m128_const_64( 0x825B453797CF0BEF, 0xA647A8B34D42C787 );
|
||||
x[3] = m128_const_64( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 );
|
||||
x[4] = m128_const_64( 0xB64445321B017BEF, 0x148FE485FCD398D9 );
|
||||
x[5] = m128_const_64( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 );
|
||||
x[6] = m128_const_64( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B );
|
||||
x[7] = m128_const_64( 0xD43E3B447795D246, 0xE7989AF11921C8F7 );
|
||||
}
|
||||
else
|
||||
{
|
||||
x[0] = m128_const_64( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 );
|
||||
x[1] = m128_const_64( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B );
|
||||
x[2] = m128_const_64( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 );
|
||||
x[3] = m128_const_64( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 );
|
||||
x[4] = m128_const_64( 0xF0B266796C859D41, 0x6107FBD5D89041C3 );
|
||||
x[5] = m128_const_64( 0x93CB628565C892FD, 0x5FA2560309392549 );
|
||||
x[6] = m128_const_64( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE );
|
||||
x[7] = m128_const_64( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB );
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
__m128i* x = (__m128i*)sp->x;
|
||||
|
||||
x[0] = _mm_set_epi64x( iv[ 1], iv[ 0] );
|
||||
x[1] = _mm_set_epi64x( iv[ 3], iv[ 2] );
|
||||
x[2] = _mm_set_epi64x( iv[ 5], iv[ 4] );
|
||||
x[3] = _mm_set_epi64x( iv[ 7], iv[ 6] );
|
||||
x[4] = _mm_set_epi64x( iv[ 9], iv[ 8] );
|
||||
x[5] = _mm_set_epi64x( iv[11], iv[10] );
|
||||
x[6] = _mm_set_epi64x( iv[13], iv[12] );
|
||||
x[7] = _mm_set_epi64x( iv[15], iv[14] );
|
||||
|
||||
#endif
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
@@ -94,19 +94,14 @@ int scanhash_groestl( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void groestl_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_dmd_gr_algo( algo_gate_t* gate )
|
||||
{
|
||||
init_groestl_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_groestl;
|
||||
gate->hash = (void*)&groestlhash;
|
||||
gate->set_target = (void*)&groestl_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -10,7 +10,7 @@
|
||||
#else
|
||||
#include "aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#include <openssl/sha.h>
|
||||
|
||||
typedef struct {
|
||||
#ifdef NO_AES_NI
|
||||
@@ -18,7 +18,7 @@ typedef struct {
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
#endif
|
||||
sph_sha256_context sha;
|
||||
SHA256_CTX sha;
|
||||
} myrgr_ctx_holder;
|
||||
|
||||
myrgr_ctx_holder myrgr_ctx;
|
||||
@@ -28,15 +28,15 @@ void init_myrgr_ctx()
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_init( &myrgr_ctx.groestl );
|
||||
#else
|
||||
init_groestl (&myrgr_ctx.groestl, 64 );
|
||||
init_groestl ( &myrgr_ctx.groestl, 64 );
|
||||
#endif
|
||||
sph_sha256_init(&myrgr_ctx.sha);
|
||||
SHA256_Init( &myrgr_ctx.sha );
|
||||
}
|
||||
|
||||
void myriad_hash(void *output, const void *input)
|
||||
{
|
||||
myrgr_ctx_holder ctx;
|
||||
memcpy( &ctx, &myrgr_ctx, sizeof(myrgr_ctx) );
|
||||
myrgr_ctx_holder ctx;
|
||||
memcpy( &ctx, &myrgr_ctx, sizeof(myrgr_ctx) );
|
||||
|
||||
uint32_t _ALIGN(32) hash[16];
|
||||
|
||||
@@ -44,23 +44,22 @@ void myriad_hash(void *output, const void *input)
|
||||
sph_groestl512(&ctx.groestl, input, 80);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_groestl( &ctx.groestl, (char*)input, 640 );
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
update_groestl( &ctx.groestl, (char*)input, 640 );
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
#endif
|
||||
|
||||
sph_sha256(&ctx.sha, hash, 64);
|
||||
sph_sha256_close(&ctx.sha, hash);
|
||||
SHA256_Update( &ctx.sha, (unsigned char*)hash, 64 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx.sha );
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_myriad( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
int scanhash_myriad( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
@@ -8,7 +8,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "aes_ni/hash-groestl.h"
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
|
||||
typedef struct {
|
||||
hashState_groestl groestl;
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#if defined(__AVX2__) && defined(__AES__) && !defined(__SHA__)
|
||||
#define MYRGR_4WAY
|
||||
#endif
|
||||
|
||||
|
@@ -15,11 +15,6 @@ pthread_barrier_t hodl_barrier;
|
||||
// need to be passed.
|
||||
unsigned char *hodl_scratchbuf = NULL;
|
||||
|
||||
void hodl_set_target( struct work* work, double diff )
|
||||
{
|
||||
diff_to_target(work->target, diff / 8388608.0 );
|
||||
}
|
||||
|
||||
void hodl_le_build_stratum_request( char* req, struct work* work,
|
||||
struct stratum_ctx *sctx )
|
||||
{
|
||||
@@ -170,7 +165,6 @@ bool register_hodl_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&hodl_scanhash;
|
||||
gate->get_new_work = (void*)&hodl_get_new_work;
|
||||
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;
|
||||
gate->set_target = (void*)&hodl_set_target;
|
||||
gate->build_stratum_request = (void*)&hodl_le_build_stratum_request;
|
||||
gate->malloc_txs_request = (void*)&hodl_malloc_txs_request;
|
||||
gate->build_block_header = (void*)&hodl_build_block_header;
|
||||
@@ -179,6 +173,7 @@ bool register_hodl_algo( algo_gate_t* gate )
|
||||
gate->work_cmp_size = 76;
|
||||
hodl_scratchbuf = (unsigned char*)malloc( 1 << 30 );
|
||||
allow_getwork = false;
|
||||
opt_target_factor = 8388608.0;
|
||||
return ( hodl_scratchbuf != NULL );
|
||||
}
|
||||
|
||||
|
@@ -246,18 +246,12 @@ do { \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
#define W0(x) Wz(x, _mm256_set_epi64x( 0x5555555555555555, \
|
||||
0x5555555555555555, 0x5555555555555555, 0x5555555555555555 ), 1 )
|
||||
#define W1(x) Wz(x, _mm256_set_epi64x( 0x3333333333333333, \
|
||||
0x3333333333333333, 0x3333333333333333, 0x3333333333333333 ), 2 )
|
||||
#define W2(x) Wz(x, _mm256_set_epi64x( 0x0F0F0F0F0F0F0F0F, \
|
||||
0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F ), 4 )
|
||||
#define W3(x) Wz(x, _mm256_set_epi64x( 0x00FF00FF00FF00FF, \
|
||||
0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF ), 8 )
|
||||
#define W4(x) Wz(x, _mm256_set_epi64x( 0x0000FFFF0000FFFF, \
|
||||
0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF ), 16 )
|
||||
#define W5(x) Wz(x, _mm256_set_epi64x( 0x00000000FFFFFFFF, \
|
||||
0x00000000FFFFFFFF, 0x00000000FFFFFFFF, 0x00000000FFFFFFFF ), 32 )
|
||||
#define W0(x) Wz(x, m256_const1_64( 0x5555555555555555 ), 1 )
|
||||
#define W1(x) Wz(x, m256_const1_64( 0x3333333333333333 ), 2 )
|
||||
#define W2(x) Wz(x, m256_const1_64( 0x0F0F0F0F0F0F0F0F ), 4 )
|
||||
#define W3(x) Wz(x, m256_const1_64( 0x00FF00FF00FF00FF ), 8 )
|
||||
#define W4(x) Wz(x, m256_const1_64( 0x0000FFFF0000FFFF ), 16 )
|
||||
#define W5(x) Wz(x, m256_const1_64( 0x00000000FFFFFFFF ), 32 )
|
||||
#define W6(x) \
|
||||
do { \
|
||||
__m256i t = x ## h; \
|
||||
@@ -331,14 +325,14 @@ do { \
|
||||
__m256i m2l = buf[5]; \
|
||||
__m256i m3h = buf[6]; \
|
||||
__m256i m3l = buf[7]; \
|
||||
h0h = _mm256_xor_si256( h0h, m0h ); \
|
||||
h0l = _mm256_xor_si256( h0l, m0l ); \
|
||||
h1h = _mm256_xor_si256( h1h, m1h ); \
|
||||
h1l = _mm256_xor_si256( h1l, m1l ); \
|
||||
h2h = _mm256_xor_si256( h2h, m2h ); \
|
||||
h2l = _mm256_xor_si256( h2l, m2l ); \
|
||||
h3h = _mm256_xor_si256( h3h, m3h ); \
|
||||
h3l = _mm256_xor_si256( h3l, m3l ); \
|
||||
h0h = _mm256_xor_si256( h0h, m0h ); \
|
||||
h0l = _mm256_xor_si256( h0l, m0l ); \
|
||||
h1h = _mm256_xor_si256( h1h, m1h ); \
|
||||
h1l = _mm256_xor_si256( h1l, m1l ); \
|
||||
h2h = _mm256_xor_si256( h2h, m2h ); \
|
||||
h2l = _mm256_xor_si256( h2l, m2l ); \
|
||||
h3h = _mm256_xor_si256( h3h, m3h ); \
|
||||
h3l = _mm256_xor_si256( h3l, m3l ); \
|
||||
|
||||
#define INPUT_BUF2 \
|
||||
h4h = _mm256_xor_si256( h4h, m0h ); \
|
||||
@@ -477,13 +471,48 @@ static const sph_u64 IV512[] = {
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
jh_4way_init( jh_4way_context *sc, const void *iv )
|
||||
void jh256_4way_init( jh_4way_context *sc )
|
||||
{
|
||||
uint64_t *v = (uint64_t*)iv;
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
sc->H[i] = _mm256_set_epi64x( v[i], v[i], v[i], v[i] );
|
||||
// bswapped IV256
|
||||
sc->H[ 0] = m256_const1_64( 0xebd3202c41a398eb );
|
||||
sc->H[ 1] = m256_const1_64( 0xc145b29c7bbecd92 );
|
||||
sc->H[ 2] = m256_const1_64( 0xfac7d4609151931c );
|
||||
sc->H[ 3] = m256_const1_64( 0x038a507ed6820026 );
|
||||
sc->H[ 4] = m256_const1_64( 0x45b92677269e23a4 );
|
||||
sc->H[ 5] = m256_const1_64( 0x77941ad4481afbe0 );
|
||||
sc->H[ 6] = m256_const1_64( 0x7a176b0226abb5cd );
|
||||
sc->H[ 7] = m256_const1_64( 0xa82fff0f4224f056 );
|
||||
sc->H[ 8] = m256_const1_64( 0x754d2e7f8996a371 );
|
||||
sc->H[ 9] = m256_const1_64( 0x62e27df70849141d );
|
||||
sc->H[10] = m256_const1_64( 0x948f2476f7957627 );
|
||||
sc->H[11] = m256_const1_64( 0x6c29804757b6d587 );
|
||||
sc->H[12] = m256_const1_64( 0x6c0d8eac2d275e5c );
|
||||
sc->H[13] = m256_const1_64( 0x0f7a0557c6508451 );
|
||||
sc->H[14] = m256_const1_64( 0xea12247067d3e47b );
|
||||
sc->H[15] = m256_const1_64( 0x69d71cd313abe389 );
|
||||
sc->ptr = 0;
|
||||
sc->block_count = 0;
|
||||
}
|
||||
|
||||
void jh512_4way_init( jh_4way_context *sc )
|
||||
{
|
||||
// bswapped IV512
|
||||
sc->H[ 0] = m256_const1_64( 0x17aa003e964bd16f );
|
||||
sc->H[ 1] = m256_const1_64( 0x43d5157a052e6a63 );
|
||||
sc->H[ 2] = m256_const1_64( 0x0bef970c8d5e228a );
|
||||
sc->H[ 3] = m256_const1_64( 0x61c3b3f2591234e9 );
|
||||
sc->H[ 4] = m256_const1_64( 0x1e806f53c1a01d89 );
|
||||
sc->H[ 5] = m256_const1_64( 0x806d2bea6b05a92a );
|
||||
sc->H[ 6] = m256_const1_64( 0xa6ba7520dbcc8e58 );
|
||||
sc->H[ 7] = m256_const1_64( 0xf73bf8ba763a0fa9 );
|
||||
sc->H[ 8] = m256_const1_64( 0x694ae34105e66901 );
|
||||
sc->H[ 9] = m256_const1_64( 0x5ae66f2e8e8ab546 );
|
||||
sc->H[10] = m256_const1_64( 0x243c84c1d0a74710 );
|
||||
sc->H[11] = m256_const1_64( 0x99c15a2db1716e3b );
|
||||
sc->H[12] = m256_const1_64( 0x56f8b19decf657cf );
|
||||
sc->H[13] = m256_const1_64( 0x56b116577c8806a7 );
|
||||
sc->H[14] = m256_const1_64( 0xfb1785e6dffcc2e3 );
|
||||
sc->H[15] = m256_const1_64( 0x4bdd8ccc78465a54 );
|
||||
sc->ptr = 0;
|
||||
sc->block_count = 0;
|
||||
}
|
||||
@@ -542,7 +571,7 @@ jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
|
||||
size_t numz, u;
|
||||
sph_u64 l0, l1, l0e, l1e;
|
||||
|
||||
buf[0] = _mm256_set_epi64x( 0x80, 0x80, 0x80, 0x80 );
|
||||
buf[0] = m256_const1_64( 0x80ULL );
|
||||
|
||||
if ( sc->ptr == 0 )
|
||||
numz = 48;
|
||||
@@ -555,8 +584,8 @@ jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
|
||||
l1 = SPH_T64(sc->block_count >> 55);
|
||||
sph_enc64be( &l0e, l0 );
|
||||
sph_enc64be( &l1e, l1 );
|
||||
*(buf + (numz>>3) ) = _mm256_set_epi64x( l1e, l1e, l1e, l1e );
|
||||
*(buf + (numz>>3) + 1) = _mm256_set_epi64x( l0e, l0e, l0e, l0e );
|
||||
*(buf + (numz>>3) ) = _mm256_set1_epi64x( l1e );
|
||||
*(buf + (numz>>3) + 1) = _mm256_set1_epi64x( l0e );
|
||||
|
||||
jh_4way_core( sc, buf, numz + 16 );
|
||||
|
||||
@@ -566,11 +595,13 @@ jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
|
||||
memcpy_256( dst256, buf, 8 );
|
||||
}
|
||||
|
||||
/*
|
||||
void
|
||||
jh256_4way_init(void *cc)
|
||||
{
|
||||
jh_4way_init(cc, IV256);
|
||||
jhs_4way_init(cc, IV256);
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
jh256_4way(void *cc, const void *data, size_t len)
|
||||
@@ -584,11 +615,13 @@ jh256_4way_close(void *cc, void *dst)
|
||||
jh_4way_close(cc, 0, 0, dst, 8, IV256);
|
||||
}
|
||||
|
||||
/*
|
||||
void
|
||||
jh512_4way_init(void *cc)
|
||||
{
|
||||
jh_4way_init(cc, IV512);
|
||||
jhb_4way_init(cc, IV512);
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
jh512_4way(void *cc, const void *data, size_t len)
|
||||
|
@@ -79,13 +79,13 @@ typedef jh_4way_context jh256_4way_context;
|
||||
|
||||
typedef jh_4way_context jh512_4way_context;
|
||||
|
||||
void jh256_4way_init(void *cc);
|
||||
void jh256_4way_init( jh_4way_context *sc);
|
||||
|
||||
void jh256_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void jh256_4way_close(void *cc, void *dst);
|
||||
|
||||
void jh512_4way_init(void *cc);
|
||||
void jh512_4way_init( jh_4way_context *sc );
|
||||
|
||||
void jh512_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
|
@@ -12,7 +12,7 @@ bool register_jha_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&jha_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -39,10 +39,10 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
||||
keccakhash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( ( ( hash7[ lane<<1 ] & 0xFFFFFF00 ) == 0 ) )
|
||||
if ( ( hash7[ lane<<1 ] & 0xFFFFFF00 ) == 0 )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
|
@@ -1,18 +1,13 @@
|
||||
#include "keccak-gate.h"
|
||||
|
||||
void keccak_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (128.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
int64_t keccak_get_max64() { return 0x7ffffLL; }
|
||||
|
||||
bool register_keccak_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
gate->set_target = (void*)&keccak_set_target;
|
||||
gate->get_max64 = (void*)&keccak_get_max64;
|
||||
opt_target_factor = 128.0;
|
||||
#if defined (KECCAK_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_keccak_4way;
|
||||
gate->hash = (void*)&keccakhash_4way;
|
||||
@@ -23,17 +18,12 @@ bool register_keccak_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
};
|
||||
|
||||
void keccakc_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_keccakc_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||
gate->set_target = (void*)&keccakc_set_target;
|
||||
gate->get_max64 = (void*)&keccak_get_max64;
|
||||
opt_target_factor = 256.0;
|
||||
#if defined (KECCAK_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_keccak_4way;
|
||||
gate->hash = (void*)&keccakhash_4way;
|
||||
|
@@ -370,18 +370,23 @@ static const sph_u64 RC[] = {
|
||||
|
||||
static void keccak64_init( keccak64_ctx_m256i *kc, unsigned out_size )
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 25; i ++)
|
||||
kc->w[i] = _mm256_setzero_si256();
|
||||
__m256i zero = m256_zero;
|
||||
__m256i neg1 = m256_neg1;
|
||||
|
||||
// Initialization for the "lane complement".
|
||||
kc->w[ 1] = m256_neg1;
|
||||
kc->w[ 2] = m256_neg1;
|
||||
kc->w[ 8] = m256_neg1;
|
||||
kc->w[12] = m256_neg1;
|
||||
kc->w[17] = m256_neg1;
|
||||
kc->w[20] = m256_neg1;
|
||||
kc->ptr = 0;
|
||||
kc->w[ 0] = zero; kc->w[ 1] = neg1;
|
||||
kc->w[ 2] = neg1; kc->w[ 3] = zero;
|
||||
kc->w[ 4] = zero; kc->w[ 5] = zero;
|
||||
kc->w[ 6] = zero; kc->w[ 7] = zero;
|
||||
kc->w[ 8] = neg1; kc->w[ 9] = zero;
|
||||
kc->w[10] = zero; kc->w[11] = zero;
|
||||
kc->w[12] = neg1; kc->w[13] = zero;
|
||||
kc->w[14] = zero; kc->w[15] = zero;
|
||||
kc->w[16] = zero; kc->w[17] = neg1;
|
||||
kc->w[18] = zero; kc->w[19] = zero;
|
||||
kc->w[20] = neg1; kc->w[21] = zero;
|
||||
kc->w[22] = zero; kc->w[23] = zero;
|
||||
kc->w[24] = zero; kc->ptr = 0;
|
||||
kc->lim = 200 - (out_size >> 2);
|
||||
}
|
||||
|
||||
@@ -441,8 +446,8 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
|
||||
eb = 0x100 >> 8;
|
||||
if ( kc->ptr == (lim - 8) )
|
||||
{
|
||||
uint64_t t = eb | 0x8000000000000000;
|
||||
u.tmp[0] = _mm256_set_epi64x( t, t, t, t );
|
||||
const uint64_t t = eb | 0x8000000000000000;
|
||||
u.tmp[0] = m256_const1_64( t );
|
||||
j = 8;
|
||||
}
|
||||
else
|
||||
@@ -450,8 +455,7 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
|
||||
j = lim - kc->ptr;
|
||||
u.tmp[0] = _mm256_set_epi64x( eb, eb, eb, eb );
|
||||
memset_zero_256( u.tmp + 1, (j>>3) - 2 );
|
||||
u.tmp[ (j>>3) - 1] = _mm256_set_epi64x( 0x8000000000000000,
|
||||
0x8000000000000000, 0x8000000000000000, 0x8000000000000000);
|
||||
u.tmp[ (j>>3) - 1] = m256_const1_64( 0x8000000000000000 );
|
||||
}
|
||||
keccak64_core( kc, u.tmp, j, lim );
|
||||
/* Finalize the "lane complement" */
|
||||
@@ -461,9 +465,7 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
|
||||
NOT64( kc->w[12], kc->w[12] );
|
||||
NOT64( kc->w[17], kc->w[17] );
|
||||
NOT64( kc->w[20], kc->w[20] );
|
||||
for ( j = 0; j < m256_len; j++ )
|
||||
u.tmp[j] = kc->w[j];
|
||||
memcpy_256( dst, u.tmp, m256_len );
|
||||
memcpy_256( dst, kc->w, m256_len );
|
||||
}
|
||||
|
||||
void keccak256_4way_init( void *kc )
|
||||
|
@@ -71,7 +71,7 @@ bool register_lyra2rev3_algo( algo_gate_t* gate )
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2rev3_thread_init;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -105,7 +105,7 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -128,7 +128,7 @@ bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
#endif
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -148,7 +148,7 @@ bool register_lyra2h_algo( algo_gate_t* gate )
|
||||
#endif
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -168,8 +168,8 @@ bool register_allium_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&allium_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
gate->get_max64 = (void*)&allium_get_max64_0xFFFFLL;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -182,6 +182,7 @@ int phi2_get_work_data_size() { return phi2_use_roots ? 144 : 128; }
|
||||
|
||||
void phi2_decode_extra_data( struct work *work )
|
||||
{
|
||||
phi2_use_roots = false;
|
||||
if ( work->data[0] & ( 1<<30 ) ) phi2_use_roots = true;
|
||||
else for ( int i = 20; i < 36; i++ )
|
||||
{
|
||||
@@ -213,8 +214,8 @@ bool register_phi2_algo( algo_gate_t* gate )
|
||||
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
|
||||
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
|
||||
gate->build_extraheader = (void*)&phi2_build_extraheader;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
opt_target_factor = 256.0;
|
||||
#if defined(PHI2_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_phi2_4way;
|
||||
#else
|
||||
|
@@ -60,7 +60,7 @@ int LYRA2REV2( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
|
||||
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
|
||||
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
|
||||
int64_t i; //auxiliary iteration counter
|
||||
// int64_t i; //auxiliary iteration counter
|
||||
int64_t v64; // 64bit var for memcpy
|
||||
//====================================================================/
|
||||
|
||||
@@ -128,17 +128,22 @@ int LYRA2REV2( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
//================= Initializing the Sponge State ====================//
|
||||
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
|
||||
|
||||
initState( state );
|
||||
// initState( state );
|
||||
|
||||
//========================= Setup Phase =============================//
|
||||
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||
|
||||
ptrWord = wholeMatrix;
|
||||
|
||||
absorbBlockBlake2Safe( state, ptrWord, nBlocksInput, BLOCK_LEN );
|
||||
/*
|
||||
for (i = 0; i < nBlocksInput; i++)
|
||||
{
|
||||
absorbBlockBlake2Safe( state, ptrWord ); //absorbs each block of pad(pwd || salt || basil)
|
||||
ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil)
|
||||
}
|
||||
*/
|
||||
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0( state, &wholeMatrix[0], nCols ); //The locally copied password is most likely overwritten here
|
||||
|
||||
@@ -227,7 +232,7 @@ int LYRA2REV3( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
|
||||
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
|
||||
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
|
||||
int64_t i; //auxiliary iteration counter
|
||||
// int64_t i; //auxiliary iteration counter
|
||||
int64_t v64; // 64bit var for memcpy
|
||||
uint64_t instance = 0;
|
||||
//====================================================================/
|
||||
@@ -302,17 +307,21 @@ int LYRA2REV3( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
//================= Initializing the Sponge State ====================//
|
||||
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
|
||||
|
||||
initState( state );
|
||||
// initState( state );
|
||||
|
||||
//========================= Setup Phase =============================//
|
||||
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||
|
||||
ptrWord = wholeMatrix;
|
||||
|
||||
absorbBlockBlake2Safe( state, ptrWord, nBlocksInput, BLOCK_LEN );
|
||||
/*
|
||||
for (i = 0; i < nBlocksInput; i++)
|
||||
{
|
||||
absorbBlockBlake2Safe( state, ptrWord ); //absorbs each block of pad(pwd || salt || basil)
|
||||
ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil)
|
||||
}
|
||||
*/
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0( state, &wholeMatrix[0], nCols ); //The locally copied password is most likely overwritten here
|
||||
|
||||
@@ -405,7 +414,7 @@ int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
|
||||
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
|
||||
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
|
||||
int64_t i; //auxiliary iteration counter
|
||||
// int64_t i; //auxiliary iteration counter
|
||||
//=======================================================================/
|
||||
|
||||
//======= Initializing the Memory Matrix and pointers to it =============//
|
||||
@@ -459,17 +468,21 @@ int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
// if (state == NULL) {
|
||||
// return -1;
|
||||
// }
|
||||
initState( state );
|
||||
// initState( state );
|
||||
|
||||
//============================== Setup Phase =============================//
|
||||
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
|
||||
absorbBlockBlake2Safe( state, ptrWord, nBlocksInput,
|
||||
BLOCK_LEN_BLAKE2_SAFE_INT64 );
|
||||
/*
|
||||
for ( i = 0; i < nBlocksInput; i++ )
|
||||
{
|
||||
absorbBlockBlake2Safe( state, ptrWord ); //absorbs each block of pad(pwd || salt || basil)
|
||||
ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil)
|
||||
}
|
||||
|
||||
*/
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0(state, &wholeMatrix[0], nCols); //The locally copied password is most likely overwritten here
|
||||
reducedDuplexRow1(state, &wholeMatrix[0], &wholeMatrix[ROW_LEN_INT64], nCols);
|
||||
@@ -623,17 +636,21 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd, const uint64_t pwdlen,
|
||||
//================= Initializing the Sponge State ====================//
|
||||
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
|
||||
|
||||
initState( state );
|
||||
// initState( state );
|
||||
|
||||
//========================= Setup Phase =============================//
|
||||
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||
|
||||
ptrWord = wholeMatrix;
|
||||
|
||||
absorbBlockBlake2Safe( state, ptrWord, nBlocksInput, BLOCK_LEN );
|
||||
/*
|
||||
for (i = 0; i < nBlocksInput; i++)
|
||||
{
|
||||
absorbBlockBlake2Safe( state, ptrWord ); //absorbs each block of pad(pwd || salt || basil)
|
||||
ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil)
|
||||
}
|
||||
*/
|
||||
//Initializes M[0] and M[1]
|
||||
reducedSqueezeRow0( state, &wholeMatrix[0], nCols ); //The locally copied password is most likely overwritten here
|
||||
|
||||
|
@@ -118,11 +118,6 @@ int64_t lyra2re_get_max64 ()
|
||||
return 0xffffLL;
|
||||
}
|
||||
|
||||
void lyra2re_set_target ( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target(work, job_diff / (128.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_lyra2re_algo( algo_gate_t* gate )
|
||||
{
|
||||
init_lyra2re_ctx();
|
||||
@@ -130,7 +125,7 @@ bool register_lyra2re_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2re;
|
||||
gate->hash = (void*)&lyra2re_hash;
|
||||
gate->get_max64 = (void*)&lyra2re_get_max64;
|
||||
gate->set_target = (void*)&lyra2re_set_target;
|
||||
opt_target_factor = 128.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -86,7 +86,7 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
@@ -94,12 +94,12 @@ int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
@@ -186,7 +186,7 @@ void lyra2rev3_4way_hash( void *state, const void *input )
|
||||
bmw256_4way_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev3_4way( struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
@@ -194,12 +194,12 @@ int scanhash_lyra2rev3_4way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
@@ -32,27 +32,27 @@ void l2v3_blake256_midstate( const void* input )
|
||||
|
||||
void lyra2rev3_hash( void *state, const void *input )
|
||||
{
|
||||
lyra2v3_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &lyra2v3_ctx, sizeof(lyra2v3_ctx) );
|
||||
uint8_t hash[128] __attribute__ ((aligned (64)));
|
||||
#define hashA hash
|
||||
#define hashB hash+64
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
lyra2v3_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &lyra2v3_ctx, sizeof(lyra2v3_ctx) );
|
||||
uint8_t hash[128] __attribute__ ((aligned (64)));
|
||||
#define hashA hash
|
||||
#define hashB hash+64
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
|
||||
memcpy( &ctx.blake, &l2v3_blake_mid, sizeof l2v3_blake_mid );
|
||||
sph_blake256( &ctx.blake, (uint8_t*)input + midlen, tail );
|
||||
sph_blake256_close( &ctx.blake, hash );
|
||||
memcpy( &ctx.blake, &l2v3_blake_mid, sizeof l2v3_blake_mid );
|
||||
sph_blake256( &ctx.blake, (uint8_t*)input + midlen, tail );
|
||||
sph_blake256_close( &ctx.blake, hash );
|
||||
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash, 32, hash, 32, hash, 32, 1, 4, 4 );
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash, 32, hash, 32, hash, 32, 1, 4, 4 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hashA,
|
||||
(const byte*) hash, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hashA,
|
||||
(const byte*) hash, 32 );
|
||||
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash, 32, hash, 32, hash, 32, 1, 4, 4 );
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash, 32, hash, 32, hash, 32, 1, 4, 4 );
|
||||
|
||||
sph_bmw256( &ctx.bmw, hash, 32 );
|
||||
sph_bmw256_close( &ctx.bmw, hash );
|
||||
sph_bmw256( &ctx.bmw, hash, 32 );
|
||||
sph_bmw256_close( &ctx.bmw, hash );
|
||||
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
@@ -53,11 +53,6 @@ int scanhash_lyra2z330( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void lyra2z330_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool lyra2z330_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 256; // nCols
|
||||
@@ -76,7 +71,7 @@ bool register_lyra2z330_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2z330;
|
||||
gate->hash = (void*)&lyra2z330_hash;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&lyra2z330_set_target;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -40,29 +40,32 @@
|
||||
*/
|
||||
inline void initState( uint64_t State[/*16*/] )
|
||||
{
|
||||
|
||||
/*
|
||||
#if defined (__AVX2__)
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
|
||||
state[0] = _mm256_setzero_si256();
|
||||
state[1] = _mm256_setzero_si256();
|
||||
state[2] = _mm256_set_epi64x( blake2b_IV[3], blake2b_IV[2],
|
||||
blake2b_IV[1], blake2b_IV[0] );
|
||||
state[3] = _mm256_set_epi64x( blake2b_IV[7], blake2b_IV[6],
|
||||
blake2b_IV[5], blake2b_IV[4] );
|
||||
const __m256i zero = m256_zero;
|
||||
state[0] = zero;
|
||||
state[1] = zero;
|
||||
state[2] = m256_const_64( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL,
|
||||
0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL );
|
||||
state[3] = m256_const_64( 0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL,
|
||||
0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL );
|
||||
|
||||
#elif defined (__SSE2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
const __m128i zero = m128_zero;
|
||||
|
||||
state[0] = _mm_setzero_si128();
|
||||
state[1] = _mm_setzero_si128();
|
||||
state[2] = _mm_setzero_si128();
|
||||
state[3] = _mm_setzero_si128();
|
||||
state[4] = _mm_set_epi64x( blake2b_IV[1], blake2b_IV[0] );
|
||||
state[5] = _mm_set_epi64x( blake2b_IV[3], blake2b_IV[2] );
|
||||
state[6] = _mm_set_epi64x( blake2b_IV[5], blake2b_IV[4] );
|
||||
state[7] = _mm_set_epi64x( blake2b_IV[7], blake2b_IV[6] );
|
||||
state[0] = zero;
|
||||
state[1] = zero;
|
||||
state[2] = zero;
|
||||
state[3] = zero;
|
||||
state[4] = m128_const_64( 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL );
|
||||
state[5] = m128_const_64( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL );
|
||||
state[6] = m128_const_64( 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL );
|
||||
state[7] = m128_const_64( 0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL );
|
||||
|
||||
#else
|
||||
//First 512 bis are zeros
|
||||
@@ -77,6 +80,8 @@ inline void initState( uint64_t State[/*16*/] )
|
||||
State[14] = blake2b_IV[6];
|
||||
State[15] = blake2b_IV[7];
|
||||
#endif
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -250,43 +255,74 @@ inline void absorbBlock( uint64_t *State, const uint64_t *In )
|
||||
* @param state The current state of the sponge
|
||||
* @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words)
|
||||
*/
|
||||
inline void absorbBlockBlake2Safe( uint64_t *State, const uint64_t *In )
|
||||
inline void absorbBlockBlake2Safe( uint64_t *State, const uint64_t *In,
|
||||
const uint64_t nBlocks, const uint64_t block_len )
|
||||
{
|
||||
//XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
|
||||
// XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with
|
||||
// the IV.
|
||||
#if defined (__AVX2__)
|
||||
|
||||
register __m256i state0, state1, state2, state3;
|
||||
register __m256i state0, state1, state2, state3;
|
||||
|
||||
state0 =
|
||||
state1 = m256_zero;
|
||||
state2 = m256_const_64( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL,
|
||||
0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL );
|
||||
state3 = m256_const_64( 0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL,
|
||||
0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL );
|
||||
|
||||
for ( int i = 0; i < nBlocks; i++ )
|
||||
{
|
||||
__m256i *in = (__m256i*)In;
|
||||
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
state0 = _mm256_xor_si256( state0, in[0] );
|
||||
state1 = _mm256_xor_si256( state1, in[1] );
|
||||
|
||||
LYRA_12_ROUNDS_AVX2( state0, state1, state2, state3 );
|
||||
In += block_len;
|
||||
}
|
||||
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
#elif defined (__SSE2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
__m128i state0, state1, state2, state3, state4, state5, state6, state7;
|
||||
|
||||
state0 =
|
||||
state1 =
|
||||
state2 =
|
||||
state3 = m128_zero;
|
||||
state4 = m128_const_64( 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL );
|
||||
state5 = m128_const_64( 0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL );
|
||||
state6 = m128_const_64( 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL );
|
||||
state7 = m128_const_64( 0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL );
|
||||
|
||||
for ( int i = 0; i < nBlocks; i++ )
|
||||
{
|
||||
__m128i* in = (__m128i*)In;
|
||||
|
||||
state[0] = _mm_xor_si128( state[0], in[0] );
|
||||
state[1] = _mm_xor_si128( state[1], in[1] );
|
||||
state[2] = _mm_xor_si128( state[2], in[2] );
|
||||
state[3] = _mm_xor_si128( state[3], in[3] );
|
||||
state0 = _mm_xor_si128( state0, in[0] );
|
||||
state1 = _mm_xor_si128( state1, in[1] );
|
||||
state2 = _mm_xor_si128( state2, in[2] );
|
||||
state3 = _mm_xor_si128( state3, in[3] );
|
||||
|
||||
//Applies the transformation f to the sponge's state
|
||||
LYRA_12_ROUNDS_AVX( state[0], state[1], state[2], state[3],
|
||||
state[4], state[5], state[6], state[7] );
|
||||
LYRA_12_ROUNDS_AVX( state0, state1, state2, state3,
|
||||
state4, state5, state6, state7 );
|
||||
In += block_len;
|
||||
}
|
||||
|
||||
_mm_store_si128( (__m128i*)State, state0 );
|
||||
_mm_store_si128( (__m128i*)State + 1, state1 );
|
||||
_mm_store_si128( (__m128i*)State + 2, state2 );
|
||||
_mm_store_si128( (__m128i*)State + 3, state3 );
|
||||
_mm_store_si128( (__m128i*)State + 4, state4 );
|
||||
_mm_store_si128( (__m128i*)State + 5, state5 );
|
||||
_mm_store_si128( (__m128i*)State + 6, state6 );
|
||||
_mm_store_si128( (__m128i*)State + 7, state7 );
|
||||
|
||||
#else
|
||||
|
||||
State[0] ^= In[0];
|
||||
|
@@ -170,7 +170,8 @@ void reducedSqueezeRow0(uint64_t* state, uint64_t* row, uint64_t nCols);
|
||||
|
||||
//---- Absorbs
|
||||
void absorbBlock(uint64_t *state, const uint64_t *in);
|
||||
void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);
|
||||
void absorbBlockBlake2Safe( uint64_t *state, const uint64_t *in,
|
||||
const uint64_t nBlocks, const uint64_t block_len );
|
||||
|
||||
//---- Duplexes
|
||||
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols);
|
||||
|
115
algo/m7m.c
115
algo/m7m.c
@@ -19,100 +19,89 @@
|
||||
#define EPS1 DBL_EPSILON
|
||||
#define EPS2 3.0e-11
|
||||
|
||||
inline double exp_n(double xt)
|
||||
inline double exp_n( double xt )
|
||||
{
|
||||
if(xt < -700.0)
|
||||
if ( xt < -700.0 )
|
||||
return 0;
|
||||
else if(xt > 700.0)
|
||||
else if ( xt > 700.0 )
|
||||
return 1e200;
|
||||
else if(xt > -0.8e-8 && xt < 0.8e-8)
|
||||
return (1.0 + xt);
|
||||
else if ( xt > -0.8e-8 && xt < 0.8e-8 )
|
||||
return ( 1.0 + xt );
|
||||
else
|
||||
return exp(xt);
|
||||
return exp( xt );
|
||||
}
|
||||
|
||||
inline double exp_n2(double x1, double x2)
|
||||
inline double exp_n2( double x1, double x2 )
|
||||
{
|
||||
double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8, p5 = 37., p6 = 700.;
|
||||
double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8,
|
||||
p5 = 37., p6 = 700.;
|
||||
double xt = x1 - x2;
|
||||
if (xt < p1+1.e-200)
|
||||
if ( xt < p1+1.e-200 )
|
||||
return 1.;
|
||||
else if (xt > p1 && xt < p2 + 1.e-200)
|
||||
else if ( xt > p1 && xt < p2 + 1.e-200 )
|
||||
return ( 1. - exp(xt) );
|
||||
else if (xt > p2 && xt < p3 + 1.e-200)
|
||||
return ( 1. / (1. + exp(xt)) );
|
||||
else if (xt > p3 && xt < p4)
|
||||
else if ( xt > p2 && xt < p3 + 1.e-200 )
|
||||
return ( 1. / ( 1. + exp(xt) ) );
|
||||
else if ( xt > p3 && xt < p4 )
|
||||
return ( 1. / (2. + xt) );
|
||||
else if (xt > p4 - 1.e-200 && xt < p5)
|
||||
return ( exp(-xt) / (1. + exp(-xt)) );
|
||||
else if (xt > p5 - 1.e-200 && xt < p6)
|
||||
else if ( xt > p4 - 1.e-200 && xt < p5 )
|
||||
return ( exp(-xt) / ( 1. + exp(-xt) ) );
|
||||
else if ( xt > p5 - 1.e-200 && xt < p6 )
|
||||
return ( exp(-xt) );
|
||||
else if (xt > p6 - 1.e-200)
|
||||
else if ( xt > p6 - 1.e-200 )
|
||||
return 0.;
|
||||
}
|
||||
|
||||
double swit2_(double wvnmb)
|
||||
double swit2_( double wvnmb )
|
||||
{
|
||||
return pow( (5.55243*(exp_n(-0.3*wvnmb/15.762) - exp_n(-0.6*wvnmb/15.762)))*wvnmb, 0.5)
|
||||
/ 1034.66 * pow(sin(wvnmb/65.), 2.);
|
||||
return pow( ( 5.55243 * ( exp_n( -0.3 * wvnmb / 15.762 )
|
||||
- exp_n( -0.6 * wvnmb / 15.762 ) ) ) * wvnmb, 0.5 )
|
||||
/ 1034.66 * pow( sin( wvnmb / 65. ), 2. );
|
||||
}
|
||||
|
||||
|
||||
double GaussianQuad_N2(const double x1, const double x2)
|
||||
double GaussianQuad_N2( const double x1, const double x2 )
|
||||
{
|
||||
double s=0.0;
|
||||
double s = 0.0;
|
||||
double x[6], w[6];
|
||||
//gauleg(a2, b2, x, w);
|
||||
|
||||
double z1, z, xm, xl, pp, p3, p2, p1;
|
||||
xm=0.5*(x2+x1);
|
||||
xl=0.5*(x2-x1);
|
||||
for(int i=1;i<=3;i++)
|
||||
xm = 0.5 * ( x2 + x1 );
|
||||
xl = 0.5 * ( x2 - x1 );
|
||||
for( int i = 1; i <= 3; i++ )
|
||||
{
|
||||
z = (i == 1) ? 0.909632 : -0.0;
|
||||
z = (i == 2) ? 0.540641 : z;
|
||||
do
|
||||
z = (i == 2) ? 0.540641 : ( (i == 1) ? 0.909632 : -0.0 );
|
||||
do
|
||||
{
|
||||
p1 = z;
|
||||
p2 = 1;
|
||||
p3 = 0;
|
||||
|
||||
p3=1;
|
||||
p2=z;
|
||||
p1=((3.0 * z * z) - 1) / 2;
|
||||
|
||||
p3=p2;
|
||||
p2=p1;
|
||||
p1=((5.0 * z * p2) - (2.0 * z)) / 3;
|
||||
|
||||
p3=p2;
|
||||
p2=p1;
|
||||
p1=((7.0 * z * p2) - (3.0 * p3)) / 4;
|
||||
|
||||
p3=p2;
|
||||
p2=p1;
|
||||
p1=((9.0 * z * p2) - (4.0 * p3)) / 5;
|
||||
|
||||
pp=5*(z*p1-p2)/(z*z-1.0);
|
||||
z1=z;
|
||||
z=z1-p1/pp;
|
||||
} while (fabs(z-z1) > 3.0e-11);
|
||||
p1 = ( ( 3.0 * z * z ) - 1 ) / 2;
|
||||
p2 = p1;
|
||||
p1 = ( ( 5.0 * z * p2 ) - ( 2.0 * z ) ) / 3;
|
||||
p3 = p2;
|
||||
p2 = p1;
|
||||
p1 = ( ( 7.0 * z * p2 ) - ( 3.0 * p3 ) ) / 4;
|
||||
p3 = p2;
|
||||
p2 = p1;
|
||||
p1 = ( ( 9.0 * z * p2 ) - ( 4.0 * p3 ) ) / 5;
|
||||
pp = 5 * ( z * p1 - p2 ) / ( z * z - 1.0 );
|
||||
z1 = z;
|
||||
z = z1 - p1 / pp;
|
||||
} while ( fabs( z - z1 ) > 3.0e-11 );
|
||||
|
||||
x[i]=xm-xl*z;
|
||||
x[5+1-i]=xm+xl*z;
|
||||
w[i]=2.0*xl/((1.0-z*z)*pp*pp);
|
||||
w[5+1-i]=w[i];
|
||||
x[i] = xm - xl * z;
|
||||
x[ 5+1-i ] = xm + xl * z;
|
||||
w[i] = 2.0 * xl / ( ( 1.0 - z * z ) * pp * pp );
|
||||
w[ 5+1-i ] = w [i];
|
||||
}
|
||||
|
||||
for(int j=1; j<=5; j++) s += w[j]*swit2_(x[j]);
|
||||
for( int j = 1; j <= 5; j++ ) s += w[j] * swit2_( x[j] );
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
uint32_t sw2_(int nnounce)
|
||||
uint32_t sw2_( int nnounce )
|
||||
{
|
||||
double wmax = ((sqrt((double)(nnounce))*(1.+EPSa))/450+100);
|
||||
return ((uint32_t)(GaussianQuad_N2(0., wmax)*(1.+EPSa)*1.e6));
|
||||
double wmax = ( ( sqrt( (double)(nnounce) ) * ( 1.+EPSa ) ) / 450+100 );
|
||||
return ( (uint32_t)( GaussianQuad_N2( 0., wmax ) * ( 1.+EPSa ) * 1.e6 ) );
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
@@ -323,7 +312,7 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
|
||||
mpz_clears(magipi, magisw, product, bns0, bns1, NULL);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return rc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_m7m_algo( algo_gate_t *gate )
|
||||
@@ -334,9 +323,9 @@ bool register_m7m_algo( algo_gate_t *gate )
|
||||
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0x1ffff;
|
||||
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -49,7 +49,7 @@ void anime_4way_hash( void *state, const void *input )
|
||||
__m256i* vhB = (__m256i*)vhashB;
|
||||
__m256i vh_mask;
|
||||
const uint32_t mask = 8;
|
||||
const __m256i bit3_mask = _mm256_set1_epi64x( 8 );
|
||||
const __m256i bit3_mask = m256_const1_64( 8 );
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
anime_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &anime_4way_ctx, sizeof(anime_4way_ctx) );
|
||||
|
@@ -21,7 +21,7 @@
|
||||
#include "algo/shabal/shabal-hash-4way.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/haval/haval-hash-4way.h"
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
|
||||
union _hmq1725_4way_context_overlay
|
||||
{
|
||||
@@ -57,7 +57,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
uint32_t vhashB[32<<2] __attribute__ ((aligned (64)));
|
||||
hmq1725_4way_context_overlay ctx __attribute__ ((aligned (64)));
|
||||
__m256i vh_mask;
|
||||
const __m256i vmask = _mm256_set1_epi64x( 24 );
|
||||
const __m256i vmask = m256_const1_64( 24 );
|
||||
const uint32_t mask = 24;
|
||||
__m256i* vh = (__m256i*)vhash;
|
||||
__m256i* vhA = (__m256i*)vhashA;
|
||||
|
@@ -10,8 +10,8 @@ bool register_hmq1725_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_hmq1725;
|
||||
gate->hash = (void*)&hmq1725hash;
|
||||
#endif
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -409,14 +409,3 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
bool register_hmq1725_algo( algo_gate_t* gate )
|
||||
{
|
||||
init_hmq1725_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->scanhash = (void*)&scanhash_hmq1725;
|
||||
gate->hash = (void*)&hmq1725hash;
|
||||
return true;
|
||||
};
|
||||
*/
|
||||
|
@@ -49,7 +49,7 @@ void quark_4way_hash( void *state, const void *input )
|
||||
__m256i* vhB = (__m256i*)vhashB;
|
||||
__m256i vh_mask;
|
||||
quark_4way_ctx_holder ctx;
|
||||
const __m256i bit3_mask = _mm256_set1_epi64x( 8 );
|
||||
const __m256i bit3_mask = m256_const1_64( 8 );
|
||||
const uint32_t mask = 8;
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
|
||||
|
@@ -3,7 +3,7 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
#include "ripemd-hash-4way.h"
|
||||
|
||||
#define LBRY_INPUT_SIZE 112
|
||||
|
@@ -41,6 +41,7 @@ void lbry_le_build_stratum_request( char *req, struct work *work,
|
||||
free(xnonce2str);
|
||||
}
|
||||
|
||||
/*
|
||||
void lbry_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits )
|
||||
@@ -63,6 +64,7 @@ void lbry_build_block_header( struct work* g_work, uint32_t version,
|
||||
g_work->data[ LBRY_NBITS_INDEX ] = nbits;
|
||||
g_work->data[28] = 0x80000000;
|
||||
}
|
||||
*/
|
||||
|
||||
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
@@ -92,11 +94,6 @@ void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
g_work->data[28] = 0x80000000;
|
||||
}
|
||||
|
||||
void lbry_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
int64_t lbry_get_max64() { return 0x1ffffLL; }
|
||||
|
||||
int lbry_get_work_data_size() { return LBRY_WORK_DATA_SIZE; }
|
||||
@@ -119,11 +116,11 @@ bool register_lbry_algo( algo_gate_t* gate )
|
||||
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
|
||||
// gate->build_block_header = (void*)&build_block_header;
|
||||
gate->build_extraheader = (void*)&lbry_build_extraheader;
|
||||
gate->set_target = (void*)&lbry_set_target;
|
||||
gate->ntime_index = LBRY_NTIME_INDEX;
|
||||
gate->nbits_index = LBRY_NBITS_INDEX;
|
||||
gate->nonce_index = LBRY_NONCE_INDEX;
|
||||
gate->get_work_data_size = (void*)&lbry_get_work_data_size;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -5,23 +5,26 @@
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
static const uint32_t IV[5] =
|
||||
{ 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 };
|
||||
*/
|
||||
|
||||
/*
|
||||
* Round constants for RIPEMD-160.
|
||||
*/
|
||||
#define K11 0x00000000
|
||||
#define K12 0x5A827999
|
||||
#define K13 0x6ED9EBA1
|
||||
#define K14 0x8F1BBCDC
|
||||
#define K15 0xA953FD4E
|
||||
|
||||
#define K21 0x50A28BE6
|
||||
#define K22 0x5C4DD124
|
||||
#define K23 0x6D703EF3
|
||||
#define K24 0x7A6D76E9
|
||||
#define K25 0x00000000
|
||||
#define K11 0x0000000000000000
|
||||
#define K12 0x5A8279995A827999
|
||||
#define K13 0x6ED9EBA16ED9EBA1
|
||||
#define K14 0x8F1BBCDC8F1BBCDC
|
||||
#define K15 0xA953FD4EA953FD4E
|
||||
|
||||
#define K21 0x50A28BE650A28BE6
|
||||
#define K22 0x5C4DD1245C4DD124
|
||||
#define K23 0x6D703EF36D703EF3
|
||||
#define K24 0x7A6D76E97A6D76E9
|
||||
#define K25 0x0000000000000000
|
||||
|
||||
// RIPEMD-160 4 way
|
||||
|
||||
@@ -44,7 +47,7 @@ static const uint32_t IV[5] =
|
||||
do{ \
|
||||
a = _mm_add_epi32( mm128_rol_32( _mm_add_epi32( _mm_add_epi32( \
|
||||
_mm_add_epi32( a, f( b ,c, d ) ), r ), \
|
||||
_mm_set1_epi32( k ) ), s ), e ); \
|
||||
m128_const1_64( k ) ), s ), e ); \
|
||||
c = mm128_rol_32( c, 10 );\
|
||||
} while (0)
|
||||
|
||||
@@ -248,11 +251,11 @@ static void ripemd160_4way_round( ripemd160_4way_context *sc )
|
||||
|
||||
void ripemd160_4way_init( ripemd160_4way_context *sc )
|
||||
{
|
||||
sc->val[0] = _mm_set1_epi32( IV[0] );
|
||||
sc->val[1] = _mm_set1_epi32( IV[1] );
|
||||
sc->val[2] = _mm_set1_epi32( IV[2] );
|
||||
sc->val[3] = _mm_set1_epi32( IV[3] );
|
||||
sc->val[4] = _mm_set1_epi32( IV[4] );
|
||||
sc->val[0] = m128_const1_64( 0x6745230167452301 );
|
||||
sc->val[1] = m128_const1_64( 0xEFCDAB89EFCDAB89 );
|
||||
sc->val[2] = m128_const1_64( 0x98BADCFE98BADCFE );
|
||||
sc->val[3] = m128_const1_64( 0x1032547610325476 );
|
||||
sc->val[4] = m128_const1_64( 0xC3D2E1F0C3D2E1F0 );
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
@@ -343,7 +346,7 @@ void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
|
||||
do{ \
|
||||
a = _mm256_add_epi32( mm256_rol_32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
_mm256_add_epi32( a, f( b ,c, d ) ), r ), \
|
||||
_mm256_set1_epi32( k ) ), s ), e ); \
|
||||
m256_const1_64( k ) ), s ), e ); \
|
||||
c = mm256_rol_32( c, 10 );\
|
||||
} while (0)
|
||||
|
||||
@@ -548,11 +551,11 @@ static void ripemd160_8way_round( ripemd160_8way_context *sc )
|
||||
|
||||
void ripemd160_8way_init( ripemd160_8way_context *sc )
|
||||
{
|
||||
sc->val[0] = _mm256_set1_epi32( IV[0] );
|
||||
sc->val[1] = _mm256_set1_epi32( IV[1] );
|
||||
sc->val[2] = _mm256_set1_epi32( IV[2] );
|
||||
sc->val[3] = _mm256_set1_epi32( IV[3] );
|
||||
sc->val[4] = _mm256_set1_epi32( IV[4] );
|
||||
sc->val[0] = m256_const1_64( 0x6745230167452301 );
|
||||
sc->val[1] = m256_const1_64( 0xEFCDAB89EFCDAB89 );
|
||||
sc->val[2] = m256_const1_64( 0x98BADCFE98BADCFE );
|
||||
sc->val[3] = m256_const1_64( 0x1032547610325476 );
|
||||
sc->val[4] = m256_const1_64( 0xC3D2E1F0C3D2E1F0 );
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
|
@@ -1089,13 +1089,13 @@ bool register_neoscrypt_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_neoscrypt;
|
||||
gate->hash = (void*)&neoscrypt;
|
||||
gate->get_max64 = (void*)&get_neoscrypt_max64;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->wait_for_diff = (void*)&neoscrypt_wait_for_diff;
|
||||
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
|
||||
gate->get_work_data_size = (void*)&neoscrypt_get_work_data_size;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -503,8 +503,8 @@ bool register_pluck_algo( algo_gate_t* gate )
|
||||
gate->miner_thread_init = (void*)&pluck_miner_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_pluck;
|
||||
gate->hash = (void*)&pluck_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->get_max64 = (void*)&pluck_get_max64;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -698,8 +698,8 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
||||
extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
|
||||
uint32_t midstate[8];
|
||||
uint32_t n = pdata[19] - 1;
|
||||
@@ -783,13 +783,18 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
||||
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_scrypt;
|
||||
// gate->hash = (void*)&scrypt_1024_1_1_256_24way;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->get_max64 = (void*)&scrypt_get_max64;
|
||||
opt_target_factor = 65536.0;
|
||||
|
||||
if ( !opt_scrypt_n )
|
||||
|
||||
if ( !opt_param_n )
|
||||
{
|
||||
opt_param_n = 1024;
|
||||
scratchbuf_size = 1024;
|
||||
}
|
||||
else
|
||||
scratchbuf_size = opt_scrypt_n;
|
||||
scratchbuf_size = opt_param_n;
|
||||
applog(LOG_INFO,"Scrypt paramaters: N= %d, R= 1.", opt_param_n );
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -55,6 +55,7 @@ typedef uint32_t scrypt_mix_word_t;
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
|
||||
/* cpu agnostic */
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_basic
|
||||
#define SCRYPT_MIX_FN chacha_core_basic
|
||||
|
@@ -1,9 +1,11 @@
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX)
|
||||
|
||||
/*
|
||||
#if defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
#undef SCRYPT_ROMIX_FN
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix
|
||||
#endif
|
||||
*/
|
||||
|
||||
#undef SCRYPT_HAVE_ROMIX
|
||||
#define SCRYPT_HAVE_ROMIX
|
||||
|
@@ -240,24 +240,24 @@ bool register_scryptjane_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_scryptjane;
|
||||
gate->hash = (void*)&scryptjanehash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
opt_target_factor = 65536.0;
|
||||
|
||||
// figure out if arg in N or Nfactor
|
||||
if ( !opt_scrypt_n )
|
||||
if ( !opt_param_n )
|
||||
{
|
||||
applog( LOG_ERR, "The N factor must be specified in the form algo:nf");
|
||||
return false;
|
||||
}
|
||||
else if ( opt_scrypt_n < 32 )
|
||||
else if ( opt_param_n < 32 )
|
||||
{
|
||||
// arg is Nfactor, calculate N
|
||||
sj_N = 1 << ( opt_scrypt_n + 1 );
|
||||
sj_N = 1 << ( opt_param_n + 1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// arg is N
|
||||
sj_N = opt_scrypt_n;
|
||||
sj_N = opt_param_n;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@@ -55,32 +55,13 @@ typedef struct {
|
||||
__m128i buf[64>>2];
|
||||
__m128i val[8];
|
||||
uint32_t count_high, count_low;
|
||||
bool initialized;
|
||||
} sha256_4way_context;
|
||||
|
||||
void sha256_4way_init( sha256_4way_context *sc );
|
||||
void sha256_4way( sha256_4way_context *sc, const void *data, size_t len );
|
||||
void sha256_4way_close( sha256_4way_context *sc, void *dst );
|
||||
|
||||
/*
|
||||
// SHA-256 7 way hybrid
|
||||
// Combines SSE, MMX and scalar data to do 8 + 2 + 1 parallel.
|
||||
typedef struct {
|
||||
__m128i bufx[64>>2];
|
||||
__m128i valx[8];
|
||||
__m64 bufy[64>>2];
|
||||
__m64 valy[8];
|
||||
uint32_t bufz[64>>2];
|
||||
uint32_t valz[8];
|
||||
uint32_t count_high, count_low;
|
||||
} sha256_7way_context;
|
||||
|
||||
void sha256_7way_init( sha256_7way_context *ctx );
|
||||
void sha256_7way( sha256_7way_context *ctx, const void *datax,
|
||||
void *datay, void *dataz, size_t len );
|
||||
void sha256_7way_close( sha256_7way_context *ctx, void *dstx, void *dstyx,
|
||||
void *dstz );
|
||||
*/
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
// SHA-256 8 way
|
||||
@@ -89,6 +70,7 @@ typedef struct {
|
||||
__m256i buf[64>>2];
|
||||
__m256i val[8];
|
||||
uint32_t count_high, count_low;
|
||||
bool initialized;
|
||||
} sha256_8way_context;
|
||||
|
||||
void sha256_8way_init( sha256_8way_context *sc );
|
||||
@@ -103,6 +85,7 @@ typedef struct {
|
||||
__m256i buf[128>>3];
|
||||
__m256i val[8];
|
||||
uint64_t count;
|
||||
bool initialized;
|
||||
} sha512_4way_context;
|
||||
|
||||
void sha512_4way_init( sha512_4way_context *sc);
|
@@ -12,6 +12,7 @@
|
||||
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
|
||||
#define EXTERN_SHA256
|
||||
@@ -197,7 +198,17 @@ static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
|
||||
|
||||
extern void sha256d(unsigned char *hash, const unsigned char *data, int len)
|
||||
{
|
||||
uint32_t S[16], T[16];
|
||||
#if defined(__SHA__)
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, data, len );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
#else
|
||||
|
||||
uint32_t S[16], T[16];
|
||||
int i, r;
|
||||
|
||||
sha256_init(S);
|
||||
@@ -218,6 +229,7 @@ extern void sha256d(unsigned char *hash, const unsigned char *data, int len)
|
||||
sha256_transform(T, S, 0);
|
||||
for (i = 0; i < 8; i++)
|
||||
be32enc((uint32_t *)hash + i, T[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void sha256d_preextend(uint32_t *W)
|
||||
@@ -635,9 +647,46 @@ int scanhash_sha256d( struct work *work,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scanhash_SHA256d( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(64) data[20];
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
memcpy( data, pdata, 80 );
|
||||
|
||||
do {
|
||||
data[19] = ++n;
|
||||
sha256d( (unsigned char*)hash, (const unsigned char*)data, 80 );
|
||||
if ( unlikely( swab32( hash[7] ) <= Htarg ) )
|
||||
{
|
||||
pdata[19] = n;
|
||||
sha256d_80_swap(hash, pdata);
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
} while ( likely( n < max_nonce && !work_restart[thr_id].restart ) );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
bool register_sha256d_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_sha256d;
|
||||
#if defined(__SHA__)
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_SHA256d;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256d;
|
||||
#endif
|
||||
gate->hash = (void*)&sha256d;
|
||||
return true;
|
||||
};
|
||||
|
@@ -34,19 +34,18 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sha2-hash-4way.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include "sha-hash-4way.h"
|
||||
|
||||
// SHA-256 32 bit
|
||||
|
||||
/*
|
||||
static const sph_u32 H256[8] = {
|
||||
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85),
|
||||
SPH_C32(0x3C6EF372), SPH_C32(0xA54FF53A),
|
||||
SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
|
||||
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
|
||||
};
|
||||
*/
|
||||
|
||||
static const sph_u32 K256[64] = {
|
||||
SPH_C32(0x428A2F98), SPH_C32(0x71374491),
|
||||
@@ -113,16 +112,17 @@ static const sph_u32 K256[64] = {
|
||||
|
||||
#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
register __m128i T1, T2; \
|
||||
__m128i T1, T2; \
|
||||
__m128i K = _mm_set1_epi32( K256[( (j)+(i) )] ); \
|
||||
T1 = _mm_add_epi32( H, mm128_add4_32( BSG2_1(E), CHs(E, F, G), \
|
||||
_mm_set1_epi32( K256[( (j)+(i) )] ), W[i] ) ); \
|
||||
K, W[i] ) ); \
|
||||
T2 = _mm_add_epi32( BSG2_0(A), MAJs(A, B, C) ); \
|
||||
D = _mm_add_epi32( D, T1 ); \
|
||||
H = _mm_add_epi32( T1, T2 ); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
sha256_4way_round( __m128i *in, __m128i r[8] )
|
||||
sha256_4way_round( sha256_4way_context *ctx, __m128i *in, __m128i r[8] )
|
||||
{
|
||||
register __m128i A, B, C, D, E, F, G, H;
|
||||
__m128i W[16];
|
||||
@@ -130,14 +130,28 @@ sha256_4way_round( __m128i *in, __m128i r[8] )
|
||||
mm128_block_bswap_32( W, in );
|
||||
mm128_block_bswap_32( W+8, in+8 );
|
||||
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
C = r[2];
|
||||
D = r[3];
|
||||
E = r[4];
|
||||
F = r[5];
|
||||
G = r[6];
|
||||
H = r[7];
|
||||
if ( ctx->initialized )
|
||||
{
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
C = r[2];
|
||||
D = r[3];
|
||||
E = r[4];
|
||||
F = r[5];
|
||||
G = r[6];
|
||||
H = r[7];
|
||||
}
|
||||
else
|
||||
{
|
||||
A = m128_const1_64( 0x6A09E6676A09E667 );
|
||||
B = m128_const1_64( 0xBB67AE85BB67AE85 );
|
||||
C = m128_const1_64( 0x3C6EF3723C6EF372 );
|
||||
D = m128_const1_64( 0xA54FF53AA54FF53A );
|
||||
E = m128_const1_64( 0x510E527F510E527F );
|
||||
F = m128_const1_64( 0x9B05688C9B05688C );
|
||||
G = m128_const1_64( 0x1F83D9AB1F83D9AB );
|
||||
H = m128_const1_64( 0x5BE0CD195BE0CD19 );
|
||||
}
|
||||
|
||||
SHA2s_4WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
|
||||
SHA2s_4WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
|
||||
@@ -193,19 +207,36 @@ sha256_4way_round( __m128i *in, __m128i r[8] )
|
||||
SHA2s_4WAY_STEP( B, C, D, E, F, G, H, A, 15, j );
|
||||
}
|
||||
|
||||
r[0] = _mm_add_epi32( r[0], A );
|
||||
r[1] = _mm_add_epi32( r[1], B );
|
||||
r[2] = _mm_add_epi32( r[2], C );
|
||||
r[3] = _mm_add_epi32( r[3], D );
|
||||
r[4] = _mm_add_epi32( r[4], E );
|
||||
r[5] = _mm_add_epi32( r[5], F );
|
||||
r[6] = _mm_add_epi32( r[6], G );
|
||||
r[7] = _mm_add_epi32( r[7], H );
|
||||
if ( ctx->initialized )
|
||||
{
|
||||
r[0] = _mm_add_epi32( r[0], A );
|
||||
r[1] = _mm_add_epi32( r[1], B );
|
||||
r[2] = _mm_add_epi32( r[2], C );
|
||||
r[3] = _mm_add_epi32( r[3], D );
|
||||
r[4] = _mm_add_epi32( r[4], E );
|
||||
r[5] = _mm_add_epi32( r[5], F );
|
||||
r[6] = _mm_add_epi32( r[6], G );
|
||||
r[7] = _mm_add_epi32( r[7], H );
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx->initialized = true;
|
||||
r[0] = _mm_add_epi32( A, m128_const1_64( 0x6A09E6676A09E667 ) );
|
||||
r[1] = _mm_add_epi32( B, m128_const1_64( 0xBB67AE85BB67AE85 ) );
|
||||
r[2] = _mm_add_epi32( C, m128_const1_64( 0x3C6EF3723C6EF372 ) );
|
||||
r[3] = _mm_add_epi32( D, m128_const1_64( 0xA54FF53AA54FF53A ) );
|
||||
r[4] = _mm_add_epi32( E, m128_const1_64( 0x510E527F510E527F ) );
|
||||
r[5] = _mm_add_epi32( F, m128_const1_64( 0x9B05688C9B05688C ) );
|
||||
r[6] = _mm_add_epi32( G, m128_const1_64( 0x1F83D9AB1F83D9AB ) );
|
||||
r[7] = _mm_add_epi32( H, m128_const1_64( 0x5BE0CD195BE0CD19 ) );
|
||||
}
|
||||
}
|
||||
|
||||
void sha256_4way_init( sha256_4way_context *sc )
|
||||
{
|
||||
sc->initialized = false;
|
||||
sc->count_high = sc->count_low = 0;
|
||||
/*
|
||||
sc->val[0] = _mm_set1_epi32( H256[0] );
|
||||
sc->val[1] = _mm_set1_epi32( H256[1] );
|
||||
sc->val[2] = _mm_set1_epi32( H256[2] );
|
||||
@@ -214,6 +245,7 @@ void sha256_4way_init( sha256_4way_context *sc )
|
||||
sc->val[5] = _mm_set1_epi32( H256[5] );
|
||||
sc->val[6] = _mm_set1_epi32( H256[6] );
|
||||
sc->val[7] = _mm_set1_epi32( H256[7] );
|
||||
*/
|
||||
}
|
||||
|
||||
void sha256_4way( sha256_4way_context *sc, const void *data, size_t len )
|
||||
@@ -237,7 +269,7 @@ void sha256_4way( sha256_4way_context *sc, const void *data, size_t len )
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
sha256_4way_round( sc->buf, sc->val );
|
||||
sha256_4way_round( sc, sc->buf, sc->val );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
@@ -256,13 +288,13 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
const int pad = buf_size - 8;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (buf_size - 1U);
|
||||
sc->buf[ ptr>>2 ] = _mm_set1_epi32( 0x80 );
|
||||
sc->buf[ ptr>>2 ] = m128_const1_64( 0x0000008000000080 );
|
||||
ptr += 4;
|
||||
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_128( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||
sha256_4way_round( sc->buf, sc->val );
|
||||
sha256_4way_round( sc, sc->buf, sc->val );
|
||||
memset_zero_128( sc->buf, pad >> 2 );
|
||||
}
|
||||
else
|
||||
@@ -276,7 +308,7 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
mm128_bswap_32( _mm_set1_epi32( high ) );
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm128_bswap_32( _mm_set1_epi32( low ) );
|
||||
sha256_4way_round( sc->buf, sc->val );
|
||||
sha256_4way_round( sc, sc->buf, sc->val );
|
||||
|
||||
mm128_block_bswap_32( dst, sc->val );
|
||||
}
|
||||
@@ -313,16 +345,17 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
|
||||
#define SHA2s_8WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
register __m256i T1, T2; \
|
||||
T1 = _mm256_add_epi32( H, mm256_add4_32( BSG2_1x(E), CHx(E, F, G), \
|
||||
_mm256_set1_epi32( K256[( (j)+(i) )] ), W[i] ) ); \
|
||||
__m256i T1, T2; \
|
||||
__m256i K = _mm256_set1_epi32( K256[( (j)+(i) )] ); \
|
||||
T1 = _mm256_add_epi32( H, mm256_add4_32( BSG2_1x(E), CHx(E, F, G), \
|
||||
K, W[i] ) ); \
|
||||
T2 = _mm256_add_epi32( BSG2_0x(A), MAJx(A, B, C) ); \
|
||||
D = _mm256_add_epi32( D, T1 ); \
|
||||
H = _mm256_add_epi32( T1, T2 ); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
sha256_8way_round( __m256i *in, __m256i r[8] )
|
||||
sha256_8way_round( sha256_8way_context *ctx, __m256i *in, __m256i r[8] )
|
||||
{
|
||||
register __m256i A, B, C, D, E, F, G, H;
|
||||
__m256i W[16];
|
||||
@@ -330,14 +363,28 @@ sha256_8way_round( __m256i *in, __m256i r[8] )
|
||||
mm256_block_bswap_32( W , in );
|
||||
mm256_block_bswap_32( W+8, in+8 );
|
||||
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
C = r[2];
|
||||
D = r[3];
|
||||
E = r[4];
|
||||
F = r[5];
|
||||
G = r[6];
|
||||
H = r[7];
|
||||
if ( ctx->initialized )
|
||||
{
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
C = r[2];
|
||||
D = r[3];
|
||||
E = r[4];
|
||||
F = r[5];
|
||||
G = r[6];
|
||||
H = r[7];
|
||||
}
|
||||
else
|
||||
{
|
||||
A = m256_const1_64( 0x6A09E6676A09E667 );
|
||||
B = m256_const1_64( 0xBB67AE85BB67AE85 );
|
||||
C = m256_const1_64( 0x3C6EF3723C6EF372 );
|
||||
D = m256_const1_64( 0xA54FF53AA54FF53A );
|
||||
E = m256_const1_64( 0x510E527F510E527F );
|
||||
F = m256_const1_64( 0x9B05688C9B05688C );
|
||||
G = m256_const1_64( 0x1F83D9AB1F83D9AB );
|
||||
H = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
||||
}
|
||||
|
||||
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
|
||||
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
|
||||
@@ -393,20 +440,36 @@ sha256_8way_round( __m256i *in, __m256i r[8] )
|
||||
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 15, j );
|
||||
}
|
||||
|
||||
r[0] = _mm256_add_epi32( r[0], A );
|
||||
r[1] = _mm256_add_epi32( r[1], B );
|
||||
r[2] = _mm256_add_epi32( r[2], C );
|
||||
r[3] = _mm256_add_epi32( r[3], D );
|
||||
r[4] = _mm256_add_epi32( r[4], E );
|
||||
r[5] = _mm256_add_epi32( r[5], F );
|
||||
r[6] = _mm256_add_epi32( r[6], G );
|
||||
r[7] = _mm256_add_epi32( r[7], H );
|
||||
if ( ctx->initialized )
|
||||
{
|
||||
r[0] = _mm256_add_epi32( r[0], A );
|
||||
r[1] = _mm256_add_epi32( r[1], B );
|
||||
r[2] = _mm256_add_epi32( r[2], C );
|
||||
r[3] = _mm256_add_epi32( r[3], D );
|
||||
r[4] = _mm256_add_epi32( r[4], E );
|
||||
r[5] = _mm256_add_epi32( r[5], F );
|
||||
r[6] = _mm256_add_epi32( r[6], G );
|
||||
r[7] = _mm256_add_epi32( r[7], H );
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx->initialized = true;
|
||||
r[0] = _mm256_add_epi32( A, m256_const1_64( 0x6A09E6676A09E667 ) );
|
||||
r[1] = _mm256_add_epi32( B, m256_const1_64( 0xBB67AE85BB67AE85 ) );
|
||||
r[2] = _mm256_add_epi32( C, m256_const1_64( 0x3C6EF3723C6EF372 ) );
|
||||
r[3] = _mm256_add_epi32( D, m256_const1_64( 0xA54FF53AA54FF53A ) );
|
||||
r[4] = _mm256_add_epi32( E, m256_const1_64( 0x510E527F510E527F ) );
|
||||
r[5] = _mm256_add_epi32( F, m256_const1_64( 0x9B05688C9B05688C ) );
|
||||
r[6] = _mm256_add_epi32( G, m256_const1_64( 0x1F83D9AB1F83D9AB ) );
|
||||
r[7] = _mm256_add_epi32( H, m256_const1_64( 0x5BE0CD195BE0CD19 ) );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void sha256_8way_init( sha256_8way_context *sc )
|
||||
{
|
||||
sc->initialized = false;
|
||||
sc->count_high = sc->count_low = 0;
|
||||
/*
|
||||
sc->val[0] = _mm256_set1_epi32( H256[0] );
|
||||
sc->val[1] = _mm256_set1_epi32( H256[1] );
|
||||
sc->val[2] = _mm256_set1_epi32( H256[2] );
|
||||
@@ -415,6 +478,7 @@ void sha256_8way_init( sha256_8way_context *sc )
|
||||
sc->val[5] = _mm256_set1_epi32( H256[5] );
|
||||
sc->val[6] = _mm256_set1_epi32( H256[6] );
|
||||
sc->val[7] = _mm256_set1_epi32( H256[7] );
|
||||
*/
|
||||
}
|
||||
|
||||
void sha256_8way( sha256_8way_context *sc, const void *data, size_t len )
|
||||
@@ -438,7 +502,7 @@ void sha256_8way( sha256_8way_context *sc, const void *data, size_t len )
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
sha256_8way_round( sc->buf, sc->val );
|
||||
sha256_8way_round( sc, sc->buf, sc->val );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
@@ -457,13 +521,13 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
||||
const int pad = buf_size - 8;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (buf_size - 1U);
|
||||
sc->buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
|
||||
sc->buf[ ptr>>2 ] = m256_const1_64( 0x0000008000000080 );
|
||||
ptr += 4;
|
||||
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_256( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||
sha256_8way_round( sc->buf, sc->val );
|
||||
sha256_8way_round( sc, sc->buf, sc->val );
|
||||
memset_zero_256( sc->buf, pad >> 2 );
|
||||
}
|
||||
else
|
||||
@@ -478,207 +542,10 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm256_bswap_32( _mm256_set1_epi32( low ) );
|
||||
|
||||
sha256_8way_round( sc->buf, sc->val );
|
||||
sha256_8way_round( sc, sc->buf, sc->val );
|
||||
|
||||
mm256_block_bswap_32( dst, sc->val );
|
||||
}
|
||||
|
||||
|
||||
// SHA-512 4 way 64 bit
|
||||
|
||||
static const sph_u64 H512[8] = {
|
||||
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
|
||||
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
|
||||
SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
|
||||
SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
|
||||
};
|
||||
|
||||
static const sph_u64 K512[80] = {
|
||||
SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD),
|
||||
SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC),
|
||||
SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019),
|
||||
SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118),
|
||||
SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE),
|
||||
SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2),
|
||||
SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1),
|
||||
SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694),
|
||||
SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3),
|
||||
SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65),
|
||||
SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483),
|
||||
SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5),
|
||||
SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210),
|
||||
SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4),
|
||||
SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725),
|
||||
SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70),
|
||||
SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926),
|
||||
SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF),
|
||||
SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8),
|
||||
SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B),
|
||||
SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001),
|
||||
SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30),
|
||||
SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910),
|
||||
SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8),
|
||||
SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53),
|
||||
SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8),
|
||||
SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB),
|
||||
SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3),
|
||||
SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60),
|
||||
SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC),
|
||||
SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9),
|
||||
SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B),
|
||||
SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207),
|
||||
SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178),
|
||||
SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6),
|
||||
SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B),
|
||||
SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493),
|
||||
SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C),
|
||||
SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A),
|
||||
SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817)
|
||||
};
|
||||
|
||||
#define CH(X, Y, Z) \
|
||||
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
||||
|
||||
#define MAJ(X, Y, Z) \
|
||||
_mm256_or_si256( _mm256_and_si256( X, Y ), \
|
||||
_mm256_and_si256( _mm256_or_si256( X, Y ), Z ) )
|
||||
|
||||
#define BSG5_0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 28), mm256_ror_64(x, 34) ), mm256_ror_64(x, 39) )
|
||||
|
||||
#define BSG5_1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 14), mm256_ror_64(x, 18) ), mm256_ror_64(x, 41) )
|
||||
|
||||
#define SSG5_0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 1), mm256_ror_64(x, 8) ), _mm256_srli_epi64(x, 7) )
|
||||
|
||||
#define SSG5_1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 19), mm256_ror_64(x, 61) ), _mm256_srli_epi64(x, 6) )
|
||||
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
register __m256i T1, T2; \
|
||||
T1 = _mm256_add_epi64( H, mm256_add4_64( BSG5_1(E), CH(E, F, G), \
|
||||
_mm256_set1_epi64x( K512[i] ), W[i] ) ); \
|
||||
T2 = _mm256_add_epi64( BSG5_0(A), MAJ(A, B, C) ); \
|
||||
D = _mm256_add_epi64( D, T1 ); \
|
||||
H = _mm256_add_epi64( T1, T2 ); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
sha512_4way_round( __m256i *in, __m256i r[8] )
|
||||
{
|
||||
int i;
|
||||
register __m256i A, B, C, D, E, F, G, H;
|
||||
__m256i W[80];
|
||||
|
||||
mm256_block_bswap_64( W , in );
|
||||
mm256_block_bswap_64( W+8, in+8 );
|
||||
|
||||
for ( i = 16; i < 80; i++ )
|
||||
W[i] = mm256_add4_64( SSG5_1( W[ i- 2 ] ), W[ i- 7 ],
|
||||
SSG5_0( W[ i-15 ] ), W[ i-16 ] );
|
||||
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
C = r[2];
|
||||
D = r[3];
|
||||
E = r[4];
|
||||
F = r[5];
|
||||
G = r[6];
|
||||
H = r[7];
|
||||
|
||||
for ( i = 0; i < 80; i += 8 )
|
||||
{
|
||||
SHA3_4WAY_STEP( A, B, C, D, E, F, G, H, i + 0 );
|
||||
SHA3_4WAY_STEP( H, A, B, C, D, E, F, G, i + 1 );
|
||||
SHA3_4WAY_STEP( G, H, A, B, C, D, E, F, i + 2 );
|
||||
SHA3_4WAY_STEP( F, G, H, A, B, C, D, E, i + 3 );
|
||||
SHA3_4WAY_STEP( E, F, G, H, A, B, C, D, i + 4 );
|
||||
SHA3_4WAY_STEP( D, E, F, G, H, A, B, C, i + 5 );
|
||||
SHA3_4WAY_STEP( C, D, E, F, G, H, A, B, i + 6 );
|
||||
SHA3_4WAY_STEP( B, C, D, E, F, G, H, A, i + 7 );
|
||||
}
|
||||
|
||||
r[0] = _mm256_add_epi64( r[0], A );
|
||||
r[1] = _mm256_add_epi64( r[1], B );
|
||||
r[2] = _mm256_add_epi64( r[2], C );
|
||||
r[3] = _mm256_add_epi64( r[3], D );
|
||||
r[4] = _mm256_add_epi64( r[4], E );
|
||||
r[5] = _mm256_add_epi64( r[5], F );
|
||||
r[6] = _mm256_add_epi64( r[6], G );
|
||||
r[7] = _mm256_add_epi64( r[7], H );
|
||||
}
|
||||
|
||||
void sha512_4way_init( sha512_4way_context *sc )
|
||||
{
|
||||
sc->count = 0;
|
||||
sc->val[0] = _mm256_set1_epi64x( H512[0] );
|
||||
sc->val[1] = _mm256_set1_epi64x( H512[1] );
|
||||
sc->val[2] = _mm256_set1_epi64x( H512[2] );
|
||||
sc->val[3] = _mm256_set1_epi64x( H512[3] );
|
||||
sc->val[4] = _mm256_set1_epi64x( H512[4] );
|
||||
sc->val[5] = _mm256_set1_epi64x( H512[5] );
|
||||
sc->val[6] = _mm256_set1_epi64x( H512[6] );
|
||||
sc->val[7] = _mm256_set1_epi64x( H512[7] );
|
||||
}
|
||||
|
||||
void sha512_4way( sha512_4way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
const int buf_size = 128;
|
||||
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_256( sc->buf + (ptr>>3), vdata, clen>>3 );
|
||||
vdata = vdata + (clen>>3);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
sha512_4way_round( sc->buf, sc->val );
|
||||
ptr = 0;
|
||||
}
|
||||
sc->count += clen;
|
||||
}
|
||||
}
|
||||
|
||||
void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr;
|
||||
const int buf_size = 128;
|
||||
const int pad = buf_size - 16;
|
||||
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
sc->buf[ ptr>>3 ] = m256_const1_64( 0x80 );
|
||||
ptr += 8;
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_256( sc->buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
sha512_4way_round( sc->buf, sc->val );
|
||||
memset_zero_256( sc->buf, pad >> 3 );
|
||||
}
|
||||
else
|
||||
memset_zero_256( sc->buf + (ptr>>3), (pad - ptr) >> 3 );
|
||||
|
||||
sc->buf[ pad >> 3 ] =
|
||||
mm256_bswap_64( _mm256_set1_epi64x( sc->count >> 61 ) );
|
||||
sc->buf[ ( pad+8 ) >> 3 ] =
|
||||
mm256_bswap_64( _mm256_set1_epi64x( sc->count << 3 ) );
|
||||
sha512_4way_round( sc->buf, sc->val );
|
||||
|
||||
mm256_block_bswap_64( dst, sc->val );
|
||||
}
|
||||
|
||||
#endif // __AVX2__
|
||||
#endif // __SSE2__
|
@@ -3,7 +3,7 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "sha2-hash-4way.h"
|
||||
#include "sha-hash-4way.h"
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
|
@@ -3,7 +3,7 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "sha2-hash-4way.h"
|
||||
#include "sha-hash-4way.h"
|
||||
|
||||
#if defined(SHA256T_11WAY)
|
||||
|
||||
@@ -158,7 +158,7 @@ void sha256t_8way_hash( void* output, const void* input )
|
||||
sha256_8way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
|
||||
int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
@@ -166,12 +166,12 @@ int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
@@ -194,7 +194,7 @@ int scanhash_sha256t_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
const uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32(
|
||||
@@ -244,7 +244,7 @@ void sha256t_4way_hash( void* output, const void* input )
|
||||
sha256_4way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce,
|
||||
int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
@@ -252,12 +252,12 @@ int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
@@ -278,7 +278,7 @@ int scanhash_sha256t_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
const uint32_t mask = masks[m];
|
||||
do {
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
||||
pdata[19] = n;
|
||||
|
320
algo/sha/sha512-hash-4way.c
Normal file
320
algo/sha/sha512-hash-4way.c
Normal file
@@ -0,0 +1,320 @@
|
||||
/* $Id: sha2big.c 216 2010-06-08 09:46:57Z tp $ */
|
||||
/*
|
||||
* SHA-384 / SHA-512 implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include "sha-hash-4way.h"
|
||||
|
||||
// SHA-512 4 way 64 bit
|
||||
|
||||
/*
|
||||
static const sph_u64 H512[8] = {
|
||||
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
|
||||
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
|
||||
SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
|
||||
SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
|
||||
};
|
||||
*/
|
||||
|
||||
static const sph_u64 K512[80] = {
|
||||
SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD),
|
||||
SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC),
|
||||
SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019),
|
||||
SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118),
|
||||
SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE),
|
||||
SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2),
|
||||
SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1),
|
||||
SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694),
|
||||
SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3),
|
||||
SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65),
|
||||
SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483),
|
||||
SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5),
|
||||
SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210),
|
||||
SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4),
|
||||
SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725),
|
||||
SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70),
|
||||
SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926),
|
||||
SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF),
|
||||
SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8),
|
||||
SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B),
|
||||
SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001),
|
||||
SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30),
|
||||
SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910),
|
||||
SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8),
|
||||
SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53),
|
||||
SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8),
|
||||
SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB),
|
||||
SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3),
|
||||
SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60),
|
||||
SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC),
|
||||
SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9),
|
||||
SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B),
|
||||
SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207),
|
||||
SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178),
|
||||
SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6),
|
||||
SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B),
|
||||
SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493),
|
||||
SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C),
|
||||
SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A),
|
||||
SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817)
|
||||
};
|
||||
|
||||
#define CH(X, Y, Z) \
|
||||
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
||||
|
||||
#define MAJ(X, Y, Z) \
|
||||
_mm256_or_si256( _mm256_and_si256( X, Y ), \
|
||||
_mm256_and_si256( _mm256_or_si256( X, Y ), Z ) )
|
||||
|
||||
#define BSG5_0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 28), mm256_ror_64(x, 34) ), mm256_ror_64(x, 39) )
|
||||
|
||||
#define BSG5_1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 14), mm256_ror_64(x, 18) ), mm256_ror_64(x, 41) )
|
||||
|
||||
#define SSG5_0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 1), mm256_ror_64(x, 8) ), _mm256_srli_epi64(x, 7) )
|
||||
|
||||
#define SSG5_1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_ror_64(x, 19), mm256_ror_64(x, 61) ), _mm256_srli_epi64(x, 6) )
|
||||
|
||||
// Interleave SSG0 & SSG1 for better throughput.
|
||||
// return ssg0(w0) + ssg1(w1)
|
||||
static inline __m256i ssg512_add( __m256i w0, __m256i w1 )
|
||||
{
|
||||
__m256i w0a, w1a, w0b, w1b;
|
||||
w0a = mm256_ror_64( w0, 1 );
|
||||
w1a = mm256_ror_64( w1,19 );
|
||||
w0b = mm256_ror_64( w0, 8 );
|
||||
w1b = mm256_ror_64( w1,61 );
|
||||
w0a = _mm256_xor_si256( w0a, w0b );
|
||||
w1a = _mm256_xor_si256( w1a, w1b );
|
||||
w0b = _mm256_srli_epi64( w0, 7 );
|
||||
w1b = _mm256_srli_epi64( w1, 6 );
|
||||
w0a = _mm256_xor_si256( w0a, w0b );
|
||||
w1a = _mm256_xor_si256( w1a, w1b );
|
||||
return _mm256_add_epi64( w0a, w1a );
|
||||
}
|
||||
|
||||
|
||||
#define SSG512x2_0( w0, w1, i ) do \
|
||||
{ \
|
||||
__m256i X0a, X1a, X0b, X1b; \
|
||||
X0a = mm256_ror_64( W[i-15], 1 ); \
|
||||
X1a = mm256_ror_64( W[i-14], 1 ); \
|
||||
X0b = mm256_ror_64( W[i-15], 8 ); \
|
||||
X1b = mm256_ror_64( W[i-14], 8 ); \
|
||||
X0a = _mm256_xor_si256( X0a, X0b ); \
|
||||
X1a = _mm256_xor_si256( X1a, X1b ); \
|
||||
X0b = _mm256_srli_epi64( W[i-15], 7 ); \
|
||||
X1b = _mm256_srli_epi64( W[i-14], 7 ); \
|
||||
w0 = _mm256_xor_si256( X0a, X0b ); \
|
||||
w1 = _mm256_xor_si256( X1a, X1b ); \
|
||||
} while(0)
|
||||
|
||||
#define SSG512x2_1( w0, w1, i ) do \
|
||||
{ \
|
||||
__m256i X0a, X1a, X0b, X1b; \
|
||||
X0a = mm256_ror_64( W[i-2],19 ); \
|
||||
X1a = mm256_ror_64( W[i-1],19 ); \
|
||||
X0b = mm256_ror_64( W[i-2],61 ); \
|
||||
X1b = mm256_ror_64( W[i-1],61 ); \
|
||||
X0a = _mm256_xor_si256( X0a, X0b ); \
|
||||
X1a = _mm256_xor_si256( X1a, X1b ); \
|
||||
X0b = _mm256_srli_epi64( W[i-2], 6 ); \
|
||||
X1b = _mm256_srli_epi64( W[i-1], 6 ); \
|
||||
w0 = _mm256_xor_si256( X0a, X0b ); \
|
||||
w1 = _mm256_xor_si256( X1a, X1b ); \
|
||||
} while(0)
|
||||
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
__m256i T1, T2; \
|
||||
__m256i K = _mm256_set1_epi64x( K512[ i ] ); \
|
||||
T1 = _mm256_add_epi64( H, mm256_add4_64( BSG5_1(E), CH(E, F, G), \
|
||||
K, W[i] ) ); \
|
||||
T2 = _mm256_add_epi64( BSG5_0(A), MAJ(A, B, C) ); \
|
||||
D = _mm256_add_epi64( D, T1 ); \
|
||||
H = _mm256_add_epi64( T1, T2 ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
static void
|
||||
sha512_4way_round( sha512_4way_context *ctx, __m256i *in, __m256i r[8] )
|
||||
{
|
||||
int i;
|
||||
register __m256i A, B, C, D, E, F, G, H;
|
||||
__m256i W[80];
|
||||
|
||||
mm256_block_bswap_64( W , in );
|
||||
mm256_block_bswap_64( W+8, in+8 );
|
||||
|
||||
for ( i = 16; i < 80; i++ )
|
||||
W[i] = _mm256_add_epi64( ssg512_add( W[i-15], W[i-2] ),
|
||||
_mm256_add_epi64( W[ i- 7 ], W[ i-16 ] ) );
|
||||
|
||||
if ( ctx->initialized )
|
||||
{
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
C = r[2];
|
||||
D = r[3];
|
||||
E = r[4];
|
||||
F = r[5];
|
||||
G = r[6];
|
||||
H = r[7];
|
||||
}
|
||||
else
|
||||
{
|
||||
A = m256_const1_64( 0x6A09E667F3BCC908 );
|
||||
B = m256_const1_64( 0xBB67AE8584CAA73B );
|
||||
C = m256_const1_64( 0x3C6EF372FE94F82B );
|
||||
D = m256_const1_64( 0xA54FF53A5F1D36F1 );
|
||||
E = m256_const1_64( 0x510E527FADE682D1 );
|
||||
F = m256_const1_64( 0x9B05688C2B3E6C1F );
|
||||
G = m256_const1_64( 0x1F83D9ABFB41BD6B );
|
||||
H = m256_const1_64( 0x5BE0CD19137E2179 );
|
||||
}
|
||||
|
||||
for ( i = 0; i < 80; i += 8 )
|
||||
{
|
||||
SHA3_4WAY_STEP( A, B, C, D, E, F, G, H, i + 0 );
|
||||
SHA3_4WAY_STEP( H, A, B, C, D, E, F, G, i + 1 );
|
||||
SHA3_4WAY_STEP( G, H, A, B, C, D, E, F, i + 2 );
|
||||
SHA3_4WAY_STEP( F, G, H, A, B, C, D, E, i + 3 );
|
||||
SHA3_4WAY_STEP( E, F, G, H, A, B, C, D, i + 4 );
|
||||
SHA3_4WAY_STEP( D, E, F, G, H, A, B, C, i + 5 );
|
||||
SHA3_4WAY_STEP( C, D, E, F, G, H, A, B, i + 6 );
|
||||
SHA3_4WAY_STEP( B, C, D, E, F, G, H, A, i + 7 );
|
||||
}
|
||||
|
||||
if ( ctx->initialized )
|
||||
{
|
||||
r[0] = _mm256_add_epi64( r[0], A );
|
||||
r[1] = _mm256_add_epi64( r[1], B );
|
||||
r[2] = _mm256_add_epi64( r[2], C );
|
||||
r[3] = _mm256_add_epi64( r[3], D );
|
||||
r[4] = _mm256_add_epi64( r[4], E );
|
||||
r[5] = _mm256_add_epi64( r[5], F );
|
||||
r[6] = _mm256_add_epi64( r[6], G );
|
||||
r[7] = _mm256_add_epi64( r[7], H );
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx->initialized = true;
|
||||
r[0] = _mm256_add_epi64( A, m256_const1_64( 0x6A09E667F3BCC908 ) );
|
||||
r[1] = _mm256_add_epi64( B, m256_const1_64( 0xBB67AE8584CAA73B ) );
|
||||
r[2] = _mm256_add_epi64( C, m256_const1_64( 0x3C6EF372FE94F82B ) );
|
||||
r[3] = _mm256_add_epi64( D, m256_const1_64( 0xA54FF53A5F1D36F1 ) );
|
||||
r[4] = _mm256_add_epi64( E, m256_const1_64( 0x510E527FADE682D1 ) );
|
||||
r[5] = _mm256_add_epi64( F, m256_const1_64( 0x9B05688C2B3E6C1F ) );
|
||||
r[6] = _mm256_add_epi64( G, m256_const1_64( 0x1F83D9ABFB41BD6B ) );
|
||||
r[7] = _mm256_add_epi64( H, m256_const1_64( 0x5BE0CD19137E2179 ) );
|
||||
}
|
||||
}
|
||||
|
||||
void sha512_4way_init( sha512_4way_context *sc )
|
||||
{
|
||||
sc->initialized = false;
|
||||
sc->count = 0;
|
||||
/*
|
||||
sc->val[0] = _mm256_set1_epi64x( H512[0] );
|
||||
sc->val[1] = _mm256_set1_epi64x( H512[1] );
|
||||
sc->val[2] = _mm256_set1_epi64x( H512[2] );
|
||||
sc->val[3] = _mm256_set1_epi64x( H512[3] );
|
||||
sc->val[4] = _mm256_set1_epi64x( H512[4] );
|
||||
sc->val[5] = _mm256_set1_epi64x( H512[5] );
|
||||
sc->val[6] = _mm256_set1_epi64x( H512[6] );
|
||||
sc->val[7] = _mm256_set1_epi64x( H512[7] );
|
||||
*/
|
||||
}
|
||||
|
||||
void sha512_4way( sha512_4way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
const int buf_size = 128;
|
||||
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_256( sc->buf + (ptr>>3), vdata, clen>>3 );
|
||||
vdata = vdata + (clen>>3);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
sha512_4way_round( sc, sc->buf, sc->val );
|
||||
ptr = 0;
|
||||
}
|
||||
sc->count += clen;
|
||||
}
|
||||
}
|
||||
|
||||
void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr;
|
||||
const int buf_size = 128;
|
||||
const int pad = buf_size - 16;
|
||||
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
sc->buf[ ptr>>3 ] = m256_const1_64( 0x80 );
|
||||
ptr += 8;
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_256( sc->buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
sha512_4way_round( sc, sc->buf, sc->val );
|
||||
memset_zero_256( sc->buf, pad >> 3 );
|
||||
}
|
||||
else
|
||||
memset_zero_256( sc->buf + (ptr>>3), (pad - ptr) >> 3 );
|
||||
|
||||
sc->buf[ pad >> 3 ] =
|
||||
mm256_bswap_64( _mm256_set1_epi64x( sc->count >> 61 ) );
|
||||
sc->buf[ ( pad+8 ) >> 3 ] =
|
||||
mm256_bswap_64( _mm256_set1_epi64x( sc->count << 3 ) );
|
||||
sha512_4way_round( sc, sc->buf, sc->val );
|
||||
|
||||
mm256_block_bswap_64( dst, sc->val );
|
||||
}
|
||||
|
||||
#endif // __AVX2__
|
@@ -63,7 +63,6 @@ extern "C"{
|
||||
* that it can optimize them at will.
|
||||
*/
|
||||
|
||||
/* BEGIN -- automatically generated code. */
|
||||
|
||||
#define DECL_STATE \
|
||||
__m128i A00, A01, A02, A03, A04, A05, A06, A07, \
|
||||
@@ -76,8 +75,11 @@ extern "C"{
|
||||
M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
sph_u32 Wlow, Whigh;
|
||||
|
||||
#define READ_STATE(state) do { \
|
||||
A00 = (state)->A[0]; \
|
||||
#define READ_STATE(state) do \
|
||||
{ \
|
||||
if ( (state)->state_loaded ) \
|
||||
{ \
|
||||
A00 = (state)->A[0]; \
|
||||
A01 = (state)->A[1]; \
|
||||
A02 = (state)->A[2]; \
|
||||
A03 = (state)->A[3]; \
|
||||
@@ -121,9 +123,58 @@ extern "C"{
|
||||
CD = (state)->C[13]; \
|
||||
CE = (state)->C[14]; \
|
||||
CF = (state)->C[15]; \
|
||||
Wlow = (state)->Wlow; \
|
||||
Whigh = (state)->Whigh; \
|
||||
} while (0)
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
(state)->state_loaded = true; \
|
||||
A00 = m128_const1_64( 0x20728DFD20728DFD ); \
|
||||
A01 = m128_const1_64( 0x46C0BD5346C0BD53 ); \
|
||||
A02 = m128_const1_64( 0xE782B699E782B699 ); \
|
||||
A03 = m128_const1_64( 0x5530463255304632 ); \
|
||||
A04 = m128_const1_64( 0x71B4EF9071B4EF90 ); \
|
||||
A05 = m128_const1_64( 0x0EA9E82C0EA9E82C ); \
|
||||
A06 = m128_const1_64( 0xDBB930F1DBB930F1 ); \
|
||||
A07 = m128_const1_64( 0xFAD06B8BFAD06B8B ); \
|
||||
A08 = m128_const1_64( 0xBE0CAE40BE0CAE40 ); \
|
||||
A09 = m128_const1_64( 0x8BD144108BD14410 ); \
|
||||
A0A = m128_const1_64( 0x76D2ADAC76D2ADAC ); \
|
||||
A0B = m128_const1_64( 0x28ACAB7F28ACAB7F ); \
|
||||
B0 = m128_const1_64( 0xC1099CB7C1099CB7 ); \
|
||||
B1 = m128_const1_64( 0x07B385F307B385F3 ); \
|
||||
B2 = m128_const1_64( 0xE7442C26E7442C26 ); \
|
||||
B3 = m128_const1_64( 0xCC8AD640CC8AD640 ); \
|
||||
B4 = m128_const1_64( 0xEB6F56C7EB6F56C7 ); \
|
||||
B5 = m128_const1_64( 0x1EA81AA91EA81AA9 ); \
|
||||
B6 = m128_const1_64( 0x73B9D31473B9D314 ); \
|
||||
B7 = m128_const1_64( 0x1DE85D081DE85D08 ); \
|
||||
B8 = m128_const1_64( 0x48910A5A48910A5A ); \
|
||||
B9 = m128_const1_64( 0x893B22DB893B22DB ); \
|
||||
BA = m128_const1_64( 0xC5A0DF44C5A0DF44 ); \
|
||||
BB = m128_const1_64( 0xBBC4324EBBC4324E ); \
|
||||
BC = m128_const1_64( 0x72D2F24072D2F240 ); \
|
||||
BD = m128_const1_64( 0x75941D9975941D99 ); \
|
||||
BE = m128_const1_64( 0x6D8BDE826D8BDE82 ); \
|
||||
BF = m128_const1_64( 0xA1A7502BA1A7502B ); \
|
||||
C0 = m128_const1_64( 0xD9BF68D1D9BF68D1 ); \
|
||||
C1 = m128_const1_64( 0x58BAD75058BAD750 ); \
|
||||
C2 = m128_const1_64( 0x56028CB256028CB2 ); \
|
||||
C3 = m128_const1_64( 0x8134F3598134F359 ); \
|
||||
C4 = m128_const1_64( 0xB5D469D8B5D469D8 ); \
|
||||
C5 = m128_const1_64( 0x941A8CC2941A8CC2 ); \
|
||||
C6 = m128_const1_64( 0x418B2A6E418B2A6E ); \
|
||||
C7 = m128_const1_64( 0x0405278004052780 ); \
|
||||
C8 = m128_const1_64( 0x7F07D7877F07D787 ); \
|
||||
C9 = m128_const1_64( 0x5194358F5194358F ); \
|
||||
CA = m128_const1_64( 0x3C60D6653C60D665 ); \
|
||||
CB = m128_const1_64( 0xBE97D79ABE97D79A ); \
|
||||
CC = m128_const1_64( 0x950C3434950C3434 ); \
|
||||
CD = m128_const1_64( 0xAED9A06DAED9A06D ); \
|
||||
CE = m128_const1_64( 0x2537DC8D2537DC8D ); \
|
||||
CF = m128_const1_64( 0x7CDB59697CDB5969 ); \
|
||||
} \
|
||||
Wlow = (state)->Wlow; \
|
||||
Whigh = (state)->Whigh; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE(state) do { \
|
||||
(state)->A[0] = A00; \
|
||||
@@ -397,6 +448,7 @@ do { \
|
||||
Whigh = T32(Whigh + 1); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
static const sph_u32 A_init_256[] = {
|
||||
C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
|
||||
C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
|
||||
@@ -436,33 +488,115 @@ static const sph_u32 C_init_512[] = {
|
||||
C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
|
||||
C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
|
||||
};
|
||||
*/
|
||||
|
||||
static void
|
||||
shabal_4way_init( void *cc, unsigned size )
|
||||
{
|
||||
shabal_4way_context *sc = (shabal_4way_context*)cc;
|
||||
int i;
|
||||
|
||||
if ( size == 512 )
|
||||
{
|
||||
for ( i = 0; i < 12; i++ )
|
||||
sc->A[i] = _mm_set1_epi32( A_init_512[i] );
|
||||
for ( i = 0; i < 16; i++ )
|
||||
{
|
||||
sc->B[i] = _mm_set1_epi32( B_init_512[i] );
|
||||
sc->C[i] = _mm_set1_epi32( C_init_512[i] );
|
||||
}
|
||||
{ // copy immediate constants directly to working registers later.
|
||||
sc->state_loaded = false;
|
||||
/*
|
||||
sc->A[ 0] = m128_const1_64( 0x20728DFD20728DFD );
|
||||
sc->A[ 1] = m128_const1_64( 0x46C0BD5346C0BD53 );
|
||||
sc->A[ 2] = m128_const1_64( 0xE782B699E782B699 );
|
||||
sc->A[ 3] = m128_const1_64( 0x5530463255304632 );
|
||||
sc->A[ 4] = m128_const1_64( 0x71B4EF9071B4EF90 );
|
||||
sc->A[ 5] = m128_const1_64( 0x0EA9E82C0EA9E82C );
|
||||
sc->A[ 6] = m128_const1_64( 0xDBB930F1DBB930F1 );
|
||||
sc->A[ 7] = m128_const1_64( 0xFAD06B8BFAD06B8B );
|
||||
sc->A[ 8] = m128_const1_64( 0xBE0CAE40BE0CAE40 );
|
||||
sc->A[ 9] = m128_const1_64( 0x8BD144108BD14410 );
|
||||
sc->A[10] = m128_const1_64( 0x76D2ADAC76D2ADAC );
|
||||
sc->A[11] = m128_const1_64( 0x28ACAB7F28ACAB7F );
|
||||
|
||||
sc->B[ 0] = m128_const1_64( 0xC1099CB7C1099CB7 );
|
||||
sc->B[ 1] = m128_const1_64( 0x07B385F307B385F3 );
|
||||
sc->B[ 2] = m128_const1_64( 0xE7442C26E7442C26 );
|
||||
sc->B[ 3] = m128_const1_64( 0xCC8AD640CC8AD640 );
|
||||
sc->B[ 4] = m128_const1_64( 0xEB6F56C7EB6F56C7 );
|
||||
sc->B[ 5] = m128_const1_64( 0x1EA81AA91EA81AA9 );
|
||||
sc->B[ 6] = m128_const1_64( 0x73B9D31473B9D314 );
|
||||
sc->B[ 7] = m128_const1_64( 0x1DE85D081DE85D08 );
|
||||
sc->B[ 8] = m128_const1_64( 0x48910A5A48910A5A );
|
||||
sc->B[ 9] = m128_const1_64( 0x893B22DB893B22DB );
|
||||
sc->B[10] = m128_const1_64( 0xC5A0DF44C5A0DF44 );
|
||||
sc->B[11] = m128_const1_64( 0xBBC4324EBBC4324E );
|
||||
sc->B[12] = m128_const1_64( 0x72D2F24072D2F240 );
|
||||
sc->B[13] = m128_const1_64( 0x75941D9975941D99 );
|
||||
sc->B[14] = m128_const1_64( 0x6D8BDE826D8BDE82 );
|
||||
sc->B[15] = m128_const1_64( 0xA1A7502BA1A7502B );
|
||||
|
||||
sc->C[ 0] = m128_const1_64( 0xD9BF68D1D9BF68D1 );
|
||||
sc->C[ 1] = m128_const1_64( 0x58BAD75058BAD750 );
|
||||
sc->C[ 2] = m128_const1_64( 0x56028CB256028CB2 );
|
||||
sc->C[ 3] = m128_const1_64( 0x8134F3598134F359 );
|
||||
sc->C[ 4] = m128_const1_64( 0xB5D469D8B5D469D8 );
|
||||
sc->C[ 5] = m128_const1_64( 0x941A8CC2941A8CC2 );
|
||||
sc->C[ 6] = m128_const1_64( 0x418B2A6E418B2A6E );
|
||||
sc->C[ 7] = m128_const1_64( 0x0405278004052780 );
|
||||
sc->C[ 8] = m128_const1_64( 0x7F07D7877F07D787 );
|
||||
sc->C[ 9] = m128_const1_64( 0x5194358F5194358F );
|
||||
sc->C[10] = m128_const1_64( 0x3C60D6653C60D665 );
|
||||
sc->C[11] = m128_const1_64( 0xBE97D79ABE97D79A );
|
||||
sc->C[12] = m128_const1_64( 0x950C3434950C3434 );
|
||||
sc->C[13] = m128_const1_64( 0xAED9A06DAED9A06D );
|
||||
sc->C[14] = m128_const1_64( 0x2537DC8D2537DC8D );
|
||||
sc->C[15] = m128_const1_64( 0x7CDB59697CDB5969 );
|
||||
*/
|
||||
}
|
||||
else
|
||||
{
|
||||
for ( i = 0; i < 12; i++ )
|
||||
sc->A[i] = _mm_set1_epi32( A_init_256[i] );
|
||||
for ( i = 0; i < 16; i++ )
|
||||
{
|
||||
sc->B[i] = _mm_set1_epi32( B_init_256[i] );
|
||||
sc->C[i] = _mm_set1_epi32( C_init_256[i] );
|
||||
}
|
||||
}
|
||||
{ // No users
|
||||
sc->state_loaded = true;
|
||||
sc->A[ 0] = m128_const1_64( 0x52F8455252F84552 );
|
||||
sc->A[ 1] = m128_const1_64( 0xE54B7999E54B7999 );
|
||||
sc->A[ 2] = m128_const1_64( 0x2D8EE3EC2D8EE3EC );
|
||||
sc->A[ 3] = m128_const1_64( 0xB9645191B9645191 );
|
||||
sc->A[ 4] = m128_const1_64( 0xE0078B86E0078B86 );
|
||||
sc->A[ 5] = m128_const1_64( 0xBB7C44C9BB7C44C9 );
|
||||
sc->A[ 6] = m128_const1_64( 0xD2B5C1CAD2B5C1CA );
|
||||
sc->A[ 7] = m128_const1_64( 0xB0D2EB8CB0D2EB8C );
|
||||
sc->A[ 8] = m128_const1_64( 0x14CE5A4514CE5A45 );
|
||||
sc->A[ 9] = m128_const1_64( 0x22AF50DC22AF50DC );
|
||||
sc->A[10] = m128_const1_64( 0xEFFDBC6BEFFDBC6B );
|
||||
sc->A[11] = m128_const1_64( 0xEB21B74AEB21B74A );
|
||||
|
||||
sc->B[ 0] = m128_const1_64( 0xB555C6EEB555C6EE );
|
||||
sc->B[ 1] = m128_const1_64( 0x3E7105963E710596 );
|
||||
sc->B[ 2] = m128_const1_64( 0xA72A652FA72A652F );
|
||||
sc->B[ 3] = m128_const1_64( 0x9301515F9301515F );
|
||||
sc->B[ 4] = m128_const1_64( 0xDA28C1FADA28C1FA );
|
||||
sc->B[ 5] = m128_const1_64( 0x696FD868696FD868 );
|
||||
sc->B[ 6] = m128_const1_64( 0x9CB6BF729CB6BF72 );
|
||||
sc->B[ 7] = m128_const1_64( 0x0AFE40020AFE4002 );
|
||||
sc->B[ 8] = m128_const1_64( 0xA6E03615A6E03615 );
|
||||
sc->B[ 9] = m128_const1_64( 0x5138C1D45138C1D4 );
|
||||
sc->B[10] = m128_const1_64( 0xBE216306BE216306 );
|
||||
sc->B[11] = m128_const1_64( 0xB38B8890B38B8890 );
|
||||
sc->B[12] = m128_const1_64( 0x3EA8B96B3EA8B96B );
|
||||
sc->B[13] = m128_const1_64( 0x3299ACE43299ACE4 );
|
||||
sc->B[14] = m128_const1_64( 0x30924DD430924DD4 );
|
||||
sc->B[15] = m128_const1_64( 0x55CB34A555CB34A5 );
|
||||
|
||||
sc->C[ 0] = m128_const1_64( 0xB405F031B405F031 );
|
||||
sc->C[ 1] = m128_const1_64( 0xC4233EBAC4233EBA );
|
||||
sc->C[ 2] = m128_const1_64( 0xB3733979B3733979 );
|
||||
sc->C[ 3] = m128_const1_64( 0xC0DD9D55C0DD9D55 );
|
||||
sc->C[ 4] = m128_const1_64( 0xC51C28AEC51C28AE );
|
||||
sc->C[ 5] = m128_const1_64( 0xA327B8E1A327B8E1 );
|
||||
sc->C[ 6] = m128_const1_64( 0x56C5616756C56167 );
|
||||
sc->C[ 7] = m128_const1_64( 0xED614433ED614433 );
|
||||
sc->C[ 8] = m128_const1_64( 0x88B59D6088B59D60 );
|
||||
sc->C[ 9] = m128_const1_64( 0x60E2CEBA60E2CEBA );
|
||||
sc->C[10] = m128_const1_64( 0x758B4B8B758B4B8B );
|
||||
sc->C[11] = m128_const1_64( 0x83E82A7F83E82A7F );
|
||||
sc->C[12] = m128_const1_64( 0xBC968828BC968828 );
|
||||
sc->C[13] = m128_const1_64( 0xE6E00BF7E6E00BF7 );
|
||||
sc->C[14] = m128_const1_64( 0xBA839E55BA839E55 );
|
||||
sc->C[15] = m128_const1_64( 0x9B491C609B491C60 );
|
||||
}
|
||||
sc->Wlow = 1;
|
||||
sc->Whigh = 0;
|
||||
sc->ptr = 0;
|
||||
@@ -488,6 +622,8 @@ shabal_4way_core( void *cc, const unsigned char *data, size_t len )
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
READ_STATE(sc);
|
||||
|
||||
while ( len > 0 )
|
||||
|
@@ -54,7 +54,8 @@ typedef struct {
|
||||
__m128i buf[16] __attribute__ ((aligned (64)));
|
||||
__m128i A[12], B[16], C[16];
|
||||
sph_u32 Whigh, Wlow;
|
||||
size_t ptr;
|
||||
size_t ptr;
|
||||
bool state_loaded;
|
||||
} shabal_4way_context;
|
||||
|
||||
typedef shabal_4way_context shabal256_4way_context;
|
||||
|
@@ -5,7 +5,7 @@
|
||||
#if defined(__SHA__)
|
||||
#include <openssl/sha.h>
|
||||
#else
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
#endif
|
||||
|
||||
#if defined (SKEIN_4WAY)
|
||||
|
@@ -415,18 +415,46 @@ do { \
|
||||
sc->bcount = bcount; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
static const sph_u64 IV256[] = {
|
||||
SPH_C64(0xCCD044A12FDB3E13), SPH_C64(0xE83590301A79A9EB),
|
||||
SPH_C64(0x55AEA0614F816E6F), SPH_C64(0x2A2767A4AE9B94DB),
|
||||
SPH_C64(0xEC06025E74DD7683), SPH_C64(0xE7A436CDC4746251),
|
||||
SPH_C64(0xC36FBAF9393AD185), SPH_C64(0x3EEDBA1833EDFC13)
|
||||
};
|
||||
|
||||
static void
|
||||
skein_big_init_4way( skein512_4way_context *sc, const sph_u64 *iv )
|
||||
static const sph_u64 IV512[] = {
|
||||
SPH_C64(0x4903ADFF749C51CE), SPH_C64(0x0D95DE399746DF03),
|
||||
SPH_C64(0x8FD1934127C79BCE), SPH_C64(0x9A255629FF352CB1),
|
||||
SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4),
|
||||
SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33)
|
||||
};
|
||||
*/
|
||||
|
||||
void skein256_4way_init( skein256_4way_context *sc )
|
||||
{
|
||||
sc->h0 = _mm256_set_epi64x( iv[0], iv[0],iv[0],iv[0] );
|
||||
sc->h1 = _mm256_set_epi64x( iv[1], iv[1],iv[1],iv[1] );
|
||||
sc->h2 = _mm256_set_epi64x( iv[2], iv[2],iv[2],iv[2] );
|
||||
sc->h3 = _mm256_set_epi64x( iv[3], iv[3],iv[3],iv[3] );
|
||||
sc->h4 = _mm256_set_epi64x( iv[4], iv[4],iv[4],iv[4] );
|
||||
sc->h5 = _mm256_set_epi64x( iv[5], iv[5],iv[5],iv[5] );
|
||||
sc->h6 = _mm256_set_epi64x( iv[6], iv[6],iv[6],iv[6] );
|
||||
sc->h7 = _mm256_set_epi64x( iv[7], iv[7],iv[7],iv[7] );
|
||||
sc->h0 = m256_const1_64( 0xCCD044A12FDB3E13 );
|
||||
sc->h1 = m256_const1_64( 0xE83590301A79A9EB );
|
||||
sc->h2 = m256_const1_64( 0x55AEA0614F816E6F );
|
||||
sc->h3 = m256_const1_64( 0x2A2767A4AE9B94DB );
|
||||
sc->h4 = m256_const1_64( 0xEC06025E74DD7683 );
|
||||
sc->h5 = m256_const1_64( 0xE7A436CDC4746251 );
|
||||
sc->h6 = m256_const1_64( 0xC36FBAF9393AD185 );
|
||||
sc->h7 = m256_const1_64( 0x3EEDBA1833EDFC13 );
|
||||
sc->bcount = 0;
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
void skein512_4way_init( skein512_4way_context *sc )
|
||||
{
|
||||
sc->h0 = m256_const1_64( 0x4903ADFF749C51CE );
|
||||
sc->h1 = m256_const1_64( 0x0D95DE399746DF03 );
|
||||
sc->h2 = m256_const1_64( 0x8FD1934127C79BCE );
|
||||
sc->h3 = m256_const1_64( 0x9A255629FF352CB1 );
|
||||
sc->h4 = m256_const1_64( 0x5DB62599DF6CA7B0 );
|
||||
sc->h5 = m256_const1_64( 0xEABE394CA9D5C3F4 );
|
||||
sc->h6 = m256_const1_64( 0x991112C71A75B523 );
|
||||
sc->h7 = m256_const1_64( 0xAE18A40B660FCC33 );
|
||||
sc->bcount = 0;
|
||||
sc->ptr = 0;
|
||||
}
|
||||
@@ -524,6 +552,7 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
memcpy_256( dst, buf, out_len >> 3 );
|
||||
}
|
||||
|
||||
/*
|
||||
static const sph_u64 IV256[] = {
|
||||
SPH_C64(0xCCD044A12FDB3E13), SPH_C64(0xE83590301A79A9EB),
|
||||
SPH_C64(0x55AEA0614F816E6F), SPH_C64(0x2A2767A4AE9B94DB),
|
||||
@@ -537,13 +566,14 @@ static const sph_u64 IV512[] = {
|
||||
SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4),
|
||||
SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33)
|
||||
};
|
||||
|
||||
|
||||
*/
|
||||
/*
|
||||
void
|
||||
skein256_4way_init(void *cc)
|
||||
{
|
||||
skein_big_init_4way(cc, IV256);
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
skein256_4way(void *cc, const void *data, size_t len)
|
||||
@@ -557,11 +587,13 @@ skein256_4way_close(void *cc, void *dst)
|
||||
skein_big_close_4way(cc, 0, 0, dst, 32);
|
||||
}
|
||||
|
||||
/*
|
||||
void
|
||||
skein512_4way_init(void *cc)
|
||||
{
|
||||
skein_big_init_4way(cc, IV512);
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
skein512_4way(void *cc, const void *data, size_t len)
|
||||
|
@@ -55,25 +55,26 @@ extern "C"{
|
||||
#define SPH_SIZE_skein256 256
|
||||
#define SPH_SIZE_skein512 512
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[8] __attribute__ ((aligned (32)));
|
||||
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
typedef struct
|
||||
{
|
||||
__m256i buf[8] __attribute__ ((aligned (64)));
|
||||
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
size_t ptr;
|
||||
sph_u64 bcount;
|
||||
} sph_skein_4way_big_context;
|
||||
|
||||
typedef sph_skein_4way_big_context skein512_4way_context;
|
||||
typedef sph_skein_4way_big_context skein256_4way_context;
|
||||
|
||||
void skein512_4way_init(void *cc);
|
||||
void skein512_4way(void *cc, const void *data, size_t len);
|
||||
void skein512_4way_close(void *cc, void *dst);
|
||||
void skein512_4way_init( skein512_4way_context *sc );
|
||||
void skein512_4way( void *cc, const void *data, size_t len );
|
||||
void skein512_4way_close( void *cc, void *dst );
|
||||
//void sph_skein512_addbits_and_close(
|
||||
// void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void skein256_4way_init(void *cc);
|
||||
void skein256_4way(void *cc, const void *data, size_t len);
|
||||
void skein256_4way_close(void *cc, void *dst);
|
||||
void skein256_4way_init( skein256_4way_context *sc );
|
||||
void skein256_4way( void *cc, const void *data, size_t len );
|
||||
void skein256_4way_close( void *cc, void *dst );
|
||||
//void sph_skein256_addbits_and_close(
|
||||
// void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
|
@@ -200,6 +200,7 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
|
||||
T = _mm_set1_epi32( 0x7A879D8AUL );
|
||||
for( j =16; j < 64; j++ )
|
||||
{
|
||||
// AVX512 _mm_rol_epi32 doesn't like using a variable for the second arg.
|
||||
SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ),
|
||||
mm128_rol_32( T, j&31 ) ), 7 );
|
||||
SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) );
|
||||
|
@@ -120,19 +120,13 @@ int scanhash_fresh( struct work *work,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void fresh_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
|
||||
bool register_fresh_algo( algo_gate_t* gate )
|
||||
{
|
||||
algo_not_tested();
|
||||
gate->scanhash = (void*)&scanhash_fresh;
|
||||
gate->hash = (void*)&freshhash;
|
||||
gate->set_target = (void*)&fresh_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -1,10 +1,5 @@
|
||||
#include "timetravel-gate.h"
|
||||
|
||||
void tt8_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_timetravel_algo( algo_gate_t* gate )
|
||||
{
|
||||
#ifdef TIMETRAVEL_4WAY
|
||||
@@ -16,9 +11,9 @@ bool register_timetravel_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_timetravel;
|
||||
gate->hash = (void*)&timetravel_hash;
|
||||
#endif
|
||||
gate->set_target = (void*)&tt8_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -1,10 +1,5 @@
|
||||
#include "timetravel10-gate.h"
|
||||
|
||||
void tt10_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_timetravel10_algo( algo_gate_t* gate )
|
||||
{
|
||||
#ifdef TIMETRAVEL10_4WAY
|
||||
@@ -16,9 +11,9 @@ bool register_timetravel10_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_timetravel10;
|
||||
gate->hash = (void*)&timetravel10_hash;
|
||||
#endif
|
||||
gate->set_target = (void*)&tt10_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -249,7 +249,6 @@ bool register_drop_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_drop;
|
||||
gate->hash = (void*)&droplp_hash_pok;
|
||||
gate->get_new_work = (void*)&drop_get_new_work;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
@@ -257,6 +256,7 @@ bool register_drop_algo( algo_gate_t* gate )
|
||||
gate->decode_extra_data = (void*)&drop_display_pok;
|
||||
gate->get_work_data_size = (void*)&drop_get_work_data_size;
|
||||
gate->work_cmp_size = 72;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
283
algo/x13/x13bcd-4way.c
Normal file
283
algo/x13/x13bcd-4way.c
Normal file
@@ -0,0 +1,283 @@
|
||||
#include "x13sm3-gate.h"
|
||||
|
||||
#if defined(X13SM3_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
//#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/sm3/sm3-hash-4way.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
|
||||
typedef struct {
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
// luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
simd_2way_context simd;
|
||||
hashState_echo echo;
|
||||
sm3_4way_ctx_t sm3;
|
||||
hamsi512_4way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
} x13bcd_4way_ctx_holder;
|
||||
|
||||
x13bcd_4way_ctx_holder x13bcd_4way_ctx __attribute__ ((aligned (64)));
|
||||
static __thread blake512_4way_context x13bcd_ctx_mid;
|
||||
|
||||
void init_x13bcd_4way_ctx()
|
||||
{
|
||||
blake512_4way_init( &x13bcd_4way_ctx.blake );
|
||||
bmw512_4way_init( &x13bcd_4way_ctx.bmw );
|
||||
init_groestl( &x13bcd_4way_ctx.groestl, 64 );
|
||||
skein512_4way_init( &x13bcd_4way_ctx.skein );
|
||||
jh512_4way_init( &x13bcd_4way_ctx.jh );
|
||||
keccak512_4way_init( &x13bcd_4way_ctx.keccak );
|
||||
// luffa_2way_init( &x13bcd_4way_ctx.luffa, 512 );
|
||||
cubehashInit( &x13bcd_4way_ctx.cube, 512, 16, 32 );
|
||||
sph_shavite512_init( &x13bcd_4way_ctx.shavite );
|
||||
simd_2way_init( &x13bcd_4way_ctx.simd, 512 );
|
||||
init_echo( &x13bcd_4way_ctx.echo, 512 );
|
||||
sm3_4way_init( &x13bcd_4way_ctx.sm3 );
|
||||
hamsi512_4way_init( &x13bcd_4way_ctx.hamsi );
|
||||
sph_fugue512_init( &x13bcd_4way_ctx.fugue );
|
||||
};
|
||||
|
||||
void x13bcd_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
x13bcd_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &x13bcd_4way_ctx, sizeof(x13bcd_4way_ctx) );
|
||||
|
||||
// Blake
|
||||
memcpy( &ctx.blake, &x13bcd_ctx_mid, sizeof(x13bcd_ctx_mid) );
|
||||
blake512_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
|
||||
// blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// Bmw
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// Groestl
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
// Parallel 4way
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// Keccak
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// SM3 parallel 32 bit
|
||||
uint32_t sm3_vhash[32*4] __attribute__ ((aligned (64)));
|
||||
memset( sm3_vhash, 0, sizeof sm3_vhash );
|
||||
uint32_t sm3_hash0[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash0, 0, sizeof sm3_hash0 );
|
||||
uint32_t sm3_hash1[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash1, 0, sizeof sm3_hash1 );
|
||||
uint32_t sm3_hash2[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash2, 0, sizeof sm3_hash2 );
|
||||
uint32_t sm3_hash3[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash3, 0, sizeof sm3_hash3 );
|
||||
|
||||
sm3_4way( &ctx.sm3, vhash, 64 );
|
||||
sm3_4way_close( &ctx.sm3, sm3_vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
|
||||
|
||||
/*
|
||||
// Luffa
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
*/
|
||||
|
||||
// Cubehash
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||
memcpy( &ctx.cube, &x13bcd_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1, 64 );
|
||||
memcpy( &ctx.cube, &x13bcd_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2, 64 );
|
||||
memcpy( &ctx.cube, &x13bcd_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 );
|
||||
|
||||
// Shavite
|
||||
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
memcpy( &ctx.shavite, &x13bcd_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash1, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
memcpy( &ctx.shavite, &x13bcd_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash2, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
memcpy( &ctx.shavite, &x13bcd_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
|
||||
// Simd
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
|
||||
// Echo
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
memcpy( &ctx.echo, &x13bcd_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, 512 );
|
||||
memcpy( &ctx.echo, &x13bcd_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, 512 );
|
||||
memcpy( &ctx.echo, &x13bcd_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
/*
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// SM3 parallel 32 bit
|
||||
uint32_t sm3_vhash[32*4] __attribute__ ((aligned (64)));
|
||||
memset( sm3_vhash, 0, sizeof sm3_vhash );
|
||||
uint32_t sm3_hash0[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash0, 0, sizeof sm3_hash0 );
|
||||
uint32_t sm3_hash1[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash1, 0, sizeof sm3_hash1 );
|
||||
uint32_t sm3_hash2[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash2, 0, sizeof sm3_hash2 );
|
||||
uint32_t sm3_hash3[32] __attribute__ ((aligned (32)));
|
||||
memset( sm3_hash3, 0, sizeof sm3_hash3 );
|
||||
|
||||
sm3_4way( &ctx.sm3, vhash, 64 );
|
||||
sm3_4way_close( &ctx.sm3, sm3_vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
|
||||
*/
|
||||
|
||||
// Hamsi parallel 4x32x2
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// Fugue serial
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
blake512_4way_init( &x13bcd_ctx_mid );
|
||||
blake512_4way( &x13bcd_ctx_mid, vdata, 64 );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
x13bcd_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
258
algo/x13/x13bcd.c
Normal file
258
algo/x13/x13bcd.c
Normal file
@@ -0,0 +1,258 @@
|
||||
#include "x13sm3-gate.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/sph_luffa.h"
|
||||
#include "algo/cubehash/sph_cubehash.h"
|
||||
#include "algo/simd/sph_simd.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/sm3/sph_sm3.h"
|
||||
|
||||
//#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include "algo/blake/sse2/blake.c"
|
||||
#include "algo/bmw/sse2/bmw.c"
|
||||
#include "algo/keccak/sse2/keccak.c"
|
||||
#include "algo/skein/sse2/skein.c"
|
||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#else
|
||||
hashState_echo echo;
|
||||
hashState_groestl groestl;
|
||||
#endif
|
||||
// hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
sm3_ctx_t sm3;
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
} x13bcd_ctx_holder;
|
||||
|
||||
x13bcd_ctx_holder x13bcd_ctx;
|
||||
|
||||
void init_x13bcd_ctx()
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_init(&x13bcd_ctx.groestl);
|
||||
sph_echo512_init(&x13bcd_ctx.echo);
|
||||
#else
|
||||
init_echo(&x13bcd_ctx.echo, 512);
|
||||
init_groestl(&x13bcd_ctx.groestl, 64 );
|
||||
#endif
|
||||
// init_luffa(&x13bcd_ctx.luffa,512);
|
||||
cubehashInit(&x13bcd_ctx.cube,512,16,32);
|
||||
sph_shavite512_init(&x13bcd_ctx.shavite);
|
||||
init_sd(&x13bcd_ctx.simd,512);
|
||||
sm3_init( &x13bcd_ctx.sm3 );
|
||||
sph_hamsi512_init(&x13bcd_ctx.hamsi);
|
||||
sph_fugue512_init(&x13bcd_ctx.fugue);
|
||||
};
|
||||
|
||||
void x13bcd_hash(void *output, const void *input)
|
||||
{
|
||||
unsigned char hash[128] __attribute__ ((aligned (32)));
|
||||
|
||||
x13bcd_ctx_holder ctx;
|
||||
memcpy(&ctx, &x13bcd_ctx, sizeof(x13bcd_ctx));
|
||||
|
||||
unsigned char hashbuf[128];
|
||||
size_t hashptr;
|
||||
sph_u64 hashctA;
|
||||
sph_u64 hashctB;
|
||||
|
||||
//---blake1---
|
||||
|
||||
DECL_BLK;
|
||||
BLK_I;
|
||||
BLK_W;
|
||||
BLK_C;
|
||||
|
||||
//---bmw2---
|
||||
|
||||
DECL_BMW;
|
||||
BMW_I;
|
||||
BMW_U;
|
||||
|
||||
#define M(x) sph_dec64le_aligned(data + 8 * (x))
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
|
||||
BMW_C;
|
||||
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
|
||||
//---groestl----
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)hash, 512 );
|
||||
#endif
|
||||
|
||||
//---skein4---
|
||||
|
||||
DECL_SKN;
|
||||
SKN_I;
|
||||
SKN_U;
|
||||
SKN_C;
|
||||
|
||||
//---jh5------
|
||||
|
||||
DECL_JH;
|
||||
JH_H;
|
||||
|
||||
//---keccak6---
|
||||
|
||||
DECL_KEC;
|
||||
KEC_I;
|
||||
KEC_U;
|
||||
KEC_C;
|
||||
|
||||
uint32_t sm3_hash[32] __attribute__ ((aligned (32)));
|
||||
memset(sm3_hash, 0, sizeof sm3_hash);
|
||||
|
||||
sph_sm3(&ctx.sm3, hash, 64);
|
||||
sph_sm3_close(&ctx.sm3, sm3_hash);
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
|
||||
(const byte*)sm3_hash, 64 );
|
||||
|
||||
/*
|
||||
//--- luffa7
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, 64 );
|
||||
|
||||
// 8 Cube
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
|
||||
(const byte*)hash, 64 );
|
||||
*/
|
||||
|
||||
// 9 Shavite
|
||||
sph_shavite512( &ctx.shavite, hash, 64);
|
||||
sph_shavite512_close( &ctx.shavite, hash);
|
||||
|
||||
// 10 Simd
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence *)hash, 512 );
|
||||
|
||||
//11---echo---
|
||||
#ifdef NO_AES_NI
|
||||
sph_echo512(&ctx.echo, hash, 64);
|
||||
sph_echo512_close(&ctx.echo, hash);
|
||||
#else
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash,
|
||||
(const BitSequence *)hash, 512 );
|
||||
#endif
|
||||
|
||||
/*
|
||||
uint32_t sm3_hash[32] __attribute__ ((aligned (32)));
|
||||
memset(sm3_hash, 0, sizeof sm3_hash);
|
||||
|
||||
sph_sm3(&ctx.sm3, hash, 64);
|
||||
sph_sm3_close(&ctx.sm3, sm3_hash);
|
||||
|
||||
sph_hamsi512(&ctx.hamsi, sm3_hash, 64);
|
||||
*/
|
||||
|
||||
sph_hamsi512(&ctx.hamsi, hash, 64);
|
||||
sph_hamsi512_close(&ctx.hamsi, hash);
|
||||
|
||||
sph_fugue512(&ctx.fugue, hash, 64);
|
||||
sph_fugue512_close(&ctx.fugue, hash);
|
||||
|
||||
asm volatile ("emms");
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_x13bcd( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
if (Htarg != 0)
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
x13bcd_hash(hash64, endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash64 );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -16,3 +16,19 @@ bool register_x13sm3_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_x13bcd_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X13SM3_4WAY)
|
||||
init_x13bcd_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_x13bcd_4way;
|
||||
gate->hash = (void*)&x13bcd_4way_hash;
|
||||
#else
|
||||
init_x13bcd_ctx();
|
||||
gate->scanhash = (void*)&scanhash_x13bcd;
|
||||
gate->hash = (void*)&x13bcd_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -10,23 +10,31 @@
|
||||
|
||||
bool register_x13sm3_algo( algo_gate_t* gate );
|
||||
|
||||
bool register_x13bcd_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X13SM3_4WAY)
|
||||
|
||||
void x13sm3_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_x13sm3_4way_ctx();
|
||||
|
||||
void x13bcd_4way_hash( void *state, const void *input );
|
||||
int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_x13bcd_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void x13sm3_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x13sm3( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_x13sm3_ctx();
|
||||
|
||||
void x13bcd_hash( void *state, const void *input );
|
||||
int scanhash_x13bcd( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_x13bcd_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
|
247
algo/x16/hex.c
Normal file
247
algo/x16/hex.c
Normal file
@@ -0,0 +1,247 @@
|
||||
/**
|
||||
* x16r algo implementation
|
||||
*
|
||||
* Implementation by tpruvot@github Jan 2018
|
||||
* Optimized by JayDDee@github Jan 2018
|
||||
*/
|
||||
#include "x16r-gate.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include <openssl/sha.h>
|
||||
#if defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
static void hex_getAlgoString(const uint32_t* prevblock, char *output)
|
||||
{
|
||||
char *sptr = output;
|
||||
uint8_t* data = (uint8_t*)prevblock;
|
||||
|
||||
for (uint8_t j = 0; j < X16R_HASH_FUNC_COUNT; j++) {
|
||||
uint8_t b = (15 - j) >> 1; // 16 ascii hex chars, reversed
|
||||
uint8_t algoDigit = (j & 1) ? data[b] & 0xF : data[b] >> 4;
|
||||
if (algoDigit >= 10)
|
||||
sprintf(sptr, "%c", 'A' + (algoDigit - 10));
|
||||
else
|
||||
sprintf(sptr, "%u", (uint32_t) algoDigit);
|
||||
sptr++;
|
||||
}
|
||||
*sptr = '\0';
|
||||
}
|
||||
|
||||
union _hex_context_overlay
|
||||
{
|
||||
#if defined(__AES__)
|
||||
hashState_echo echo;
|
||||
hashState_groestl groestl;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_blake512_context blake;
|
||||
sph_bmw512_context bmw;
|
||||
sph_skein512_context skein;
|
||||
sph_jh512_context jh;
|
||||
sph_keccak512_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
};
|
||||
typedef union _hex_context_overlay hex_context_overlay;
|
||||
|
||||
void hex_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[16];
|
||||
hex_context_overlay ctx;
|
||||
void *in = (void*) input;
|
||||
int size = 80;
|
||||
/*
|
||||
if ( s_ntime == UINT32_MAX )
|
||||
{
|
||||
const uint8_t* in8 = (uint8_t*) input;
|
||||
x16_r_s_getAlgoString( &in8[4], hashOrder );
|
||||
}
|
||||
*/
|
||||
|
||||
char elem = hashOrder[0];
|
||||
uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
sph_blake512_init( &ctx.blake );
|
||||
sph_blake512( &ctx.blake, in, size );
|
||||
sph_blake512_close( &ctx.blake, hash );
|
||||
break;
|
||||
case BMW:
|
||||
sph_bmw512_init( &ctx.bmw );
|
||||
sph_bmw512(&ctx.bmw, in, size);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
break;
|
||||
case GROESTL:
|
||||
#if defined(__AES__)
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)in, size<<3 );
|
||||
#else
|
||||
sph_groestl512_init( &ctx.groestl );
|
||||
sph_groestl512( &ctx.groestl, in, size );
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#endif
|
||||
break;
|
||||
case SKEIN:
|
||||
sph_skein512_init( &ctx.skein );
|
||||
sph_skein512( &ctx.skein, in, size );
|
||||
sph_skein512_close( &ctx.skein, hash );
|
||||
break;
|
||||
case JH:
|
||||
sph_jh512_init( &ctx.jh );
|
||||
sph_jh512(&ctx.jh, in, size );
|
||||
sph_jh512_close(&ctx.jh, hash );
|
||||
break;
|
||||
case KECCAK:
|
||||
sph_keccak512_init( &ctx.keccak );
|
||||
sph_keccak512( &ctx.keccak, in, size );
|
||||
sph_keccak512_close( &ctx.keccak, hash );
|
||||
break;
|
||||
case LUFFA:
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)in, size );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
|
||||
(const byte*)in, size );
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash );
|
||||
break;
|
||||
case SIMD:
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence*)in, size<<3 );
|
||||
break;
|
||||
case ECHO:
|
||||
#if defined(__AES__)
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash,
|
||||
(const BitSequence*)in, size<<3 );
|
||||
#else
|
||||
sph_echo512_init( &ctx.echo );
|
||||
sph_echo512( &ctx.echo, in, size );
|
||||
sph_echo512_close( &ctx.echo, hash );
|
||||
#endif
|
||||
break;
|
||||
case HAMSI:
|
||||
sph_hamsi512_init( &ctx.hamsi );
|
||||
sph_hamsi512( &ctx.hamsi, in, size );
|
||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash );
|
||||
break;
|
||||
case SHABAL:
|
||||
sph_shabal512_init( &ctx.shabal );
|
||||
sph_shabal512( &ctx.shabal, in, size );
|
||||
sph_shabal512_close( &ctx.shabal, hash );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||
break;
|
||||
case SHA_512:
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, in, size );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
break;
|
||||
}
|
||||
algo = (uint8_t)hash[0] % X16R_HASH_FUNC_COUNT;
|
||||
in = (void*) hash;
|
||||
size = 64;
|
||||
}
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_hex( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
hex_getAlgoString( (const uint32_t*) (&endiandata[1]), hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
do
|
||||
{
|
||||
be32enc( &endiandata[19], nonce );
|
||||
hex_hash( hash32, endiandata );
|
||||
|
||||
if ( hash32[7] <= Htarg )
|
||||
if (fulltest( hash32, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash32, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
@@ -27,7 +27,7 @@
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/shabal-hash-4way.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
@@ -68,21 +68,7 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
int size = 80;
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
|
||||
|
||||
if ( s_ntime == UINT32_MAX )
|
||||
{
|
||||
const uint8_t* tmp = (uint8_t*) in0;
|
||||
x16_r_s_getAlgoString( &tmp[4], hashOrder );
|
||||
}
|
||||
|
||||
// Input data is both 64 bit interleaved (input)
|
||||
// and deinterleaved in inp0-3.
|
||||
// If First function uses 64 bit data it is not required to interleave inp
|
||||
// first. It may use the inerleaved data dmost convenient, ie 4way 64 bit.
|
||||
// All other functions assume data is deinterleaved in hash0-3
|
||||
// All functions must exit with data deinterleaved in hash0-3.
|
||||
// Alias in0-3 points to either inp0-3 or hash0-3 according to
|
||||
// its hashOrder position. Size is also set accordingly.
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
|
@@ -42,8 +42,23 @@ bool register_x16r_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&x16r_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_x16rv2_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rv2_4way;
|
||||
gate->hash = (void*)&x16rv2_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x16rv2;
|
||||
gate->hash = (void*)&x16rv2_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -57,8 +72,189 @@ bool register_x16s_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&x16r_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
////////////////
|
||||
//
|
||||
// X16RT
|
||||
|
||||
|
||||
void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash )
|
||||
{
|
||||
int32_t maskedTime = timeStamp & 0xffffff80;
|
||||
sha256d( (unsigned char*)timeHash, (const unsigned char*)( &maskedTime ),
|
||||
sizeof( maskedTime ) );
|
||||
}
|
||||
|
||||
void x16rt_getAlgoString( const uint32_t *timeHash, char *output)
|
||||
{
|
||||
char *sptr = output;
|
||||
uint8_t* data = (uint8_t*)timeHash;
|
||||
|
||||
for (uint8_t j = 0; j < X16R_HASH_FUNC_COUNT; j++) {
|
||||
uint8_t b = (15 - j) >> 1; // 16 ascii hex chars, reversed
|
||||
uint8_t algoDigit = (j & 1) ? data[b] & 0xF : data[b] >> 4;
|
||||
|
||||
if (algoDigit >= 10)
|
||||
sprintf(sptr, "%c", 'A' + (algoDigit - 10));
|
||||
else
|
||||
sprintf(sptr, "%u", (uint32_t) algoDigit);
|
||||
sptr++;
|
||||
}
|
||||
*sptr = '\0';
|
||||
}
|
||||
|
||||
void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
uchar merkle_tree[64] = { 0 };
|
||||
size_t t;
|
||||
|
||||
algo_gate.gen_merkle_root( merkle_tree, sctx );
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
|
||||
// Assemble block header
|
||||
// algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
|
||||
// (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
|
||||
// le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
|
||||
int i;
|
||||
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
|
||||
if ( have_stratum )
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[ 1+i ] = le32dec( (uint32_t*)sctx->job.prevhash + i );
|
||||
else
|
||||
for (i = 0; i < 8; i++)
|
||||
g_work->data[ 8-i ] = le32dec( (uint32_t*)sctx->job.prevhash + i );
|
||||
|
||||
g_work->data[ algo_gate.ntime_index ] = le32dec( sctx->job.ntime );
|
||||
g_work->data[ algo_gate.nbits_index ] = le32dec( sctx->job.nbits );
|
||||
g_work->data[20] = 0x80000000;
|
||||
g_work->data[31] = 0x00000280;
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->merkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->witmerkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->denom10[i] = le32dec((uint32_t *)sctx->job.denom10 + i);
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->denom100[i] = le32dec((uint32_t *)sctx->job.denom100 + i);
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->denom1000[i] = le32dec((uint32_t *)sctx->job.denom1000 + i);
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->denom10000[i] = le32dec((uint32_t *)sctx->job.denom10000 + i);
|
||||
|
||||
uint32_t pofnhash[8];
|
||||
memset(pofnhash, 0x00, 32);
|
||||
|
||||
char denom10_str [ 2 * sizeof( g_work->denom10 ) + 1 ];
|
||||
char denom100_str [ 2 * sizeof( g_work->denom100 ) + 1 ];
|
||||
char denom1000_str [ 2 * sizeof( g_work->denom1000 ) + 1 ];
|
||||
char denom10000_str [ 2 * sizeof( g_work->denom10000 ) + 1 ];
|
||||
char merkleroot_str [ 2 * sizeof( g_work->merkleroothash ) + 1 ];
|
||||
char witmerkleroot_str[ 2 * sizeof( g_work->witmerkleroothash ) + 1 ];
|
||||
char pofn_str [ 2 * sizeof( pofnhash ) + 1 ];
|
||||
|
||||
cbin2hex( denom10_str, (char*) g_work->denom10, 32 );
|
||||
cbin2hex( denom100_str, (char*) g_work->denom100, 32 );
|
||||
cbin2hex( denom1000_str, (char*) g_work->denom1000, 32 );
|
||||
cbin2hex( denom10000_str, (char*) g_work->denom10000, 32 );
|
||||
cbin2hex( merkleroot_str, (char*) g_work->merkleroothash, 32 );
|
||||
cbin2hex( witmerkleroot_str, (char*) g_work->witmerkleroothash, 32 );
|
||||
cbin2hex( pofn_str, (char*) pofnhash, 32 );
|
||||
|
||||
if ( true )
|
||||
{
|
||||
char* data;
|
||||
data = (char*)malloc( 2 + strlen( denom10_str ) * 4 + 16 * 4
|
||||
+ strlen( merkleroot_str ) * 3 );
|
||||
// Build the block header veildatahash in hex
|
||||
sprintf( data, "%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
merkleroot_str, witmerkleroot_str, "04",
|
||||
"0a00000000000000", denom10_str,
|
||||
"6400000000000000", denom100_str,
|
||||
"e803000000000000", denom1000_str,
|
||||
"1027000000000000", denom10000_str, pofn_str );
|
||||
// Covert the hex to binary
|
||||
uint32_t test[100];
|
||||
hex2bin( (unsigned char*)(&test), data, 257);
|
||||
// Compute the sha256d of the binary
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
sha256d( (unsigned char*)hash, (unsigned char*)&(test), 257);
|
||||
// assign the veildatahash in the blockheader
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[16 - i] = le32dec(hash + i);
|
||||
free(data);
|
||||
}
|
||||
}
|
||||
|
||||
bool register_x16rt_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
||||
gate->hash = (void*)&x16rt_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x16rt;
|
||||
gate->hash = (void*)&x16rt_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_x16rt_veil_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
||||
gate->hash = (void*)&x16rt_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x16rt;
|
||||
gate->hash = (void*)&x16rt_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->build_extraheader = (void*)&veil_build_extraheader;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
////////////////////
|
||||
//
|
||||
// HEX
|
||||
|
||||
bool register_hex_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_hex;
|
||||
gate->hash = (void*)&hex_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
opt_target_factor = 128.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
///////////////////////////////
|
||||
//
|
||||
// X21S
|
||||
|
||||
bool register_x21s_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x21s_4way;
|
||||
gate->hash = (void*)&x21s_4way_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_4way_thread_init;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x21s;
|
||||
gate->hash = (void*)&x21s_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_thread_init;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -4,6 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "simd-utils.h"
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define X16R_4WAY
|
||||
@@ -30,11 +31,18 @@ enum x16r_Algo {
|
||||
};
|
||||
|
||||
void (*x16_r_s_getAlgoString) ( const uint8_t*, char* );
|
||||
void x16r_getAlgoString( const uint8_t* prevblock, char *output );
|
||||
void x16s_getAlgoString( const uint8_t* prevblock, char *output );
|
||||
void x16r_getAlgoString( const uint8_t *prevblock, char *output );
|
||||
void x16s_getAlgoString( const uint8_t *prevblock, char *output );
|
||||
void x16rt_getAlgoString( const uint32_t *timeHash, char *output );
|
||||
|
||||
void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash );
|
||||
|
||||
bool register_x16r_algo( algo_gate_t* gate );
|
||||
bool register_x16rv2_algo( algo_gate_t* gate );
|
||||
bool register_x16s_algo( algo_gate_t* gate );
|
||||
bool register_x16rt_algo( algo_gate_t* gate );
|
||||
bool register_hex__algo( algo_gate_t* gate );
|
||||
bool register_x21s__algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X16R_4WAY)
|
||||
|
||||
@@ -42,11 +50,41 @@ void x16r_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x16rv2_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x16rt_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x21s_4way_hash( void *state, const void *input );
|
||||
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool x21s_4way_thread_init();
|
||||
|
||||
#endif
|
||||
|
||||
void x16r_hash( void *state, const void *input );
|
||||
int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x16rv2_hash( void *state, const void *input );
|
||||
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x16rt_hash( void *state, const void *input );
|
||||
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void hex_hash( void *state, const void *input );
|
||||
int scanhash_hex( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void x21s_hash( void *state, const void *input );
|
||||
int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool x21s_thread_init();
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -65,13 +65,13 @@ void x16r_hash( void* output, const void* input )
|
||||
x16r_context_overlay ctx;
|
||||
void *in = (void*) input;
|
||||
int size = 80;
|
||||
|
||||
/*
|
||||
if ( s_ntime == UINT32_MAX )
|
||||
{
|
||||
const uint8_t* in8 = (uint8_t*) input;
|
||||
x16_r_s_getAlgoString( &in8[4], hashOrder );
|
||||
}
|
||||
|
||||
*/
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
|
353
algo/x16/x16rt-4way.c
Normal file
353
algo/x16/x16rt-4way.c
Normal file
@@ -0,0 +1,353 @@
|
||||
#include "x16r-gate.h"
|
||||
|
||||
#if defined (X16R_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/shabal-hash-4way.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread bool s_implemented = false;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
union _x16rt_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_echo echo;
|
||||
hashState_groestl groestl;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
shabal512_4way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
};
|
||||
typedef union _x16rt_4way_context_overlay x16rt_4way_context_overlay;
|
||||
|
||||
void x16rt_4way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t hash0[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[24] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[24*4] __attribute__ ((aligned (64)));
|
||||
x16rt_4way_context_overlay ctx;
|
||||
void *in0 = (void*) hash0;
|
||||
void *in1 = (void*) hash1;
|
||||
void *in2 = (void*) hash2;
|
||||
void *in3 = (void*) hash3;
|
||||
int size = 80;
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
|
||||
|
||||
/*
|
||||
void *in = (void*) input;
|
||||
uint32_t *in32 = (uint32_t*) hash0;
|
||||
uint32_t ntime = in32[17];
|
||||
if ( s_ntime == UINT32_MAX )
|
||||
{
|
||||
uint32_t _ALIGN(64) timeHash[8];
|
||||
x16rt_getTimeHash(ntime, &timeHash);
|
||||
x16rt_getAlgoString(&timeHash[0], hashOrder);
|
||||
}
|
||||
*/
|
||||
|
||||
// Input data is both 64 bit interleaved (input)
|
||||
// and deinterleaved in inp0-3.
|
||||
// If First function uses 64 bit data it is not required to interleave inp
|
||||
// first. It may use the inerleaved data dmost convenient, ie 4way 64 bit.
|
||||
// All other functions assume data is deinterleaved in hash0-3
|
||||
// All functions must exit with data deinterleaved in hash0-3.
|
||||
// Alias in0-3 points to either inp0-3 or hash0-3 according to
|
||||
// its hashOrder position. Size is also set accordingly.
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
blake512_4way_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_4way( &ctx.blake, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
blake512_4way( &ctx.blake, vhash, size );
|
||||
}
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case BMW:
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_4way( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
bmw512_4way( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case GROESTL:
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)in0, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)in1, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)in2, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)in3, size<<3 );
|
||||
break;
|
||||
case SKEIN:
|
||||
skein512_4way_init( &ctx.skein );
|
||||
if ( i == 0 )
|
||||
skein512_4way( &ctx.skein, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
skein512_4way( &ctx.skein, vhash, size );
|
||||
}
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case JH:
|
||||
jh512_4way_init( &ctx.jh );
|
||||
if ( i == 0 )
|
||||
jh512_4way( &ctx.jh, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
jh512_4way( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case KECCAK:
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
if ( i == 0 )
|
||||
keccak512_4way( &ctx.keccak, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
keccak512_4way( &ctx.keccak, vhash, size );
|
||||
}
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case LUFFA:
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size);
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0,
|
||||
(const byte*)in0, size );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash1,
|
||||
(const byte*)in1, size );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash2,
|
||||
(const byte*)in2, size );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3,
|
||||
(const byte*)in3, size );
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in0, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in1, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in2, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in3, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
break;
|
||||
case SIMD:
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case ECHO:
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence*)in0, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence*)in1, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence*)in2, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence*)in3, size<<3 );
|
||||
break;
|
||||
case HAMSI:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in0, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in1, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in2, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in3, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
break;
|
||||
case SHABAL:
|
||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, size );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
break;
|
||||
case SHA_512:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, size );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
}
|
||||
size = 64;
|
||||
}
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t _ALIGN(64) timeHash[4*8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
uint32_t ntime = swab32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16rt_getTimeHash( ntime, &timeHash );
|
||||
x16rt_getAlgoString( &timeHash[0], hashOrder );
|
||||
s_ntime = ntime;
|
||||
s_implemented = true;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
||||
hashOrder, ntime, timeHash );
|
||||
}
|
||||
if ( !s_implemented )
|
||||
{
|
||||
applog( LOG_WARNING, "s not implemented");
|
||||
sleep(1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
x16rt_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ ) if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !(*restart) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
239
algo/x16/x16rt.c
Normal file
239
algo/x16/x16rt.c
Normal file
@@ -0,0 +1,239 @@
|
||||
#include "x16r-gate.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include <openssl/sha.h>
|
||||
#if defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread bool s_implemented = false;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
union _x16rt_context_overlay
|
||||
{
|
||||
#if defined(__AES__)
|
||||
hashState_echo echo;
|
||||
hashState_groestl groestl;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_blake512_context blake;
|
||||
sph_bmw512_context bmw;
|
||||
sph_skein512_context skein;
|
||||
sph_jh512_context jh;
|
||||
sph_keccak512_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
};
|
||||
typedef union _x16rt_context_overlay x16rt_context_overlay;
|
||||
|
||||
void x16rt_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[16];
|
||||
x16rt_context_overlay ctx;
|
||||
int size = 80;
|
||||
void *in = (void*) input;
|
||||
|
||||
/*
|
||||
void *in = (void*) input;
|
||||
uint32_t *in32 = (uint32_t*) in;
|
||||
uint32_t ntime = in32[17];
|
||||
if ( s_ntime == UINT32_MAX )
|
||||
{
|
||||
uint32_t _ALIGN(64) timeHash[8];
|
||||
x16rt_getTimeHash(ntime, &timeHash);
|
||||
x16rt_getAlgoString(&timeHash[0], hashOrder);
|
||||
}
|
||||
*/
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
sph_blake512_init( &ctx.blake );
|
||||
sph_blake512( &ctx.blake, in, size );
|
||||
sph_blake512_close( &ctx.blake, hash );
|
||||
break;
|
||||
case BMW:
|
||||
sph_bmw512_init( &ctx.bmw );
|
||||
sph_bmw512(&ctx.bmw, in, size);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
break;
|
||||
case GROESTL:
|
||||
#if defined(__AES__)
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)in, size<<3 );
|
||||
#else
|
||||
sph_groestl512_init( &ctx.groestl );
|
||||
sph_groestl512( &ctx.groestl, in, size );
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#endif
|
||||
break;
|
||||
case SKEIN:
|
||||
sph_skein512_init( &ctx.skein );
|
||||
sph_skein512( &ctx.skein, in, size );
|
||||
sph_skein512_close( &ctx.skein, hash );
|
||||
break;
|
||||
case JH:
|
||||
sph_jh512_init( &ctx.jh );
|
||||
sph_jh512(&ctx.jh, in, size );
|
||||
sph_jh512_close(&ctx.jh, hash );
|
||||
break;
|
||||
case KECCAK:
|
||||
sph_keccak512_init( &ctx.keccak );
|
||||
sph_keccak512( &ctx.keccak, in, size );
|
||||
sph_keccak512_close( &ctx.keccak, hash );
|
||||
break;
|
||||
case LUFFA:
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)in, size );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
|
||||
(const byte*)in, size );
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash );
|
||||
break;
|
||||
case SIMD:
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence*)in, size<<3 );
|
||||
break;
|
||||
case ECHO:
|
||||
#if defined(__AES__)
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash,
|
||||
(const BitSequence*)in, size<<3 );
|
||||
#else
|
||||
sph_echo512_init( &ctx.echo );
|
||||
sph_echo512( &ctx.echo, in, size );
|
||||
sph_echo512_close( &ctx.echo, hash );
|
||||
#endif
|
||||
break;
|
||||
case HAMSI:
|
||||
sph_hamsi512_init( &ctx.hamsi );
|
||||
sph_hamsi512( &ctx.hamsi, in, size );
|
||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash );
|
||||
break;
|
||||
case SHABAL:
|
||||
sph_shabal512_init( &ctx.shabal );
|
||||
sph_shabal512( &ctx.shabal, in, size );
|
||||
sph_shabal512_close( &ctx.shabal, hash );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||
break;
|
||||
case SHA_512:
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, in, size );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
break;
|
||||
}
|
||||
in = (void*) hash;
|
||||
size = 64;
|
||||
}
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t _ALIGN(64) timeHash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
uint32_t ntime = swab32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16rt_getTimeHash( ntime, &timeHash );
|
||||
x16rt_getAlgoString( &timeHash[0], hashOrder );
|
||||
s_ntime = ntime;
|
||||
s_implemented = true;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
||||
hashOrder, ntime, timeHash );
|
||||
}
|
||||
if ( !s_implemented )
|
||||
{
|
||||
applog( LOG_WARNING, "s not implemented");
|
||||
sleep(1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
do
|
||||
{
|
||||
be32enc( &endiandata[19], nonce );
|
||||
x16rt_hash( hash32, endiandata );
|
||||
|
||||
if ( hash32[7] <= Htarg )
|
||||
if (fulltest( hash32, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash32, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
384
algo/x16/x16rv2-4way.c
Normal file
384
algo/x16/x16rv2-4way.c
Normal file
@@ -0,0 +1,384 @@
|
||||
/**
|
||||
* x16r algo implementation
|
||||
*
|
||||
* Implementation by tpruvot@github Jan 2018
|
||||
* Optimized by JayDDee@github Jan 2018
|
||||
*/
|
||||
#include "x16r-gate.h"
|
||||
|
||||
#if defined (X16R_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/shabal-hash-4way.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
union _x16rv2_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_echo echo;
|
||||
hashState_groestl groestl;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
shabal512_4way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
sph_tiger_context tiger;
|
||||
};
|
||||
typedef union _x16rv2_4way_context_overlay x16rv2_4way_context_overlay;
|
||||
|
||||
// Pad the 24 bytes tiger hash to 64 bytes
|
||||
inline void padtiger512( uint32_t* hash )
|
||||
{
|
||||
for ( int i = 6; i < 16; i++ ) hash[i] = 0;
|
||||
}
|
||||
|
||||
void x16rv2_4way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t hash0[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[24] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[24*4] __attribute__ ((aligned (64)));
|
||||
x16rv2_4way_context_overlay ctx;
|
||||
void *in0 = (void*) hash0;
|
||||
void *in1 = (void*) hash1;
|
||||
void *in2 = (void*) hash2;
|
||||
void *in3 = (void*) hash3;
|
||||
int size = 80;
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
blake512_4way_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_4way( &ctx.blake, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
blake512_4way( &ctx.blake, vhash, size );
|
||||
}
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case BMW:
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_4way( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
bmw512_4way( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case GROESTL:
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)in0, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)in1, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)in2, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)in3, size<<3 );
|
||||
break;
|
||||
case SKEIN:
|
||||
skein512_4way_init( &ctx.skein );
|
||||
if ( i == 0 )
|
||||
skein512_4way( &ctx.skein, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
skein512_4way( &ctx.skein, vhash, size );
|
||||
}
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case JH:
|
||||
jh512_4way_init( &ctx.jh );
|
||||
if ( i == 0 )
|
||||
jh512_4way( &ctx.jh, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
jh512_4way( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case KECCAK:
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in0, size );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in1, size );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in2, size );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in3, size );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
|
||||
for ( int i = (24/4); i < (64/4); i++ )
|
||||
hash0[i] = hash1[i] = hash2[i] = hash3[i] = 0;
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case LUFFA:
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in0, size );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in1, size );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
|
||||
for ( int i = (24/4); i < (64/4); i++ )
|
||||
hash0[i] = hash1[i] = 0;
|
||||
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in2, size );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in3, size );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
|
||||
for ( int i = (24/4); i < (64/4); i++ )
|
||||
hash2[i] = hash3[i] = 0;
|
||||
|
||||
intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0,
|
||||
(const byte*)in0, size );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash1,
|
||||
(const byte*)in1, size );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash2,
|
||||
(const byte*)in2, size );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3,
|
||||
(const byte*)in3, size );
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in0, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in1, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in2, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in3, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
break;
|
||||
case SIMD:
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case ECHO:
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence*)in0, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence*)in1, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence*)in2, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence*)in3, size<<3 );
|
||||
break;
|
||||
case HAMSI:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in0, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in1, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in2, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in3, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
break;
|
||||
case SHABAL:
|
||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, size );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
break;
|
||||
case SHA_512:
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in0, size );
|
||||
sph_tiger_close( &ctx.tiger, hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in1, size );
|
||||
sph_tiger_close( &ctx.tiger, hash1 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in2, size );
|
||||
sph_tiger_close( &ctx.tiger, hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in3, size );
|
||||
sph_tiger_close( &ctx.tiger, hash3 );
|
||||
|
||||
for ( int i = (24/4); i < (64/4); i++ )
|
||||
hash0[i] = hash1[i] = hash2[i] = hash3[i] = 0;
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
}
|
||||
size = 64;
|
||||
}
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
if ( s_ntime != endiandata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
x16rv2_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ ) if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !(*restart) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
247
algo/x16/x16rv2.c
Normal file
247
algo/x16/x16rv2.c
Normal file
@@ -0,0 +1,247 @@
|
||||
/**
|
||||
* x16r algo implementation
|
||||
*
|
||||
* Implementation by tpruvot@github Jan 2018
|
||||
* Optimized by JayDDee@github Jan 2018
|
||||
*/
|
||||
#include "x16r-gate.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
union _x16rv2_context_overlay
|
||||
{
|
||||
#if defined(__AES__)
|
||||
hashState_echo echo;
|
||||
hashState_groestl groestl;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_blake512_context blake;
|
||||
sph_bmw512_context bmw;
|
||||
sph_skein512_context skein;
|
||||
sph_jh512_context jh;
|
||||
sph_keccak512_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_tiger_context tiger;
|
||||
};
|
||||
typedef union _x16rv2_context_overlay x16rv2_context_overlay;
|
||||
|
||||
// Pad the 24 bytes tiger hash to 64 bytes
|
||||
inline void padtiger512(uint32_t* hash) {
|
||||
for (int i = (24/4); i < (64/4); i++) hash[i] = 0;
|
||||
}
|
||||
|
||||
void x16rv2_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[16];
|
||||
x16rv2_context_overlay ctx;
|
||||
void *in = (void*) input;
|
||||
int size = 80;
|
||||
/*
|
||||
if ( s_ntime == UINT32_MAX )
|
||||
{
|
||||
const uint8_t* in8 = (uint8_t*) input;
|
||||
x16_r_s_getAlgoString( &in8[4], hashOrder );
|
||||
}
|
||||
*/
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
sph_blake512_init( &ctx.blake );
|
||||
sph_blake512( &ctx.blake, in, size );
|
||||
sph_blake512_close( &ctx.blake, hash );
|
||||
break;
|
||||
case BMW:
|
||||
sph_bmw512_init( &ctx.bmw );
|
||||
sph_bmw512(&ctx.bmw, in, size);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
break;
|
||||
case GROESTL:
|
||||
#if defined(__AES__)
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)in, size<<3 );
|
||||
#else
|
||||
sph_groestl512_init( &ctx.groestl );
|
||||
sph_groestl512( &ctx.groestl, in, size );
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#endif
|
||||
break;
|
||||
case SKEIN:
|
||||
sph_skein512_init( &ctx.skein );
|
||||
sph_skein512( &ctx.skein, in, size );
|
||||
sph_skein512_close( &ctx.skein, hash );
|
||||
break;
|
||||
case JH:
|
||||
sph_jh512_init( &ctx.jh );
|
||||
sph_jh512(&ctx.jh, in, size );
|
||||
sph_jh512_close(&ctx.jh, hash );
|
||||
break;
|
||||
case KECCAK:
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in, size );
|
||||
sph_tiger_close( &ctx.tiger, hash );
|
||||
padtiger512( hash );
|
||||
sph_keccak512_init( &ctx.keccak );
|
||||
sph_keccak512( &ctx.keccak, hash, 64 );
|
||||
sph_keccak512_close( &ctx.keccak, hash );
|
||||
break;
|
||||
case LUFFA:
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in, size );
|
||||
sph_tiger_close( &ctx.tiger, hash );
|
||||
padtiger512( hash );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, 64 );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
|
||||
(const byte*)in, size );
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash );
|
||||
break;
|
||||
case SIMD:
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence*)in, size<<3 );
|
||||
break;
|
||||
case ECHO:
|
||||
#if defined(__AES__)
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash,
|
||||
(const BitSequence*)in, size<<3 );
|
||||
#else
|
||||
sph_echo512_init( &ctx.echo );
|
||||
sph_echo512( &ctx.echo, in, size );
|
||||
sph_echo512_close( &ctx.echo, hash );
|
||||
#endif
|
||||
break;
|
||||
case HAMSI:
|
||||
sph_hamsi512_init( &ctx.hamsi );
|
||||
sph_hamsi512( &ctx.hamsi, in, size );
|
||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash );
|
||||
break;
|
||||
case SHABAL:
|
||||
sph_shabal512_init( &ctx.shabal );
|
||||
sph_shabal512( &ctx.shabal, in, size );
|
||||
sph_shabal512_close( &ctx.shabal, hash );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||
break;
|
||||
case SHA_512:
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger( &ctx.tiger, in, size );
|
||||
sph_tiger_close( &ctx.tiger, hash );
|
||||
padtiger512( hash );
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, hash, 64 );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
break;
|
||||
}
|
||||
in = (void*) hash;
|
||||
size = 64;
|
||||
}
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
if ( s_ntime != pdata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
do
|
||||
{
|
||||
be32enc( &endiandata[19], nonce );
|
||||
x16rv2_hash( hash32, endiandata );
|
||||
|
||||
if ( hash32[7] <= Htarg )
|
||||
if (fulltest( hash32, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash32, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
431
algo/x16/x21s-4way.c
Normal file
431
algo/x16/x21s-4way.c
Normal file
@@ -0,0 +1,431 @@
|
||||
/**
|
||||
* x16r algo implementation
|
||||
*
|
||||
* Implementation by tpruvot@github Jan 2018
|
||||
* Optimized by JayDDee@github Jan 2018
|
||||
*/
|
||||
#include "x16r-gate.h"
|
||||
|
||||
#if defined (X16R_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/shabal-hash-4way.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
#include "algo/haval/haval-hash-4way.h"
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/lyra2/lyra2.h"
|
||||
#if defined(__SHA__)
|
||||
#include <openssl/sha.h>
|
||||
#endif
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
static __thread uint64_t* x21s_4way_matrix;
|
||||
|
||||
union _x21s_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_echo echo;
|
||||
hashState_groestl groestl;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
shabal512_4way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
haval256_5_4way_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
#if defined(__SHA__)
|
||||
SHA256_CTX sha256;
|
||||
#else
|
||||
sha256_4way_context sha256;
|
||||
#endif
|
||||
};
|
||||
typedef union _x21s_4way_context_overlay x21s_4way_context_overlay;
|
||||
|
||||
void x21s_4way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t hash0[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[24] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[24*4] __attribute__ ((aligned (64)));
|
||||
x21s_4way_context_overlay ctx;
|
||||
void *in0 = (void*) hash0;
|
||||
void *in1 = (void*) hash1;
|
||||
void *in2 = (void*) hash2;
|
||||
void *in3 = (void*) hash3;
|
||||
int size = 80;
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
|
||||
|
||||
// Input data is both 64 bit interleaved (input)
|
||||
// and deinterleaved in inp0-3.
|
||||
// If First function uses 64 bit data it is not required to interleave inp
|
||||
// first. It may use the inerleaved data dmost convenient, ie 4way 64 bit.
|
||||
// All other functions assume data is deinterleaved in hash0-3
|
||||
// All functions must exit with data deinterleaved in hash0-3.
|
||||
// Alias in0-3 points to either inp0-3 or hash0-3 according to
|
||||
// its hashOrder position. Size is also set accordingly.
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
blake512_4way_init( &ctx.blake );
|
||||
if ( i == 0 )
|
||||
blake512_4way( &ctx.blake, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
blake512_4way( &ctx.blake, vhash, size );
|
||||
}
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case BMW:
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
if ( i == 0 )
|
||||
bmw512_4way( &ctx.bmw, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
bmw512_4way( &ctx.bmw, vhash, size );
|
||||
}
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case GROESTL:
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)in0, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)in1, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)in2, size<<3 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)in3, size<<3 );
|
||||
break;
|
||||
case SKEIN:
|
||||
skein512_4way_init( &ctx.skein );
|
||||
if ( i == 0 )
|
||||
skein512_4way( &ctx.skein, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
skein512_4way( &ctx.skein, vhash, size );
|
||||
}
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case JH:
|
||||
jh512_4way_init( &ctx.jh );
|
||||
if ( i == 0 )
|
||||
jh512_4way( &ctx.jh, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
jh512_4way( &ctx.jh, vhash, size );
|
||||
}
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case KECCAK:
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
if ( i == 0 )
|
||||
keccak512_4way( &ctx.keccak, input, size );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
keccak512_4way( &ctx.keccak, vhash, size );
|
||||
}
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case LUFFA:
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size);
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0,
|
||||
(const byte*)in0, size );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash1,
|
||||
(const byte*)in1, size );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash2,
|
||||
(const byte*)in2, size );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3,
|
||||
(const byte*)in3, size );
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in0, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in1, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in2, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in3, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
break;
|
||||
case SIMD:
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case ECHO:
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence*)in0, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence*)in1, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence*)in2, size<<3 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence*)in3, size<<3 );
|
||||
break;
|
||||
case HAMSI:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, size );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in0, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in1, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in2, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in3, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
break;
|
||||
case SHABAL:
|
||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, size );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
break;
|
||||
case SHA_512:
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, size );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
}
|
||||
size = 64;
|
||||
}
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhash, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash0, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash0 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash1, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash1 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash2, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash2 );
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash3, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash3 );
|
||||
|
||||
LYRA2REV2( x21s_4way_matrix, (void*) hash0, 32, (const void*) hash0, 32,
|
||||
(const void*) hash0, 32, 1, 4, 4 );
|
||||
LYRA2REV2( x21s_4way_matrix, (void*) hash1, 32, (const void*) hash1, 32,
|
||||
(const void*) hash1, 32, 1, 4, 4 );
|
||||
LYRA2REV2( x21s_4way_matrix, (void*) hash2, 32, (const void*) hash2, 32,
|
||||
(const void*) hash2, 32, 1, 4, 4 );
|
||||
LYRA2REV2( x21s_4way_matrix, (void*) hash3, 32, (const void*) hash3, 32,
|
||||
(const void*) hash3, 32, 1, 4, 4 );
|
||||
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash0 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash1, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash1 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash2, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash2 );
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash3 );
|
||||
|
||||
#if defined(__SHA__)
|
||||
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, hash0, 64 );
|
||||
SHA256_Final( (unsigned char*)hash0, &ctx.sha256 );
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, hash1, 64 );
|
||||
SHA256_Final( (unsigned char*)hash1, &ctx.sha256 );
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, hash2, 64 );
|
||||
SHA256_Final( (unsigned char*)hash2, &ctx.sha256 );
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, hash3, 64 );
|
||||
SHA256_Final( (unsigned char*)hash3, &ctx.sha256 );
|
||||
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
|
||||
#else
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
sha256_4way_init( &ctx.sha256 );
|
||||
sha256_4way( &ctx.sha256, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha256, vhash );
|
||||
dintrlv_4x32( output, output+32, output+64,output+96, vhash, 256 );
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
if ( s_ntime != endiandata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
x21s_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ ) if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !(*restart) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool x21s_4way_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
const int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||
x21s_4way_matrix = _mm_malloc( size, 64 );
|
||||
return x21s_4way_matrix;
|
||||
}
|
||||
|
||||
#endif
|
263
algo/x16/x21s.c
Normal file
263
algo/x16/x21s.c
Normal file
@@ -0,0 +1,263 @@
|
||||
/**
|
||||
* x16r algo implementation
|
||||
*
|
||||
* Implementation by tpruvot@github Jan 2018
|
||||
* Optimized by JayDDee@github Jan 2018
|
||||
*/
|
||||
#include "x16r-gate.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include <openssl/sha.h>
|
||||
#if defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
#include "algo/haval/sph-haval.h"
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/lyra2/lyra2.h"
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
static __thread uint64_t* x21s_matrix;
|
||||
|
||||
union _x21s_context_overlay
|
||||
{
|
||||
#if defined(__AES__)
|
||||
hashState_echo echo;
|
||||
hashState_groestl groestl;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_blake512_context blake;
|
||||
sph_bmw512_context bmw;
|
||||
sph_skein512_context skein;
|
||||
sph_jh512_context jh;
|
||||
sph_keccak512_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_fugue512_context fugue;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_haval256_5_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
SHA256_CTX sha256;
|
||||
};
|
||||
typedef union _x21s_context_overlay x21s_context_overlay;
|
||||
|
||||
void x21s_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[16];
|
||||
x21s_context_overlay ctx;
|
||||
void *in = (void*) input;
|
||||
int size = 80;
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
{
|
||||
case BLAKE:
|
||||
sph_blake512_init( &ctx.blake );
|
||||
sph_blake512( &ctx.blake, in, size );
|
||||
sph_blake512_close( &ctx.blake, hash );
|
||||
break;
|
||||
case BMW:
|
||||
sph_bmw512_init( &ctx.bmw );
|
||||
sph_bmw512(&ctx.bmw, in, size);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
break;
|
||||
case GROESTL:
|
||||
#if defined(__AES__)
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)in, size<<3 );
|
||||
#else
|
||||
sph_groestl512_init( &ctx.groestl );
|
||||
sph_groestl512( &ctx.groestl, in, size );
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#endif
|
||||
break;
|
||||
case SKEIN:
|
||||
sph_skein512_init( &ctx.skein );
|
||||
sph_skein512( &ctx.skein, in, size );
|
||||
sph_skein512_close( &ctx.skein, hash );
|
||||
break;
|
||||
case JH:
|
||||
sph_jh512_init( &ctx.jh );
|
||||
sph_jh512(&ctx.jh, in, size );
|
||||
sph_jh512_close(&ctx.jh, hash );
|
||||
break;
|
||||
case KECCAK:
|
||||
sph_keccak512_init( &ctx.keccak );
|
||||
sph_keccak512( &ctx.keccak, in, size );
|
||||
sph_keccak512_close( &ctx.keccak, hash );
|
||||
break;
|
||||
case LUFFA:
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)in, size );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
|
||||
(const byte*)in, size );
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512( &ctx.shavite, in, size );
|
||||
sph_shavite512_close( &ctx.shavite, hash );
|
||||
break;
|
||||
case SIMD:
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence*)in, size<<3 );
|
||||
break;
|
||||
case ECHO:
|
||||
#if defined(__AES__)
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash,
|
||||
(const BitSequence*)in, size<<3 );
|
||||
#else
|
||||
sph_echo512_init( &ctx.echo );
|
||||
sph_echo512( &ctx.echo, in, size );
|
||||
sph_echo512_close( &ctx.echo, hash );
|
||||
#endif
|
||||
break;
|
||||
case HAMSI:
|
||||
sph_hamsi512_init( &ctx.hamsi );
|
||||
sph_hamsi512( &ctx.hamsi, in, size );
|
||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, in, size );
|
||||
sph_fugue512_close( &ctx.fugue, hash );
|
||||
break;
|
||||
case SHABAL:
|
||||
sph_shabal512_init( &ctx.shabal );
|
||||
sph_shabal512( &ctx.shabal, in, size );
|
||||
sph_shabal512_close( &ctx.shabal, hash );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, in, size );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||
break;
|
||||
case SHA_512:
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, in, size );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
break;
|
||||
}
|
||||
in = (void*) hash;
|
||||
size = 64;
|
||||
}
|
||||
|
||||
sph_haval256_5_init( &ctx.haval );
|
||||
sph_haval256_5( &ctx.haval, (const void*) hash, 64) ;
|
||||
sph_haval256_5_close( &ctx.haval, hash );
|
||||
|
||||
sph_tiger_init( &ctx.tiger );
|
||||
sph_tiger ( &ctx.tiger, (const void*) hash, 64 );
|
||||
sph_tiger_close( &ctx.tiger, (void*) hash );
|
||||
|
||||
LYRA2REV2( x21s_matrix, (void*) hash, 32, (const void*) hash, 32,
|
||||
(const void*) hash, 32, 1, 4, 4);
|
||||
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash );
|
||||
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, hash, 64 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx.sha256 );
|
||||
|
||||
memcpy( output, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
if ( s_ntime != pdata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
do
|
||||
{
|
||||
be32enc( &endiandata[19], nonce );
|
||||
x21s_hash( hash32, endiandata );
|
||||
|
||||
if ( hash32[7] <= Htarg )
|
||||
if (fulltest( hash32, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash32, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool x21s_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
const int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||
x21s_matrix = _mm_malloc( size, 64 );
|
||||
return x21s_matrix;
|
||||
}
|
||||
|
@@ -23,7 +23,7 @@
|
||||
#include "algo/shabal/shabal-hash-4way.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/haval/haval-hash-4way.h"
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
|
||||
union _sonoa_4way_context_overlay
|
||||
{
|
||||
@@ -69,7 +69,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -80,7 +80,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
@@ -134,13 +134,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
|
||||
// 2
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -151,7 +151,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
@@ -203,7 +203,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
@@ -215,7 +215,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -226,7 +226,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
@@ -278,13 +278,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
@@ -300,13 +300,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
// 4
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -317,7 +317,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
@@ -369,13 +369,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
@@ -390,7 +390,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
@@ -402,7 +402,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
hamsi512_4way( &ctx.hamsi, vhashB, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
@@ -438,7 +438,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
shabal512_4way( &ctx.shabal, vhashB, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -449,7 +449,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
@@ -501,13 +501,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
@@ -522,13 +522,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||
@@ -545,13 +545,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
|
||||
// 6
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -562,7 +562,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
@@ -614,13 +614,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
@@ -635,13 +635,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||
@@ -656,13 +656,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||
@@ -679,13 +679,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
|
||||
// 7
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -696,7 +696,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
@@ -748,13 +748,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
@@ -769,13 +769,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||
@@ -790,7 +790,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
@@ -806,7 +806,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
@@ -816,7 +816,7 @@ int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id;
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user