mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
dc6b007a18 | ||
![]() |
06bfaa1249 | ||
![]() |
6566e99a13 | ||
![]() |
ccfccbadd5 | ||
![]() |
45ecd0de14 | ||
![]() |
4fa8fcea8b |
37
README.txt
37
README.txt
@@ -1,6 +1,10 @@
|
||||
This file is included in the Windows binary package. Compile instructions
|
||||
for Linux and Windows can be found in RELEASE_NOTES.
|
||||
|
||||
This package is officially avalable only from:
|
||||
https://github.com/JayDDee/cpuminer-opt
|
||||
No other sources should be trusted.
|
||||
|
||||
cpuminer is a console program that is executed from a DOS or Powershell
|
||||
prompt. There is no GUI and no mouse support.
|
||||
|
||||
@@ -10,7 +14,7 @@ miners. The source code is open for anyone to inspect. If you don't trust
|
||||
the software, don't use it.
|
||||
|
||||
Choose the exe that best matches you CPU's features or use trial and
|
||||
error to find the fastest one that doesn't crash. Pay attention to
|
||||
error to find the fastest one that works. Pay attention to
|
||||
the features listed at cpuminer startup to ensure you are mining at
|
||||
optimum speed using the best available features.
|
||||
|
||||
@@ -31,26 +35,31 @@ https://en.wikipedia.org/wiki/List_of_Intel_CPU_microarchitectures
|
||||
https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures
|
||||
|
||||
|
||||
Exe file name Compile flags Arch name
|
||||
Exe file name Compile flags Arch name
|
||||
|
||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
||||
cpuminer-aes-sse42.exe "-march=westmere" Westmere
|
||||
cpuminer-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge
|
||||
cpuminer-avx2.exe "-march=core-avx2 -maes" Haswell*
|
||||
cpuminer-avx512.exe "-march=skylake-avx512" Skylake-X, Cascadelake-X
|
||||
cpuminer-zen.exe "-march=znver1" AMD Ryzen, Threadripper
|
||||
cpuminer-avx512-sha-vaes.exe "-march=icelake-client" Icelake*
|
||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
||||
cpuminer-aes-sse42.exe "-march=westmere" Westmere
|
||||
cpuminer-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge
|
||||
cpuminer-avx2.exe "-march=core-avx2 -maes" Haswell(1)
|
||||
cpuminer-avx512.exe "-march=skylake-avx512" Skylake-X, Cascadelake
|
||||
cpuminer-avx512-sha.exe "-march=cascadelake -msha" Rocketlake(2)
|
||||
cpuminer-avx512-sha-vaes.exe "-march=icelake-client" Icelake, Tigerlake(3)
|
||||
cpuminer-zen.exe "-march=znver1" AMD Zen1, Zen2
|
||||
cpuminer-zen3.exe "-march=znver2 -mvaes" Zen3(4)
|
||||
|
||||
* Haswell includes Broadwell, Skylake, Kabylake, Coffeelake & Cometlake.
|
||||
Icelake is only available on some laptops. Mining with a laptop is not
|
||||
recommended. The icelake build is included in anticipation of Intel eventually
|
||||
releasing a desktop CPU with a microarchitecture newer than Skylake.
|
||||
(1) Haswell includes Broadwell, Skylake, Kabylake, Coffeelake & Cometlake.
|
||||
(2) Rocketlake build uses cascadelake+sha as a workaround until Rocketlake
|
||||
compiler support is avalable.
|
||||
(3) Icelake & Tigerlake are only available on some laptops. Mining with a
|
||||
laptop is not recommended.
|
||||
(4) Zen3 build uses zen2+vaes as a workaround until Zen3 compiler support is
|
||||
available. Zen2 CPUs should use Zen1 build.
|
||||
|
||||
Notes about included DLL files:
|
||||
|
||||
Downloading DLL files from alternative sources presents an inherent
|
||||
security risk if their source is unknown. All DLL files included have
|
||||
been copied from the Ubuntu-20.04 instalation or compiled by me from
|
||||
been copied from the Ubuntu-20.04 installation or compiled by me from
|
||||
source code obtained from the author's official repository. The exact
|
||||
procedure is documented in the build instructions for Windows:
|
||||
https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source
|
||||
|
@@ -65,6 +65,44 @@ If not what makes it happen or not happen?
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.15.6
|
||||
|
||||
Implement keccak pre-hash optimization for x16* algos.
|
||||
Move conditional mining test to before get_new_work in miner thread.
|
||||
Add test for share reject reason when solo mining.
|
||||
Add support for floating point, as well as integer, "networkhasps" in
|
||||
RPC getmininginfo method.
|
||||
|
||||
v3.15.5
|
||||
|
||||
Fix stratum jobs lost if 2 jobs received in less than one second.
|
||||
|
||||
|
||||
v3.15.4
|
||||
|
||||
Fixed yescryptr16 broken in v3.15.3.
|
||||
|
||||
v3.15.3
|
||||
|
||||
Yescrypt algos now use yespower v0.5, a little faster.
|
||||
New implementation of sha256 using SHA CPU extension.
|
||||
Replace Openssl with SPH for sha256 & sha512.
|
||||
AVX512 optimization for sha256t & sha256q.
|
||||
Faster sha256t, sha256q, x21s, x22i & x25x on CPUs with SHA without AVX512.
|
||||
AVX512+SHA build for Intel Rocketlake added to Windows binary package.
|
||||
|
||||
v3.15.2
|
||||
|
||||
Zen3 AVX2+VAES optimization for x16*, x17, sonoa, xevan, x21s, x22i, x25x,
|
||||
allium.
|
||||
Zen3 (AVX2+SHA+VAES) build added to Windows binary package.
|
||||
|
||||
v3.15.1
|
||||
|
||||
Fix compile on AMD Zen3 CPUs with VAES.
|
||||
Force new work immediately after solving a block solo.
|
||||
|
||||
|
||||
v3.15.0
|
||||
|
||||
Fugue optimized with AES, improves many sha3 algos.
|
||||
|
@@ -370,11 +370,15 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_X22I: register_x22i_algo ( gate ); break;
|
||||
case ALGO_X25X: register_x25x_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPT: register_yescrypt_05_algo ( gate ); break;
|
||||
// case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_05_algo ( gate ); break;
|
||||
// case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8G: register_yescryptr8g_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_05_algo( gate ); break;
|
||||
// case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_05_algo( gate ); break;
|
||||
// case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_YESPOWER: register_yespower_algo ( gate ); break;
|
||||
case ALGO_YESPOWERR16: register_yespowerr16_algo ( gate ); break;
|
||||
case ALGO_YESPOWER_B2B: register_yespower_b2b_algo ( gate ); break;
|
||||
@@ -415,7 +419,6 @@ void exec_hash_function( int algo, void *output, const void *pdata )
|
||||
const char* const algo_alias_map[][2] =
|
||||
{
|
||||
// alias proper
|
||||
{ "argon2d-crds", "argon2d250" },
|
||||
{ "argon2d-dyn", "argon2d500" },
|
||||
{ "argon2d-uis", "argon2d4096" },
|
||||
{ "bcd", "x13bcd" },
|
||||
|
@@ -90,10 +90,11 @@ typedef uint32_t set_t;
|
||||
#define AES_OPT 2
|
||||
#define SSE42_OPT 4
|
||||
#define AVX_OPT 8 // Sandybridge
|
||||
#define AVX2_OPT 0x10 // Haswell
|
||||
#define SHA_OPT 0x20 // sha256 (Ryzen, Ice Lake)
|
||||
#define AVX512_OPT 0x40 // AVX512- F, VL, DQ, BW (Skylake-X)
|
||||
#define VAES_OPT 0x80 // VAES (Ice Lake)
|
||||
#define AVX2_OPT 0x10 // Haswell, Zen1
|
||||
#define SHA_OPT 0x20 // Zen1, Icelake (sha256)
|
||||
#define AVX512_OPT 0x40 // Skylake-X (AVX512[F,VL,DQ,BW])
|
||||
#define VAES_OPT 0x80 // Icelake (VAES & AVX512)
|
||||
#define VAES256_OPT 0x100 // Zen3 (VAES without AVX512)
|
||||
|
||||
|
||||
// return set containing all elements from sets a & b
|
||||
@@ -111,9 +112,9 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
|
||||
typedef struct
|
||||
{
|
||||
// Mandatory functions, one of these is mandatory. If a generic scanhash
|
||||
// is used a custom hash function must be registered, with a custom scanhash
|
||||
// the custom hash function can be called directly and doesn't need to be
|
||||
// registered in the gate.
|
||||
// is used a custom target hash function must be registered, with a custom
|
||||
// scanhash the target hash function can be called directly and doesn't need
|
||||
// to be registered in the gate.
|
||||
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
|
||||
|
||||
int ( *hash ) ( void*, const void*, int );
|
||||
@@ -161,7 +162,7 @@ bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
|
||||
bool ( *do_this_thread ) ( int );
|
||||
|
||||
// After do_this_thread
|
||||
void ( *resync_threads ) ( struct work* );
|
||||
void ( *resync_threads ) ( int, struct work* );
|
||||
|
||||
// No longer needed
|
||||
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
|
||||
|
@@ -1,5 +1,4 @@
|
||||
//#if 0
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(__VAES__)
|
||||
|
||||
#include "simd-utils.h"
|
||||
#include "echo-hash-4way.h"
|
||||
@@ -13,8 +12,12 @@ static const unsigned int mul2ipt[] __attribute__ ((aligned (64))) =
|
||||
*/
|
||||
// do these need to be reversed?
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
|
||||
#define mul2mask \
|
||||
_mm512_set4_epi32( 0, 0, 0, 0x00001b00 )
|
||||
m512_const2_64( 0, 0x00001b00 )
|
||||
//_mm512_set4_epi32( 0, 0, 0, 0x00001b00 )
|
||||
// _mm512_set4_epi32( 0x00001b00, 0, 0, 0 )
|
||||
|
||||
#define lsbmask m512_const1_32( 0x01010101 )
|
||||
@@ -30,87 +33,87 @@ static const unsigned int mul2ipt[] __attribute__ ((aligned (64))) =
|
||||
const int j2 = ( (j)+2 ) & 3; \
|
||||
const int j3 = ( (j)+3 ) & 3; \
|
||||
s2 = _mm512_add_epi8( state1[ 0 ] [j ], state1[ 0 ][ j ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 0 ][ j ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask );\
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ] [j ] = s2; \
|
||||
state2[ 1 ] [j ] = state1[ 0 ][ j ]; \
|
||||
state2[ 2 ] [j ] = state1[ 0 ][ j ]; \
|
||||
state2[ 3 ] [j ] = _mm512_xor_si512( s2, state1[ 0 ][ j ] );\
|
||||
s2 = _mm512_add_epi8( state1[ 1 ][ j1 ], state1[ 1 ][ j1 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 1 ][ j1 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 );\
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], \
|
||||
_mm512_xor_si512( s2, state1[ 1 ][ j1 ] ) ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], s2 ); \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512( state2[ 2 ][ j ], state1[ 1 ][ j1 ] ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3 ][ j ], state1[ 1 ][ j1 ] ); \
|
||||
s2 = _mm512_add_epi8( state1[ 2 ][ j2 ], state1[ 2 ][ j2 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 2 ][ j2 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], state1[ 2 ][ j2 ] ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], \
|
||||
t1 = _mm512_srli_epi16( state1[ 0 ][ j ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask );\
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ] [j ] = s2; \
|
||||
state2[ 1 ] [j ] = state1[ 0 ][ j ]; \
|
||||
state2[ 2 ] [j ] = state1[ 0 ][ j ]; \
|
||||
state2[ 3 ] [j ] = _mm512_xor_si512( s2, state1[ 0 ][ j ] );\
|
||||
s2 = _mm512_add_epi8( state1[ 1 ][ j1 ], state1[ 1 ][ j1 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 1 ][ j1 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 );\
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], \
|
||||
_mm512_xor_si512( s2, state1[ 1 ][ j1 ] ) ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], s2 ); \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512( state2[ 2 ][ j ], state1[ 1 ][ j1 ] ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3 ][ j ], state1[ 1 ][ j1 ] ); \
|
||||
s2 = _mm512_add_epi8( state1[ 2 ][ j2 ], state1[ 2 ][ j2 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 2 ][ j2 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], state1[ 2 ][ j2 ] ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], \
|
||||
_mm512_xor_si512( s2, state1[ 2 ][ j2 ] ) ); \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512( state2[ 2 ][ j ], s2 ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3][ j ], state1[ 2 ][ j2 ] ); \
|
||||
s2 = _mm512_add_epi8( state1[ 3 ][ j3 ], state1[ 3 ][ j3 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 3 ][ j3 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], state1[ 3 ][ j3 ] ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], state1[ 3 ][ j3 ] ); \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512( state2[ 2 ][ j ], \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512( state2[ 2 ][ j ], s2 ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3][ j ], state1[ 2 ][ j2 ] ); \
|
||||
s2 = _mm512_add_epi8( state1[ 3 ][ j3 ], state1[ 3 ][ j3 ] ); \
|
||||
t1 = _mm512_srli_epi16( state1[ 3 ][ j3 ], 7 ); \
|
||||
t1 = _mm512_and_si512( t1, lsbmask ); \
|
||||
t2 = _mm512_shuffle_epi8( mul2mask, t1 ); \
|
||||
s2 = _mm512_xor_si512( s2, t2 ); \
|
||||
state2[ 0 ][ j ] = _mm512_xor_si512( state2[ 0 ][ j ], state1[ 3 ][ j3 ] ); \
|
||||
state2[ 1 ][ j ] = _mm512_xor_si512( state2[ 1 ][ j ], state1[ 3 ][ j3 ] ); \
|
||||
state2[ 2 ][ j ] = _mm512_xor_si512( state2[ 2 ][ j ], \
|
||||
_mm512_xor_si512( s2, state1[ 3 ][ j3] ) ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3 ][ j ], s2 ); \
|
||||
state2[ 3 ][ j ] = _mm512_xor_si512( state2[ 3 ][ j ], s2 ); \
|
||||
} while(0)
|
||||
|
||||
#define ECHO_ROUND_UNROLL2 \
|
||||
ECHO_SUBBYTES(_state, 0, 0);\
|
||||
ECHO_SUBBYTES(_state, 0, 0);\
|
||||
ECHO_SUBBYTES(_state, 1, 0);\
|
||||
ECHO_SUBBYTES(_state, 2, 0);\
|
||||
ECHO_SUBBYTES(_state, 3, 0);\
|
||||
ECHO_SUBBYTES(_state, 0, 1);\
|
||||
ECHO_SUBBYTES(_state, 1, 1);\
|
||||
ECHO_SUBBYTES(_state, 2, 1);\
|
||||
ECHO_SUBBYTES(_state, 3, 1);\
|
||||
ECHO_SUBBYTES(_state, 0, 2);\
|
||||
ECHO_SUBBYTES(_state, 1, 2);\
|
||||
ECHO_SUBBYTES(_state, 2, 2);\
|
||||
ECHO_SUBBYTES(_state, 3, 2);\
|
||||
ECHO_SUBBYTES(_state, 0, 3);\
|
||||
ECHO_SUBBYTES(_state, 1, 3);\
|
||||
ECHO_SUBBYTES(_state, 2, 3);\
|
||||
ECHO_SUBBYTES(_state, 3, 3);\
|
||||
ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 0);\
|
||||
ECHO_SUBBYTES(_state2, 1, 0);\
|
||||
ECHO_SUBBYTES(_state2, 2, 0);\
|
||||
ECHO_SUBBYTES(_state2, 3, 0);\
|
||||
ECHO_SUBBYTES(_state2, 0, 1);\
|
||||
ECHO_SUBBYTES(_state2, 1, 1);\
|
||||
ECHO_SUBBYTES(_state2, 2, 1);\
|
||||
ECHO_SUBBYTES(_state2, 3, 1);\
|
||||
ECHO_SUBBYTES(_state2, 0, 2);\
|
||||
ECHO_SUBBYTES(_state2, 1, 2);\
|
||||
ECHO_SUBBYTES(_state2, 2, 2);\
|
||||
ECHO_SUBBYTES(_state2, 3, 2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 3);\
|
||||
ECHO_SUBBYTES(_state2, 1, 3);\
|
||||
ECHO_SUBBYTES(_state2, 2, 3);\
|
||||
ECHO_SUBBYTES(_state2, 3, 3);\
|
||||
ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
|
||||
ECHO_SUBBYTES(_state, 2, 0);\
|
||||
ECHO_SUBBYTES(_state, 3, 0);\
|
||||
ECHO_SUBBYTES(_state, 0, 1);\
|
||||
ECHO_SUBBYTES(_state, 1, 1);\
|
||||
ECHO_SUBBYTES(_state, 2, 1);\
|
||||
ECHO_SUBBYTES(_state, 3, 1);\
|
||||
ECHO_SUBBYTES(_state, 0, 2);\
|
||||
ECHO_SUBBYTES(_state, 1, 2);\
|
||||
ECHO_SUBBYTES(_state, 2, 2);\
|
||||
ECHO_SUBBYTES(_state, 3, 2);\
|
||||
ECHO_SUBBYTES(_state, 0, 3);\
|
||||
ECHO_SUBBYTES(_state, 1, 3);\
|
||||
ECHO_SUBBYTES(_state, 2, 3);\
|
||||
ECHO_SUBBYTES(_state, 3, 3);\
|
||||
ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 0);\
|
||||
ECHO_SUBBYTES(_state2, 1, 0);\
|
||||
ECHO_SUBBYTES(_state2, 2, 0);\
|
||||
ECHO_SUBBYTES(_state2, 3, 0);\
|
||||
ECHO_SUBBYTES(_state2, 0, 1);\
|
||||
ECHO_SUBBYTES(_state2, 1, 1);\
|
||||
ECHO_SUBBYTES(_state2, 2, 1);\
|
||||
ECHO_SUBBYTES(_state2, 3, 1);\
|
||||
ECHO_SUBBYTES(_state2, 0, 2);\
|
||||
ECHO_SUBBYTES(_state2, 1, 2);\
|
||||
ECHO_SUBBYTES(_state2, 2, 2);\
|
||||
ECHO_SUBBYTES(_state2, 3, 2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 3);\
|
||||
ECHO_SUBBYTES(_state2, 1, 3);\
|
||||
ECHO_SUBBYTES(_state2, 2, 3);\
|
||||
ECHO_SUBBYTES(_state2, 3, 3);\
|
||||
ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
|
||||
|
||||
#define SAVESTATE(dst, src)\
|
||||
dst[0][0] = src[0][0];\
|
||||
@@ -224,43 +227,43 @@ void echo_4way_compress( echo_4way_context *ctx, const __m512i *pmsg,
|
||||
|
||||
int echo_4way_init( echo_4way_context *ctx, int nHashSize )
|
||||
{
|
||||
int i, j;
|
||||
int i, j;
|
||||
|
||||
ctx->k = m512_zero;
|
||||
ctx->processed_bits = 0;
|
||||
ctx->uBufferBytes = 0;
|
||||
ctx->processed_bits = 0;
|
||||
ctx->uBufferBytes = 0;
|
||||
|
||||
switch( nHashSize )
|
||||
{
|
||||
case 256:
|
||||
ctx->uHashSize = 256;
|
||||
ctx->uBlockLength = 192;
|
||||
ctx->uRounds = 8;
|
||||
ctx->hashsize = _mm512_set4_epi32( 0, 0, 0, 0x100 );
|
||||
ctx->const1536 = _mm512_set4_epi32( 0, 0, 0, 0x600 );
|
||||
break;
|
||||
switch( nHashSize )
|
||||
{
|
||||
case 256:
|
||||
ctx->uHashSize = 256;
|
||||
ctx->uBlockLength = 192;
|
||||
ctx->uRounds = 8;
|
||||
ctx->hashsize = m512_const2_64( 0, 0x100 );
|
||||
ctx->const1536 = m512_const2_64( 0, 0x600 );
|
||||
break;
|
||||
|
||||
case 512:
|
||||
ctx->uHashSize = 512;
|
||||
ctx->uBlockLength = 128;
|
||||
ctx->uRounds = 10;
|
||||
ctx->hashsize = _mm512_set4_epi32( 0, 0, 0, 0x200 );
|
||||
ctx->const1536 = _mm512_set4_epi32( 0, 0, 0, 0x400);
|
||||
break;
|
||||
case 512:
|
||||
ctx->uHashSize = 512;
|
||||
ctx->uBlockLength = 128;
|
||||
ctx->uRounds = 10;
|
||||
ctx->hashsize = m512_const2_64( 0, 0x200 );
|
||||
ctx->const1536 = m512_const2_64( 0, 0x400);
|
||||
break;
|
||||
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = 0; j < nHashSize / 256; j++ )
|
||||
ctx->state[ i ][ j ] = ctx->hashsize;
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = 0; j < nHashSize / 256; j++ )
|
||||
ctx->state[ i ][ j ] = ctx->hashsize;
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = nHashSize / 256; j < 4; j++ )
|
||||
ctx->state[ i ][ j ] = m512_zero;
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = nHashSize / 256; j < 4; j++ )
|
||||
ctx->state[ i ][ j ] = m512_zero;
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_4way_update_close( echo_4way_context *state, void *hashval,
|
||||
@@ -285,17 +288,13 @@ int echo_4way_update_close( echo_4way_context *state, void *hashval,
|
||||
vlen = databitlen / 128; // * 4 lanes / 128 bits per lane
|
||||
memcpy_512( state->buffer, data, vlen );
|
||||
state->processed_bits += (unsigned int)( databitlen );
|
||||
remainingbits = _mm512_set4_epi32( 0, 0, 0, databitlen );
|
||||
|
||||
remainingbits = m512_const2_64( 0, (uint64_t)databitlen );
|
||||
}
|
||||
|
||||
state->buffer[ vlen ] = _mm512_set4_epi32( 0, 0, 0, 0x80 );
|
||||
state->buffer[ vlen ] = m512_const2_64( 0, 0x80 );
|
||||
memset_zero_512( state->buffer + vlen + 1, vblen - vlen - 2 );
|
||||
state->buffer[ vblen-2 ] =
|
||||
_mm512_set4_epi32( (uint32_t)state->uHashSize << 16, 0, 0, 0 );
|
||||
state->buffer[ vblen-1 ] =
|
||||
_mm512_set4_epi64( 0, state->processed_bits,
|
||||
0, state->processed_bits );
|
||||
state->buffer[ vblen-2 ] = m512_const2_64( (uint64_t)state->uHashSize << 48, 0 );
|
||||
state->buffer[ vblen-1 ] = m512_const2_64( 0, state->processed_bits);
|
||||
|
||||
state->k = _mm512_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm512_sub_epi64( state->k, state->const1536 );
|
||||
@@ -328,16 +327,16 @@ int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
|
||||
ctx->uHashSize = 256;
|
||||
ctx->uBlockLength = 192;
|
||||
ctx->uRounds = 8;
|
||||
ctx->hashsize = _mm512_set4_epi32( 0, 0, 0, 0x100 );
|
||||
ctx->const1536 = _mm512_set4_epi32( 0, 0, 0, 0x600 );
|
||||
ctx->hashsize = m512_const2_64( 0, 0x100 );
|
||||
ctx->const1536 = m512_const2_64( 0, 0x600 );
|
||||
break;
|
||||
|
||||
case 512:
|
||||
ctx->uHashSize = 512;
|
||||
ctx->uBlockLength = 128;
|
||||
ctx->uRounds = 10;
|
||||
ctx->hashsize = _mm512_set4_epi32( 0, 0, 0, 0x200 );
|
||||
ctx->const1536 = _mm512_set4_epi32( 0, 0, 0, 0x400);
|
||||
ctx->hashsize = m512_const2_64( 0, 0x200 );
|
||||
ctx->const1536 = m512_const2_64( 0, 0x400 );
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -372,17 +371,14 @@ int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
|
||||
vlen = databitlen / 128; // * 4 lanes / 128 bits per lane
|
||||
memcpy_512( ctx->buffer, data, vlen );
|
||||
ctx->processed_bits += (unsigned int)( databitlen );
|
||||
remainingbits = _mm512_set4_epi32( 0, 0, 0, databitlen );
|
||||
|
||||
remainingbits = m512_const2_64( 0, databitlen );
|
||||
}
|
||||
|
||||
ctx->buffer[ vlen ] = _mm512_set4_epi32( 0, 0, 0, 0x80 );
|
||||
ctx->buffer[ vlen ] = m512_const2_64( 0, 0x80 );
|
||||
memset_zero_512( ctx->buffer + vlen + 1, vblen - vlen - 2 );
|
||||
ctx->buffer[ vblen-2 ] =
|
||||
_mm512_set4_epi32( (uint32_t)ctx->uHashSize << 16, 0, 0, 0 );
|
||||
ctx->buffer[ vblen-1 ] =
|
||||
_mm512_set4_epi64( 0, ctx->processed_bits,
|
||||
0, ctx->processed_bits );
|
||||
m512_const2_64( (uint64_t)ctx->uHashSize << 48, 0 );
|
||||
ctx->buffer[ vblen-1 ] = m512_const2_64( 0, ctx->processed_bits);
|
||||
|
||||
ctx->k = _mm512_add_epi64( ctx->k, remainingbits );
|
||||
ctx->k = _mm512_sub_epi64( ctx->k, ctx->const1536 );
|
||||
@@ -400,5 +396,380 @@ int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#endif
|
||||
// AVX2 + VAES
|
||||
|
||||
#define mul2mask_2way m256_const2_64( 0, 0x0000000000001b00 )
|
||||
|
||||
#define lsbmask_2way m256_const1_32( 0x01010101 )
|
||||
|
||||
#define ECHO_SUBBYTES_2WAY( state, i, j ) \
|
||||
state[i][j] = _mm256_aesenc_epi128( state[i][j], k1 ); \
|
||||
state[i][j] = _mm256_aesenc_epi128( state[i][j], m256_zero ); \
|
||||
k1 = _mm256_add_epi32( k1, m256_one_128 );
|
||||
|
||||
#define ECHO_MIXBYTES_2WAY( state1, state2, j, t1, t2, s2 ) do \
|
||||
{ \
|
||||
const int j1 = ( (j)+1 ) & 3; \
|
||||
const int j2 = ( (j)+2 ) & 3; \
|
||||
const int j3 = ( (j)+3 ) & 3; \
|
||||
s2 = _mm256_add_epi8( state1[ 0 ] [j ], state1[ 0 ][ j ] ); \
|
||||
t1 = _mm256_srli_epi16( state1[ 0 ][ j ], 7 ); \
|
||||
t1 = _mm256_and_si256( t1, lsbmask_2way );\
|
||||
t2 = _mm256_shuffle_epi8( mul2mask_2way, t1 ); \
|
||||
s2 = _mm256_xor_si256( s2, t2 ); \
|
||||
state2[ 0 ] [j ] = s2; \
|
||||
state2[ 1 ] [j ] = state1[ 0 ][ j ]; \
|
||||
state2[ 2 ] [j ] = state1[ 0 ][ j ]; \
|
||||
state2[ 3 ] [j ] = _mm256_xor_si256( s2, state1[ 0 ][ j ] );\
|
||||
s2 = _mm256_add_epi8( state1[ 1 ][ j1 ], state1[ 1 ][ j1 ] ); \
|
||||
t1 = _mm256_srli_epi16( state1[ 1 ][ j1 ], 7 ); \
|
||||
t1 = _mm256_and_si256( t1, lsbmask_2way ); \
|
||||
t2 = _mm256_shuffle_epi8( mul2mask_2way, t1 ); \
|
||||
s2 = _mm256_xor_si256( s2, t2 );\
|
||||
state2[ 0 ][ j ] = _mm256_xor_si256( state2[ 0 ][ j ], \
|
||||
_mm256_xor_si256( s2, state1[ 1 ][ j1 ] ) ); \
|
||||
state2[ 1 ][ j ] = _mm256_xor_si256( state2[ 1 ][ j ], s2 ); \
|
||||
state2[ 2 ][ j ] = _mm256_xor_si256( state2[ 2 ][ j ], state1[ 1 ][ j1 ] ); \
|
||||
state2[ 3 ][ j ] = _mm256_xor_si256( state2[ 3 ][ j ], state1[ 1 ][ j1 ] ); \
|
||||
s2 = _mm256_add_epi8( state1[ 2 ][ j2 ], state1[ 2 ][ j2 ] ); \
|
||||
t1 = _mm256_srli_epi16( state1[ 2 ][ j2 ], 7 ); \
|
||||
t1 = _mm256_and_si256( t1, lsbmask_2way ); \
|
||||
t2 = _mm256_shuffle_epi8( mul2mask_2way, t1 ); \
|
||||
s2 = _mm256_xor_si256( s2, t2 ); \
|
||||
state2[ 0 ][ j ] = _mm256_xor_si256( state2[ 0 ][ j ], state1[ 2 ][ j2 ] ); \
|
||||
state2[ 1 ][ j ] = _mm256_xor_si256( state2[ 1 ][ j ], \
|
||||
_mm256_xor_si256( s2, state1[ 2 ][ j2 ] ) ); \
|
||||
state2[ 2 ][ j ] = _mm256_xor_si256( state2[ 2 ][ j ], s2 ); \
|
||||
state2[ 3 ][ j ] = _mm256_xor_si256( state2[ 3][ j ], state1[ 2 ][ j2 ] ); \
|
||||
s2 = _mm256_add_epi8( state1[ 3 ][ j3 ], state1[ 3 ][ j3 ] ); \
|
||||
t1 = _mm256_srli_epi16( state1[ 3 ][ j3 ], 7 ); \
|
||||
t1 = _mm256_and_si256( t1, lsbmask_2way ); \
|
||||
t2 = _mm256_shuffle_epi8( mul2mask_2way, t1 ); \
|
||||
s2 = _mm256_xor_si256( s2, t2 ); \
|
||||
state2[ 0 ][ j ] = _mm256_xor_si256( state2[ 0 ][ j ], state1[ 3 ][ j3 ] ); \
|
||||
state2[ 1 ][ j ] = _mm256_xor_si256( state2[ 1 ][ j ], state1[ 3 ][ j3 ] ); \
|
||||
state2[ 2 ][ j ] = _mm256_xor_si256( state2[ 2 ][ j ], \
|
||||
_mm256_xor_si256( s2, state1[ 3 ][ j3] ) ); \
|
||||
state2[ 3 ][ j ] = _mm256_xor_si256( state2[ 3 ][ j ], s2 ); \
|
||||
} while(0)
|
||||
|
||||
#define ECHO_ROUND_UNROLL2_2WAY \
|
||||
ECHO_SUBBYTES_2WAY(_state, 0, 0);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 1, 0);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 2, 0);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 3, 0);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 0, 1);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 1, 1);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 2, 1);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 3, 1);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 0, 2);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 1, 2);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 2, 2);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 3, 2);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 0, 3);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 1, 3);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 2, 3);\
|
||||
ECHO_SUBBYTES_2WAY(_state, 3, 3);\
|
||||
ECHO_MIXBYTES_2WAY(_state, _state2, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES_2WAY(_state, _state2, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES_2WAY(_state, _state2, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES_2WAY(_state, _state2, 3, t1, t2, s2);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 0, 0);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 1, 0);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 2, 0);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 3, 0);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 0, 1);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 1, 1);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 2, 1);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 3, 1);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 0, 2);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 1, 2);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 2, 2);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 3, 2);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 0, 3);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 1, 3);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 2, 3);\
|
||||
ECHO_SUBBYTES_2WAY(_state2, 3, 3);\
|
||||
ECHO_MIXBYTES_2WAY(_state2, _state, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES_2WAY(_state2, _state, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES_2WAY(_state2, _state, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES_2WAY(_state2, _state, 3, t1, t2, s2)
|
||||
|
||||
#define SAVESTATE_2WAY(dst, src)\
|
||||
dst[0][0] = src[0][0];\
|
||||
dst[0][1] = src[0][1];\
|
||||
dst[0][2] = src[0][2];\
|
||||
dst[0][3] = src[0][3];\
|
||||
dst[1][0] = src[1][0];\
|
||||
dst[1][1] = src[1][1];\
|
||||
dst[1][2] = src[1][2];\
|
||||
dst[1][3] = src[1][3];\
|
||||
dst[2][0] = src[2][0];\
|
||||
dst[2][1] = src[2][1];\
|
||||
dst[2][2] = src[2][2];\
|
||||
dst[2][3] = src[2][3];\
|
||||
dst[3][0] = src[3][0];\
|
||||
dst[3][1] = src[3][1];\
|
||||
dst[3][2] = src[3][2];\
|
||||
dst[3][3] = src[3][3]
|
||||
|
||||
// blockcount always 1
|
||||
void echo_2way_compress( echo_2way_context *ctx, const __m256i *pmsg,
|
||||
unsigned int uBlockCount )
|
||||
{
|
||||
unsigned int r, b, i, j;
|
||||
__m256i t1, t2, s2, k1;
|
||||
__m256i _state[4][4], _state2[4][4], _statebackup[4][4];
|
||||
|
||||
_state[ 0 ][ 0 ] = ctx->state[ 0 ][ 0 ];
|
||||
_state[ 0 ][ 1 ] = ctx->state[ 0 ][ 1 ];
|
||||
_state[ 0 ][ 2 ] = ctx->state[ 0 ][ 2 ];
|
||||
_state[ 0 ][ 3 ] = ctx->state[ 0 ][ 3 ];
|
||||
_state[ 1 ][ 0 ] = ctx->state[ 1 ][ 0 ];
|
||||
_state[ 1 ][ 1 ] = ctx->state[ 1 ][ 1 ];
|
||||
_state[ 1 ][ 2 ] = ctx->state[ 1 ][ 2 ];
|
||||
_state[ 1 ][ 3 ] = ctx->state[ 1 ][ 3 ];
|
||||
_state[ 2 ][ 0 ] = ctx->state[ 2 ][ 0 ];
|
||||
_state[ 2 ][ 1 ] = ctx->state[ 2 ][ 1 ];
|
||||
_state[ 2 ][ 2 ] = ctx->state[ 2 ][ 2 ];
|
||||
_state[ 2 ][ 3 ] = ctx->state[ 2 ][ 3 ];
|
||||
_state[ 3 ][ 0 ] = ctx->state[ 3 ][ 0 ];
|
||||
_state[ 3 ][ 1 ] = ctx->state[ 3 ][ 1 ];
|
||||
_state[ 3 ][ 2 ] = ctx->state[ 3 ][ 2 ];
|
||||
_state[ 3 ][ 3 ] = ctx->state[ 3 ][ 3 ];
|
||||
|
||||
for ( b = 0; b < uBlockCount; b++ )
|
||||
{
|
||||
ctx->k = _mm256_add_epi64( ctx->k, ctx->const1536 );
|
||||
|
||||
for( j = ctx->uHashSize / 256; j < 4; j++ )
|
||||
{
|
||||
for ( i = 0; i < 4; i++ )
|
||||
{
|
||||
_state[ i ][ j ] = _mm256_load_si256(
|
||||
pmsg + 4 * (j - (ctx->uHashSize / 256)) + i );
|
||||
}
|
||||
}
|
||||
|
||||
// save state
|
||||
SAVESTATE_2WAY( _statebackup, _state );
|
||||
|
||||
k1 = ctx->k;
|
||||
|
||||
for ( r = 0; r < ctx->uRounds / 2; r++ )
|
||||
{
|
||||
ECHO_ROUND_UNROLL2_2WAY;
|
||||
}
|
||||
|
||||
if ( ctx->uHashSize == 256 )
|
||||
{
|
||||
for ( i = 0; i < 4; i++ )
|
||||
{
|
||||
_state[ i ][ 0 ] = _mm256_xor_si256( _state[ i ][ 0 ],
|
||||
_state[ i ][ 1 ] );
|
||||
_state[ i ][ 0 ] = _mm256_xor_si256( _state[ i ][ 0 ],
|
||||
_state[ i ][ 2 ] );
|
||||
_state[ i ][ 0 ] = _mm256_xor_si256( _state[ i ][ 0 ],
|
||||
_state[ i ][ 3 ] );
|
||||
_state[ i ][ 0 ] = _mm256_xor_si256( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 0 ] );
|
||||
_state[ i ][ 0 ] = _mm256_xor_si256( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 1 ] );
|
||||
_state[ i ][ 0 ] = _mm256_xor_si256( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 2 ] ) ;
|
||||
_state[ i ][ 0 ] = _mm256_xor_si256( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 3 ] );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for ( i = 0; i < 4; i++ )
|
||||
{
|
||||
_state[ i ][ 0 ] = _mm256_xor_si256( _state[ i ][ 0 ],
|
||||
_state[ i ][ 2 ] );
|
||||
_state[ i ][ 1 ] = _mm256_xor_si256( _state[ i ][ 1 ],
|
||||
_state[ i ][ 3 ] );
|
||||
_state[ i ][ 0 ] = _mm256_xor_si256( _state[ i ][ 0 ],
|
||||
_statebackup[ i ][ 0 ] );
|
||||
_state[ i ][ 0 ] = _mm256_xor_si256( _state[ i ] [0 ],
|
||||
_statebackup[ i ][ 2 ] );
|
||||
_state[ i ][ 1 ] = _mm256_xor_si256( _state[ i ][ 1 ],
|
||||
_statebackup[ i ][ 1 ] );
|
||||
_state[ i ][ 1 ] = _mm256_xor_si256( _state[ i ][ 1 ],
|
||||
_statebackup[ i ][ 3 ] );
|
||||
}
|
||||
}
|
||||
pmsg += ctx->uBlockLength;
|
||||
}
|
||||
SAVESTATE_2WAY(ctx->state, _state);
|
||||
|
||||
}
|
||||
int echo_2way_init( echo_2way_context *ctx, int nHashSize )
|
||||
{
|
||||
int i, j;
|
||||
|
||||
ctx->k = m256_zero;
|
||||
ctx->processed_bits = 0;
|
||||
ctx->uBufferBytes = 0;
|
||||
|
||||
switch( nHashSize )
|
||||
{
|
||||
case 256:
|
||||
ctx->uHashSize = 256;
|
||||
ctx->uBlockLength = 192;
|
||||
ctx->uRounds = 8;
|
||||
ctx->hashsize = m256_const2_64( 0, 0x100 );
|
||||
ctx->const1536 = m256_const2_64( 0, 0x600 );
|
||||
break;
|
||||
|
||||
case 512:
|
||||
ctx->uHashSize = 512;
|
||||
ctx->uBlockLength = 128;
|
||||
ctx->uRounds = 10;
|
||||
ctx->hashsize = m256_const2_64( 0, 0x200 );
|
||||
ctx->const1536 = m256_const2_64( 0, 0x400 );
|
||||
break;
|
||||
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = 0; j < nHashSize / 256; j++ )
|
||||
ctx->state[ i ][ j ] = ctx->hashsize;
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = nHashSize / 256; j < 4; j++ )
|
||||
ctx->state[ i ][ j ] = m256_zero;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_2way_update_close( echo_2way_context *state, void *hashval,
|
||||
const void *data, int databitlen )
|
||||
{
|
||||
// bytelen is either 32 (maybe), 64 or 80 or 128!
|
||||
// all are less than full block.
|
||||
|
||||
int vlen = databitlen / 128; // * 4 lanes / 128 bits per lane
|
||||
const int vblen = state->uBlockLength / 16; // 16 bytes per lane
|
||||
__m256i remainingbits;
|
||||
|
||||
if ( databitlen == 1024 )
|
||||
{
|
||||
echo_2way_compress( state, data, 1 );
|
||||
state->processed_bits = 1024;
|
||||
remainingbits = m256_const2_64( 0, -1024 );
|
||||
vlen = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy_256( state->buffer, data, vlen );
|
||||
state->processed_bits += (unsigned int)( databitlen );
|
||||
remainingbits = m256_const2_64( 0, databitlen );
|
||||
}
|
||||
|
||||
state->buffer[ vlen ] = m256_const2_64( 0, 0x80 );
|
||||
memset_zero_256( state->buffer + vlen + 1, vblen - vlen - 2 );
|
||||
state->buffer[ vblen-2 ] = m256_const2_64( (uint64_t)state->uHashSize << 48, 0 );
|
||||
state->buffer[ vblen-1 ] = m256_const2_64( 0, state->processed_bits );
|
||||
|
||||
state->k = _mm256_add_epi64( state->k, remainingbits );
|
||||
state->k = _mm256_sub_epi64( state->k, state->const1536 );
|
||||
|
||||
echo_2way_compress( state, state->buffer, 1 );
|
||||
|
||||
_mm256_store_si256( (__m256i*)hashval + 0, state->state[ 0 ][ 0] );
|
||||
_mm256_store_si256( (__m256i*)hashval + 1, state->state[ 1 ][ 0] );
|
||||
|
||||
if ( state->uHashSize == 512 )
|
||||
{
|
||||
_mm256_store_si256( (__m256i*)hashval + 2, state->state[ 2 ][ 0 ] );
|
||||
_mm256_store_si256( (__m256i*)hashval + 3, state->state[ 3 ][ 0 ] );
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int echo_2way_full( echo_2way_context *ctx, void *hashval, int nHashSize,
|
||||
const void *data, int datalen )
|
||||
{
|
||||
int i, j;
|
||||
int databitlen = datalen * 8;
|
||||
ctx->k = m256_zero;
|
||||
ctx->processed_bits = 0;
|
||||
ctx->uBufferBytes = 0;
|
||||
|
||||
switch( nHashSize )
|
||||
{
|
||||
case 256:
|
||||
ctx->uHashSize = 256;
|
||||
ctx->uBlockLength = 192;
|
||||
ctx->uRounds = 8;
|
||||
ctx->hashsize = m256_const2_64( 0, 0x100 );
|
||||
ctx->const1536 = m256_const2_64( 0, 0x600 );
|
||||
break;
|
||||
|
||||
case 512:
|
||||
ctx->uHashSize = 512;
|
||||
ctx->uBlockLength = 128;
|
||||
ctx->uRounds = 10;
|
||||
ctx->hashsize = m256_const2_64( 0, 0x200 );
|
||||
ctx->const1536 = m256_const2_64( 0, 0x400 );
|
||||
break;
|
||||
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = 0; j < nHashSize / 256; j++ )
|
||||
ctx->state[ i ][ j ] = ctx->hashsize;
|
||||
|
||||
for( i = 0; i < 4; i++ )
|
||||
for( j = nHashSize / 256; j < 4; j++ )
|
||||
ctx->state[ i ][ j ] = m256_zero;
|
||||
|
||||
int vlen = datalen / 32;
|
||||
const int vblen = ctx->uBlockLength / 16; // 16 bytes per lane
|
||||
__m256i remainingbits;
|
||||
|
||||
if ( databitlen == 1024 )
|
||||
{
|
||||
echo_2way_compress( ctx, data, 1 );
|
||||
ctx->processed_bits = 1024;
|
||||
remainingbits = m256_const2_64( 0, -1024 );
|
||||
vlen = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
vlen = databitlen / 128; // * 4 lanes / 128 bits per lane
|
||||
memcpy_256( ctx->buffer, data, vlen );
|
||||
ctx->processed_bits += (unsigned int)( databitlen );
|
||||
remainingbits = m256_const2_64( 0, databitlen );
|
||||
}
|
||||
|
||||
ctx->buffer[ vlen ] = m256_const2_64( 0, 0x80 );
|
||||
memset_zero_256( ctx->buffer + vlen + 1, vblen - vlen - 2 );
|
||||
ctx->buffer[ vblen-2 ] = m256_const2_64( (uint64_t)ctx->uHashSize << 48, 0 );
|
||||
ctx->buffer[ vblen-1 ] = m256_const2_64( 0, ctx->processed_bits );
|
||||
|
||||
ctx->k = _mm256_add_epi64( ctx->k, remainingbits );
|
||||
ctx->k = _mm256_sub_epi64( ctx->k, ctx->const1536 );
|
||||
|
||||
echo_2way_compress( ctx, ctx->buffer, 1 );
|
||||
|
||||
_mm256_store_si256( (__m256i*)hashval + 0, ctx->state[ 0 ][ 0] );
|
||||
_mm256_store_si256( (__m256i*)hashval + 1, ctx->state[ 1 ][ 0] );
|
||||
|
||||
if ( ctx->uHashSize == 512 )
|
||||
{
|
||||
_mm256_store_si256( (__m256i*)hashval + 2, ctx->state[ 2 ][ 0 ] );
|
||||
_mm256_store_si256( (__m256i*)hashval + 3, ctx->state[ 3 ][ 0 ] );
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif // VAES
|
||||
|
@@ -1,10 +1,12 @@
|
||||
#if !defined(ECHO_HASH_4WAY_H__)
|
||||
#define ECHO_HASH_4WAY_H__ 1
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(__VAES__)
|
||||
|
||||
#include "simd-utils.h"
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m512i state[4][4];
|
||||
@@ -20,6 +22,7 @@ typedef struct
|
||||
unsigned int processed_bits;
|
||||
|
||||
} echo_4way_context __attribute__ ((aligned (64)));
|
||||
#define echo512_4way_context echo_4way_context
|
||||
|
||||
int echo_4way_init( echo_4way_context *state, int hashbitlen );
|
||||
#define echo512_4way_init( state ) echo_4way_init( state, 512 )
|
||||
@@ -29,8 +32,8 @@ int echo_4way_update( echo_4way_context *state, const void *data,
|
||||
unsigned int databitlen);
|
||||
#define echo512_4way_update echo_4way_update
|
||||
|
||||
int echo_close( echo_4way_context *state, void *hashval );
|
||||
#define echo512_4way_close echo_4way_close
|
||||
// int echo_4way_close( echo_4way_context *state, void *hashval );
|
||||
// #define echo512_4way_close echo_4way_close
|
||||
|
||||
int echo_4way_update_close( echo_4way_context *state, void *hashval,
|
||||
const void *data, int databitlen );
|
||||
@@ -43,5 +46,45 @@ int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
|
||||
#define echo256_4way_full( state, hashval, data, datalen ) \
|
||||
echo_4way_full( state, hashval, 256, data, datalen )
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif // AVX512
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m256i state[4][4];
|
||||
__m256i buffer[ 4 * 192 / 16 ]; // 4x128 interleaved 192 bytes
|
||||
__m256i k;
|
||||
__m256i hashsize;
|
||||
__m256i const1536;
|
||||
|
||||
unsigned int uRounds;
|
||||
unsigned int uHashSize;
|
||||
unsigned int uBlockLength;
|
||||
unsigned int uBufferBytes;
|
||||
unsigned int processed_bits;
|
||||
|
||||
} echo_2way_context __attribute__ ((aligned (64)));
|
||||
#define echo512_2way_context echo_2way_context
|
||||
|
||||
int echo_2way_init( echo_2way_context *state, int hashbitlen );
|
||||
#define echo512_2way_init( state ) echo_2way_init( state, 512 )
|
||||
#define echo256_2way_init( state ) echo_2way_init( state, 256 )
|
||||
|
||||
int echo_2way_update( echo_2way_context *state, const void *data,
|
||||
unsigned int databitlen);
|
||||
#define echo512_2way_update echo_2way_update
|
||||
|
||||
int echo_2way_update_close( echo_2way_context *state, void *hashval,
|
||||
const void *data, int databitlen );
|
||||
#define echo512_2way_update_close echo_2way_update_close
|
||||
|
||||
int echo_2way_full( echo_2way_context *ctx, void *hashval, int nHashSize,
|
||||
const void *data, int datalen );
|
||||
#define echo512_2way_full( state, hashval, data, datalen ) \
|
||||
echo_2way_full( state, hashval, 512, data, datalen )
|
||||
#define echo256_2way_full( state, hashval, data, datalen ) \
|
||||
echo_2way_full( state, hashval, 256, data, datalen )
|
||||
|
||||
|
||||
#endif // VAES
|
||||
|
||||
#endif // ECHO_HASH_4WAY_H__
|
||||
|
@@ -34,13 +34,11 @@ MYALIGN const unsigned long long _supermix4c[] = {0x0706050403020000, 0x03020000
|
||||
MYALIGN const unsigned long long _supermix7a[] = {0x010c0b060d080702, 0x0904030e03000104};
|
||||
MYALIGN const unsigned long long _supermix7b[] = {0x8080808080808080, 0x0504070605040f06};
|
||||
MYALIGN const unsigned long long _k_n[] = {0x4E4E4E4E4E4E4E4E, 0x1B1B1B1B0E0E0E0E};
|
||||
MYALIGN const unsigned int _maskd3n[] = {0xffffffff, 0xffffffff, 0xffffffff, 0x00000000};
|
||||
MYALIGN const unsigned char _shift_one_mask[] = {7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14, 3, 0, 1, 2};
|
||||
MYALIGN const unsigned char _shift_four_mask[] = {13, 14, 15, 12, 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8};
|
||||
MYALIGN const unsigned char _shift_seven_mask[] = {10, 11, 8, 9, 14, 15, 12, 13, 2, 3, 0, 1, 6, 7, 4, 5};
|
||||
MYALIGN const unsigned char _aes_shift_rows[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11};
|
||||
MYALIGN const unsigned int _inv_shift_rows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c};
|
||||
MYALIGN const unsigned int _zero[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int _mul2mask[] = {0x1b1b0000, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int _mul4mask[] = {0x2d361b00, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int _lsbmask2[] = {0x03030303, 0x03030303, 0x03030303, 0x03030303};
|
||||
@@ -61,7 +59,7 @@ MYALIGN const unsigned int _IV512[] = {
|
||||
|
||||
#define UNPACK_S0(s0, s1, t1)\
|
||||
s1 = _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(s1), _mm_castsi128_ps(s0), 0xc0));\
|
||||
s0 = _mm_and_si128(s0, M128(_maskd3n))
|
||||
s0 = mm128_mask_32( s0, 8 )
|
||||
|
||||
#define CMIX(s1, s2, r1, r2, t1, t2)\
|
||||
t1 = s1;\
|
||||
@@ -78,7 +76,7 @@ MYALIGN const unsigned int _IV512[] = {
|
||||
#define UNPACK_S0(s0, s1, t1)\
|
||||
t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 3, 3));\
|
||||
s1 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s1), _mm_castsi128_ps(t1)));\
|
||||
s0 = _mm_and_si128(s0, M128(_maskd3n))
|
||||
s0 = mm128_mask_32( s0, 8 )
|
||||
|
||||
#define CMIX(s1, s2, r1, r2, t1, t2)\
|
||||
t1 = _mm_shuffle_epi32(s1, 0xf9);\
|
||||
@@ -138,7 +136,7 @@ MYALIGN const unsigned int _IV512[] = {
|
||||
|
||||
#define SUBSTITUTE(r0, _t1, _t2, _t3, _t0)\
|
||||
_t2 = _mm_shuffle_epi8(r0, M128(_inv_shift_rows));\
|
||||
_t2 = _mm_aesenclast_si128(_t2, M128(_zero))
|
||||
_t2 = _mm_aesenclast_si128( _t2, m128_zero )
|
||||
|
||||
#define SUPERMIX(t0, t1, t2, t3, t4)\
|
||||
PRESUPERMIX(t0, t1, t2, t3, t4);\
|
||||
@@ -181,14 +179,14 @@ MYALIGN const unsigned int _IV512[] = {
|
||||
SUPERMIX(_t2, _t3, _t0, _t1, r1c);\
|
||||
_t0 = _mm_shuffle_epi32(r1c, 0x39);\
|
||||
r2c = _mm_xor_si128(r2c, _t0);\
|
||||
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
|
||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
||||
r2d = _mm_xor_si128(r2d, _t0);\
|
||||
UNPACK_S0(r1c, r1a, _t3);\
|
||||
SUBSTITUTE(r2c, _t1, _t2, _t3, _t0);\
|
||||
SUPERMIX(_t2, _t3, _t0, _t1, r2c);\
|
||||
_t0 = _mm_shuffle_epi32(r2c, 0x39);\
|
||||
r3c = _mm_xor_si128(r3c, _t0);\
|
||||
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
|
||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
||||
r3d = _mm_xor_si128(r3d, _t0);\
|
||||
UNPACK_S0(r2c, r2a, _t3);\
|
||||
SUBSTITUTE(r3c, _t1, _t2, _t3, _t0);\
|
||||
@@ -203,21 +201,21 @@ MYALIGN const unsigned int _IV512[] = {
|
||||
SUPERMIX(_t2, _t3, _t0, _t1, r1c);\
|
||||
_t0 = _mm_shuffle_epi32(r1c, 0x39);\
|
||||
r2c = _mm_xor_si128(r2c, _t0);\
|
||||
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
|
||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
||||
r2d = _mm_xor_si128(r2d, _t0);\
|
||||
UNPACK_S0(r1c, r1a, _t3);\
|
||||
SUBSTITUTE(r2c, _t1, _t2, _t3, _t0);\
|
||||
SUPERMIX(_t2, _t3, _t0, _t1, r2c);\
|
||||
_t0 = _mm_shuffle_epi32(r2c, 0x39);\
|
||||
r3c = _mm_xor_si128(r3c, _t0);\
|
||||
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
|
||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
||||
r3d = _mm_xor_si128(r3d, _t0);\
|
||||
UNPACK_S0(r2c, r2a, _t3);\
|
||||
SUBSTITUTE(r3c, _t1, _t2, _t3, _t0);\
|
||||
SUPERMIX(_t2, _t3, _t0, _t1, r3c);\
|
||||
_t0 = _mm_shuffle_epi32(r3c, 0x39);\
|
||||
r4c = _mm_xor_si128(r4c, _t0);\
|
||||
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
|
||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
||||
r4d = _mm_xor_si128(r4d, _t0);\
|
||||
UNPACK_S0(r3c, r3a, _t3);\
|
||||
SUBSTITUTE(r4c, _t1, _t2, _t3, _t0);\
|
||||
@@ -462,7 +460,7 @@ HashReturn fugue512_Init(hashState_fugue *ctx, int nHashSize)
|
||||
ctx->uBlockLength = 4;
|
||||
|
||||
for(i = 0; i < 6; i++)
|
||||
ctx->state[i] = _mm_setzero_si128();
|
||||
ctx->state[i] = m128_zero;
|
||||
|
||||
ctx->state[6] = _mm_load_si128((__m128i*)_IV512 + 0);
|
||||
ctx->state[7] = _mm_load_si128((__m128i*)_IV512 + 1);
|
||||
|
@@ -17,7 +17,7 @@
|
||||
#if defined(__AES__)
|
||||
|
||||
#include "algo/sha/sha3_common.h"
|
||||
#include <x86intrin.h>
|
||||
#include "simd-utils.h"
|
||||
|
||||
|
||||
typedef struct
|
||||
|
@@ -15,7 +15,9 @@
|
||||
#include "miner.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(__AVX2__) && defined(__VAES__)
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
|
||||
int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen )
|
||||
@@ -43,10 +45,10 @@ int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen )
|
||||
}
|
||||
|
||||
int groestl256_4way_full( groestl256_4way_context* ctx, void* output,
|
||||
const void* input, uint64_t databitlen )
|
||||
const void* input, uint64_t datalen )
|
||||
{
|
||||
const int len = (int)databitlen / 128;
|
||||
const int hashlen_m128i = 32 / 16; // bytes to __m128i
|
||||
const int len = (int)datalen >> 4;
|
||||
const int hashlen_m128i = 32 >> 4; // bytes to __m128i
|
||||
const int hash_offset = SIZE256 - hashlen_m128i;
|
||||
int rem = ctx->rem_ptr;
|
||||
int blocks = len / SIZE256;
|
||||
@@ -172,5 +174,161 @@ int groestl256_4way_update_close( groestl256_4way_context* ctx, void* output,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // VAES
|
||||
#endif // AVX512
|
||||
|
||||
// AVX2 + VAES
|
||||
|
||||
int groestl256_2way_init( groestl256_2way_context* ctx, uint64_t hashlen )
|
||||
{
|
||||
int i;
|
||||
|
||||
ctx->hashlen = hashlen;
|
||||
|
||||
if (ctx->chaining == NULL || ctx->buffer == NULL)
|
||||
return 1;
|
||||
|
||||
for ( i = 0; i < SIZE256; i++ )
|
||||
{
|
||||
ctx->chaining[i] = m256_zero;
|
||||
ctx->buffer[i] = m256_zero;
|
||||
}
|
||||
|
||||
// The only non-zero in the IV is len. It can be hard coded.
|
||||
ctx->chaining[ 3 ] = m256_const2_64( 0, 0x0100000000000000 );
|
||||
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int groestl256_2way_full( groestl256_2way_context* ctx, void* output,
|
||||
const void* input, uint64_t datalen )
|
||||
{
|
||||
const int len = (int)datalen >> 4;
|
||||
const int hashlen_m128i = 32 >> 4; // bytes to __m128i
|
||||
const int hash_offset = SIZE256 - hashlen_m128i;
|
||||
int rem = ctx->rem_ptr;
|
||||
int blocks = len / SIZE256;
|
||||
__m256i* in = (__m256i*)input;
|
||||
int i;
|
||||
|
||||
if (ctx->chaining == NULL || ctx->buffer == NULL)
|
||||
return 1;
|
||||
|
||||
for ( i = 0; i < SIZE256; i++ )
|
||||
{
|
||||
ctx->chaining[i] = m256_zero;
|
||||
ctx->buffer[i] = m256_zero;
|
||||
}
|
||||
|
||||
// The only non-zero in the IV is len. It can be hard coded.
|
||||
ctx->chaining[ 3 ] = m256_const2_64( 0, 0x0100000000000000 );
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
|
||||
// --- update ---
|
||||
|
||||
// digest any full blocks, process directly from input
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
TF512_2way( ctx->chaining, &in[ i * SIZE256 ] );
|
||||
ctx->buf_ptr = blocks * SIZE256;
|
||||
|
||||
// copy any remaining data to buffer, it may already contain data
|
||||
// from a previous update for a midstate precalc
|
||||
for ( i = 0; i < len % SIZE256; i++ )
|
||||
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
|
||||
i += rem; // use i as rem_ptr in final
|
||||
|
||||
//--- final ---
|
||||
|
||||
blocks++; // adjust for final block
|
||||
|
||||
if ( i == SIZE256 - 1 )
|
||||
{
|
||||
// only 1 vector left in buffer, all padding at once
|
||||
ctx->buffer[i] = m256_const2_64( (uint64_t)blocks << 56, 0x80 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// add first padding
|
||||
ctx->buffer[i] = m256_const2_64( 0, 0x80 );
|
||||
// add zero padding
|
||||
for ( i += 1; i < SIZE256 - 1; i++ )
|
||||
ctx->buffer[i] = m256_zero;
|
||||
|
||||
// add length padding, second last byte is zero unless blocks > 255
|
||||
ctx->buffer[i] = m256_const2_64( (uint64_t)blocks << 56, 0 );
|
||||
}
|
||||
|
||||
// digest final padding block and do output transform
|
||||
TF512_2way( ctx->chaining, ctx->buffer );
|
||||
|
||||
OF512_2way( ctx->chaining );
|
||||
|
||||
// store hash result in output
|
||||
for ( i = 0; i < hashlen_m128i; i++ )
|
||||
casti_m256i( output, i ) = ctx->chaining[ hash_offset + i ];
|
||||
|
||||
return 0;
|
||||
}
|
||||
int groestl256_2way_update_close( groestl256_2way_context* ctx, void* output,
|
||||
const void* input, uint64_t databitlen )
|
||||
{
|
||||
const int len = (int)databitlen / 128;
|
||||
const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i
|
||||
const int hash_offset = SIZE256 - hashlen_m128i;
|
||||
int rem = ctx->rem_ptr;
|
||||
int blocks = len / SIZE256;
|
||||
__m256i* in = (__m256i*)input;
|
||||
int i;
|
||||
|
||||
// --- update ---
|
||||
|
||||
// digest any full blocks, process directly from input
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
TF512_2way( ctx->chaining, &in[ i * SIZE256 ] );
|
||||
ctx->buf_ptr = blocks * SIZE256;
|
||||
|
||||
// copy any remaining data to buffer, it may already contain data
|
||||
// from a previous update for a midstate precalc
|
||||
for ( i = 0; i < len % SIZE256; i++ )
|
||||
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
|
||||
i += rem; // use i as rem_ptr in final
|
||||
|
||||
//--- final ---
|
||||
|
||||
blocks++; // adjust for final block
|
||||
|
||||
if ( i == SIZE256 - 1 )
|
||||
{
|
||||
// only 1 vector left in buffer, all padding at once
|
||||
ctx->buffer[i] = m256_const1_128( _mm_set_epi8(
|
||||
blocks, blocks>>8,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// add first padding
|
||||
ctx->buffer[i] = m256_const2_64( 0, 0x80 );
|
||||
// add zero padding
|
||||
for ( i += 1; i < SIZE256 - 1; i++ )
|
||||
ctx->buffer[i] = m256_zero;
|
||||
|
||||
// add length padding, second last byte is zero unless blocks > 255
|
||||
ctx->buffer[i] = m256_const1_128( _mm_set_epi8(
|
||||
blocks, blocks>>8, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 ) );
|
||||
}
|
||||
|
||||
// digest final padding block and do output transform
|
||||
TF512_2way( ctx->chaining, ctx->buffer );
|
||||
|
||||
OF512_2way( ctx->chaining );
|
||||
|
||||
// store hash result in output
|
||||
for ( i = 0; i < hashlen_m128i; i++ )
|
||||
casti_m256i( output, i ) = ctx->chaining[ hash_offset + i ];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // VAES
|
||||
|
@@ -18,8 +18,8 @@
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#if defined(__AVX2__) && defined(__VAES__)
|
||||
|
||||
#define LENGTH (256)
|
||||
|
||||
//#include "brg_endian.h"
|
||||
@@ -48,6 +48,8 @@
|
||||
|
||||
#define SIZE256 (SIZE_512/16)
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
typedef struct {
|
||||
__attribute__ ((aligned (128))) __m512i chaining[SIZE256];
|
||||
__attribute__ ((aligned (64))) __m512i buffer[SIZE256];
|
||||
@@ -55,7 +57,7 @@ typedef struct {
|
||||
int blk_count; // SIZE_m128i
|
||||
int buf_ptr; // __m128i offset
|
||||
int rem_ptr;
|
||||
int databitlen; // bits
|
||||
// int databitlen; // bits
|
||||
} groestl256_4way_context;
|
||||
|
||||
|
||||
@@ -74,5 +76,25 @@ int groestl256_4way_update_close( groestl256_4way_context*, void*,
|
||||
int groestl256_4way_full( groestl256_4way_context*, void*,
|
||||
const void*, uint64_t );
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif // AVX512
|
||||
|
||||
typedef struct {
|
||||
__attribute__ ((aligned (128))) __m256i chaining[SIZE256];
|
||||
__attribute__ ((aligned (64))) __m256i buffer[SIZE256];
|
||||
int hashlen; // byte
|
||||
int blk_count; // SIZE_m128i
|
||||
int buf_ptr; // __m128i offset
|
||||
int rem_ptr;
|
||||
// int databitlen; // bits
|
||||
} groestl256_2way_context;
|
||||
|
||||
int groestl256_2way_init( groestl256_2way_context*, uint64_t );
|
||||
|
||||
int groestl256_2way_update_close( groestl256_2way_context*, void*,
|
||||
const void*, uint64_t );
|
||||
|
||||
int groestl256_2way_full( groestl256_2way_context*, void*,
|
||||
const void*, uint64_t );
|
||||
|
||||
#endif // VAES
|
||||
#endif // GROESTL256_HASH_4WAY_H__
|
||||
|
@@ -7,13 +7,13 @@
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
|
||||
#if !defined(GROESTL256_INTR_4WAY_H__)
|
||||
#define GROESTL256_INTR_4WAY_H__ 1
|
||||
|
||||
#include "groestl256-hash-4way.h"
|
||||
|
||||
#if defined(__VAES__)
|
||||
#if defined(__AVX2__) && defined(__VAES__)
|
||||
|
||||
static const __m128i round_const_l0[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
{ 0x7060504030201000, 0xffffffffffffffff },
|
||||
@@ -42,6 +42,8 @@ static const __m128i round_const_l7[] __attribute__ ((aligned (64))) =
|
||||
{ 0x0000000000000000, 0x8696a6b6c6d6e6f6 }
|
||||
};
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
static const __m512i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02,
|
||||
0x1d1519111c141810, 0x1f171b131e161a12,
|
||||
0x2d2529212c242820, 0x2f272b232e262a22,
|
||||
@@ -499,5 +501,398 @@ void OF512_4way( __m512i* chaining )
|
||||
chaining[3] = xmm11;
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
static const __m256i TRANSP_MASK_2WAY =
|
||||
{ 0x0d0509010c040800, 0x0f070b030e060a02,
|
||||
0x1d1519111c141810, 0x1f171b131e161a12 };
|
||||
|
||||
static const __m256i SUBSH_MASK0_2WAY =
|
||||
{ 0x0c0f0104070b0e00, 0x03060a0d08020509,
|
||||
0x1c1f1114171b1e10, 0x13161a1d18121519 };
|
||||
|
||||
static const __m256i SUBSH_MASK1_2WAY =
|
||||
{ 0x0e090205000d0801, 0x04070c0f0a03060b,
|
||||
0x1e191215101d1801, 0x14171c1f1a13161b };
|
||||
|
||||
static const __m256i SUBSH_MASK2_2WAY =
|
||||
{ 0x080b0306010f0a02, 0x05000e090c04070d,
|
||||
0x181b1316111f1a12, 0x15101e191c14171d };
|
||||
|
||||
static const __m256i SUBSH_MASK3_2WAY =
|
||||
{ 0x0a0d040702090c03, 0x0601080b0e05000f,
|
||||
0x1a1d141712191c13, 0x1611181b1e15101f };
|
||||
|
||||
static const __m256i SUBSH_MASK4_2WAY =
|
||||
{ 0x0b0e0500030a0d04, 0x0702090c0f060108,
|
||||
0x1b1e1510131a1d14, 0x1712191c1f161118 };
|
||||
|
||||
static const __m256i SUBSH_MASK5_2WAY =
|
||||
{ 0x0d080601040c0f05, 0x00030b0e0907020a,
|
||||
0x1d181611141c1f15, 0x10131b1e1917121a };
|
||||
|
||||
static const __m256i SUBSH_MASK6_2WAY =
|
||||
{ 0x0f0a0702050e0906, 0x01040d080b00030c,
|
||||
0x1f1a1712151e1916, 0x11141d181b10131c };
|
||||
|
||||
static const __m256i SUBSH_MASK7_2WAY =
|
||||
{ 0x090c000306080b07, 0x02050f0a0d01040e,
|
||||
0x191c101316181b17, 0x12151f1a1d11141e, };
|
||||
|
||||
#define tos(a) #a
|
||||
#define tostr(a) tos(a)
|
||||
|
||||
/* xmm[i] will be multiplied by 2
|
||||
* xmm[j] will be lost
|
||||
* xmm[k] has to be all 0x1b */
|
||||
#define MUL2_2WAY(i, j, k){\
|
||||
j = _mm256_xor_si256(j, j);\
|
||||
j = _mm256_cmpgt_epi8(j, i );\
|
||||
i = _mm256_add_epi8(i, i);\
|
||||
j = _mm256_and_si256(j, k);\
|
||||
i = _mm256_xor_si256(i, j);\
|
||||
}
|
||||
|
||||
#define MixBytes_2way(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
||||
/* t_i = a_i + a_{i+1} */\
|
||||
b6 = a0;\
|
||||
b7 = a1;\
|
||||
a0 = _mm256_xor_si256(a0, a1);\
|
||||
b0 = a2;\
|
||||
a1 = _mm256_xor_si256(a1, a2);\
|
||||
b1 = a3;\
|
||||
a2 = _mm256_xor_si256(a2, a3);\
|
||||
b2 = a4;\
|
||||
a3 = _mm256_xor_si256(a3, a4);\
|
||||
b3 = a5;\
|
||||
a4 = _mm256_xor_si256(a4, a5);\
|
||||
b4 = a6;\
|
||||
a5 = _mm256_xor_si256(a5, a6);\
|
||||
b5 = a7;\
|
||||
a6 = _mm256_xor_si256(a6, a7);\
|
||||
a7 = _mm256_xor_si256(a7, b6);\
|
||||
\
|
||||
/* build y4 y5 y6 ... in regs xmm8, xmm9, xmm10 by adding t_i*/\
|
||||
b0 = _mm256_xor_si256(b0, a4);\
|
||||
b6 = _mm256_xor_si256(b6, a4);\
|
||||
b1 = _mm256_xor_si256(b1, a5);\
|
||||
b7 = _mm256_xor_si256(b7, a5);\
|
||||
b2 = _mm256_xor_si256(b2, a6);\
|
||||
b0 = _mm256_xor_si256(b0, a6);\
|
||||
/* spill values y_4, y_5 to memory */\
|
||||
TEMP0 = b0;\
|
||||
b3 = _mm256_xor_si256(b3, a7);\
|
||||
b1 = _mm256_xor_si256(b1, a7);\
|
||||
TEMP1 = b1;\
|
||||
b4 = _mm256_xor_si256(b4, a0);\
|
||||
b2 = _mm256_xor_si256(b2, a0);\
|
||||
/* save values t0, t1, t2 to xmm8, xmm9 and memory */\
|
||||
b0 = a0;\
|
||||
b5 = _mm256_xor_si256(b5, a1);\
|
||||
b3 = _mm256_xor_si256(b3, a1);\
|
||||
b1 = a1;\
|
||||
b6 = _mm256_xor_si256(b6, a2);\
|
||||
b4 = _mm256_xor_si256(b4, a2);\
|
||||
TEMP2 = a2;\
|
||||
b7 = _mm256_xor_si256(b7, a3);\
|
||||
b5 = _mm256_xor_si256(b5, a3);\
|
||||
\
|
||||
/* compute x_i = t_i + t_{i+3} */\
|
||||
a0 = _mm256_xor_si256(a0, a3);\
|
||||
a1 = _mm256_xor_si256(a1, a4);\
|
||||
a2 = _mm256_xor_si256(a2, a5);\
|
||||
a3 = _mm256_xor_si256(a3, a6);\
|
||||
a4 = _mm256_xor_si256(a4, a7);\
|
||||
a5 = _mm256_xor_si256(a5, b0);\
|
||||
a6 = _mm256_xor_si256(a6, b1);\
|
||||
a7 = _mm256_xor_si256(a7, TEMP2);\
|
||||
\
|
||||
/* compute z_i : double x_i using temp xmm8 and 1B xmm9 */\
|
||||
/* compute w_i : add y_{i+4} */\
|
||||
b1 = m256_const1_64( 0x1b1b1b1b1b1b1b1b );\
|
||||
MUL2_2WAY(a0, b0, b1);\
|
||||
a0 = _mm256_xor_si256(a0, TEMP0);\
|
||||
MUL2_2WAY(a1, b0, b1);\
|
||||
a1 = _mm256_xor_si256(a1, TEMP1);\
|
||||
MUL2_2WAY(a2, b0, b1);\
|
||||
a2 = _mm256_xor_si256(a2, b2);\
|
||||
MUL2_2WAY(a3, b0, b1);\
|
||||
a3 = _mm256_xor_si256(a3, b3);\
|
||||
MUL2_2WAY(a4, b0, b1);\
|
||||
a4 = _mm256_xor_si256(a4, b4);\
|
||||
MUL2_2WAY(a5, b0, b1);\
|
||||
a5 = _mm256_xor_si256(a5, b5);\
|
||||
MUL2_2WAY(a6, b0, b1);\
|
||||
a6 = _mm256_xor_si256(a6, b6);\
|
||||
MUL2_2WAY(a7, b0, b1);\
|
||||
a7 = _mm256_xor_si256(a7, b7);\
|
||||
\
|
||||
/* compute v_i : double w_i */\
|
||||
/* add to y_4 y_5 .. v3, v4, ... */\
|
||||
MUL2_2WAY(a0, b0, b1);\
|
||||
b5 = _mm256_xor_si256(b5, a0);\
|
||||
MUL2_2WAY(a1, b0, b1);\
|
||||
b6 = _mm256_xor_si256(b6, a1);\
|
||||
MUL2_2WAY(a2, b0, b1);\
|
||||
b7 = _mm256_xor_si256(b7, a2);\
|
||||
MUL2_2WAY(a5, b0, b1);\
|
||||
b2 = _mm256_xor_si256(b2, a5);\
|
||||
MUL2_2WAY(a6, b0, b1);\
|
||||
b3 = _mm256_xor_si256(b3, a6);\
|
||||
MUL2_2WAY(a7, b0, b1);\
|
||||
b4 = _mm256_xor_si256(b4, a7);\
|
||||
MUL2_2WAY(a3, b0, b1);\
|
||||
MUL2_2WAY(a4, b0, b1);\
|
||||
b0 = TEMP0;\
|
||||
b1 = TEMP1;\
|
||||
b0 = _mm256_xor_si256(b0, a3);\
|
||||
b1 = _mm256_xor_si256(b1, a4);\
|
||||
}/*MixBytes*/
|
||||
|
||||
#define ROUND_2WAY(i, a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
||||
/* AddRoundConstant */\
|
||||
b1 = m256_const2_64( 0xffffffffffffffff, 0 ); \
|
||||
a0 = _mm256_xor_si256( a0, m256_const1_128( round_const_l0[i] ) );\
|
||||
a1 = _mm256_xor_si256( a1, b1 );\
|
||||
a2 = _mm256_xor_si256( a2, b1 );\
|
||||
a3 = _mm256_xor_si256( a3, b1 );\
|
||||
a4 = _mm256_xor_si256( a4, b1 );\
|
||||
a5 = _mm256_xor_si256( a5, b1 );\
|
||||
a6 = _mm256_xor_si256( a6, b1 );\
|
||||
a7 = _mm256_xor_si256( a7, m256_const1_128( round_const_l7[i] ) );\
|
||||
\
|
||||
/* ShiftBytes + SubBytes (interleaved) */\
|
||||
b0 = _mm256_xor_si256( b0, b0 );\
|
||||
a0 = _mm256_shuffle_epi8( a0, SUBSH_MASK0_2WAY );\
|
||||
a0 = _mm256_aesenclast_epi128(a0, b0 );\
|
||||
a1 = _mm256_shuffle_epi8( a1, SUBSH_MASK1_2WAY );\
|
||||
a1 = _mm256_aesenclast_epi128(a1, b0 );\
|
||||
a2 = _mm256_shuffle_epi8( a2, SUBSH_MASK2_2WAY );\
|
||||
a2 = _mm256_aesenclast_epi128(a2, b0 );\
|
||||
a3 = _mm256_shuffle_epi8( a3, SUBSH_MASK3_2WAY );\
|
||||
a3 = _mm256_aesenclast_epi128(a3, b0 );\
|
||||
a4 = _mm256_shuffle_epi8( a4, SUBSH_MASK4_2WAY );\
|
||||
a4 = _mm256_aesenclast_epi128(a4, b0 );\
|
||||
a5 = _mm256_shuffle_epi8( a5, SUBSH_MASK5_2WAY );\
|
||||
a5 = _mm256_aesenclast_epi128(a5, b0 );\
|
||||
a6 = _mm256_shuffle_epi8( a6, SUBSH_MASK6_2WAY );\
|
||||
a6 = _mm256_aesenclast_epi128(a6, b0 );\
|
||||
a7 = _mm256_shuffle_epi8( a7, SUBSH_MASK7_2WAY );\
|
||||
a7 = _mm256_aesenclast_epi128( a7, b0 );\
|
||||
\
|
||||
/* MixBytes */\
|
||||
MixBytes_2way(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7);\
|
||||
\
|
||||
}
|
||||
|
||||
/* 10 rounds, P and Q in parallel */
|
||||
#define ROUNDS_P_Q_2WAY(){\
|
||||
ROUND_2WAY(0, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
ROUND_2WAY(1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
||||
ROUND_2WAY(2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
ROUND_2WAY(3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
||||
ROUND_2WAY(4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
ROUND_2WAY(5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
||||
ROUND_2WAY(6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
ROUND_2WAY(7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
||||
ROUND_2WAY(8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
ROUND_2WAY(9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
||||
}
|
||||
|
||||
#define Matrix_Transpose_A_2way(i0, i1, i2, i3, o1, o2, o3, t0){\
|
||||
t0 = TRANSP_MASK_2WAY;\
|
||||
\
|
||||
i0 = _mm256_shuffle_epi8( i0, t0 );\
|
||||
i1 = _mm256_shuffle_epi8( i1, t0 );\
|
||||
i2 = _mm256_shuffle_epi8( i2, t0 );\
|
||||
i3 = _mm256_shuffle_epi8( i3, t0 );\
|
||||
\
|
||||
o1 = i0;\
|
||||
t0 = i2;\
|
||||
\
|
||||
i0 = _mm256_unpacklo_epi16( i0, i1 );\
|
||||
o1 = _mm256_unpackhi_epi16( o1, i1 );\
|
||||
i2 = _mm256_unpacklo_epi16( i2, i3 );\
|
||||
t0 = _mm256_unpackhi_epi16( t0, i3 );\
|
||||
\
|
||||
i0 = _mm256_shuffle_epi32( i0, 216 );\
|
||||
o1 = _mm256_shuffle_epi32( o1, 216 );\
|
||||
i2 = _mm256_shuffle_epi32( i2, 216 );\
|
||||
t0 = _mm256_shuffle_epi32( t0, 216 );\
|
||||
\
|
||||
o2 = i0;\
|
||||
o3 = o1;\
|
||||
\
|
||||
i0 = _mm256_unpacklo_epi32( i0, i2 );\
|
||||
o1 = _mm256_unpacklo_epi32( o1, t0 );\
|
||||
o2 = _mm256_unpackhi_epi32( o2, i2 );\
|
||||
o3 = _mm256_unpackhi_epi32( o3, t0 );\
|
||||
}/**/
|
||||
|
||||
#define Matrix_Transpose_B_2way(i0, i1, i2, i3, i4, i5, i6, i7, o1, o2, o3, o4, o5, o6, o7){\
|
||||
o1 = i0;\
|
||||
o2 = i1;\
|
||||
i0 = _mm256_unpacklo_epi64( i0, i4 );\
|
||||
o1 = _mm256_unpackhi_epi64( o1, i4 );\
|
||||
o3 = i1;\
|
||||
o4 = i2;\
|
||||
o2 = _mm256_unpacklo_epi64( o2, i5 );\
|
||||
o3 = _mm256_unpackhi_epi64( o3, i5 );\
|
||||
o5 = i2;\
|
||||
o6 = i3;\
|
||||
o4 = _mm256_unpacklo_epi64( o4, i6 );\
|
||||
o5 = _mm256_unpackhi_epi64( o5, i6 );\
|
||||
o7 = i3;\
|
||||
o6 = _mm256_unpacklo_epi64( o6, i7 );\
|
||||
o7 = _mm256_unpackhi_epi64( o7, i7 );\
|
||||
}/**/
|
||||
|
||||
#define Matrix_Transpose_B_INV_2way(i0, i1, i2, i3, i4, i5, i6, i7, o0, o1, o2, o3){\
|
||||
o0 = i0;\
|
||||
i0 = _mm256_unpacklo_epi64( i0, i1 );\
|
||||
o0 = _mm256_unpackhi_epi64( o0, i1 );\
|
||||
o1 = i2;\
|
||||
i2 = _mm256_unpacklo_epi64( i2, i3 );\
|
||||
o1 = _mm256_unpackhi_epi64( o1, i3 );\
|
||||
o2 = i4;\
|
||||
i4 = _mm256_unpacklo_epi64( i4, i5 );\
|
||||
o2 = _mm256_unpackhi_epi64( o2, i5 );\
|
||||
o3 = i6;\
|
||||
i6 = _mm256_unpacklo_epi64( i6, i7 );\
|
||||
o3 = _mm256_unpackhi_epi64( o3, i7 );\
|
||||
}/**/
|
||||
|
||||
#define Matrix_Transpose_O_B_2way(i0, i1, i2, i3, i4, i5, i6, i7, t0){\
|
||||
t0 = _mm256_xor_si256( t0, t0 );\
|
||||
i1 = i0;\
|
||||
i3 = i2;\
|
||||
i5 = i4;\
|
||||
i7 = i6;\
|
||||
i0 = _mm256_unpacklo_epi64( i0, t0 );\
|
||||
i1 = _mm256_unpackhi_epi64( i1, t0 );\
|
||||
i2 = _mm256_unpacklo_epi64( i2, t0 );\
|
||||
i3 = _mm256_unpackhi_epi64( i3, t0 );\
|
||||
i4 = _mm256_unpacklo_epi64( i4, t0 );\
|
||||
i5 = _mm256_unpackhi_epi64( i5, t0 );\
|
||||
i6 = _mm256_unpacklo_epi64( i6, t0 );\
|
||||
i7 = _mm256_unpackhi_epi64( i7, t0 );\
|
||||
}/**/
|
||||
|
||||
#define Matrix_Transpose_O_B_INV_2way(i0, i1, i2, i3, i4, i5, i6, i7){\
|
||||
i0 = _mm256_unpacklo_epi64( i0, i1 );\
|
||||
i2 = _mm256_unpacklo_epi64( i2, i3 );\
|
||||
i4 = _mm256_unpacklo_epi64( i4, i5 );\
|
||||
i6 = _mm256_unpacklo_epi64( i6, i7 );\
|
||||
}/**/
|
||||
|
||||
void TF512_2way( __m256i* chaining, __m256i* message )
|
||||
{
|
||||
static __m256i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
static __m256i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
||||
static __m256i TEMP0;
|
||||
static __m256i TEMP1;
|
||||
static __m256i TEMP2;
|
||||
|
||||
/* load message into registers xmm12 - xmm15 */
|
||||
xmm12 = message[0];
|
||||
xmm13 = message[1];
|
||||
xmm14 = message[2];
|
||||
xmm15 = message[3];
|
||||
|
||||
/* transform message M from column ordering into row ordering */
|
||||
/* we first put two rows (64 bit) of the message into one 128-bit xmm register */
|
||||
Matrix_Transpose_A_2way(xmm12, xmm13, xmm14, xmm15, xmm2, xmm6, xmm7, xmm0);
|
||||
|
||||
/* load previous chaining value */
|
||||
/* we first put two rows (64 bit) of the CV into one 128-bit xmm register */
|
||||
xmm8 = chaining[0];
|
||||
xmm0 = chaining[1];
|
||||
xmm4 = chaining[2];
|
||||
xmm5 = chaining[3];
|
||||
|
||||
/* xor message to CV get input of P */
|
||||
/* result: CV+M in xmm8, xmm0, xmm4, xmm5 */
|
||||
xmm8 = _mm256_xor_si256( xmm8, xmm12 );
|
||||
xmm0 = _mm256_xor_si256( xmm0, xmm2 );
|
||||
xmm4 = _mm256_xor_si256( xmm4, xmm6 );
|
||||
xmm5 = _mm256_xor_si256( xmm5, xmm7 );
|
||||
|
||||
/* there are now 2 rows of the Groestl state (P and Q) in each xmm register */
|
||||
/* unpack to get 1 row of P (64 bit) and Q (64 bit) into one xmm register */
|
||||
/* result: the 8 rows of P and Q in xmm8 - xmm12 */
|
||||
Matrix_Transpose_B_2way(xmm8, xmm0, xmm4, xmm5, xmm12, xmm2, xmm6, xmm7, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);
|
||||
|
||||
/* compute the two permutations P and Q in parallel */
|
||||
ROUNDS_P_Q_2WAY();
|
||||
|
||||
/* unpack again to get two rows of P or two rows of Q in one xmm register */
|
||||
Matrix_Transpose_B_INV_2way(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3);
|
||||
|
||||
/* xor output of P and Q */
|
||||
/* result: P(CV+M)+Q(M) in xmm0...xmm3 */
|
||||
xmm0 = _mm256_xor_si256( xmm0, xmm8 );
|
||||
xmm1 = _mm256_xor_si256( xmm1, xmm10 );
|
||||
xmm2 = _mm256_xor_si256( xmm2, xmm12 );
|
||||
xmm3 = _mm256_xor_si256( xmm3, xmm14 );
|
||||
|
||||
/* xor CV (feed-forward) */
|
||||
/* result: P(CV+M)+Q(M)+CV in xmm0...xmm3 */
|
||||
xmm0 = _mm256_xor_si256( xmm0, (chaining[0]) );
|
||||
xmm1 = _mm256_xor_si256( xmm1, (chaining[1]) );
|
||||
xmm2 = _mm256_xor_si256( xmm2, (chaining[2]) );
|
||||
xmm3 = _mm256_xor_si256( xmm3, (chaining[3]) );
|
||||
|
||||
/* store CV */
|
||||
chaining[0] = xmm0;
|
||||
chaining[1] = xmm1;
|
||||
chaining[2] = xmm2;
|
||||
chaining[3] = xmm3;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void OF512_2way( __m256i* chaining )
|
||||
{
|
||||
static __m256i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
static __m256i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
||||
static __m256i TEMP0;
|
||||
static __m256i TEMP1;
|
||||
static __m256i TEMP2;
|
||||
|
||||
/* load CV into registers xmm8, xmm10, xmm12, xmm14 */
|
||||
xmm8 = chaining[0];
|
||||
xmm10 = chaining[1];
|
||||
xmm12 = chaining[2];
|
||||
xmm14 = chaining[3];
|
||||
|
||||
/* there are now 2 rows of the CV in one xmm register */
|
||||
/* unpack to get 1 row of P (64 bit) into one half of an xmm register */
|
||||
/* result: the 8 input rows of P in xmm8 - xmm15 */
|
||||
Matrix_Transpose_O_B_2way(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0);
|
||||
|
||||
/* compute the permutation P */
|
||||
/* result: the output of P(CV) in xmm8 - xmm15 */
|
||||
ROUNDS_P_Q_2WAY();
|
||||
|
||||
/* unpack again to get two rows of P in one xmm register */
|
||||
/* result: P(CV) in xmm8, xmm10, xmm12, xmm14 */
|
||||
Matrix_Transpose_O_B_INV_2way(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);
|
||||
|
||||
/* xor CV to P output (feed-forward) */
|
||||
/* result: P(CV)+CV in xmm8, xmm10, xmm12, xmm14 */
|
||||
xmm8 = _mm256_xor_si256( xmm8, (chaining[0]) );
|
||||
xmm10 = _mm256_xor_si256( xmm10, (chaining[1]) );
|
||||
xmm12 = _mm256_xor_si256( xmm12, (chaining[2]) );
|
||||
xmm14 = _mm256_xor_si256( xmm14, (chaining[3]) );
|
||||
|
||||
/* transform state back from row ordering into column ordering */
|
||||
/* result: final hash value in xmm9, xmm11 */
|
||||
Matrix_Transpose_A_2way(xmm8, xmm10, xmm12, xmm14, xmm4, xmm9, xmm11, xmm0);
|
||||
|
||||
/* we only need to return the truncated half of the state */
|
||||
chaining[2] = xmm9;
|
||||
chaining[3] = xmm11;
|
||||
}
|
||||
|
||||
#endif // VAES
|
||||
#endif // GROESTL512_INTR_4WAY_H__
|
||||
#endif // GROESTL256_INTR_4WAY_H__
|
||||
|
@@ -15,7 +15,9 @@
|
||||
#include "miner.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(__AVX2__) && defined(__VAES__)
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
int groestl512_4way_init( groestl512_4way_context* ctx, uint64_t hashlen )
|
||||
{
|
||||
@@ -137,5 +139,130 @@ int groestl512_4way_full( groestl512_4way_context* ctx, void* output,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
|
||||
// AVX2 + VAES
|
||||
|
||||
int groestl512_2way_init( groestl512_2way_context* ctx, uint64_t hashlen )
|
||||
{
|
||||
if (ctx->chaining == NULL || ctx->buffer == NULL)
|
||||
return 1;
|
||||
|
||||
memset_zero_256( ctx->chaining, SIZE512 );
|
||||
memset_zero_256( ctx->buffer, SIZE512 );
|
||||
|
||||
// The only non-zero in the IV is len. It can be hard coded.
|
||||
ctx->chaining[ 6 ] = m256_const2_64( 0x0200000000000000, 0 );
|
||||
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int groestl512_2way_update_close( groestl512_2way_context* ctx, void* output,
|
||||
const void* input, uint64_t databitlen )
|
||||
{
|
||||
const int len = (int)databitlen / 128;
|
||||
const int hashlen_m128i = 64 / 16; // bytes to __m128i
|
||||
const int hash_offset = SIZE512 - hashlen_m128i;
|
||||
int rem = ctx->rem_ptr;
|
||||
int blocks = len / SIZE512;
|
||||
__m256i* in = (__m256i*)input;
|
||||
int i;
|
||||
|
||||
// --- update ---
|
||||
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
TF1024_2way( ctx->chaining, &in[ i * SIZE512 ] );
|
||||
ctx->buf_ptr = blocks * SIZE512;
|
||||
|
||||
for ( i = 0; i < len % SIZE512; i++ )
|
||||
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
|
||||
i += rem;
|
||||
|
||||
//--- final ---
|
||||
|
||||
blocks++; // adjust for final block
|
||||
|
||||
if ( i == SIZE512 - 1 )
|
||||
{
|
||||
// only 1 vector left in buffer, all padding at once
|
||||
ctx->buffer[i] = m256_const1_128( _mm_set_epi8(
|
||||
blocks, blocks>>8,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx->buffer[i] = m256_const2_64( 0, 0x80 );
|
||||
for ( i += 1; i < SIZE512 - 1; i++ )
|
||||
ctx->buffer[i] = m256_zero;
|
||||
ctx->buffer[i] = m256_const1_128( _mm_set_epi8(
|
||||
blocks, blocks>>8, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 ) );
|
||||
}
|
||||
|
||||
TF1024_2way( ctx->chaining, ctx->buffer );
|
||||
OF1024_2way( ctx->chaining );
|
||||
|
||||
for ( i = 0; i < hashlen_m128i; i++ )
|
||||
casti_m256i( output, i ) = ctx->chaining[ hash_offset + i ];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int groestl512_2way_full( groestl512_2way_context* ctx, void* output,
|
||||
const void* input, uint64_t datalen )
|
||||
{
|
||||
const int len = (int)datalen >> 4;
|
||||
const int hashlen_m128i = 64 >> 4; // bytes to __m128i
|
||||
const int hash_offset = SIZE512 - hashlen_m128i;
|
||||
uint64_t blocks = len / SIZE512;
|
||||
__m256i* in = (__m256i*)input;
|
||||
int i;
|
||||
|
||||
// --- init ---
|
||||
|
||||
memset_zero_256( ctx->chaining, SIZE512 );
|
||||
memset_zero_256( ctx->buffer, SIZE512 );
|
||||
ctx->chaining[ 6 ] = m256_const2_64( 0x0200000000000000, 0 );
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
|
||||
// --- update ---
|
||||
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
TF1024_2way( ctx->chaining, &in[ i * SIZE512 ] );
|
||||
ctx->buf_ptr = blocks * SIZE512;
|
||||
|
||||
for ( i = 0; i < len % SIZE512; i++ )
|
||||
ctx->buffer[ ctx->rem_ptr + i ] = in[ ctx->buf_ptr + i ];
|
||||
i += ctx->rem_ptr;
|
||||
|
||||
// --- close ---
|
||||
|
||||
blocks++;
|
||||
|
||||
if ( i == SIZE512 - 1 )
|
||||
{
|
||||
// only 1 vector left in buffer, all padding at once
|
||||
ctx->buffer[i] = m256_const2_64( blocks << 56, 0x80 );
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx->buffer[i] = m256_const2_64( 0, 0x80 );
|
||||
for ( i += 1; i < SIZE512 - 1; i++ )
|
||||
ctx->buffer[i] = m256_zero;
|
||||
ctx->buffer[i] = m256_const2_64( blocks << 56, 0 );
|
||||
}
|
||||
|
||||
TF1024_2way( ctx->chaining, ctx->buffer );
|
||||
OF1024_2way( ctx->chaining );
|
||||
|
||||
for ( i = 0; i < hashlen_m128i; i++ )
|
||||
casti_m256i( output, i ) = ctx->chaining[ hash_offset + i ];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // VAES
|
||||
|
||||
|
@@ -10,7 +10,7 @@
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(__AVX2__) && defined(__VAES__)
|
||||
|
||||
#define LENGTH (512)
|
||||
|
||||
@@ -36,20 +36,19 @@
|
||||
|
||||
#define SIZE512 (SIZE_1024/16)
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
typedef struct {
|
||||
__attribute__ ((aligned (128))) __m512i chaining[SIZE512];
|
||||
__attribute__ ((aligned (64))) __m512i buffer[SIZE512];
|
||||
int blk_count; // SIZE_m128i
|
||||
int buf_ptr; // __m128i offset
|
||||
int rem_ptr;
|
||||
int databitlen; // bits
|
||||
} groestl512_4way_context;
|
||||
|
||||
|
||||
int groestl512_4way_init( groestl512_4way_context*, uint64_t );
|
||||
|
||||
//int reinit_groestl( hashState_groestl* );
|
||||
|
||||
int groestl512_4way_update( groestl512_4way_context*, const void*,
|
||||
uint64_t );
|
||||
int groestl512_4way_close( groestl512_4way_context*, void* );
|
||||
@@ -58,5 +57,29 @@ int groestl512_4way_update_close( groestl512_4way_context*, void*,
|
||||
int groestl512_4way_full( groestl512_4way_context*, void*,
|
||||
const void*, uint64_t );
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
// AVX2 + VAES
|
||||
|
||||
typedef struct {
|
||||
__attribute__ ((aligned (128))) __m256i chaining[SIZE512];
|
||||
__attribute__ ((aligned (64))) __m256i buffer[SIZE512];
|
||||
int blk_count; // SIZE_m128i
|
||||
int buf_ptr; // __m128i offset
|
||||
int rem_ptr;
|
||||
} groestl512_2way_context;
|
||||
|
||||
|
||||
int groestl512_2way_init( groestl512_2way_context*, uint64_t );
|
||||
|
||||
int groestl512_2way_update( groestl512_2way_context*, const void*,
|
||||
uint64_t );
|
||||
int groestl512_2way_close( groestl512_2way_context*, void* );
|
||||
int groestl512_2way_update_close( groestl512_2way_context*, void*,
|
||||
const void*, uint64_t );
|
||||
int groestl512_2way_full( groestl512_2way_context*, void*,
|
||||
const void*, uint64_t );
|
||||
|
||||
|
||||
#endif // VAES
|
||||
#endif // GROESTL512_HASH_4WAY_H__
|
||||
|
@@ -7,13 +7,12 @@
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
|
||||
#if !defined(GROESTL512_INTR_4WAY_H__)
|
||||
#define GROESTL512_INTR_4WAY_H__ 1
|
||||
|
||||
#include "groestl512-hash-4way.h"
|
||||
|
||||
#if defined(__VAES__)
|
||||
#if defined(__AVX2__) && defined(__VAES__)
|
||||
|
||||
static const __m128i round_const_p[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
@@ -51,6 +50,8 @@ static const __m128i round_const_q[] __attribute__ ((aligned (64))) =
|
||||
{ 0x8292a2b2c2d2e2f2, 0x0212223242526272 }
|
||||
};
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
static const __m512i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02,
|
||||
0x1d1519111c141810, 0x1f171b131e161a12,
|
||||
0x2d2529212c242820, 0x2f272b232e262a22,
|
||||
@@ -661,5 +662,578 @@ void OF1024_4way( __m512i* chaining )
|
||||
return;
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
// AVX2 + VAES
|
||||
|
||||
static const __m256i TRANSP_MASK_2WAY =
|
||||
{ 0x0d0509010c040800, 0x0f070b030e060a02,
|
||||
0x1d1519111c141810, 0x1f171b131e161a12 };
|
||||
|
||||
static const __m256i SUBSH_MASK0_2WAY =
|
||||
{ 0x0b0e0104070a0d00, 0x0306090c0f020508,
|
||||
0x1b1e1114171a1d10, 0x1316191c1f121518 };
|
||||
|
||||
static const __m256i SUBSH_MASK1_2WAY =
|
||||
{ 0x0c0f0205080b0e01, 0x04070a0d00030609,
|
||||
0x1c1f1215181b1e11, 0x14171a1d10131619 };
|
||||
|
||||
static const __m256i SUBSH_MASK2_2WAY =
|
||||
{ 0x0d000306090c0f02, 0x05080b0e0104070a,
|
||||
0x1d101316191c1f12, 0x15181b1e1114171a };
|
||||
|
||||
static const __m256i SUBSH_MASK3_2WAY =
|
||||
{ 0x0e0104070a0d0003, 0x06090c0f0205080b,
|
||||
0x1e1114171a1d1013, 0x16191c1f1215181b };
|
||||
|
||||
static const __m256i SUBSH_MASK4_2WAY =
|
||||
{ 0x0f0205080b0e0104, 0x070a0d000306090c,
|
||||
0x1f1215181b1e1114, 0x171a1d101316191c };
|
||||
|
||||
static const __m256i SUBSH_MASK5_2WAY =
|
||||
{ 0x000306090c0f0205, 0x080b0e0104070a0d,
|
||||
0x101316191c1f1215, 0x181b1e1114171a1d };
|
||||
|
||||
static const __m256i SUBSH_MASK6_2WAY =
|
||||
{ 0x0104070a0d000306, 0x090c0f0205080b0e,
|
||||
0x1114171a1d101316, 0x191c1f1215181b1e };
|
||||
|
||||
static const __m256i SUBSH_MASK7_2WAY =
|
||||
{ 0x06090c0f0205080b, 0x0e0104070a0d0003,
|
||||
0x16191c1f1215181b, 0x1e1114171a1d1013 };
|
||||
|
||||
#define tos(a) #a
|
||||
#define tostr(a) tos(a)
|
||||
|
||||
/* xmm[i] will be multiplied by 2
|
||||
* xmm[j] will be lost
|
||||
* xmm[k] has to be all 0x1b */
|
||||
#define MUL2_2WAY(i, j, k){\
|
||||
j = _mm256_xor_si256(j, j);\
|
||||
j = _mm256_cmpgt_epi8(j, i );\
|
||||
i = _mm256_add_epi8(i, i);\
|
||||
j = _mm256_and_si256(j, k);\
|
||||
i = _mm256_xor_si256(i, j);\
|
||||
}
|
||||
|
||||
#define MixBytes_2way(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
||||
/* t_i = a_i + a_{i+1} */\
|
||||
b6 = a0;\
|
||||
b7 = a1;\
|
||||
a0 = _mm256_xor_si256(a0, a1);\
|
||||
b0 = a2;\
|
||||
a1 = _mm256_xor_si256(a1, a2);\
|
||||
b1 = a3;\
|
||||
a2 = _mm256_xor_si256(a2, a3);\
|
||||
b2 = a4;\
|
||||
a3 = _mm256_xor_si256(a3, a4);\
|
||||
b3 = a5;\
|
||||
a4 = _mm256_xor_si256(a4, a5);\
|
||||
b4 = a6;\
|
||||
a5 = _mm256_xor_si256(a5, a6);\
|
||||
b5 = a7;\
|
||||
a6 = _mm256_xor_si256(a6, a7);\
|
||||
a7 = _mm256_xor_si256(a7, b6);\
|
||||
\
|
||||
/* build y4 y5 y6 ... in regs xmm8, xmm9, xmm10 by adding t_i*/\
|
||||
b0 = _mm256_xor_si256(b0, a4);\
|
||||
b6 = _mm256_xor_si256(b6, a4);\
|
||||
b1 = _mm256_xor_si256(b1, a5);\
|
||||
b7 = _mm256_xor_si256(b7, a5);\
|
||||
b2 = _mm256_xor_si256(b2, a6);\
|
||||
b0 = _mm256_xor_si256(b0, a6);\
|
||||
/* spill values y_4, y_5 to memory */\
|
||||
TEMP0 = b0;\
|
||||
b3 = _mm256_xor_si256(b3, a7);\
|
||||
b1 = _mm256_xor_si256(b1, a7);\
|
||||
TEMP1 = b1;\
|
||||
b4 = _mm256_xor_si256(b4, a0);\
|
||||
b2 = _mm256_xor_si256(b2, a0);\
|
||||
/* save values t0, t1, t2 to xmm8, xmm9 and memory */\
|
||||
b0 = a0;\
|
||||
b5 = _mm256_xor_si256(b5, a1);\
|
||||
b3 = _mm256_xor_si256(b3, a1);\
|
||||
b1 = a1;\
|
||||
b6 = _mm256_xor_si256(b6, a2);\
|
||||
b4 = _mm256_xor_si256(b4, a2);\
|
||||
TEMP2 = a2;\
|
||||
b7 = _mm256_xor_si256(b7, a3);\
|
||||
b5 = _mm256_xor_si256(b5, a3);\
|
||||
\
|
||||
/* compute x_i = t_i + t_{i+3} */\
|
||||
a0 = _mm256_xor_si256(a0, a3);\
|
||||
a1 = _mm256_xor_si256(a1, a4);\
|
||||
a2 = _mm256_xor_si256(a2, a5);\
|
||||
a3 = _mm256_xor_si256(a3, a6);\
|
||||
a4 = _mm256_xor_si256(a4, a7);\
|
||||
a5 = _mm256_xor_si256(a5, b0);\
|
||||
a6 = _mm256_xor_si256(a6, b1);\
|
||||
a7 = _mm256_xor_si256(a7, TEMP2);\
|
||||
\
|
||||
/* compute z_i : double x_i using temp xmm8 and 1B xmm9 */\
|
||||
/* compute w_i : add y_{i+4} */\
|
||||
b1 = m256_const1_64( 0x1b1b1b1b1b1b1b1b );\
|
||||
MUL2_2WAY(a0, b0, b1);\
|
||||
a0 = _mm256_xor_si256(a0, TEMP0);\
|
||||
MUL2_2WAY(a1, b0, b1);\
|
||||
a1 = _mm256_xor_si256(a1, TEMP1);\
|
||||
MUL2_2WAY(a2, b0, b1);\
|
||||
a2 = _mm256_xor_si256(a2, b2);\
|
||||
MUL2_2WAY(a3, b0, b1);\
|
||||
a3 = _mm256_xor_si256(a3, b3);\
|
||||
MUL2_2WAY(a4, b0, b1);\
|
||||
a4 = _mm256_xor_si256(a4, b4);\
|
||||
MUL2_2WAY(a5, b0, b1);\
|
||||
a5 = _mm256_xor_si256(a5, b5);\
|
||||
MUL2_2WAY(a6, b0, b1);\
|
||||
a6 = _mm256_xor_si256(a6, b6);\
|
||||
MUL2_2WAY(a7, b0, b1);\
|
||||
a7 = _mm256_xor_si256(a7, b7);\
|
||||
\
|
||||
/* compute v_i : double w_i */\
|
||||
/* add to y_4 y_5 .. v3, v4, ... */\
|
||||
MUL2_2WAY(a0, b0, b1);\
|
||||
b5 = _mm256_xor_si256(b5, a0);\
|
||||
MUL2_2WAY(a1, b0, b1);\
|
||||
b6 = _mm256_xor_si256(b6, a1);\
|
||||
MUL2_2WAY(a2, b0, b1);\
|
||||
b7 = _mm256_xor_si256(b7, a2);\
|
||||
MUL2_2WAY(a5, b0, b1);\
|
||||
b2 = _mm256_xor_si256(b2, a5);\
|
||||
MUL2_2WAY(a6, b0, b1);\
|
||||
b3 = _mm256_xor_si256(b3, a6);\
|
||||
MUL2_2WAY(a7, b0, b1);\
|
||||
b4 = _mm256_xor_si256(b4, a7);\
|
||||
MUL2_2WAY(a3, b0, b1);\
|
||||
MUL2_2WAY(a4, b0, b1);\
|
||||
b0 = TEMP0;\
|
||||
b1 = TEMP1;\
|
||||
b0 = _mm256_xor_si256(b0, a3);\
|
||||
b1 = _mm256_xor_si256(b1, a4);\
|
||||
}/*MixBytes*/
|
||||
|
||||
/* one round
|
||||
* a0-a7 = input rows
|
||||
* b0-b7 = output rows
|
||||
*/
|
||||
#define SUBMIX_2WAY(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
||||
/* SubBytes */\
|
||||
b0 = _mm256_xor_si256( b0, b0 );\
|
||||
a0 = _mm256_aesenclast_epi128( a0, b0 );\
|
||||
a1 = _mm256_aesenclast_epi128( a1, b0 );\
|
||||
a2 = _mm256_aesenclast_epi128( a2, b0 );\
|
||||
a3 = _mm256_aesenclast_epi128( a3, b0 );\
|
||||
a4 = _mm256_aesenclast_epi128( a4, b0 );\
|
||||
a5 = _mm256_aesenclast_epi128( a5, b0 );\
|
||||
a6 = _mm256_aesenclast_epi128( a6, b0 );\
|
||||
a7 = _mm256_aesenclast_epi128( a7, b0 );\
|
||||
/* MixBytes */\
|
||||
MixBytes_2way(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7);\
|
||||
}
|
||||
|
||||
#define ROUNDS_P_2WAY(){\
|
||||
uint8_t round_counter = 0;\
|
||||
for ( round_counter = 0; round_counter < 14; round_counter += 2 ) \
|
||||
{ \
|
||||
/* AddRoundConstant P1024 */\
|
||||
xmm8 = _mm256_xor_si256( xmm8, m256_const1_128( \
|
||||
casti_m128i( round_const_p, round_counter ) ) ); \
|
||||
/* ShiftBytes P1024 + pre-AESENCLAST */\
|
||||
xmm8 = _mm256_shuffle_epi8( xmm8, SUBSH_MASK0_2WAY ); \
|
||||
xmm9 = _mm256_shuffle_epi8( xmm9, SUBSH_MASK1_2WAY );\
|
||||
xmm10 = _mm256_shuffle_epi8( xmm10, SUBSH_MASK2_2WAY );\
|
||||
xmm11 = _mm256_shuffle_epi8( xmm11, SUBSH_MASK3_2WAY );\
|
||||
xmm12 = _mm256_shuffle_epi8( xmm12, SUBSH_MASK4_2WAY );\
|
||||
xmm13 = _mm256_shuffle_epi8( xmm13, SUBSH_MASK5_2WAY );\
|
||||
xmm14 = _mm256_shuffle_epi8( xmm14, SUBSH_MASK6_2WAY );\
|
||||
xmm15 = _mm256_shuffle_epi8( xmm15, SUBSH_MASK7_2WAY );\
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX_2WAY(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
\
|
||||
/* AddRoundConstant P1024 */\
|
||||
xmm0 = _mm256_xor_si256( xmm0, m256_const1_128( \
|
||||
casti_m128i( round_const_p, round_counter+1 ) ) ); \
|
||||
/* ShiftBytes P1024 + pre-AESENCLAST */\
|
||||
xmm0 = _mm256_shuffle_epi8( xmm0, SUBSH_MASK0_2WAY );\
|
||||
xmm1 = _mm256_shuffle_epi8( xmm1, SUBSH_MASK1_2WAY );\
|
||||
xmm2 = _mm256_shuffle_epi8( xmm2, SUBSH_MASK2_2WAY );\
|
||||
xmm3 = _mm256_shuffle_epi8( xmm3, SUBSH_MASK3_2WAY );\
|
||||
xmm4 = _mm256_shuffle_epi8( xmm4, SUBSH_MASK4_2WAY );\
|
||||
xmm5 = _mm256_shuffle_epi8( xmm5, SUBSH_MASK5_2WAY );\
|
||||
xmm6 = _mm256_shuffle_epi8( xmm6, SUBSH_MASK6_2WAY );\
|
||||
xmm7 = _mm256_shuffle_epi8( xmm7, SUBSH_MASK7_2WAY );\
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX_2WAY(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
||||
}\
|
||||
}
|
||||
|
||||
#define ROUNDS_Q_2WAY(){\
|
||||
uint8_t round_counter = 0;\
|
||||
for ( round_counter = 0; round_counter < 14; round_counter += 2) \
|
||||
{ \
|
||||
/* AddRoundConstant Q1024 */\
|
||||
xmm1 = m256_neg1;\
|
||||
xmm8 = _mm256_xor_si256( xmm8, xmm1 );\
|
||||
xmm9 = _mm256_xor_si256( xmm9, xmm1 );\
|
||||
xmm10 = _mm256_xor_si256( xmm10, xmm1 );\
|
||||
xmm11 = _mm256_xor_si256( xmm11, xmm1 );\
|
||||
xmm12 = _mm256_xor_si256( xmm12, xmm1 );\
|
||||
xmm13 = _mm256_xor_si256( xmm13, xmm1 );\
|
||||
xmm14 = _mm256_xor_si256( xmm14, xmm1 );\
|
||||
xmm15 = _mm256_xor_si256( xmm15, m256_const1_128( \
|
||||
casti_m128i( round_const_q, round_counter ) ) ); \
|
||||
/* ShiftBytes Q1024 + pre-AESENCLAST */\
|
||||
xmm8 = _mm256_shuffle_epi8( xmm8, SUBSH_MASK1_2WAY );\
|
||||
xmm9 = _mm256_shuffle_epi8( xmm9, SUBSH_MASK3_2WAY );\
|
||||
xmm10 = _mm256_shuffle_epi8( xmm10, SUBSH_MASK5_2WAY );\
|
||||
xmm11 = _mm256_shuffle_epi8( xmm11, SUBSH_MASK7_2WAY );\
|
||||
xmm12 = _mm256_shuffle_epi8( xmm12, SUBSH_MASK0_2WAY );\
|
||||
xmm13 = _mm256_shuffle_epi8( xmm13, SUBSH_MASK2_2WAY );\
|
||||
xmm14 = _mm256_shuffle_epi8( xmm14, SUBSH_MASK4_2WAY );\
|
||||
xmm15 = _mm256_shuffle_epi8( xmm15, SUBSH_MASK6_2WAY );\
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX_2WAY(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
\
|
||||
/* AddRoundConstant Q1024 */\
|
||||
xmm9 = m256_neg1;\
|
||||
xmm0 = _mm256_xor_si256( xmm0, xmm9 );\
|
||||
xmm1 = _mm256_xor_si256( xmm1, xmm9 );\
|
||||
xmm2 = _mm256_xor_si256( xmm2, xmm9 );\
|
||||
xmm3 = _mm256_xor_si256( xmm3, xmm9 );\
|
||||
xmm4 = _mm256_xor_si256( xmm4, xmm9 );\
|
||||
xmm5 = _mm256_xor_si256( xmm5, xmm9 );\
|
||||
xmm6 = _mm256_xor_si256( xmm6, xmm9 );\
|
||||
xmm7 = _mm256_xor_si256( xmm7, m256_const1_128( \
|
||||
casti_m128i( round_const_q, round_counter+1 ) ) ); \
|
||||
/* ShiftBytes Q1024 + pre-AESENCLAST */\
|
||||
xmm0 = _mm256_shuffle_epi8( xmm0, SUBSH_MASK1_2WAY );\
|
||||
xmm1 = _mm256_shuffle_epi8( xmm1, SUBSH_MASK3_2WAY );\
|
||||
xmm2 = _mm256_shuffle_epi8( xmm2, SUBSH_MASK5_2WAY );\
|
||||
xmm3 = _mm256_shuffle_epi8( xmm3, SUBSH_MASK7_2WAY );\
|
||||
xmm4 = _mm256_shuffle_epi8( xmm4, SUBSH_MASK0_2WAY );\
|
||||
xmm5 = _mm256_shuffle_epi8( xmm5, SUBSH_MASK2_2WAY );\
|
||||
xmm6 = _mm256_shuffle_epi8( xmm6, SUBSH_MASK4_2WAY );\
|
||||
xmm7 = _mm256_shuffle_epi8( xmm7, SUBSH_MASK6_2WAY );\
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX_2WAY(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
||||
}\
|
||||
}
|
||||
|
||||
#define Matrix_Transpose_2way(i0, i1, i2, i3, i4, i5, i6, i7, t0, t1, t2, t3, t4, t5, t6, t7){\
|
||||
t0 = TRANSP_MASK_2WAY;\
|
||||
\
|
||||
i6 = _mm256_shuffle_epi8(i6, t0);\
|
||||
i0 = _mm256_shuffle_epi8(i0, t0);\
|
||||
i1 = _mm256_shuffle_epi8(i1, t0);\
|
||||
i2 = _mm256_shuffle_epi8(i2, t0);\
|
||||
i3 = _mm256_shuffle_epi8(i3, t0);\
|
||||
t1 = i2;\
|
||||
i4 = _mm256_shuffle_epi8(i4, t0);\
|
||||
i5 = _mm256_shuffle_epi8(i5, t0);\
|
||||
t2 = i4;\
|
||||
t3 = i6;\
|
||||
i7 = _mm256_shuffle_epi8(i7, t0);\
|
||||
\
|
||||
/* continue with unpack using 4 temp registers */\
|
||||
t0 = i0;\
|
||||
t2 = _mm256_unpackhi_epi16(t2, i5);\
|
||||
i4 = _mm256_unpacklo_epi16(i4, i5);\
|
||||
t3 = _mm256_unpackhi_epi16(t3, i7);\
|
||||
i6 = _mm256_unpacklo_epi16(i6, i7);\
|
||||
t0 = _mm256_unpackhi_epi16(t0, i1);\
|
||||
t1 = _mm256_unpackhi_epi16(t1, i3);\
|
||||
i2 = _mm256_unpacklo_epi16(i2, i3);\
|
||||
i0 = _mm256_unpacklo_epi16(i0, i1);\
|
||||
\
|
||||
/* shuffle with immediate */\
|
||||
t0 = _mm256_shuffle_epi32(t0, 216);\
|
||||
t1 = _mm256_shuffle_epi32(t1, 216);\
|
||||
t2 = _mm256_shuffle_epi32(t2, 216);\
|
||||
t3 = _mm256_shuffle_epi32(t3, 216);\
|
||||
i0 = _mm256_shuffle_epi32(i0, 216);\
|
||||
i2 = _mm256_shuffle_epi32(i2, 216);\
|
||||
i4 = _mm256_shuffle_epi32(i4, 216);\
|
||||
i6 = _mm256_shuffle_epi32(i6, 216);\
|
||||
\
|
||||
/* continue with unpack */\
|
||||
t4 = i0;\
|
||||
i0 = _mm256_unpacklo_epi32(i0, i2);\
|
||||
t4 = _mm256_unpackhi_epi32(t4, i2);\
|
||||
t5 = t0;\
|
||||
t0 = _mm256_unpacklo_epi32(t0, t1);\
|
||||
t5 = _mm256_unpackhi_epi32(t5, t1);\
|
||||
t6 = i4;\
|
||||
i4 = _mm256_unpacklo_epi32(i4, i6);\
|
||||
t7 = t2;\
|
||||
t6 = _mm256_unpackhi_epi32(t6, i6);\
|
||||
i2 = t0;\
|
||||
t2 = _mm256_unpacklo_epi32(t2, t3);\
|
||||
i3 = t0;\
|
||||
t7 = _mm256_unpackhi_epi32(t7, t3);\
|
||||
\
|
||||
/* there are now 2 rows in each xmm */\
|
||||
/* unpack to get 1 row of CV in each xmm */\
|
||||
i1 = i0;\
|
||||
i1 = _mm256_unpackhi_epi64(i1, i4);\
|
||||
i0 = _mm256_unpacklo_epi64(i0, i4);\
|
||||
i4 = t4;\
|
||||
i3 = _mm256_unpackhi_epi64(i3, t2);\
|
||||
i5 = t4;\
|
||||
i2 = _mm256_unpacklo_epi64(i2, t2);\
|
||||
i6 = t5;\
|
||||
i5 = _mm256_unpackhi_epi64(i5, t6);\
|
||||
i7 = t5;\
|
||||
i4 = _mm256_unpacklo_epi64(i4, t6);\
|
||||
i7 = _mm256_unpackhi_epi64(i7, t7);\
|
||||
i6 = _mm256_unpacklo_epi64(i6, t7);\
|
||||
/* transpose done */\
|
||||
}/**/
|
||||
|
||||
#define Matrix_Transpose_INV_2way(i0, i1, i2, i3, i4, i5, i6, i7, o0, o1, o2, t0, t1, t2, t3, t4){\
|
||||
/* transpose matrix to get output format */\
|
||||
o1 = i0;\
|
||||
i0 = _mm256_unpacklo_epi64(i0, i1);\
|
||||
o1 = _mm256_unpackhi_epi64(o1, i1);\
|
||||
t0 = i2;\
|
||||
i2 = _mm256_unpacklo_epi64(i2, i3);\
|
||||
t0 = _mm256_unpackhi_epi64(t0, i3);\
|
||||
t1 = i4;\
|
||||
i4 = _mm256_unpacklo_epi64(i4, i5);\
|
||||
t1 = _mm256_unpackhi_epi64(t1, i5);\
|
||||
t2 = i6;\
|
||||
o0 = TRANSP_MASK_2WAY;\
|
||||
i6 = _mm256_unpacklo_epi64(i6, i7);\
|
||||
t2 = _mm256_unpackhi_epi64(t2, i7);\
|
||||
/* load transpose mask into a register, because it will be used 8 times */\
|
||||
i0 = _mm256_shuffle_epi8(i0, o0);\
|
||||
i2 = _mm256_shuffle_epi8(i2, o0);\
|
||||
i4 = _mm256_shuffle_epi8(i4, o0);\
|
||||
i6 = _mm256_shuffle_epi8(i6, o0);\
|
||||
o1 = _mm256_shuffle_epi8(o1, o0);\
|
||||
t0 = _mm256_shuffle_epi8(t0, o0);\
|
||||
t1 = _mm256_shuffle_epi8(t1, o0);\
|
||||
t2 = _mm256_shuffle_epi8(t2, o0);\
|
||||
/* continue with unpack using 4 temp registers */\
|
||||
t3 = i4;\
|
||||
o2 = o1;\
|
||||
o0 = i0;\
|
||||
t4 = t1;\
|
||||
\
|
||||
t3 = _mm256_unpackhi_epi16(t3, i6);\
|
||||
i4 = _mm256_unpacklo_epi16(i4, i6);\
|
||||
o0 = _mm256_unpackhi_epi16(o0, i2);\
|
||||
i0 = _mm256_unpacklo_epi16(i0, i2);\
|
||||
o2 = _mm256_unpackhi_epi16(o2, t0);\
|
||||
o1 = _mm256_unpacklo_epi16(o1, t0);\
|
||||
t4 = _mm256_unpackhi_epi16(t4, t2);\
|
||||
t1 = _mm256_unpacklo_epi16(t1, t2);\
|
||||
/* shuffle with immediate */\
|
||||
i4 = _mm256_shuffle_epi32(i4, 216);\
|
||||
t3 = _mm256_shuffle_epi32(t3, 216);\
|
||||
o1 = _mm256_shuffle_epi32(o1, 216);\
|
||||
o2 = _mm256_shuffle_epi32(o2, 216);\
|
||||
i0 = _mm256_shuffle_epi32(i0, 216);\
|
||||
o0 = _mm256_shuffle_epi32(o0, 216);\
|
||||
t1 = _mm256_shuffle_epi32(t1, 216);\
|
||||
t4 = _mm256_shuffle_epi32(t4, 216);\
|
||||
/* continue with unpack */\
|
||||
i1 = i0;\
|
||||
i3 = o0;\
|
||||
i5 = o1;\
|
||||
i7 = o2;\
|
||||
i0 = _mm256_unpacklo_epi32(i0, i4);\
|
||||
i1 = _mm256_unpackhi_epi32(i1, i4);\
|
||||
o0 = _mm256_unpacklo_epi32(o0, t3);\
|
||||
i3 = _mm256_unpackhi_epi32(i3, t3);\
|
||||
o1 = _mm256_unpacklo_epi32(o1, t1);\
|
||||
i5 = _mm256_unpackhi_epi32(i5, t1);\
|
||||
o2 = _mm256_unpacklo_epi32(o2, t4);\
|
||||
i7 = _mm256_unpackhi_epi32(i7, t4);\
|
||||
/* transpose done */\
|
||||
}/**/
|
||||
|
||||
void INIT_2way( __m256i *chaining )
|
||||
{
|
||||
static __m256i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
static __m256i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
||||
|
||||
/* load IV into registers xmm8 - xmm15 */
|
||||
xmm8 = chaining[0];
|
||||
xmm9 = chaining[1];
|
||||
xmm10 = chaining[2];
|
||||
xmm11 = chaining[3];
|
||||
xmm12 = chaining[4];
|
||||
xmm13 = chaining[5];
|
||||
xmm14 = chaining[6];
|
||||
xmm15 = chaining[7];
|
||||
|
||||
/* transform chaining value from column ordering into row ordering */
|
||||
Matrix_Transpose_2way(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
|
||||
/* store transposed IV */
|
||||
chaining[0] = xmm8;
|
||||
chaining[1] = xmm9;
|
||||
chaining[2] = xmm10;
|
||||
chaining[3] = xmm11;
|
||||
chaining[4] = xmm12;
|
||||
chaining[5] = xmm13;
|
||||
chaining[6] = xmm14;
|
||||
chaining[7] = xmm15;
|
||||
}
|
||||
|
||||
void TF1024_2way( __m256i *chaining, const __m256i *message )
|
||||
{
|
||||
static __m256i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
static __m256i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
||||
static __m256i QTEMP[8];
|
||||
static __m256i TEMP0;
|
||||
static __m256i TEMP1;
|
||||
static __m256i TEMP2;
|
||||
|
||||
/* load message into registers xmm8 - xmm15 (Q = message) */
|
||||
xmm8 = message[0];
|
||||
xmm9 = message[1];
|
||||
xmm10 = message[2];
|
||||
xmm11 = message[3];
|
||||
xmm12 = message[4];
|
||||
xmm13 = message[5];
|
||||
xmm14 = message[6];
|
||||
xmm15 = message[7];
|
||||
|
||||
/* transform message M from column ordering into row ordering */
|
||||
Matrix_Transpose_2way(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
|
||||
/* store message M (Q input) for later */
|
||||
QTEMP[0] = xmm8;
|
||||
QTEMP[1] = xmm9;
|
||||
QTEMP[2] = xmm10;
|
||||
QTEMP[3] = xmm11;
|
||||
QTEMP[4] = xmm12;
|
||||
QTEMP[5] = xmm13;
|
||||
QTEMP[6] = xmm14;
|
||||
QTEMP[7] = xmm15;
|
||||
|
||||
/* xor CV to message to get P input */
|
||||
/* result: CV+M in xmm8...xmm15 */
|
||||
xmm8 = _mm256_xor_si256( xmm8, (chaining[0]) );
|
||||
xmm9 = _mm256_xor_si256( xmm9, (chaining[1]) );
|
||||
xmm10 = _mm256_xor_si256( xmm10, (chaining[2]) );
|
||||
xmm11 = _mm256_xor_si256( xmm11, (chaining[3]) );
|
||||
xmm12 = _mm256_xor_si256( xmm12, (chaining[4]) );
|
||||
xmm13 = _mm256_xor_si256( xmm13, (chaining[5]) );
|
||||
xmm14 = _mm256_xor_si256( xmm14, (chaining[6]) );
|
||||
xmm15 = _mm256_xor_si256( xmm15, (chaining[7]) );
|
||||
|
||||
/* compute permutation P */
|
||||
/* result: P(CV+M) in xmm8...xmm15 */
|
||||
ROUNDS_P_2WAY();
|
||||
|
||||
/* xor CV to P output (feed-forward) */
|
||||
/* result: P(CV+M)+CV in xmm8...xmm15 */
|
||||
xmm8 = _mm256_xor_si256( xmm8, (chaining[0]) );
|
||||
xmm9 = _mm256_xor_si256( xmm9, (chaining[1]) );
|
||||
xmm10 = _mm256_xor_si256( xmm10, (chaining[2]) );
|
||||
xmm11 = _mm256_xor_si256( xmm11, (chaining[3]) );
|
||||
xmm12 = _mm256_xor_si256( xmm12, (chaining[4]) );
|
||||
xmm13 = _mm256_xor_si256( xmm13, (chaining[5]) );
|
||||
xmm14 = _mm256_xor_si256( xmm14, (chaining[6]) );
|
||||
xmm15 = _mm256_xor_si256( xmm15, (chaining[7]) );
|
||||
|
||||
/* store P(CV+M)+CV */
|
||||
chaining[0] = xmm8;
|
||||
chaining[1] = xmm9;
|
||||
chaining[2] = xmm10;
|
||||
chaining[3] = xmm11;
|
||||
chaining[4] = xmm12;
|
||||
chaining[5] = xmm13;
|
||||
chaining[6] = xmm14;
|
||||
chaining[7] = xmm15;
|
||||
|
||||
/* load message M (Q input) into xmm8-15 */
|
||||
xmm8 = QTEMP[0];
|
||||
xmm9 = QTEMP[1];
|
||||
xmm10 = QTEMP[2];
|
||||
xmm11 = QTEMP[3];
|
||||
xmm12 = QTEMP[4];
|
||||
xmm13 = QTEMP[5];
|
||||
xmm14 = QTEMP[6];
|
||||
xmm15 = QTEMP[7];
|
||||
|
||||
/* compute permutation Q */
|
||||
/* result: Q(M) in xmm8...xmm15 */
|
||||
ROUNDS_Q_2WAY();
|
||||
|
||||
/* xor Q output */
|
||||
/* result: P(CV+M)+CV+Q(M) in xmm8...xmm15 */
|
||||
xmm8 = _mm256_xor_si256( xmm8, (chaining[0]) );
|
||||
xmm9 = _mm256_xor_si256( xmm9, (chaining[1]) );
|
||||
xmm10 = _mm256_xor_si256( xmm10, (chaining[2]) );
|
||||
xmm11 = _mm256_xor_si256( xmm11, (chaining[3]) );
|
||||
xmm12 = _mm256_xor_si256( xmm12, (chaining[4]) );
|
||||
xmm13 = _mm256_xor_si256( xmm13, (chaining[5]) );
|
||||
xmm14 = _mm256_xor_si256( xmm14, (chaining[6]) );
|
||||
xmm15 = _mm256_xor_si256( xmm15, (chaining[7]) );
|
||||
|
||||
/* store CV */
|
||||
chaining[0] = xmm8;
|
||||
chaining[1] = xmm9;
|
||||
chaining[2] = xmm10;
|
||||
chaining[3] = xmm11;
|
||||
chaining[4] = xmm12;
|
||||
chaining[5] = xmm13;
|
||||
chaining[6] = xmm14;
|
||||
chaining[7] = xmm15;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void OF1024_2way( __m256i* chaining )
|
||||
{
|
||||
static __m256i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
static __m256i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
||||
static __m256i TEMP0;
|
||||
static __m256i TEMP1;
|
||||
static __m256i TEMP2;
|
||||
|
||||
/* load CV into registers xmm8 - xmm15 */
|
||||
xmm8 = chaining[0];
|
||||
xmm9 = chaining[1];
|
||||
xmm10 = chaining[2];
|
||||
xmm11 = chaining[3];
|
||||
xmm12 = chaining[4];
|
||||
xmm13 = chaining[5];
|
||||
xmm14 = chaining[6];
|
||||
xmm15 = chaining[7];
|
||||
|
||||
/* compute permutation P */
|
||||
/* result: P(CV) in xmm8...xmm15 */
|
||||
ROUNDS_P_2WAY();
|
||||
|
||||
/* xor CV to P output (feed-forward) */
|
||||
/* result: P(CV)+CV in xmm8...xmm15 */
|
||||
xmm8 = _mm256_xor_si256( xmm8, (chaining[0]) );
|
||||
xmm9 = _mm256_xor_si256( xmm9, (chaining[1]) );
|
||||
xmm10 = _mm256_xor_si256( xmm10, (chaining[2]) );
|
||||
xmm11 = _mm256_xor_si256( xmm11, (chaining[3]) );
|
||||
xmm12 = _mm256_xor_si256( xmm12, (chaining[4]) );
|
||||
xmm13 = _mm256_xor_si256( xmm13, (chaining[5]) );
|
||||
xmm14 = _mm256_xor_si256( xmm14, (chaining[6]) );
|
||||
xmm15 = _mm256_xor_si256( xmm15, (chaining[7]) );
|
||||
|
||||
/* transpose CV back from row ordering to column ordering */
|
||||
/* result: final hash value in xmm0, xmm6, xmm13, xmm15 */
|
||||
Matrix_Transpose_INV_2way(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm4, xmm0, xmm6, xmm1, xmm2, xmm3, xmm5, xmm7);
|
||||
|
||||
/* we only need to return the truncated half of the state */
|
||||
chaining[4] = xmm0;
|
||||
chaining[5] = xmm6;
|
||||
chaining[6] = xmm13;
|
||||
chaining[7] = xmm15;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif // VAES
|
||||
#endif // GROESTL512_INTR_4WAY_H__
|
||||
|
@@ -11,7 +11,7 @@
|
||||
#else
|
||||
#include "sph_groestl.h"
|
||||
#endif
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
typedef struct {
|
||||
#ifdef __AES__
|
||||
@@ -19,7 +19,7 @@ typedef struct {
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
#endif
|
||||
SHA256_CTX sha;
|
||||
sph_sha256_context sha;
|
||||
} myrgr_ctx_holder;
|
||||
|
||||
myrgr_ctx_holder myrgr_ctx;
|
||||
@@ -31,7 +31,7 @@ void init_myrgr_ctx()
|
||||
#else
|
||||
sph_groestl512_init( &myrgr_ctx.groestl );
|
||||
#endif
|
||||
SHA256_Init( &myrgr_ctx.sha );
|
||||
sph_sha256_init( &myrgr_ctx.sha );
|
||||
}
|
||||
|
||||
void myriad_hash(void *output, const void *input)
|
||||
@@ -39,54 +39,55 @@ void myriad_hash(void *output, const void *input)
|
||||
myrgr_ctx_holder ctx;
|
||||
memcpy( &ctx, &myrgr_ctx, sizeof(myrgr_ctx) );
|
||||
|
||||
uint32_t _ALIGN(32) hash[16];
|
||||
uint32_t _ALIGN(32) hash[16];
|
||||
|
||||
#ifdef __AES__
|
||||
update_groestl( &ctx.groestl, (char*)input, 640 );
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
#else
|
||||
sph_groestl512(&ctx.groestl, input, 80);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
sph_groestl512(&ctx.groestl, input, 80);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#endif
|
||||
|
||||
SHA256_Update( &ctx.sha, (unsigned char*)hash, 64 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx.sha );
|
||||
sph_sha256( &ctx.sha, hash, 64 );
|
||||
sph_sha256_close( &ctx.sha, hash );
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_myriad( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do {
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t hash[8];
|
||||
be32enc(&endiandata[19], nonce);
|
||||
myriad_hash(hash, endiandata);
|
||||
do {
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t hash[8];
|
||||
be32enc(&endiandata[19], nonce);
|
||||
myriad_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget))
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@@ -99,13 +99,13 @@ void hodl_build_block_header( struct work* g_work, uint32_t version,
|
||||
// called only by thread 0, saves a backup of g_work
|
||||
void hodl_get_new_work( struct work* work, struct work* g_work)
|
||||
{
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
// pthread_rwlock_rdlock( &g_work_lock );
|
||||
|
||||
work_free( &hodl_work );
|
||||
work_copy( &hodl_work, g_work );
|
||||
hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999;
|
||||
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
// pthread_rwlock_unlock( &g_work_lock );
|
||||
}
|
||||
|
||||
json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
|
||||
@@ -125,7 +125,7 @@ json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
|
||||
}
|
||||
|
||||
// called by every thread, copies the backup to each thread's work.
|
||||
void hodl_resync_threads( struct work* work )
|
||||
void hodl_resync_threads( int thr_id, struct work* work )
|
||||
{
|
||||
int nonce_index = algo_gate.nonce_index;
|
||||
pthread_barrier_wait( &hodl_barrier );
|
||||
@@ -135,6 +135,7 @@ void hodl_resync_threads( struct work* work )
|
||||
work_copy( work, &hodl_work );
|
||||
}
|
||||
work->data[ nonce_index ] = swab32( hodl_work.data[ nonce_index ] );
|
||||
work_restart[thr_id].restart = 0;
|
||||
}
|
||||
|
||||
bool hodl_do_this_thread( int thr_id )
|
||||
|
@@ -201,6 +201,7 @@
|
||||
#define IOTA(r) XOR64_IOTA(a00, a00, r)
|
||||
|
||||
#ifdef P0
|
||||
#undef P0
|
||||
#undef P1
|
||||
#undef P2
|
||||
#undef P3
|
||||
|
@@ -174,24 +174,19 @@ void allium_16way_hash( void *state, const void *input )
|
||||
#if defined(__VAES__)
|
||||
|
||||
intrlv_4x128( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
|
||||
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 32 );
|
||||
dintrlv_4x128( state, state+32, state+64, state+96, vhash, 256 );
|
||||
|
||||
intrlv_4x128( vhash, hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
|
||||
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 32 );
|
||||
dintrlv_4x128( state+128, state+160, state+192, state+224, vhash, 256 );
|
||||
|
||||
intrlv_4x128( vhash, hash8, hash9, hash10, hash11, 256 );
|
||||
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
|
||||
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 32 );
|
||||
dintrlv_4x128( state+256, state+288, state+320, state+352, vhash, 256 );
|
||||
intrlv_4x128( vhash, hash12, hash13, hash14, hash15, 256 );
|
||||
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
|
||||
|
||||
intrlv_4x128( vhash, hash12, hash13, hash14, hash15, 256 );
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 32 );
|
||||
dintrlv_4x128( state+384, state+416, state+448, state+480, vhash, 256 );
|
||||
|
||||
#else
|
||||
@@ -262,8 +257,11 @@ typedef struct {
|
||||
keccak256_4way_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_4way_context skein;
|
||||
#if defined(__VAES__)
|
||||
groestl256_2way_context groestl;
|
||||
#else
|
||||
hashState_groestl256 groestl;
|
||||
|
||||
#endif
|
||||
} allium_8way_ctx_holder;
|
||||
|
||||
static __thread allium_8way_ctx_holder allium_8way_ctx;
|
||||
@@ -273,7 +271,11 @@ bool init_allium_8way_ctx()
|
||||
keccak256_4way_init( &allium_8way_ctx.keccak );
|
||||
cubehashInit( &allium_8way_ctx.cube, 256, 16, 32 );
|
||||
skein256_4way_init( &allium_8way_ctx.skein );
|
||||
#if defined(__VAES__)
|
||||
groestl256_2way_init( &allium_8way_ctx.groestl, 32 );
|
||||
#else
|
||||
init_groestl256( &allium_8way_ctx.groestl, 32 );
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -352,9 +354,28 @@ void allium_8way_hash( void *hash, const void *input )
|
||||
skein256_4way_update( &ctx.skein, vhashB, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhashB );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
uint64_t vhashC[4*2] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashD[4*2] __attribute__ ((aligned (64)));
|
||||
|
||||
rintrlv_4x64_2x128( vhashC, vhashD, vhashA, 256 );
|
||||
groestl256_2way_full( &ctx.groestl, vhashC, vhashC, 32 );
|
||||
groestl256_2way_full( &ctx.groestl, vhashD, vhashD, 32 );
|
||||
dintrlv_2x128( hash0, hash1, vhashC, 256 );
|
||||
dintrlv_2x128( hash2, hash3, vhashD, 256 );
|
||||
|
||||
rintrlv_4x64_2x128( vhashC, vhashD, vhashB, 256 );
|
||||
groestl256_2way_full( &ctx.groestl, vhashC, vhashC, 32 );
|
||||
groestl256_2way_full( &ctx.groestl, vhashD, vhashD, 32 );
|
||||
dintrlv_2x128( hash4, hash5, vhashC, 256 );
|
||||
dintrlv_2x128( hash6, hash7, vhashD, 256 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x64( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
|
||||
|
||||
groestl256_full( &ctx.groestl, hash0, hash0, 256 );
|
||||
groestl256_full( &ctx.groestl, hash1, hash1, 256 );
|
||||
groestl256_full( &ctx.groestl, hash2, hash2, 256 );
|
||||
@@ -363,6 +384,8 @@ void allium_8way_hash( void *hash, const void *input )
|
||||
groestl256_full( &ctx.groestl, hash5, hash5, 256 );
|
||||
groestl256_full( &ctx.groestl, hash6, hash6, 256 );
|
||||
groestl256_full( &ctx.groestl, hash7, hash7, 256 );
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
|
@@ -187,7 +187,8 @@ bool register_allium_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_allium;
|
||||
gate->hash = (void*)&allium_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT
|
||||
| VAES_OPT | VAES256_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "lyra2.h"
|
||||
#if defined(__VAES__)
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#include "algo/echo/echo-hash-4way.h"
|
||||
#elif defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
|
@@ -12,8 +12,7 @@
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/ripemd/sph_ripemd.h"
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
#define EPSa DBL_EPSILON
|
||||
#define EPS1 DBL_EPSILON
|
||||
@@ -105,8 +104,8 @@ uint32_t sw2_( int nnounce )
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
SHA256_CTX sha256;
|
||||
SHA512_CTX sha512;
|
||||
sph_sha256_context sha256;
|
||||
sph_sha512_context sha512;
|
||||
sph_keccak512_context keccak;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sph_haval256_5_context haval;
|
||||
@@ -118,8 +117,8 @@ m7m_ctx_holder m7m_ctx;
|
||||
|
||||
void init_m7m_ctx()
|
||||
{
|
||||
SHA256_Init( &m7m_ctx.sha256 );
|
||||
SHA512_Init( &m7m_ctx.sha512 );
|
||||
sph_sha256_init( &m7m_ctx );
|
||||
sph_sha512_init( &m7m_ctx.sha512 );
|
||||
sph_keccak512_init( &m7m_ctx.keccak );
|
||||
sph_whirlpool_init( &m7m_ctx.whirlpool );
|
||||
sph_haval256_5_init( &m7m_ctx.haval );
|
||||
@@ -143,11 +142,10 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint8_t bhash[7][64] __attribute__((aligned(64)));
|
||||
uint32_t n = pdata[19] - 1;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id;
|
||||
uint32_t usw_, mpzscale;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
char data_str[161], hash_str[65], target_str[65];
|
||||
//uint8_t *bdata = 0;
|
||||
uint8_t bdata[8192] __attribute__ ((aligned (64)));
|
||||
int i, digits;
|
||||
int bytes;
|
||||
@@ -155,12 +153,12 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
|
||||
|
||||
m7m_ctx_holder ctx1, ctx2 __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx1, &m7m_ctx, sizeof(m7m_ctx) );
|
||||
SHA256_CTX ctxf_sha256;
|
||||
sph_sha256_context ctxf_sha256;
|
||||
|
||||
memcpy(data, pdata, 80);
|
||||
|
||||
SHA256_Update( &ctx1.sha256, data, M7_MIDSTATE_LEN );
|
||||
SHA512_Update( &ctx1.sha512, data, M7_MIDSTATE_LEN );
|
||||
sph_sha256( &ctx1.sha256, data, M7_MIDSTATE_LEN );
|
||||
sph_sha512( &ctx1.sha512, data, M7_MIDSTATE_LEN );
|
||||
sph_keccak512( &ctx1.keccak, data, M7_MIDSTATE_LEN );
|
||||
sph_whirlpool( &ctx1.whirlpool, data, M7_MIDSTATE_LEN );
|
||||
sph_haval256_5( &ctx1.haval, data, M7_MIDSTATE_LEN );
|
||||
@@ -191,11 +189,11 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
|
||||
|
||||
memcpy( &ctx2, &ctx1, sizeof(m7m_ctx) );
|
||||
|
||||
SHA256_Update( &ctx2.sha256, data_p64, 80 - M7_MIDSTATE_LEN );
|
||||
SHA256_Final( (unsigned char*) (bhash[0]), &ctx2.sha256 );
|
||||
sph_sha256( &ctx2.sha256, data_p64, 80 - M7_MIDSTATE_LEN );
|
||||
sph_sha256_close( &ctx2.sha256, bhash[0] );
|
||||
|
||||
SHA512_Update( &ctx2.sha512, data_p64, 80 - M7_MIDSTATE_LEN );
|
||||
SHA512_Final( (unsigned char*) (bhash[1]), &ctx2.sha512 );
|
||||
sph_sha512( &ctx2.sha512, data_p64, 80 - M7_MIDSTATE_LEN );
|
||||
sph_sha512_close( &ctx2.sha512, bhash[1] );
|
||||
|
||||
sph_keccak512( &ctx2.keccak, data_p64, 80 - M7_MIDSTATE_LEN );
|
||||
sph_keccak512_close( &ctx2.keccak, (void*)(bhash[2]) );
|
||||
@@ -227,9 +225,9 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
|
||||
bytes = mpz_sizeinbase(product, 256);
|
||||
mpz_export((void *)bdata, NULL, -1, 1, 0, 0, product);
|
||||
|
||||
SHA256_Init( &ctxf_sha256 );
|
||||
SHA256_Update( &ctxf_sha256, bdata, bytes );
|
||||
SHA256_Final( (unsigned char*) hash, &ctxf_sha256 );
|
||||
sph_sha256_init( &ctxf_sha256 );
|
||||
sph_sha256( &ctxf_sha256, bdata, bytes );
|
||||
sph_sha256_close( &ctxf_sha256, hash );
|
||||
|
||||
digits=(int)((sqrt((double)(n/2))*(1.+EPS))/9000+75);
|
||||
mp_bitcnt_t prec = (long int)(digits*BITS_PER_DIGIT+16);
|
||||
@@ -262,18 +260,13 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
|
||||
mpzscale=bytes;
|
||||
mpz_export(bdata, NULL, -1, 1, 0, 0, product);
|
||||
|
||||
SHA256_Init( &ctxf_sha256 );
|
||||
SHA256_Update( &ctxf_sha256, bdata, bytes );
|
||||
SHA256_Final( (unsigned char*) hash, &ctxf_sha256 );
|
||||
}
|
||||
|
||||
sph_sha256_init( &ctxf_sha256 );
|
||||
sph_sha256( &ctxf_sha256, bdata, bytes );
|
||||
sph_sha256_close( &ctxf_sha256, hash );
|
||||
}
|
||||
|
||||
if ( unlikely( valid_hash( (uint64_t*)hash, (uint64_t*)ptarget )
|
||||
&& !opt_benchmark ) )
|
||||
|
||||
|
||||
// if ( unlikely( hash[7] <= ptarget[7] ) )
|
||||
// if ( likely( fulltest( hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
if ( opt_debug )
|
||||
{
|
||||
|
@@ -156,7 +156,7 @@ int scanhash_zr5( struct work *work, uint32_t max_nonce,
|
||||
void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t* end_nonce_ptr )
|
||||
{
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
// pthread_rwlock_rdlock( &g_work_lock );
|
||||
|
||||
// ignore POK in first word
|
||||
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
|
||||
@@ -174,7 +174,7 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
else
|
||||
++(*nonceptr);
|
||||
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
// pthread_rwlock_unlock( &g_work_lock );
|
||||
}
|
||||
|
||||
void zr5_display_pok( struct work* work )
|
||||
|
@@ -912,7 +912,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 );
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 );
|
||||
|
||||
// A = fugue serial, B = sha512 prarallel
|
||||
// A = fugue serial, B = sha512 parallel
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
|
@@ -17,7 +17,7 @@
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/haval/sph-haval.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
@@ -44,7 +44,7 @@ typedef struct {
|
||||
sph_hamsi512_context hamsi1;
|
||||
sph_shabal512_context shabal1;
|
||||
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
|
||||
SHA512_CTX sha1, sha2;
|
||||
sph_sha512_context sha1, sha2;
|
||||
sph_haval256_5_context haval1, haval2;
|
||||
#if defined(__AES__)
|
||||
hashState_echo echo1, echo2;
|
||||
@@ -106,8 +106,8 @@ void init_hmq1725_ctx()
|
||||
sph_whirlpool_init(&hmq1725_ctx.whirlpool3);
|
||||
sph_whirlpool_init(&hmq1725_ctx.whirlpool4);
|
||||
|
||||
SHA512_Init( &hmq1725_ctx.sha1 );
|
||||
SHA512_Init( &hmq1725_ctx.sha2 );
|
||||
sph_sha512_init( &hmq1725_ctx.sha1 );
|
||||
sph_sha512_init( &hmq1725_ctx.sha2 );
|
||||
|
||||
sph_haval256_5_init(&hmq1725_ctx.haval1);
|
||||
sph_haval256_5_init(&hmq1725_ctx.haval2);
|
||||
@@ -285,8 +285,8 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
}
|
||||
else
|
||||
{
|
||||
SHA512_Update( &h_ctx.sha1, hashB, 64 );
|
||||
SHA512_Final( (unsigned char*) hashA, &h_ctx.sha1 );
|
||||
sph_sha512( &h_ctx.sha1, hashB, 64 );
|
||||
sph_sha512_close( &h_ctx.sha1, hashA );
|
||||
}
|
||||
|
||||
#if defined(__AES__)
|
||||
@@ -297,8 +297,8 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
sph_groestl512_close(&h_ctx.groestl2, hashB); //4
|
||||
#endif
|
||||
|
||||
SHA512_Update( &h_ctx.sha2, hashB, 64 );
|
||||
SHA512_Final( (unsigned char*) hashA, &h_ctx.sha2 );
|
||||
sph_sha512( &h_ctx.sha2, hashB, 64 );
|
||||
sph_sha512_close( &h_ctx.sha2, hashA );
|
||||
|
||||
if ( hashA[0] & mask ) //4
|
||||
{
|
||||
|
@@ -7,28 +7,28 @@
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "sph_ripemd.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
void lbry_hash(void* output, const void* input)
|
||||
{
|
||||
SHA256_CTX ctx_sha256 __attribute__ ((aligned (64)));
|
||||
SHA512_CTX ctx_sha512 __attribute__ ((aligned (64)));
|
||||
sph_ripemd160_context ctx_ripemd __attribute__ ((aligned (64)));
|
||||
sph_sha256_context ctx_sha256 __attribute__ ((aligned (64)));
|
||||
sph_sha512_context ctx_sha512 __attribute__ ((aligned (64)));
|
||||
sph_ripemd160_context ctx_ripemd __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) hashA[16];
|
||||
uint32_t _ALIGN(64) hashB[16];
|
||||
uint32_t _ALIGN(64) hashC[16];
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, input, 112 );
|
||||
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256( &ctx_sha256, input, 112 );
|
||||
sph_sha256_close( &ctx_sha256, hashA );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, hashA, 32 );
|
||||
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256( &ctx_sha256, hashA, 32 );
|
||||
sph_sha256_close( &ctx_sha256, hashA );
|
||||
|
||||
SHA512_Init( &ctx_sha512 );
|
||||
SHA512_Update( &ctx_sha512, hashA, 32 );
|
||||
SHA512_Final( (unsigned char*) hashA, &ctx_sha512 );
|
||||
sph_sha512_init( &ctx_sha512 );
|
||||
sph_sha512( &ctx_sha512, hashA, 32 );
|
||||
sph_sha512_close( &ctx_sha512, hashA );
|
||||
|
||||
sph_ripemd160_init( &ctx_ripemd );
|
||||
sph_ripemd160 ( &ctx_ripemd, hashA, 32 );
|
||||
@@ -38,14 +38,14 @@ void lbry_hash(void* output, const void* input)
|
||||
sph_ripemd160 ( &ctx_ripemd, hashA+8, 32 );
|
||||
sph_ripemd160_close( &ctx_ripemd, hashC );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, hashB, 20 );
|
||||
SHA256_Update( &ctx_sha256, hashC, 20 );
|
||||
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256( &ctx_sha256, hashB, 20 );
|
||||
sph_sha256( &ctx_sha256, hashC, 20 );
|
||||
sph_sha256_close( &ctx_sha256, hashA );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, hashA, 32 );
|
||||
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256( &ctx_sha256, hashA, 32 );
|
||||
sph_sha256_close( &ctx_sha256, hashA );
|
||||
|
||||
memcpy( output, hashA, 32 );
|
||||
}
|
||||
|
@@ -39,10 +39,17 @@
|
||||
void
|
||||
SHA256_Buf( const void * in, size_t len, uint8_t digest[32] )
|
||||
{
|
||||
SHA256_CTX ctx;
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_context ctx;
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, in, len );
|
||||
sph_sha256_close( &ctx, digest );
|
||||
#else
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, in, len );
|
||||
SHA256_Final( digest, &ctx );
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -64,35 +71,59 @@ HMAC_SHA256_Buf( const void *K, size_t Klen, const void *in, size_t len,
|
||||
void
|
||||
HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
|
||||
{
|
||||
unsigned char pad[64];
|
||||
unsigned char khash[32];
|
||||
const unsigned char * K = _K;
|
||||
size_t i;
|
||||
unsigned char pad[64];
|
||||
unsigned char khash[32];
|
||||
const unsigned char * K = _K;
|
||||
size_t i;
|
||||
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if ( Klen > 64 )
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if ( Klen > 64 )
|
||||
{
|
||||
SHA256_Init( &ctx->ictx );
|
||||
SHA256_Update( &ctx->ictx, K, Klen );
|
||||
SHA256_Final( khash, &ctx->ictx );
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_init( &ctx->ictx );
|
||||
sph_sha256( &ctx->ictx, K, Klen );
|
||||
sph_sha256_close( &ctx->ictx, khash );
|
||||
#else
|
||||
SHA256_Init( &ctx->ictx );
|
||||
SHA256_Update( &ctx->ictx, K, Klen );
|
||||
SHA256_Final( khash, &ctx->ictx );
|
||||
#endif
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_init( &ctx->ictx );
|
||||
#else
|
||||
SHA256_Init( &ctx->ictx );
|
||||
|
||||
#endif
|
||||
|
||||
for ( i = 0; i < Klen; i++ ) pad[i] = K[i] ^ 0x36;
|
||||
memset( pad + Klen, 0x36, 64 - Klen );
|
||||
SHA256_Update( &ctx->ictx, pad, 64 );
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
SHA256_Init( &ctx->octx );
|
||||
memset( pad + Klen, 0x36, 64 - Klen );
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256( &ctx->ictx, pad, 64 );
|
||||
#else
|
||||
SHA256_Update( &ctx->ictx, pad, 64 );
|
||||
#endif
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_init( &ctx->octx );
|
||||
#else
|
||||
SHA256_Init( &ctx->octx );
|
||||
#endif
|
||||
|
||||
for ( i = 0; i < Klen; i++ ) pad[i] = K[i] ^ 0x5c;
|
||||
|
||||
memset( pad + Klen, 0x5c, 64 - Klen );
|
||||
SHA256_Update( &ctx->octx, pad, 64 );
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256( &ctx->octx, pad, 64 );
|
||||
#else
|
||||
SHA256_Update( &ctx->octx, pad, 64 );
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Add bytes to the HMAC-SHA256 operation. */
|
||||
@@ -100,23 +131,33 @@ void
|
||||
HMAC_SHA256_Update( HMAC_SHA256_CTX *ctx, const void *in, size_t len )
|
||||
{
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
SHA256_Update( &ctx->ictx, in, len );
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256( &ctx->ictx, in, len );
|
||||
#else
|
||||
SHA256_Update( &ctx->ictx, in, len );
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Finish an HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Final( unsigned char digest[32], HMAC_SHA256_CTX *ctx )
|
||||
{
|
||||
unsigned char ihash[32];
|
||||
unsigned char ihash[32];
|
||||
|
||||
/* Finish the inner SHA256 operation. */
|
||||
SHA256_Final( ihash, &ctx->ictx );
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_close( &ctx->ictx, ihash );
|
||||
sph_sha256( &ctx->octx, ihash, 32 );
|
||||
sph_sha256_close( &ctx->octx, digest );
|
||||
#else
|
||||
/* Finish the inner SHA256 operation. */
|
||||
SHA256_Final( ihash, &ctx->ictx );
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
SHA256_Update( &ctx->octx, ihash, 32 );
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
SHA256_Update( &ctx->octx, ihash, 32 );
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
SHA256_Final( digest, &ctx->octx );
|
||||
/* Finish the outer SHA256 operation. */
|
||||
SHA256_Final( digest, &ctx->octx );
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -29,14 +29,24 @@
|
||||
#ifndef HMAC_SHA256_H__
|
||||
#define HMAC_SHA256_H__
|
||||
|
||||
//#define HMAC_SSL_SHA 1
|
||||
#define HMAC_SPH_SHA 1
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_sha2.h"
|
||||
#include <openssl/sha.h>
|
||||
|
||||
|
||||
typedef struct HMAC_SHA256Context
|
||||
{
|
||||
SHA256_CTX ictx;
|
||||
SHA256_CTX octx;
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_context ictx;
|
||||
sph_sha256_context octx;
|
||||
#else
|
||||
SHA256_CTX ictx;
|
||||
SHA256_CTX octx;
|
||||
#endif
|
||||
} HMAC_SHA256_CTX;
|
||||
|
||||
void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
|
||||
|
@@ -12,7 +12,6 @@
|
||||
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
|
||||
#define EXTERN_SHA256
|
||||
@@ -198,16 +197,6 @@ static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
|
||||
|
||||
extern void sha256d(unsigned char *hash, const unsigned char *data, int len)
|
||||
{
|
||||
#if defined(__SHA__)
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, data, len );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
#else
|
||||
|
||||
uint32_t S[16], T[16];
|
||||
int i, r;
|
||||
|
||||
@@ -229,7 +218,6 @@ extern void sha256d(unsigned char *hash, const unsigned char *data, int len)
|
||||
sha256_transform(T, S, 0);
|
||||
for (i = 0; i < 8; i++)
|
||||
be32enc((uint32_t *)hash + i, T[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void sha256d_preextend(uint32_t *W)
|
||||
@@ -676,14 +664,9 @@ int scanhash_SHA256d( struct work *work, const uint32_t max_nonce,
|
||||
|
||||
bool register_sha256d_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(__SHA__)
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_SHA256d;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256d;
|
||||
#endif
|
||||
gate->hash = (void*)&sha256d;
|
||||
return true;
|
||||
gate->hash = (void*)&sha256d;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
200
algo/sha/sha256-hash-opt.c
Normal file
200
algo/sha/sha256-hash-opt.c
Normal file
@@ -0,0 +1,200 @@
|
||||
/* Intel SHA extensions using C intrinsics */
|
||||
/* Written and place in public domain by Jeffrey Walton */
|
||||
/* Based on code from Intel, and by Sean Gulley for */
|
||||
/* the miTLS project. */
|
||||
|
||||
// A drop in replacement for the function of the same name in sph_sha2.c.
|
||||
|
||||
#if defined(__SHA__)
|
||||
|
||||
#include "simd-utils.h"
|
||||
|
||||
static void sha2_round( const uint8_t input[], uint32_t state[8] )
|
||||
{
|
||||
__m128i STATE0, STATE1;
|
||||
__m128i MSG, TMP, MASK;
|
||||
__m128i TMSG0, TMSG1, TMSG2, TMSG3;
|
||||
__m128i ABEF_SAVE, CDGH_SAVE;
|
||||
|
||||
// Load initial values
|
||||
TMP = _mm_load_si128((__m128i*) &state[0]);
|
||||
STATE1 = _mm_load_si128((__m128i*) &state[4]);
|
||||
MASK = _mm_set_epi64x(0x0c0d0e0f08090a0bULL, 0x0405060700010203ULL);
|
||||
|
||||
TMP = _mm_shuffle_epi32(TMP, 0xB1); // CDAB
|
||||
STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH
|
||||
STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); // ABEF
|
||||
STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); // CDGH
|
||||
|
||||
// Save current hash
|
||||
ABEF_SAVE = STATE0;
|
||||
CDGH_SAVE = STATE1;
|
||||
|
||||
// Rounds 0-3
|
||||
MSG = _mm_load_si128((const __m128i*) (input+0));
|
||||
TMSG0 = _mm_shuffle_epi8(MSG, MASK);
|
||||
MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(0xE9B5DBA5B5C0FBCFULL, 0x71374491428A2F98ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
|
||||
// Rounds 4-7
|
||||
TMSG1 = _mm_load_si128((const __m128i*) (input+16));
|
||||
TMSG1 = _mm_shuffle_epi8(TMSG1, MASK);
|
||||
MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(0xAB1C5ED5923F82A4ULL, 0x59F111F13956C25BULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
|
||||
|
||||
// Rounds 8-11
|
||||
TMSG2 = _mm_load_si128((const __m128i*) (input+32));
|
||||
TMSG2 = _mm_shuffle_epi8(TMSG2, MASK);
|
||||
MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(0x550C7DC3243185BEULL, 0x12835B01D807AA98ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
|
||||
|
||||
// Rounds 12-15
|
||||
TMSG3 = _mm_load_si128((const __m128i*) (input+48));
|
||||
TMSG3 = _mm_shuffle_epi8(TMSG3, MASK);
|
||||
MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(0xC19BF1749BDC06A7ULL, 0x80DEB1FE72BE5D74ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
|
||||
TMSG0 = _mm_add_epi32(TMSG0, TMP);
|
||||
TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
|
||||
|
||||
// Rounds 16-19
|
||||
MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(0x240CA1CC0FC19DC6ULL, 0xEFBE4786E49B69C1ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
|
||||
TMSG1 = _mm_add_epi32(TMSG1, TMP);
|
||||
TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
|
||||
|
||||
// Rounds 20-23
|
||||
MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(0x76F988DA5CB0A9DCULL, 0x4A7484AA2DE92C6FULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
|
||||
TMSG2 = _mm_add_epi32(TMSG2, TMP);
|
||||
TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
|
||||
|
||||
// Rounds 24-27
|
||||
MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(0xBF597FC7B00327C8ULL, 0xA831C66D983E5152ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
|
||||
TMSG3 = _mm_add_epi32(TMSG3, TMP);
|
||||
TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
|
||||
|
||||
// Rounds 28-31
|
||||
MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(0x1429296706CA6351ULL, 0xD5A79147C6E00BF3ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
|
||||
TMSG0 = _mm_add_epi32(TMSG0, TMP);
|
||||
TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
|
||||
|
||||
// Rounds 32-35
|
||||
MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(0x53380D134D2C6DFCULL, 0x2E1B213827B70A85ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
|
||||
TMSG1 = _mm_add_epi32(TMSG1, TMP);
|
||||
TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
|
||||
|
||||
// Rounds 36-39
|
||||
MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(0x92722C8581C2C92EULL, 0x766A0ABB650A7354ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
|
||||
TMSG2 = _mm_add_epi32(TMSG2, TMP);
|
||||
TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
|
||||
|
||||
// Rounds 40-43
|
||||
MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(0xC76C51A3C24B8B70ULL, 0xA81A664BA2BFE8A1ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
|
||||
TMSG3 = _mm_add_epi32(TMSG3, TMP);
|
||||
TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
|
||||
|
||||
// Rounds 44-47
|
||||
MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(0x106AA070F40E3585ULL, 0xD6990624D192E819ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
|
||||
TMSG0 = _mm_add_epi32(TMSG0, TMP);
|
||||
TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
|
||||
|
||||
// Rounds 48-51
|
||||
MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(0x34B0BCB52748774CULL, 0x1E376C0819A4C116ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
|
||||
TMSG1 = _mm_add_epi32(TMSG1, TMP);
|
||||
TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
|
||||
|
||||
// Rounds 52-55
|
||||
MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(0x682E6FF35B9CCA4FULL, 0x4ED8AA4A391C0CB3ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
|
||||
TMSG2 = _mm_add_epi32(TMSG2, TMP);
|
||||
TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
|
||||
// Rounds 56-59
|
||||
MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(0x8CC7020884C87814ULL, 0x78A5636F748F82EEULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
|
||||
TMSG3 = _mm_add_epi32(TMSG3, TMP);
|
||||
TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
|
||||
// Rounds 60-63
|
||||
MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(0xC67178F2BEF9A3F7ULL, 0xA4506CEB90BEFFFAULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
|
||||
// Add values back to state
|
||||
STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
|
||||
STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
|
||||
|
||||
TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA
|
||||
STATE1 = _mm_shuffle_epi32(STATE1, 0xB1); // DCHG
|
||||
STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); // DCBA
|
||||
STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF
|
||||
|
||||
// Save state
|
||||
_mm_store_si128((__m128i*) &state[0], STATE0);
|
||||
_mm_store_si128((__m128i*) &state[4], STATE1);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
@@ -5,6 +5,79 @@
|
||||
#include <stdio.h>
|
||||
#include "sha-hash-4way.h"
|
||||
|
||||
#if defined(SHA256T_16WAY)
|
||||
|
||||
static __thread sha256_16way_context sha256_ctx16 __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256q_16way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[8*16] __attribute__ ((aligned (64)));
|
||||
sha256_16way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx16, sizeof ctx );
|
||||
|
||||
sha256_16way_update( &ctx, input + (64<<4), 16 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256q_16way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash32[8*16] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash32_d7 = &(hash32[7<<4]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||
sha256_16way_init( &sha256_ctx16 );
|
||||
sha256_16way_update( &sha256_ctx16, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = n;
|
||||
sha256q_16way_hash( hash32, vdata );
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||
{
|
||||
extr_lane_16x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||
n += 16;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
static __thread sha256_8way_context sha256_ctx8 __attribute__ ((aligned (64)));
|
||||
@@ -31,68 +104,47 @@ void sha256q_8way_hash( void* output, const void* input )
|
||||
sha256_8way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
int scanhash_sha256q_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash32[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash32_d7 = &(hash32[7<<3]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000 };
|
||||
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
// Need big endian data
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way_update( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
do
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32(
|
||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||
|
||||
pdata[19] = n;
|
||||
sha256q_8way_hash( hash, vdata );
|
||||
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
sha256q_8way_hash( hash32, vdata );
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||
n += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -1,108 +1,74 @@
|
||||
#include "sha256t-gate.h"
|
||||
|
||||
#if !defined(SHA256T_16WAY) && !defined(SHA256T_8WAY) && !defined(SHA256T_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
static __thread SHA256_CTX sha256q_ctx __attribute__ ((aligned (64)));
|
||||
static __thread sph_sha256_context sha256q_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256q_midstate( const void* input )
|
||||
{
|
||||
SHA256_Init( &sha256q_ctx );
|
||||
SHA256_Update( &sha256q_ctx, input, 64 );
|
||||
sph_sha256_init( &sha256q_ctx );
|
||||
sph_sha256( &sha256q_ctx, input, 64 );
|
||||
}
|
||||
|
||||
void sha256q_hash( void* output, const void* input )
|
||||
int sha256q_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[16];
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
|
||||
SHA256_CTX ctx __attribute__ ((aligned (64)));
|
||||
sph_sha256_context ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &sha256q_ctx, sizeof sha256q_ctx );
|
||||
|
||||
SHA256_Update( &ctx, input + midlen, tail );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256( &ctx, input + midlen, tail );
|
||||
sph_sha256_close( &ctx, hash );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, hash, 32 );
|
||||
sph_sha256_close( &ctx, hash );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, hash, 32 );
|
||||
sph_sha256_close( &ctx, hash );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, hash, 32 );
|
||||
sph_sha256_close( &ctx, output );
|
||||
|
||||
memcpy( output, hash, 32 );
|
||||
return 1;
|
||||
}
|
||||
|
||||
int scanhash_sha256q( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
#ifdef _MSC_VER
|
||||
uint32_t __declspec(align(32)) hash64[8];
|
||||
#else
|
||||
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||
#endif
|
||||
uint32_t endiandata[32];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t last_nonce = max_nonce - 1;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
sha256q_midstate( edata );
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
sha256q_midstate( endiandata );
|
||||
|
||||
for ( int m = 0; m < 6; m++ )
|
||||
do
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
edata[19] = n;
|
||||
if ( likely( sha256q_hash( hash, edata ) ) )
|
||||
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
sha256q_hash( hash64, endiandata );
|
||||
if ( !( hash64[7] & mask ) )
|
||||
if ( fulltest( hash64, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash64, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
break;
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
n++;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -5,6 +5,75 @@
|
||||
#include <stdio.h>
|
||||
#include "sha-hash-4way.h"
|
||||
|
||||
#if defined(SHA256T_16WAY)
|
||||
|
||||
static __thread sha256_16way_context sha256_ctx16 __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256t_16way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[8*16] __attribute__ ((aligned (64)));
|
||||
sha256_16way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx16, sizeof ctx );
|
||||
|
||||
sha256_16way_update( &ctx, input + (64<<4), 16 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash32[8*16] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash32_d7 = &(hash32[7<<4]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||
sha256_16way_init( &sha256_ctx16 );
|
||||
sha256_16way_update( &sha256_ctx16, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = n;
|
||||
sha256t_16way_hash( hash32, vdata );
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||
{
|
||||
extr_lane_16x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||
n += 16;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
static __thread sha256_8way_context sha256_ctx8 __attribute__ ((aligned (64)));
|
||||
@@ -31,61 +100,43 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash32[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *hash32_d7 = &(hash32[7<<3]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000 };
|
||||
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
|
||||
// Need big endian data
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way_update( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
do
|
||||
{
|
||||
const uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32(
|
||||
n+7,n+6,n+5,n+4,n+3,n+2,n+1,n ) );
|
||||
pdata[19] = n;
|
||||
sha256t_8way_hash( hash, vdata );
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
sha256t_8way_hash( hash32, vdata );
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||
n += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -2,38 +2,41 @@
|
||||
|
||||
bool register_sha256t_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(SHA256T_8WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
||||
gate->hash = (void*)&sha256t_8way_hash;
|
||||
#elif defined(SHA256T_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
||||
gate->hash = (void*)&sha256t_4way_hash;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
#if defined(SHA256T_16WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256t_16way;
|
||||
gate->hash = (void*)&sha256t_16way_hash;
|
||||
#elif defined(__SHA__)
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t;
|
||||
gate->hash = (void*)&sha256t_hash;
|
||||
#elif defined(SHA256T_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
||||
gate->hash = (void*)&sha256t_8way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
||||
gate->hash = (void*)&sha256t_4way_hash;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
bool register_sha256q_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(SHA256T_8WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256q_8way;
|
||||
gate->hash = (void*)&sha256q_8way_hash;
|
||||
#elif defined(SHA256T_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256q_4way;
|
||||
gate->hash = (void*)&sha256q_4way_hash;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
#if defined(SHA256T_16WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256q_16way;
|
||||
gate->hash = (void*)&sha256q_16way_hash;
|
||||
#elif defined(__SHA__)
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256q;
|
||||
gate->hash = (void*)&sha256q_hash;
|
||||
#elif defined(SHA256T_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256q_8way;
|
||||
gate->hash = (void*)&sha256q_8way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_sha256q_4way;
|
||||
gate->hash = (void*)&sha256q_4way_hash;
|
||||
#endif
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
|
@@ -4,18 +4,27 @@
|
||||
#include <stdint.h>
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
// Override multi way on ryzen, SHA is better.
|
||||
#if !defined(__SHA__)
|
||||
#if defined(__AVX2__)
|
||||
#define SHA256T_8WAY
|
||||
#elif defined(__SSE2__)
|
||||
#define SHA256T_4WAY
|
||||
#endif
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define SHA256T_16WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
#define SHA256T_8WAY 1
|
||||
#else
|
||||
#define SHA256T_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_sha256t_algo( algo_gate_t* gate );
|
||||
bool register_sha256q_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(SHA256T_16WAY)
|
||||
|
||||
void sha256t_16way_hash( void *output, const void *input );
|
||||
int scanhash_sha256t_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void sha256q_16way_hash( void *output, const void *input );
|
||||
int scanhash_sha256q_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
void sha256t_8way_hash( void *output, const void *input );
|
||||
@@ -36,10 +45,11 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void sha256t_hash( void *output, const void *input );
|
||||
|
||||
int sha256t_hash( void *output, const void *input );
|
||||
int scanhash_sha256t( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void sha256q_hash( void *output, const void *input );
|
||||
int sha256q_hash( void *output, const void *input );
|
||||
int scanhash_sha256q( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
|
@@ -1,105 +1,73 @@
|
||||
#include "sha256t-gate.h"
|
||||
|
||||
#if !defined(SHA256T_16WAY) && !defined(SHA256T_8WAY) && !defined(SHA256T_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
|
||||
// Only used on CPUs with SHA
|
||||
|
||||
static __thread sph_sha256_context sha256t_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256t_midstate( const void* input )
|
||||
{
|
||||
SHA256_Init( &sha256t_ctx );
|
||||
SHA256_Update( &sha256t_ctx, input, 64 );
|
||||
sph_sha256_init( &sha256t_ctx );
|
||||
sph_sha256( &sha256t_ctx, input, 64 );
|
||||
}
|
||||
|
||||
void sha256t_hash( void* output, const void* input )
|
||||
int sha256t_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[16];
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
|
||||
SHA256_CTX ctx __attribute__ ((aligned (64)));
|
||||
sph_sha256_context ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &sha256t_ctx, sizeof sha256t_ctx );
|
||||
|
||||
SHA256_Update( &ctx, input + midlen, tail );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256( &ctx, input + midlen, tail );
|
||||
sph_sha256_close( &ctx, hash );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, hash, 32 );
|
||||
sph_sha256_close( &ctx, hash );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, hash, 32 );
|
||||
sph_sha256_close( &ctx, output );
|
||||
|
||||
memcpy( output, hash, 32 );
|
||||
return 1;
|
||||
}
|
||||
|
||||
int scanhash_sha256t( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
#ifdef _MSC_VER
|
||||
uint32_t __declspec(align(32)) hash64[8];
|
||||
#else
|
||||
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||
#endif
|
||||
uint32_t endiandata[32];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t last_nonce = max_nonce - 1;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
sha256t_midstate( edata );
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
sha256t_midstate( endiandata );
|
||||
|
||||
for ( int m = 0; m < 6; m++ )
|
||||
do
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
edata[19] = n;
|
||||
if ( likely( sha256t_hash( hash, edata ) ) )
|
||||
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
sha256t_hash( hash64, endiandata );
|
||||
if ( !(hash64[7] & mask) )
|
||||
if ( fulltest( hash64, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash64, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
break;
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
n++;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
@@ -71,7 +71,11 @@ static const sph_u32 H256[8] = {
|
||||
* of the compression function.
|
||||
*/
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_SHA2
|
||||
#if defined(__SHA__)
|
||||
|
||||
#include "sha256-hash-opt.c"
|
||||
|
||||
#else // no SHA
|
||||
|
||||
static const sph_u32 K[64] = {
|
||||
SPH_C32(0x428A2F98), SPH_C32(0x71374491),
|
||||
@@ -108,6 +112,8 @@ static const sph_u32 K[64] = {
|
||||
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
|
||||
};
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_SHA2
|
||||
|
||||
#define SHA2_MEXP1(in, pc) do { \
|
||||
W[pc] = in(pc); \
|
||||
} while (0)
|
||||
@@ -191,7 +197,7 @@ static const sph_u32 K[64] = {
|
||||
(r)[7] = SPH_T32((r)[7] + H); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
#else // large footprint (default)
|
||||
|
||||
#define SHA2_ROUND_BODY(in, r) do { \
|
||||
sph_u32 A, B, C, D, E, F, G, H, T1, T2; \
|
||||
@@ -600,7 +606,7 @@ static const sph_u32 K[64] = {
|
||||
(r)[7] = SPH_T32((r)[7] + H); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
#endif // small footprint else
|
||||
|
||||
/*
|
||||
* One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access.
|
||||
@@ -613,6 +619,9 @@ sha2_round(const unsigned char *data, sph_u32 r[8])
|
||||
#undef SHA2_IN
|
||||
}
|
||||
|
||||
#endif // SHA else
|
||||
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_init(void *cc)
|
||||
@@ -653,7 +662,7 @@ void
|
||||
sph_sha224_close(void *cc, void *dst)
|
||||
{
|
||||
sha224_close(cc, dst, 7);
|
||||
sph_sha224_init(cc);
|
||||
// sph_sha224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
@@ -661,7 +670,7 @@ void
|
||||
sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha224_addbits_and_close(cc, ub, n, dst, 7);
|
||||
sph_sha224_init(cc);
|
||||
// sph_sha224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
@@ -677,14 +686,14 @@ void
|
||||
sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha224_addbits_and_close(cc, ub, n, dst, 8);
|
||||
sph_sha256_init(cc);
|
||||
// sph_sha256_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8])
|
||||
{
|
||||
#define SHA2_IN(x) msg[x]
|
||||
SHA2_ROUND_BODY(SHA2_IN, val);
|
||||
#undef SHA2_IN
|
||||
}
|
||||
//void
|
||||
//sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8])
|
||||
//{
|
||||
//#define SHA2_IN(x) msg[x]
|
||||
// SHA2_ROUND_BODY(SHA2_IN, val);
|
||||
//#undef SHA2_IN
|
||||
//}
|
||||
|
@@ -73,7 +73,7 @@ typedef struct {
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_sha224_context;
|
||||
} sph_sha224_context __attribute__((aligned(64)));
|
||||
|
||||
/**
|
||||
* This structure is a context for SHA-256 computations. It is identical
|
||||
|
@@ -26,7 +26,11 @@ static const uint32_t IV512[] =
|
||||
static void
|
||||
c512_2way( shavite512_2way_context *ctx, const void *msg )
|
||||
{
|
||||
#if defined(__VAES__)
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
#else
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
#endif
|
||||
__m256i p0, p1, p2, p3, x;
|
||||
__m256i k00, k01, k02, k03, k10, k11, k12, k13;
|
||||
__m256i *m = (__m256i*)msg;
|
||||
|
@@ -2,14 +2,8 @@
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "skein-hash-4way.h"
|
||||
|
||||
// 8 way is faster than SHA on Icelake
|
||||
// SHA is faster than 4 way on Ryzen
|
||||
//
|
||||
#if defined(__SHA__)
|
||||
#include <openssl/sha.h>
|
||||
#endif
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
#if defined (SKEIN_8WAY)
|
||||
|
||||
@@ -93,7 +87,7 @@ void skeinhash_4way( void *state, const void *input )
|
||||
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[16] __attribute__ ((aligned (64)));
|
||||
SHA256_CTX ctx_sha256;
|
||||
sph_sha256_context ctx_sha256;
|
||||
#else
|
||||
uint32_t vhash32[16*4] __attribute__ ((aligned (64)));
|
||||
sha256_4way_context ctx_sha256;
|
||||
@@ -102,31 +96,29 @@ void skeinhash_4way( void *state, const void *input )
|
||||
skein512_4way_final16( &ctx_skein, vhash64, input + (64*4) );
|
||||
|
||||
#if defined(__SHA__)
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 512 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash0, 64 );
|
||||
SHA256_Final( (unsigned char*)hash0, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash1, 64 );
|
||||
SHA256_Final( (unsigned char*)hash1, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash2, 64 );
|
||||
SHA256_Final( (unsigned char*)hash2, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash3, 64 );
|
||||
SHA256_Final( (unsigned char*)hash3, &ctx_sha256 );
|
||||
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256( &ctx_sha256, hash0, 64 );
|
||||
sph_sha256_close( &ctx_sha256, hash0 );
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256( &ctx_sha256, hash1, 64 );
|
||||
sph_sha256_close( &ctx_sha256, hash1 );
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256( &ctx_sha256, hash2, 64 );
|
||||
sph_sha256_close( &ctx_sha256, hash2 );
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256( &ctx_sha256, hash3, 64 );
|
||||
sph_sha256_close( &ctx_sha256, hash3 );
|
||||
intrlv_4x32( state, hash0, hash1, hash2, hash3, 256 );
|
||||
#else
|
||||
rintrlv_4x64_4x32( vhash32, vhash64, 512 );
|
||||
|
||||
#else
|
||||
|
||||
rintrlv_4x64_4x32( vhash32, vhash64, 512 );
|
||||
sha256_4way_init( &ctx_sha256 );
|
||||
sha256_4way_update( &ctx_sha256, vhash32, 64 );
|
||||
sha256_4way_close( &ctx_sha256, state );
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@@ -5,21 +5,21 @@
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_skein.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
void skeinhash(void *state, const void *input)
|
||||
{
|
||||
uint32_t hash[16] __attribute__ ((aligned (64)));
|
||||
sph_skein512_context ctx_skein;
|
||||
SHA256_CTX ctx_sha256;
|
||||
sph_sha256_context ctx_sha256;
|
||||
|
||||
sph_skein512_init( &ctx_skein );
|
||||
sph_skein512( &ctx_skein, input, 80 );
|
||||
sph_skein512_close( &ctx_skein, hash );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash, 64 );
|
||||
SHA256_Final( (unsigned char*) hash, &ctx_sha256 );
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256( &ctx_sha256, hash, 64 );
|
||||
sph_sha256_close( &ctx_sha256, hash );
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
@@ -42,7 +42,6 @@ void init_phi1612_ctx()
|
||||
sph_skein512_init( &phi_ctx.skein );
|
||||
sph_jh512_init( &phi_ctx.jh );
|
||||
cubehashInit( &phi_ctx.cube, 512, 16, 32 );
|
||||
sph_fugue512_init( &phi_ctx.fugue );
|
||||
sph_gost512_init( &phi_ctx.gost );
|
||||
#ifdef __AES__
|
||||
init_echo( &phi_ctx.echo, 512 );
|
||||
|
@@ -161,9 +161,9 @@ int hex_hash( void* output, const void* input, int thrid )
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash, in, size );
|
||||
break;
|
||||
case SHA_512:
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, in, size );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
sph_sha512_init( &ctx.sha512 );
|
||||
sph_sha512( &ctx.sha512, in, size );
|
||||
sph_sha512_close( &ctx.sha512, hash );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -17,7 +17,7 @@
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
@@ -58,7 +58,7 @@ struct TortureGarden
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_sha512_context sha512;
|
||||
|
||||
struct TortureNode {
|
||||
unsigned int algo;
|
||||
@@ -120,9 +120,9 @@ static void get_hash( void *output, const void *input, TortureGarden *garden,
|
||||
sph_hamsi512_close(&garden->hamsi, hash);
|
||||
break;
|
||||
case 7:
|
||||
SHA512_Init( &garden->sha512 );
|
||||
SHA512_Update( &garden->sha512, input, 64 );
|
||||
SHA512_Final( (unsigned char*)hash, &garden->sha512 );
|
||||
sph_sha512_init( &garden->sha512 );
|
||||
sph_sha512( &garden->sha512, input, 64 );
|
||||
sph_sha512_close( &garden->sha512, hash );
|
||||
break;
|
||||
case 8:
|
||||
sph_jh512_init(&garden->jh);
|
||||
@@ -229,9 +229,9 @@ int minotaur_hash( void *output, const void *input, int thr_id )
|
||||
unsigned char hash[64] __attribute__ ((aligned (64)));
|
||||
|
||||
// Find initial sha512 hash
|
||||
SHA512_Init( &garden.sha512 );
|
||||
SHA512_Update( &garden.sha512, input, 80 );
|
||||
SHA512_Final( (unsigned char*) hash, &garden.sha512 );
|
||||
sph_sha512_init( &garden.sha512 );
|
||||
sph_sha512( &garden.sha512, input, 80 );
|
||||
sph_sha512_close( &garden.sha512, hash );
|
||||
|
||||
// algo 6 (Hamsi) is very slow. It's faster to skip hashing this nonce
|
||||
// if Hamsi is needed but only the first and last functions are
|
||||
|
@@ -16,8 +16,7 @@
|
||||
|
||||
#if defined (X16R_8WAY)
|
||||
|
||||
// Perform midstate prehash of hash functions with block size <= 64 bytes
|
||||
// and interleave 4x64 before nonce insertion for final hash.
|
||||
// Perform midstate prehash of hash functions with block size <= 72 bytes.
|
||||
|
||||
void x16r_8way_prehash( void *vdata, void *pdata )
|
||||
{
|
||||
@@ -34,6 +33,11 @@ void x16r_8way_prehash( void *vdata, void *pdata )
|
||||
jh512_8way_init( &x16r_ctx.jh );
|
||||
jh512_8way_update( &x16r_ctx.jh, vdata, 64 );
|
||||
break;
|
||||
case KECCAK:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
keccak512_8way_init( &x16r_ctx.keccak );
|
||||
keccak512_8way_update( &x16r_ctx.keccak, vdata, 72 );
|
||||
break;
|
||||
case SKEIN:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
skein512_8way_init( &x16r_ctx.skein );
|
||||
@@ -173,13 +177,13 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
|
||||
hash7, vhash );
|
||||
break;
|
||||
case KECCAK:
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
if ( i == 0 )
|
||||
keccak512_8way_update( &ctx.keccak, input, size );
|
||||
if ( i == 0 )
|
||||
keccak512_8way_update( &ctx.keccak, input + (72<<3), 8 );
|
||||
else
|
||||
{
|
||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||
size<<3 );
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, size );
|
||||
}
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
@@ -490,6 +494,7 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
@@ -533,6 +538,11 @@ void x16r_4way_prehash( void *vdata, void *pdata )
|
||||
jh512_4way_init( &x16r_ctx.jh );
|
||||
jh512_4way_update( &x16r_ctx.jh, vdata, 64 );
|
||||
break;
|
||||
case KECCAK:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
keccak512_4way_init( &x16r_ctx.keccak );
|
||||
keccak512_4way_update( &x16r_ctx.keccak, vdata, 72 );
|
||||
break;
|
||||
case SKEIN:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
|
||||
@@ -619,11 +629,20 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case GROESTL:
|
||||
#if defined(__VAES__)
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
groestl512_2way_full( &ctx.groestl, vhash, vhash, size );
|
||||
dintrlv_2x128_512( hash0, hash1, vhash );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
groestl512_2way_full( &ctx.groestl, vhash, vhash, size );
|
||||
dintrlv_2x128_512( hash2, hash3, vhash );
|
||||
#else
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 );
|
||||
break;
|
||||
#endif
|
||||
break;
|
||||
case JH:
|
||||
if ( i == 0 )
|
||||
jh512_4way_update( &ctx.jh, input + (64<<2), 16 );
|
||||
@@ -637,12 +656,12 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case KECCAK:
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
if ( i == 0 )
|
||||
keccak512_4way_update( &ctx.keccak, input, size );
|
||||
if ( i == 0 )
|
||||
keccak512_4way_update( &ctx.keccak, input + (72<<2), 8 );
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, size );
|
||||
}
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
@@ -711,11 +730,20 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
||||
}
|
||||
break;
|
||||
case SHAVITE:
|
||||
#if defined(__VAES__)
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
shavite512_2way_full( &ctx.shavite, vhash, vhash, size );
|
||||
dintrlv_2x128_512( hash0, hash1, vhash );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
shavite512_2way_full( &ctx.shavite, vhash, vhash, size );
|
||||
dintrlv_2x128_512( hash2, hash3, vhash );
|
||||
#else
|
||||
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||
shavite512_full( &ctx.shavite, hash2, in2, size );
|
||||
shavite512_full( &ctx.shavite, hash3, in3, size );
|
||||
break;
|
||||
#endif
|
||||
break;
|
||||
case SIMD:
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
simd512_2way_full( &ctx.simd, vhash, vhash, size );
|
||||
@@ -725,6 +753,14 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
||||
dintrlv_2x128_512( hash2, hash3, vhash );
|
||||
break;
|
||||
case ECHO:
|
||||
#if defined(__VAES__)
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
echo_2way_full( &ctx.echo, vhash, 512, vhash, size );
|
||||
dintrlv_2x128_512( hash0, hash1, vhash );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
echo_2way_full( &ctx.echo, vhash, 512, vhash, size );
|
||||
dintrlv_2x128_512( hash2, hash3, vhash );
|
||||
#else
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)in0, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
@@ -733,7 +769,8 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
||||
(const BitSequence *)in2, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)in3, size );
|
||||
break;
|
||||
#endif
|
||||
break;
|
||||
case HAMSI:
|
||||
if ( i == 0 )
|
||||
hamsi512_4way_update( &ctx.hamsi, input + (64<<2), 16 );
|
||||
@@ -856,7 +893,7 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
x16r_4way_prehash( vdata, pdata );
|
||||
|
@@ -61,7 +61,8 @@ bool register_x16r_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_x16r;
|
||||
gate->hash = (void*)&x16r_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
||||
VAES_OPT | VAES256_OPT;
|
||||
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
@@ -79,7 +80,8 @@ bool register_x16rv2_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_x16rv2;
|
||||
gate->hash = (void*)&x16rv2_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
||||
VAES_OPT | VAES256_OPT;
|
||||
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
@@ -97,7 +99,8 @@ bool register_x16s_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_x16r;
|
||||
gate->hash = (void*)&x16r_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
||||
VAES_OPT | VAES256_OPT;
|
||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
@@ -230,7 +233,8 @@ bool register_x16rt_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_x16rt;
|
||||
gate->hash = (void*)&x16r_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
||||
VAES_OPT | VAES256_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
@@ -247,7 +251,8 @@ bool register_x16rt_veil_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_x16rt;
|
||||
gate->hash = (void*)&x16r_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
||||
VAES_OPT | VAES256_OPT;
|
||||
gate->build_extraheader = (void*)&veil_build_extraheader;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
@@ -277,22 +282,17 @@ bool register_x21s_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_x21s_8way;
|
||||
gate->hash = (void*)&x21s_8way_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_8way_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT
|
||||
| VAES_OPT;
|
||||
#elif defined (X16R_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x21s_4way;
|
||||
gate->hash = (void*)&x21s_4way_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_4way_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT
|
||||
| AVX512_OPT | VAES_OPT;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x21s;
|
||||
gate->hash = (void*)&x21s_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT
|
||||
| AVX512_OPT | VAES_OPT;
|
||||
#endif
|
||||
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
||||
VAES_OPT | VAES256_OPT;
|
||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
|
@@ -20,13 +20,16 @@
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
#if defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/fugue/fugue-aesni.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/fugue/fugue-aesni.h"
|
||||
#endif
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
@@ -39,11 +42,14 @@
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
#include "algo/shabal/shabal-hash-4way.h"
|
||||
#include "algo/sha/sha-hash-4way.h"
|
||||
|
||||
#if defined(__VAES__)
|
||||
#include "algo/groestl/groestl512-hash-4way.h"
|
||||
#include "algo/shavite/shavite-hash-4way.h"
|
||||
#include "algo/echo/echo-hash-4way.h"
|
||||
#include "algo/groestl/groestl512-hash-4way.h"
|
||||
#include "algo/shavite/shavite-hash-2way.h"
|
||||
#include "algo/shavite/shavite-hash-4way.h"
|
||||
#include "algo/echo/echo-hash-4way.h"
|
||||
#endif
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
@@ -145,15 +151,21 @@ union _x16r_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_echo echo;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
shavite512_2way_context shavite;
|
||||
echo_2way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
shavite512_context shavite;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
hashState_luffa luffa1;
|
||||
cubehashParam cube;
|
||||
shavite512_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
@@ -199,7 +211,7 @@ union _x16r_context_overlay
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_sha512_context sha512;
|
||||
} __attribute__ ((aligned (64)));
|
||||
|
||||
typedef union _x16r_context_overlay x16r_context_overlay;
|
||||
|
@@ -177,9 +177,9 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
|
||||
sph_whirlpool512_full( &ctx.whirlpool, hash, in, size );
|
||||
break;
|
||||
case SHA_512:
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, in, size );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
sph_sha512_init( &ctx.sha512 );
|
||||
sph_sha512( &ctx.sha512, in, size );
|
||||
sph_sha512_close( &ctx.sha512, hash );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -672,14 +672,20 @@ union _x16rv2_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_echo echo;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
shavite512_2way_context shavite;
|
||||
echo_2way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
shavite512_context shavite;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
shavite512_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
@@ -745,10 +751,19 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case GROESTL:
|
||||
#if defined(__VAES__)
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
groestl512_2way_full( &ctx.groestl, vhash, vhash, size );
|
||||
dintrlv_2x128_512( hash0, hash1, vhash );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
groestl512_2way_full( &ctx.groestl, vhash, vhash, size );
|
||||
dintrlv_2x128_512( hash2, hash3, vhash );
|
||||
#else
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 );
|
||||
#endif
|
||||
break;
|
||||
case JH:
|
||||
if ( i == 0 )
|
||||
@@ -887,10 +902,19 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
}
|
||||
break;
|
||||
case SHAVITE:
|
||||
#if defined(__VAES__)
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
shavite512_2way_full( &ctx.shavite, vhash, vhash, size );
|
||||
dintrlv_2x128_512( hash0, hash1, vhash );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
shavite512_2way_full( &ctx.shavite, vhash, vhash, size );
|
||||
dintrlv_2x128_512( hash2, hash3, vhash );
|
||||
#else
|
||||
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||
shavite512_full( &ctx.shavite, hash2, in2, size );
|
||||
shavite512_full( &ctx.shavite, hash3, in3, size );
|
||||
#endif
|
||||
break;
|
||||
case SIMD:
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
@@ -901,6 +925,14 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
dintrlv_2x128_512( hash2, hash3, vhash );
|
||||
break;
|
||||
case ECHO:
|
||||
#if defined(__VAES__)
|
||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||
echo_2way_full( &ctx.echo, vhash, 512, vhash, size );
|
||||
dintrlv_2x128_512( hash0, hash1, vhash );
|
||||
intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||
echo_2way_full( &ctx.echo, vhash, 512, vhash, size );
|
||||
dintrlv_2x128_512( hash2, hash3, vhash );
|
||||
#else
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)in0, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
@@ -909,6 +941,7 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
(const BitSequence *)in2, size );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)in3, size );
|
||||
#endif
|
||||
break;
|
||||
case HAMSI:
|
||||
if ( i == 0 )
|
||||
|
@@ -33,7 +33,7 @@ union _x16rv2_context_overlay
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_sha512_context sha512;
|
||||
sph_tiger_context tiger;
|
||||
};
|
||||
typedef union _x16rv2_context_overlay x16rv2_context_overlay;
|
||||
@@ -155,9 +155,9 @@ int x16rv2_hash( void* output, const void* input, int thrid )
|
||||
sph_tiger( &ctx.tiger, in, size );
|
||||
sph_tiger_close( &ctx.tiger, hash );
|
||||
padtiger512( hash );
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, hash, 64 );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
sph_sha512_init( &ctx.sha512 );
|
||||
sph_sha512( &ctx.sha512, hash, 64 );
|
||||
sph_sha512_close( &ctx.sha512, hash );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -13,7 +13,7 @@
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/lyra2/lyra2.h"
|
||||
#if defined(__SHA__)
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#endif
|
||||
|
||||
#if defined (X21S_8WAY)
|
||||
@@ -209,7 +209,7 @@ union _x21s_4way_context_overlay
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
#if defined(__SHA__)
|
||||
SHA256_CTX sha256;
|
||||
sph_sha256_context sha256;
|
||||
#else
|
||||
sha256_4way_context sha256;
|
||||
#endif
|
||||
@@ -275,23 +275,18 @@ int x21s_4way_hash( void* output, const void* input, int thrid )
|
||||
|
||||
#if defined(__SHA__)
|
||||
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, hash0, 64 );
|
||||
SHA256_Final( (unsigned char*)hash0, &ctx.sha256 );
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, hash1, 64 );
|
||||
SHA256_Final( (unsigned char*)hash1, &ctx.sha256 );
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, hash2, 64 );
|
||||
SHA256_Final( (unsigned char*)hash2, &ctx.sha256 );
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, hash3, 64 );
|
||||
SHA256_Final( (unsigned char*)hash3, &ctx.sha256 );
|
||||
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash0, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash1, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+32 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash2, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+64 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash3, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+96 );
|
||||
|
||||
#else
|
||||
|
||||
|
@@ -8,7 +8,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#include "algo/haval/sph-haval.h"
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
@@ -23,7 +23,7 @@ union _x21s_context_overlay
|
||||
sph_haval256_5_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
SHA256_CTX sha256;
|
||||
sph_sha256_context sha256;
|
||||
};
|
||||
typedef union _x21s_context_overlay x21s_context_overlay;
|
||||
|
||||
@@ -50,9 +50,9 @@ int x21s_hash( void* output, const void* input, int thrid )
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash );
|
||||
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, hash, 64 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx.sha256 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash, 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash );
|
||||
|
||||
memcpy( output, hash, 32 );
|
||||
|
||||
|
@@ -1124,7 +1124,13 @@ union _sonoa_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
echo512_2way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
@@ -1132,7 +1138,6 @@ union _sonoa_4way_context_overlay
|
||||
cube_2way_context cube;
|
||||
shavite512_2way_context shavite;
|
||||
simd_2way_context simd;
|
||||
hashState_echo echo;
|
||||
hamsi512_4way_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
@@ -1162,6 +1167,17 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -1171,6 +1187,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -1195,6 +1213,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
@@ -1206,16 +1233,29 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
if ( work_restart[thr_id].restart ) return 0;
|
||||
// 2
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -1225,6 +1265,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -1249,6 +1291,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
@@ -1263,6 +1314,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
@@ -1274,6 +1327,17 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -1283,6 +1347,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -1307,6 +1373,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
@@ -1321,6 +1396,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
@@ -1340,6 +1417,17 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -1349,6 +1437,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -1373,6 +1463,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
@@ -1387,6 +1486,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
@@ -1410,6 +1511,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
hamsi512_4way_update( &ctx.hamsi, vhashB, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
@@ -1424,6 +1534,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
intrlv_2x128_512( vhashA, hash0, hash1 );
|
||||
intrlv_2x128_512( vhashB, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
@@ -1443,6 +1555,20 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
shabal512_4way_update( &ctx.shabal, vhashB, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
// rintrlv_4x32_2x128( vhashA, vhashB, vhash, 512 );
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
intrlv_2x128_512( vhashA, hash0, hash1 );
|
||||
intrlv_2x128_512( vhashB, hash2, hash3 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -1452,6 +1578,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -1476,6 +1604,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
@@ -1490,6 +1627,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
@@ -1523,6 +1662,17 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -1532,6 +1682,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -1556,6 +1708,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
@@ -1570,6 +1731,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
@@ -1616,6 +1779,17 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -1625,6 +1799,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -1649,6 +1825,15 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
@@ -1663,6 +1848,8 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
@@ -12,7 +12,7 @@ bool register_sonoa_algo( algo_gate_t* gate )
|
||||
init_sonoa_ctx();
|
||||
gate->hash = (void*)&sonoa_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT | VAES256_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -20,7 +20,7 @@
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
@@ -53,7 +53,7 @@ typedef struct {
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_sha512_context sha512;
|
||||
sph_haval256_5_context haval;
|
||||
} sonoa_ctx_holder;
|
||||
|
||||
@@ -82,7 +82,7 @@ void init_sonoa_ctx()
|
||||
sph_hamsi512_init( &sonoa_ctx.hamsi );
|
||||
sph_shabal512_init( &sonoa_ctx.shabal );
|
||||
sph_whirlpool_init( &sonoa_ctx.whirlpool );
|
||||
SHA512_Init( &sonoa_ctx.sha512 );
|
||||
sph_sha512_init( &sonoa_ctx.sha512 );
|
||||
sph_haval256_5_init(&sonoa_ctx.haval);
|
||||
};
|
||||
|
||||
@@ -494,8 +494,8 @@ int sonoa_hash( void *state, const void *input, int thr_id )
|
||||
sph_whirlpool(&ctx.whirlpool, hash, 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||
|
||||
SHA512_Update( &ctx.sha512, hash, 64 );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
sph_sha512( &ctx.sha512, hash, 64 );
|
||||
sph_sha512_close( &ctx.sha512, hash );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool(&ctx.whirlpool, hash, 64);
|
||||
@@ -574,9 +574,9 @@ int sonoa_hash( void *state, const void *input, int thr_id )
|
||||
sph_whirlpool(&ctx.whirlpool, hash, 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, hash, 64 );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
sph_sha512_init( &ctx.sha512 );
|
||||
sph_sha512( &ctx.sha512, hash, 64 );
|
||||
sph_sha512_close( &ctx.sha512, hash );
|
||||
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash, 64);
|
||||
sph_haval256_5_close(&ctx.haval, hash);
|
||||
|
@@ -240,7 +240,13 @@ union _x17_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
echo512_2way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
@@ -248,7 +254,6 @@ union _x17_4way_context_overlay
|
||||
cube_2way_context cube;
|
||||
shavite512_2way_context shavite;
|
||||
simd_2way_context simd;
|
||||
hashState_echo echo;
|
||||
hamsi512_4way_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
@@ -275,6 +280,17 @@ int x17_4way_hash( void *state, const void *input, int thr_id )
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -284,6 +300,8 @@ int x17_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -308,6 +326,15 @@ int x17_4way_hash( void *state, const void *input, int thr_id )
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
@@ -322,6 +349,8 @@ int x17_4way_hash( void *state, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
@@ -11,7 +11,7 @@ bool register_x17_algo( algo_gate_t* gate )
|
||||
#else
|
||||
gate->hash = (void*)&x17_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT | VAES256_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -19,7 +19,7 @@
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/fugue/fugue-aesni.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
@@ -53,7 +53,7 @@ union _x17_context_overlay
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_sha512_context sha512;
|
||||
sph_haval256_5_context haval;
|
||||
};
|
||||
typedef union _x17_context_overlay x17_context_overlay;
|
||||
@@ -140,9 +140,9 @@ int x17_hash(void *output, const void *input, int thr_id )
|
||||
sph_whirlpool( &ctx.whirlpool, hash, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, hash, 64 );
|
||||
SHA512_Final( (unsigned char*)hash, &ctx.sha512 );
|
||||
sph_sha512_init( &ctx.sha512 );
|
||||
sph_sha512( &ctx.sha512, hash, 64 );
|
||||
sph_sha512_close( &ctx.sha512, hash );
|
||||
|
||||
sph_haval256_5_init(&ctx.haval);
|
||||
sph_haval256_5( &ctx.haval, (const void*)hash, 64 );
|
||||
|
@@ -405,15 +405,20 @@ union _xevan_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
skein512_4way_context skein;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
echo_2way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cube_2way_context cube;
|
||||
shavite512_2way_context shavite;
|
||||
simd_2way_context simd;
|
||||
hashState_echo echo;
|
||||
hamsi512_4way_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
@@ -442,7 +447,17 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
bmw512_4way_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, dataLen );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, dataLen );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, dataLen<<3 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 );
|
||||
@@ -450,9 +465,10 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, dataLen<<3 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, dataLen<<3 );
|
||||
|
||||
// Parallel 4way
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -477,6 +493,15 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, dataLen );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, dataLen );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, dataLen<<3 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||
|
||||
@@ -489,9 +514,10 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, dataLen );
|
||||
|
||||
// Parallel
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
@@ -542,6 +568,17 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
bmw512_4way_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, dataLen );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, dataLen );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, dataLen<<3 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, dataLen<<3 );
|
||||
@@ -551,6 +588,8 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -575,6 +614,15 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, dataLen );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, dataLen );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, dataLen<<3 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||
|
||||
@@ -589,6 +637,8 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
#endif
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way_update( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
@@ -12,7 +12,7 @@ bool register_xevan_algo( algo_gate_t* gate )
|
||||
init_xevan_ctx();
|
||||
gate->hash = (void*)&xevan_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT | VAES256_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
200
algo/x17/xevan.c
200
algo/x17/xevan.c
@@ -20,7 +20,7 @@
|
||||
#include "algo/haval/sph-haval.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
@@ -44,7 +44,7 @@ typedef struct {
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_sha512_context sha512;
|
||||
sph_haval256_5_context haval;
|
||||
#if defined(__AES__)
|
||||
hashState_echo echo;
|
||||
@@ -73,7 +73,7 @@ void init_xevan_ctx()
|
||||
sph_hamsi512_init( &xevan_ctx.hamsi );
|
||||
sph_shabal512_init( &xevan_ctx.shabal );
|
||||
sph_whirlpool_init( &xevan_ctx.whirlpool );
|
||||
SHA512_Init( &xevan_ctx.sha512 );
|
||||
sph_sha512_init( &xevan_ctx.sha512 );
|
||||
sph_haval256_5_init(&xevan_ctx.haval);
|
||||
#if defined(__AES__)
|
||||
init_groestl( &xevan_ctx.groestl, 64 );
|
||||
@@ -95,97 +95,27 @@ int xevan_hash(void *output, const void *input, int thr_id )
|
||||
|
||||
sph_blake512( &ctx.blake, input, 80 );
|
||||
sph_blake512_close( &ctx.blake, hash );
|
||||
memset(&hash[16], 0, 64);
|
||||
memset(&hash[16], 0, 64);
|
||||
|
||||
sph_bmw512(&ctx.bmw, hash, dataLen);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
sph_bmw512(&ctx.bmw, hash, dataLen);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)hash, dataLen*8 );
|
||||
#else
|
||||
sph_groestl512(&ctx.groestl, hash, dataLen);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#endif
|
||||
|
||||
sph_skein512(&ctx.skein, hash, dataLen);
|
||||
sph_skein512_close(&ctx.skein, hash);
|
||||
|
||||
sph_jh512(&ctx.jh, hash, dataLen);
|
||||
sph_jh512_close(&ctx.jh, hash);
|
||||
|
||||
sph_keccak512(&ctx.keccak, hash, dataLen);
|
||||
sph_keccak512_close(&ctx.keccak, hash);
|
||||
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, dataLen );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
|
||||
(const byte*) hash, dataLen );
|
||||
|
||||
sph_shavite512(&ctx.shavite, hash, dataLen);
|
||||
sph_shavite512_close(&ctx.shavite, hash);
|
||||
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence *)hash, dataLen*8 );
|
||||
|
||||
#if defined(__AES__)
|
||||
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
||||
(const BitSequence *) hash, dataLen*8 );
|
||||
#else
|
||||
sph_echo512(&ctx.echo, hash, dataLen);
|
||||
sph_echo512_close(&ctx.echo, hash);
|
||||
#endif
|
||||
|
||||
sph_hamsi512(&ctx.hamsi, hash, dataLen);
|
||||
sph_hamsi512_close(&ctx.hamsi, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
fugue512_Update( &ctx.fugue, hash, dataLen*8 );
|
||||
fugue512_Final( &ctx.fugue, hash );
|
||||
#else
|
||||
sph_fugue512(&ctx.fugue, hash, dataLen);
|
||||
sph_fugue512_close(&ctx.fugue, hash);
|
||||
#endif
|
||||
|
||||
sph_shabal512(&ctx.shabal, hash, dataLen);
|
||||
sph_shabal512_close(&ctx.shabal, hash);
|
||||
|
||||
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||
|
||||
SHA512_Update( &ctx.sha512, hash, dataLen );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
|
||||
sph_haval256_5_close(&ctx.haval, hash);
|
||||
|
||||
memset(&hash[8], 0, dataLen - 32);
|
||||
|
||||
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
|
||||
|
||||
sph_blake512(&ctx.blake, hash, dataLen);
|
||||
sph_blake512_close(&ctx.blake, hash);
|
||||
|
||||
sph_bmw512(&ctx.bmw, hash, dataLen);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const BitSequence*)hash, dataLen*8 );
|
||||
#else
|
||||
sph_groestl512(&ctx.groestl, hash, dataLen);
|
||||
sph_groestl512(&ctx.groestl, hash, dataLen);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#endif
|
||||
|
||||
sph_skein512(&ctx.skein, hash, dataLen);
|
||||
sph_skein512_close(&ctx.skein, hash);
|
||||
sph_skein512(&ctx.skein, hash, dataLen);
|
||||
sph_skein512_close(&ctx.skein, hash);
|
||||
|
||||
sph_jh512(&ctx.jh, hash, dataLen);
|
||||
sph_jh512_close(&ctx.jh, hash);
|
||||
sph_jh512(&ctx.jh, hash, dataLen);
|
||||
sph_jh512_close(&ctx.jh, hash);
|
||||
|
||||
sph_keccak512(&ctx.keccak, hash, dataLen);
|
||||
sph_keccak512_close(&ctx.keccak, hash);
|
||||
sph_keccak512(&ctx.keccak, hash, dataLen);
|
||||
sph_keccak512_close(&ctx.keccak, hash);
|
||||
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, dataLen );
|
||||
@@ -193,8 +123,8 @@ int xevan_hash(void *output, const void *input, int thr_id )
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
|
||||
(const byte*) hash, dataLen );
|
||||
|
||||
sph_shavite512(&ctx.shavite, hash, dataLen);
|
||||
sph_shavite512_close(&ctx.shavite, hash);
|
||||
sph_shavite512(&ctx.shavite, hash, dataLen);
|
||||
sph_shavite512_close(&ctx.shavite, hash);
|
||||
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence *)hash, dataLen*8 );
|
||||
@@ -207,30 +137,100 @@ int xevan_hash(void *output, const void *input, int thr_id )
|
||||
sph_echo512_close(&ctx.echo, hash);
|
||||
#endif
|
||||
|
||||
sph_hamsi512(&ctx.hamsi, hash, dataLen);
|
||||
sph_hamsi512_close(&ctx.hamsi, hash);
|
||||
sph_hamsi512(&ctx.hamsi, hash, dataLen);
|
||||
sph_hamsi512_close(&ctx.hamsi, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
fugue512_Update( &ctx.fugue, hash, dataLen*8 );
|
||||
fugue512_Final( &ctx.fugue, hash );
|
||||
fugue512_Update( &ctx.fugue, hash, dataLen*8 );
|
||||
fugue512_Final( &ctx.fugue, hash );
|
||||
#else
|
||||
sph_fugue512(&ctx.fugue, hash, dataLen);
|
||||
sph_fugue512_close(&ctx.fugue, hash);
|
||||
sph_fugue512(&ctx.fugue, hash, dataLen);
|
||||
sph_fugue512_close(&ctx.fugue, hash);
|
||||
#endif
|
||||
|
||||
sph_shabal512(&ctx.shabal, hash, dataLen);
|
||||
sph_shabal512_close(&ctx.shabal, hash);
|
||||
sph_shabal512(&ctx.shabal, hash, dataLen);
|
||||
sph_shabal512_close(&ctx.shabal, hash);
|
||||
|
||||
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||
|
||||
SHA512_Update( &ctx.sha512, hash, dataLen );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
sph_sha512( &ctx.sha512, hash, dataLen );
|
||||
sph_sha512_close( &ctx.sha512, hash );
|
||||
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
|
||||
sph_haval256_5_close(&ctx.haval, hash);
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
|
||||
sph_haval256_5_close(&ctx.haval, hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
memset(&hash[8], 0, dataLen - 32);
|
||||
|
||||
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
|
||||
|
||||
sph_blake512(&ctx.blake, hash, dataLen);
|
||||
sph_blake512_close(&ctx.blake, hash);
|
||||
|
||||
sph_bmw512(&ctx.bmw, hash, dataLen);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const BitSequence*)hash, dataLen*8 );
|
||||
#else
|
||||
sph_groestl512(&ctx.groestl, hash, dataLen);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#endif
|
||||
|
||||
sph_skein512(&ctx.skein, hash, dataLen);
|
||||
sph_skein512_close(&ctx.skein, hash);
|
||||
|
||||
sph_jh512(&ctx.jh, hash, dataLen);
|
||||
sph_jh512_close(&ctx.jh, hash);
|
||||
|
||||
sph_keccak512(&ctx.keccak, hash, dataLen);
|
||||
sph_keccak512_close(&ctx.keccak, hash);
|
||||
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, dataLen );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
|
||||
(const byte*) hash, dataLen );
|
||||
|
||||
sph_shavite512(&ctx.shavite, hash, dataLen);
|
||||
sph_shavite512_close(&ctx.shavite, hash);
|
||||
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence *)hash, dataLen*8 );
|
||||
|
||||
#if defined(__AES__)
|
||||
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
||||
(const BitSequence *) hash, dataLen*8 );
|
||||
#else
|
||||
sph_echo512(&ctx.echo, hash, dataLen);
|
||||
sph_echo512_close(&ctx.echo, hash);
|
||||
#endif
|
||||
|
||||
sph_hamsi512(&ctx.hamsi, hash, dataLen);
|
||||
sph_hamsi512_close(&ctx.hamsi, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
fugue512_Update( &ctx.fugue, hash, dataLen*8 );
|
||||
fugue512_Final( &ctx.fugue, hash );
|
||||
#else
|
||||
sph_fugue512(&ctx.fugue, hash, dataLen);
|
||||
sph_fugue512_close(&ctx.fugue, hash);
|
||||
#endif
|
||||
|
||||
sph_shabal512(&ctx.shabal, hash, dataLen);
|
||||
sph_shabal512_close(&ctx.shabal, hash);
|
||||
|
||||
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||
|
||||
sph_sha512( &ctx.sha512, hash, dataLen );
|
||||
sph_sha512_close( &ctx.sha512, hash );
|
||||
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
|
||||
sph_haval256_5_close(&ctx.haval, hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@@ -18,7 +18,7 @@
|
||||
#include "algo/radiogatun/sph_radiogatun.h"
|
||||
#include "algo/panama/sph_panama.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
@@ -56,7 +56,7 @@ union _x20r_context_overlay
|
||||
sph_fugue512_context fugue;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_sha512_context sha512;
|
||||
sph_haval256_5_context haval;
|
||||
sph_gost512_context gost;
|
||||
sph_radiogatun64_context radiogatun;
|
||||
@@ -68,28 +68,6 @@ void x20r_hash(void* output, const void* input)
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[64/4];
|
||||
x20r_context_overlay ctx;
|
||||
/*
|
||||
sph_blake512_context ctx_blake;
|
||||
sph_bmw512_context ctx_bmw;
|
||||
sph_groestl512_context ctx_groestl;
|
||||
sph_skein512_context ctx_skein;
|
||||
sph_jh512_context ctx_jh;
|
||||
sph_keccak512_context ctx_keccak;
|
||||
sph_luffa512_context ctx_luffa;
|
||||
sph_cubehash512_context ctx_cubehash;
|
||||
sph_shavite512_context ctx_shavite;
|
||||
sph_simd512_context ctx_simd;
|
||||
sph_echo512_context ctx_echo;
|
||||
sph_hamsi512_context ctx_hamsi;
|
||||
sph_fugue512_context ctx_fugue;
|
||||
sph_shabal512_context ctx_shabal;
|
||||
sph_whirlpool_context ctx_whirlpool;
|
||||
sph_sha512_context ctx_sha512;
|
||||
sph_haval256_5_context ctx_haval;
|
||||
sph_gost512_context ctx_gost;
|
||||
sph_radiogatun64_context ctx_radiogatun;
|
||||
sph_panama_context ctx_panama;
|
||||
*/
|
||||
void *in = (void*) input;
|
||||
int size = 80;
|
||||
|
||||
@@ -194,9 +172,9 @@ void x20r_hash(void* output, const void* input)
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||
break;
|
||||
case SHA_512:
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, in, size );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
sph_sha512_Init( &ctx.sha512 );
|
||||
sph_sha512( &ctx.sha512, in, size );
|
||||
sph_sha512_close( &ctx.sha512, hash );
|
||||
break;
|
||||
case HAVAL:
|
||||
sph_haval256_5_init(&ctx.haval);
|
||||
|
@@ -11,7 +11,7 @@
|
||||
#include "algo/shavite/shavite-hash-2way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/shavite/shavite-hash-2way.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
#include "algo/fugue/fugue-aesni.h"
|
||||
#include "algo/shabal/shabal-hash-4way.h"
|
||||
@@ -27,7 +27,9 @@
|
||||
#include "algo/shavite/shavite-hash-4way.h"
|
||||
#include "algo/echo/echo-hash-4way.h"
|
||||
#endif
|
||||
|
||||
#if defined(__SHA__)
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#endif
|
||||
|
||||
#if defined(X22I_8WAY)
|
||||
|
||||
@@ -49,7 +51,11 @@ union _x22i_8way_ctx_overlay
|
||||
haval256_5_8way_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
#if defined(X22I_8WAY_SHA)
|
||||
sph_sha256_context sha256;
|
||||
#else
|
||||
sha256_8way_context sha256;
|
||||
#endif
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
shavite512_4way_context shavite;
|
||||
@@ -383,6 +389,35 @@ int x22i_8way_hash( void *output, const void *input, int thrid )
|
||||
sph_gost512 ( &ctx.gost, (const void*) hash7, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*) hash7 );
|
||||
|
||||
#if defined(X22I_8WAY_SHA)
|
||||
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash0, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash1, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+32 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash2, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+64 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash3, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+96 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash4, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+128 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash5, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+160 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash6, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+192 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash7, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+224 );
|
||||
|
||||
#else
|
||||
|
||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
@@ -390,9 +425,55 @@ int x22i_8way_hash( void *output, const void *input, int thrid )
|
||||
sha256_8way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_8way_close( &ctx.sha256, output );
|
||||
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if defined(X22I_8WAY_SHA)
|
||||
|
||||
int scanhash_x22i_8way_sha( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
if ( x22i_8way_hash( hash, vdata, thr_id ) )
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n+i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
@@ -440,53 +521,7 @@ int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x22i_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
|
||||
#elif defined(X22I_4WAY)
|
||||
|
||||
@@ -494,14 +529,19 @@ union _x22i_4way_ctx_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
echo_2way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
shavite512_2way_context shavite;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cube_2way_context cube;
|
||||
shavite512_2way_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
@@ -511,7 +551,11 @@ union _x22i_4way_ctx_overlay
|
||||
haval256_5_4way_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
#if defined(X22I_4WAY_SHA)
|
||||
sph_sha256_context sha256;
|
||||
#else
|
||||
sha256_4way_context sha256;
|
||||
#endif
|
||||
};
|
||||
typedef union _x22i_4way_ctx_overlay x22i_ctx_overlay;
|
||||
|
||||
@@ -535,15 +579,29 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (const char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (const char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (const char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (const char*)hash3, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -570,6 +628,15 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
@@ -584,6 +651,8 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
#endif
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
@@ -625,7 +694,7 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash0, (unsigned char*)hashA0);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash0, (unsigned char*)hashA0);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash1, (unsigned char*)hashA1);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash2, (unsigned char*)hashA2);
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash3, (unsigned char*)hashA3);
|
||||
@@ -639,7 +708,7 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
memset( hashA0, 0, 64 );
|
||||
memset( hashA0, 0, 64 );
|
||||
memset( hashA1, 0, 64 );
|
||||
memset( hashA2, 0, 64 );
|
||||
memset( hashA3, 0, 64 );
|
||||
@@ -654,8 +723,8 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
sph_tiger (&ctx.tiger, (const void*) hash2, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA2);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash3, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA3);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash3, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hashA3);
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
@@ -682,9 +751,26 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash2, 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash2);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash3, 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash3);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash3, 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash3);
|
||||
|
||||
#if defined(X22I_4WAY_SHA)
|
||||
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash0, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash1, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+32 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash2, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+64 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash3, 64 );
|
||||
sph_sha256_close( &ctx.sha256, output+96 );
|
||||
|
||||
#else
|
||||
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
@@ -692,11 +778,56 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
|
||||
sha256_4way_update( &ctx.sha256, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha256, output );
|
||||
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if defined(X22I_4WAY_SHA)
|
||||
|
||||
int scanhash_x22i_4way_sha( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
if ( x22i_4way_hash( hash, vdata, thr_id ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n+i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int scanhash_x22i_4way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
@@ -741,4 +872,6 @@ int scanhash_x22i_4way( struct work* work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // X22I_4WAY
|
||||
|
@@ -7,21 +7,32 @@
|
||||
bool register_x22i_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X22I_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x22i_8way;
|
||||
gate->hash = (void*)&x22i_8way_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT
|
||||
| AVX512_OPT | VAES_OPT;
|
||||
#elif defined (X22I_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x22i_4way;
|
||||
gate->hash = (void*)&x22i_4way_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT
|
||||
| AVX512_OPT | VAES_OPT;
|
||||
|
||||
#if defined(X22I_8WAY_SHA)
|
||||
gate->scanhash = (void*)&scanhash_x22i_8way_sha;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x22i_8way;
|
||||
#endif
|
||||
gate->hash = (void*)&x22i_8way_hash;
|
||||
|
||||
#elif defined (X22I_4WAY)
|
||||
|
||||
#if defined(X22I_4WAY_SHA)
|
||||
gate->scanhash = (void*)&scanhash_x22i_4way_sha;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x22i_4way;
|
||||
#endif
|
||||
gate->hash = (void*)&x22i_4way_hash;
|
||||
|
||||
#else
|
||||
|
||||
gate->scanhash = (void*)&scanhash_x22i;
|
||||
gate->hash = (void*)&x22i_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT
|
||||
| AVX512_OPT | VAES_OPT;
|
||||
|
||||
#endif
|
||||
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT
|
||||
| AVX512_OPT | VAES_OPT | VAES256_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -30,21 +41,15 @@ bool register_x25x_algo( algo_gate_t* gate )
|
||||
#if defined (X25X_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x25x_8way;
|
||||
gate->hash = (void*)&x25x_8way_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT
|
||||
| AVX512_OPT | VAES_OPT;
|
||||
#elif defined (X25X_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x25x_4way;
|
||||
gate->hash = (void*)&x25x_4way_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT
|
||||
| AVX512_OPT | VAES_OPT;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x25x;
|
||||
gate->hash = (void*)&x25x_hash;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT
|
||||
| AVX512_OPT | VAES_OPT;
|
||||
#endif
|
||||
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
||||
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT |
|
||||
AVX512_OPT | VAES_OPT | VAES256_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -12,19 +12,34 @@
|
||||
#define X22I_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(__SHA__)
|
||||
// #define X22I_8WAY_SHA 1
|
||||
#define X22I_4WAY_SHA 1
|
||||
#endif
|
||||
|
||||
bool register_x22i_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X22I_8WAY)
|
||||
|
||||
int x22i_8way_hash( void *state, const void *input, int thrid );
|
||||
#if defined(X22I_8WAY_SHA)
|
||||
int scanhash_x22i_8way_sha( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#else
|
||||
int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
#elif defined(X22I_4WAY)
|
||||
|
||||
int x22i_4way_hash( void *state, const void *input, int thrid );
|
||||
#if defined(X22I_4WAY_SHA)
|
||||
int scanhash_x22i_4way_sha( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#else
|
||||
int scanhash_x22i_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
@@ -40,6 +55,11 @@ int scanhash_x22i( struct work *work, uint32_t max_nonce,
|
||||
#define X25X_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(__SHA__)
|
||||
// #define X25X_8WAY_SHA 1
|
||||
#define X25X_4WAY_SHA 1
|
||||
#endif
|
||||
|
||||
bool register_x25i_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X25X_8WAY)
|
||||
|
112
algo/x22/x22i.c
112
algo/x22/x22i.c
@@ -23,7 +23,7 @@
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#include "algo/haval/sph-haval.h"
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
#include "algo/lyra2/lyra2.h"
|
||||
@@ -53,11 +53,11 @@ union _x22i_context_overlay
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_sha512_context sha512;
|
||||
sph_haval256_5_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
SHA256_CTX sha256;
|
||||
sph_sha256_context sha256;
|
||||
};
|
||||
typedef union _x22i_context_overlay x22i_context_overlay;
|
||||
|
||||
@@ -67,13 +67,13 @@ int x22i_hash( void *output, const void *input, int thrid )
|
||||
unsigned char hash2[65] __attribute__((aligned(64))) = {0};
|
||||
x22i_context_overlay ctx;
|
||||
|
||||
sph_blake512_init(&ctx.blake);
|
||||
sph_blake512(&ctx.blake, input, 80);
|
||||
sph_blake512_close(&ctx.blake, hash);
|
||||
sph_blake512_init(&ctx.blake);
|
||||
sph_blake512(&ctx.blake, input, 80);
|
||||
sph_blake512_close(&ctx.blake, hash);
|
||||
|
||||
sph_bmw512_init(&ctx.bmw);
|
||||
sph_bmw512(&ctx.bmw, (const void*) hash, 64);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
sph_bmw512_init(&ctx.bmw);
|
||||
sph_bmw512(&ctx.bmw, (const void*) hash, 64);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
@@ -85,17 +85,17 @@ int x22i_hash( void *output, const void *input, int thrid )
|
||||
sph_groestl512_close( &ctx.groestl, hash );
|
||||
#endif
|
||||
|
||||
sph_skein512_init(&ctx.skein);
|
||||
sph_skein512(&ctx.skein, (const void*) hash, 64);
|
||||
sph_skein512_close(&ctx.skein, hash);
|
||||
sph_skein512_init(&ctx.skein);
|
||||
sph_skein512(&ctx.skein, (const void*) hash, 64);
|
||||
sph_skein512_close(&ctx.skein, hash);
|
||||
|
||||
sph_jh512_init(&ctx.jh);
|
||||
sph_jh512(&ctx.jh, (const void*) hash, 64);
|
||||
sph_jh512_close(&ctx.jh, hash);
|
||||
sph_jh512_init(&ctx.jh);
|
||||
sph_jh512(&ctx.jh, (const void*) hash, 64);
|
||||
sph_jh512_close(&ctx.jh, hash);
|
||||
|
||||
sph_keccak512_init(&ctx.keccak);
|
||||
sph_keccak512(&ctx.keccak, (const void*) hash, 64);
|
||||
sph_keccak512_close(&ctx.keccak, hash);
|
||||
sph_keccak512_init(&ctx.keccak);
|
||||
sph_keccak512(&ctx.keccak, (const void*) hash, 64);
|
||||
sph_keccak512_close(&ctx.keccak, hash);
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
@@ -107,9 +107,9 @@ int x22i_hash( void *output, const void *input, int thrid )
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
|
||||
(const byte*)hash, 64 );
|
||||
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash, 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash, 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash);
|
||||
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash,
|
||||
@@ -127,56 +127,56 @@ int x22i_hash( void *output, const void *input, int thrid )
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
sph_hamsi512_init(&ctx.hamsi);
|
||||
sph_hamsi512(&ctx.hamsi, (const void*) hash, 64);
|
||||
sph_hamsi512_close(&ctx.hamsi, hash);
|
||||
sph_hamsi512_init(&ctx.hamsi);
|
||||
sph_hamsi512(&ctx.hamsi, (const void*) hash, 64);
|
||||
sph_hamsi512_close(&ctx.hamsi, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
fugue512_full( &ctx.fugue, hash, hash, 64 );
|
||||
fugue512_full( &ctx.fugue, hash, hash, 64 );
|
||||
#else
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) hash, 64);
|
||||
sph_fugue512_close(&ctx.fugue, hash);
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) hash, 64);
|
||||
sph_fugue512_close(&ctx.fugue, hash);
|
||||
#endif
|
||||
|
||||
sph_shabal512_init(&ctx.shabal);
|
||||
sph_shabal512(&ctx.shabal, (const void*) hash, 64);
|
||||
sph_shabal512_close(&ctx.shabal, &hash[64]);
|
||||
sph_shabal512_init(&ctx.shabal);
|
||||
sph_shabal512(&ctx.shabal, (const void*) hash, 64);
|
||||
sph_shabal512_close(&ctx.shabal, &hash[64]);
|
||||
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash[64], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash[128]);
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash[64], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash[128]);
|
||||
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, (const void*) &hash[128], 64);
|
||||
SHA512_Final( (void*) &hash[192], &ctx.sha512 );
|
||||
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash, (unsigned char*)hash2);
|
||||
sph_sha512_init( &ctx.sha512 );
|
||||
sph_sha512( &ctx.sha512, &hash[128], 64 );
|
||||
sph_sha512_close( &ctx.sha512, &hash[192] );
|
||||
|
||||
ComputeSingleSWIFFTX((unsigned char*)hash, (unsigned char*)hash2);
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
memset(hash, 0, 64);
|
||||
sph_haval256_5_init(&ctx.haval);
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash2, 64);
|
||||
sph_haval256_5_close(&ctx.haval,hash);
|
||||
memset(hash, 0, 64);
|
||||
sph_haval256_5_init(&ctx.haval);
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash2, 64);
|
||||
sph_haval256_5_close(&ctx.haval,hash);
|
||||
|
||||
memset(hash2, 0, 64);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hash2);
|
||||
memset(hash2, 0, 64);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash, 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hash2);
|
||||
|
||||
memset(hash, 0, 64);
|
||||
LYRA2RE((void*) hash, 32, (const void*) hash2, 32, (const void*) hash2, 32, 1, 4, 4);
|
||||
memset(hash, 0, 64);
|
||||
LYRA2RE((void*) hash, 32, (const void*) hash2, 32, (const void*) hash2, 32, 1, 4, 4);
|
||||
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash, 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash, 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash);
|
||||
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, (const void*) hash, 64 );
|
||||
SHA256_Final( (unsigned char*) hash, &ctx.sha256 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash, 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash );
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
memcpy(output, hash, 32);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@@ -15,6 +15,7 @@
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/shavite/shavite-hash-2way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
@@ -31,6 +32,9 @@
|
||||
#include "algo/shavite/shavite-hash-4way.h"
|
||||
#include "algo/echo/echo-hash-4way.h"
|
||||
#endif
|
||||
#if defined(__SHA__)
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#endif
|
||||
|
||||
void x25x_shuffle( void *hash )
|
||||
{
|
||||
@@ -79,7 +83,11 @@ union _x25x_8way_ctx_overlay
|
||||
haval256_5_8way_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
#if defined(X25X_8WAY_SHA)
|
||||
sph_sha256_context sha256;
|
||||
#else
|
||||
sha256_8way_context sha256;
|
||||
#endif
|
||||
panama_8way_context panama;
|
||||
blake2s_8way_state blake2s;
|
||||
#if defined(__VAES__)
|
||||
@@ -215,9 +223,9 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
#else
|
||||
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash0[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash0[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash1[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash1[8]);
|
||||
@@ -321,9 +329,9 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
dintrlv_8x32_512( hash0[13], hash1[13], hash2[13], hash3[13],
|
||||
hash4[13], hash5[13], hash6[13], hash7[13], vhash );
|
||||
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) hash0[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash0[14]);
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) hash0[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash0[14]);
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) hash1[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash1[14]);
|
||||
@@ -372,9 +380,9 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
dintrlv_8x32_512( hash0[17], hash1[17], hash2[17], hash3[17],
|
||||
hash4[17], hash5[17], hash6[17], hash7[17], vhash );
|
||||
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash0[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hash0[18]);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash0[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hash0[18]);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) hash1[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) hash1[18]);
|
||||
@@ -412,9 +420,9 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
LYRA2X_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash6[19], hash7[19], vhash, 256 );
|
||||
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash0[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash0[20]);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash0[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash0[20]);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash1[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash1[20]);
|
||||
@@ -436,6 +444,39 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) hash7[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash7[20]);
|
||||
|
||||
#if defined(X25X_8WAY_SHA)
|
||||
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash0[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash0[21] );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash1[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash1[21] );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash2[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash2[21] );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash3[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash3[21] );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash4[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash4[21] );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash5[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash5[21] );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash6[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash6[21] );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash7[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash7[21] );
|
||||
|
||||
intrlv_8x32_512( vhash, hash0[21], hash1[21], hash2[21], hash3[21],
|
||||
hash4[21], hash5[21], hash6[21], hash7[21] );
|
||||
|
||||
#else
|
||||
|
||||
intrlv_8x32_512( vhashA, hash0[20], hash1[20], hash2[20], hash3[20],
|
||||
hash4[20], hash5[20], hash6[20], hash7[20] );
|
||||
|
||||
@@ -445,6 +486,8 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
|
||||
dintrlv_8x32_512( hash0[21], hash1[21], hash2[21], hash3[21],
|
||||
hash4[21], hash5[21], hash6[21], hash7[21], vhash );
|
||||
|
||||
#endif
|
||||
|
||||
panama_8way_init( &ctx.panama );
|
||||
panama_8way_update( &ctx.panama, vhash, 64 );
|
||||
panama_8way_close( &ctx.panama, vhash );
|
||||
@@ -574,68 +617,26 @@ int scanhash_x25x_8way( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x25x_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
#elif defined(X25X_4WAY)
|
||||
|
||||
union _x25x_4way_ctx_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
echo_2way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
luffa_2way_context luffa;
|
||||
cube_2way_context cube;
|
||||
shavite512_2way_context shavite;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
@@ -644,7 +645,11 @@ union _x25x_4way_ctx_overlay
|
||||
haval256_5_4way_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
#if defined(X25X_4WAY_SHA)
|
||||
sph_sha256_context sha256;
|
||||
#else
|
||||
sha256_4way_context sha256;
|
||||
#endif
|
||||
panama_4way_context panama;
|
||||
blake2s_4way_state blake2s;
|
||||
};
|
||||
@@ -658,6 +663,8 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
unsigned char hash2[25][64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char hash3[25][64] __attribute__((aligned(64))) = {0};
|
||||
unsigned char vhashX[24][64*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
||||
x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
@@ -668,11 +675,25 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64_512( hash0[1], hash1[1], hash2[1], hash3[1], vhash );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_2way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_2way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
dintrlv_2x128_512( hash0[2], hash1[2], vhashA );
|
||||
dintrlv_2x128_512( hash2[2], hash3[2], vhashB );
|
||||
|
||||
#else
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0[2], (const char*)hash0[1], 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1[2], (const char*)hash1[1], 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2[2], (const char*)hash2[1], 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3[2], (const char*)hash3[1], 512 );
|
||||
|
||||
#endif
|
||||
|
||||
intrlv_4x64_512( vhash, hash0[2], hash1[2], hash2[2], hash3[2] );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
dintrlv_4x64_512( hash0[3], hash1[3], hash2[3], hash3[3], vhash );
|
||||
@@ -689,41 +710,38 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64_512( hash0[5], hash1[5], hash2[5], hash3[5], vhash );
|
||||
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash0[6], 512,
|
||||
(const BitSequence*)hash0[5], 64 );
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash1[6], 512,
|
||||
(const BitSequence*)hash1[5], 64 );
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash2[6], 512,
|
||||
(const BitSequence*)hash2[5], 64 );
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash3[6], 512,
|
||||
(const BitSequence*)hash3[5], 64 );
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*)hash0[7], 512, (const byte*)hash0[6], 64 );
|
||||
cubehash_full( &ctx.cube, (byte*)hash1[7], 512, (const byte*)hash1[6], 64 );
|
||||
cubehash_full( &ctx.cube, (byte*)hash2[7], 512, (const byte*)hash2[6], 64 );
|
||||
cubehash_full( &ctx.cube, (byte*)hash3[7], 512, (const byte*)hash3[6], 64 );
|
||||
luffa512_2way_full( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa512_2way_full( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
dintrlv_2x128_512( hash0[6], hash1[6], vhashA );
|
||||
dintrlv_2x128_512( hash2[6], hash3[6], vhashB );
|
||||
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash0[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash1[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash1[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash2[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash2[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash3[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash3[8]);
|
||||
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
|
||||
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
|
||||
dintrlv_2x128_512( hash0[7], hash1[7], vhashA );
|
||||
dintrlv_2x128_512( hash2[7], hash3[7], vhashB );
|
||||
|
||||
simd_full( &ctx.simd, (BitSequence*)hash0[9],
|
||||
(const BitSequence*)hash0[8], 512 );
|
||||
simd_full( &ctx.simd, (BitSequence*)hash1[9],
|
||||
(const BitSequence*)hash1[8], 512 );
|
||||
simd_full( &ctx.simd, (BitSequence*)hash2[9],
|
||||
(const BitSequence*)hash2[8], 512 );
|
||||
simd_full( &ctx.simd, (BitSequence*)hash3[9],
|
||||
(const BitSequence*)hash3[8], 512 );
|
||||
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
dintrlv_2x128_512( hash0[8], hash1[8], vhashA );
|
||||
dintrlv_2x128_512( hash2[8], hash3[8], vhashB );
|
||||
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
dintrlv_2x128_512( hash0[9], hash1[9], vhashA );
|
||||
dintrlv_2x128_512( hash2[9], hash3[9], vhashB );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_2way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_2way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
dintrlv_2x128_512( hash0[10], hash1[10], vhashA );
|
||||
dintrlv_2x128_512( hash2[10], hash3[10], vhashB );
|
||||
|
||||
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0[10], 512,
|
||||
(const BitSequence *)hash0[ 9], 64 );
|
||||
@@ -736,6 +754,8 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0[10], hash1[10], hash2[10], hash3[10] );
|
||||
|
||||
#endif
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
@@ -826,6 +846,25 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
sph_gost512 (&ctx.gost, (const void*) hash3[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) hash3[20]);
|
||||
|
||||
#if defined(X25X_4WAY_SHA)
|
||||
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash0[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash0[21] );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash1[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash1[21] );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash2[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash2[21] );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, hash3[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, hash3[21] );
|
||||
|
||||
intrlv_4x32_512( vhash, hash0[21], hash1[21], hash2[21], hash3[21] );
|
||||
|
||||
#else
|
||||
|
||||
intrlv_4x32_512( vhashX[0], hash0[20], hash1[20], hash2[20], hash3[20] );
|
||||
memset( vhash, 0, 64*4 );
|
||||
|
||||
@@ -834,6 +873,8 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
|
||||
sha256_4way_close( &ctx.sha256, vhash );
|
||||
dintrlv_4x32_512( hash0[21], hash1[21], hash2[21], hash3[21], vhash );
|
||||
|
||||
#endif
|
||||
|
||||
panama_4way_init( &ctx.panama );
|
||||
panama_4way_update( &ctx.panama, vhash, 64 );
|
||||
panama_4way_close( &ctx.panama, vhash );
|
||||
|
@@ -23,7 +23,7 @@
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#include "algo/haval/sph-haval.h"
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
#include "algo/lyra2/lyra2.h"
|
||||
@@ -56,11 +56,11 @@ union _x25x_context_overlay
|
||||
sph_hamsi512_context hamsi;
|
||||
sph_shabal512_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
SHA512_CTX sha512;
|
||||
sph_sha512_context sha512;
|
||||
sph_haval256_5_context haval;
|
||||
sph_tiger_context tiger;
|
||||
sph_gost512_context gost;
|
||||
SHA256_CTX sha256;
|
||||
sph_sha256_context sha256;
|
||||
sph_panama_context panama;
|
||||
blake2s_state blake2s;
|
||||
};
|
||||
@@ -71,13 +71,13 @@ int x25x_hash( void *output, const void *input, int thrid )
|
||||
unsigned char hash[25][64] __attribute__((aligned(64))) = {0};
|
||||
x25x_context_overlay ctx;
|
||||
|
||||
sph_blake512_init(&ctx.blake);
|
||||
sph_blake512(&ctx.blake, input, 80);
|
||||
sph_blake512_close(&ctx.blake, &hash[0] );
|
||||
sph_blake512_init(&ctx.blake);
|
||||
sph_blake512(&ctx.blake, input, 80);
|
||||
sph_blake512_close(&ctx.blake, &hash[0] );
|
||||
|
||||
sph_bmw512_init(&ctx.bmw);
|
||||
sph_bmw512(&ctx.bmw, (const void*) &hash[0], 64);
|
||||
sph_bmw512_close(&ctx.bmw, &hash[1]);
|
||||
sph_bmw512_init(&ctx.bmw);
|
||||
sph_bmw512(&ctx.bmw, (const void*) &hash[0], 64);
|
||||
sph_bmw512_close(&ctx.bmw, &hash[1]);
|
||||
|
||||
#if defined(__AES__)
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
@@ -89,17 +89,17 @@ int x25x_hash( void *output, const void *input, int thrid )
|
||||
sph_groestl512_close( &ctx.groestl, &hash[2] );
|
||||
#endif
|
||||
|
||||
sph_skein512_init(&ctx.skein);
|
||||
sph_skein512(&ctx.skein, (const void*) &hash[2], 64);
|
||||
sph_skein512_close(&ctx.skein, &hash[3]);
|
||||
sph_skein512_init(&ctx.skein);
|
||||
sph_skein512(&ctx.skein, (const void*) &hash[2], 64);
|
||||
sph_skein512_close(&ctx.skein, &hash[3]);
|
||||
|
||||
sph_jh512_init(&ctx.jh);
|
||||
sph_jh512(&ctx.jh, (const void*) &hash[3], 64);
|
||||
sph_jh512_close(&ctx.jh, &hash[4]);
|
||||
sph_jh512_init(&ctx.jh);
|
||||
sph_jh512(&ctx.jh, (const void*) &hash[3], 64);
|
||||
sph_jh512_close(&ctx.jh, &hash[4]);
|
||||
|
||||
sph_keccak512_init(&ctx.keccak);
|
||||
sph_keccak512(&ctx.keccak, (const void*) &hash[4], 64);
|
||||
sph_keccak512_close(&ctx.keccak, &hash[5]);
|
||||
sph_keccak512_init(&ctx.keccak);
|
||||
sph_keccak512(&ctx.keccak, (const void*) &hash[4], 64);
|
||||
sph_keccak512_close(&ctx.keccak, &hash[5]);
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
@@ -111,9 +111,9 @@ int x25x_hash( void *output, const void *input, int thrid )
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) &hash[7],
|
||||
(const byte*)&hash[6], 64 );
|
||||
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) &hash[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, &hash[8]);
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) &hash[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, &hash[8]);
|
||||
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)&hash[9],
|
||||
@@ -132,51 +132,51 @@ int x25x_hash( void *output, const void *input, int thrid )
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
sph_hamsi512_init(&ctx.hamsi);
|
||||
sph_hamsi512(&ctx.hamsi, (const void*) &hash[10], 64);
|
||||
sph_hamsi512_close(&ctx.hamsi, &hash[11]);
|
||||
sph_hamsi512(&ctx.hamsi, (const void*) &hash[10], 64);
|
||||
sph_hamsi512_close(&ctx.hamsi, &hash[11]);
|
||||
|
||||
#if defined(__AES__)
|
||||
fugue512_full( &ctx.fugue, &hash[12], &hash[11], 64 );
|
||||
fugue512_full( &ctx.fugue, &hash[12], &hash[11], 64 );
|
||||
#else
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) &hash[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, &hash[12]);
|
||||
sph_fugue512_init(&ctx.fugue);
|
||||
sph_fugue512(&ctx.fugue, (const void*) &hash[11], 64);
|
||||
sph_fugue512_close(&ctx.fugue, &hash[12]);
|
||||
#endif
|
||||
|
||||
sph_shabal512_init(&ctx.shabal);
|
||||
sph_shabal512(&ctx.shabal, (const void*) &hash[12], 64);
|
||||
sph_shabal512_close(&ctx.shabal, &hash[13]);
|
||||
sph_shabal512_init(&ctx.shabal);
|
||||
sph_shabal512(&ctx.shabal, (const void*) &hash[12], 64);
|
||||
sph_shabal512_close(&ctx.shabal, &hash[13]);
|
||||
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash[14]);
|
||||
sph_whirlpool_init(&ctx.whirlpool);
|
||||
sph_whirlpool (&ctx.whirlpool, (const void*) &hash[13], 64);
|
||||
sph_whirlpool_close(&ctx.whirlpool, &hash[14]);
|
||||
|
||||
SHA512_Init( &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, (const void*) &hash[14], 64);
|
||||
SHA512_Final( (void*) &hash[15], &ctx.sha512 );
|
||||
sph_sha512_init( &ctx.sha512 );
|
||||
sph_sha512( &ctx.sha512, &hash[14], 64 );
|
||||
sph_sha512_close( &ctx.sha512, &hash[15] );
|
||||
|
||||
ComputeSingleSWIFFTX((unsigned char*)&hash[12], (unsigned char*)&hash[16]);
|
||||
|
||||
sph_haval256_5_init(&ctx.haval);
|
||||
sph_haval256_5(&ctx.haval,(const void*) &hash[16], 64);
|
||||
sph_haval256_5_close(&ctx.haval,&hash[17]);
|
||||
sph_haval256_5_init(&ctx.haval);
|
||||
sph_haval256_5(&ctx.haval,(const void*) &hash[16], 64);
|
||||
sph_haval256_5_close(&ctx.haval,&hash[17]);
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) &hash[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) &hash[18]);
|
||||
sph_tiger_init(&ctx.tiger);
|
||||
sph_tiger (&ctx.tiger, (const void*) &hash[17], 64);
|
||||
sph_tiger_close(&ctx.tiger, (void*) &hash[18]);
|
||||
|
||||
LYRA2RE( (void*)&hash[19], 32, (const void*)&hash[18], 32,
|
||||
LYRA2RE( (void*)&hash[19], 32, (const void*)&hash[18], 32,
|
||||
(const void*)&hash[18], 32, 1, 4, 4 );
|
||||
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) &hash[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) &hash[20]);
|
||||
sph_gost512_init(&ctx.gost);
|
||||
sph_gost512 (&ctx.gost, (const void*) &hash[19], 64);
|
||||
sph_gost512_close(&ctx.gost, (void*) &hash[20]);
|
||||
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
SHA256_Update( &ctx.sha256, (const void*) &hash[20], 64 );
|
||||
SHA256_Final( (unsigned char*) &hash[21], &ctx.sha256 );
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
sph_sha256( &ctx.sha256, &hash[20], 64 );
|
||||
sph_sha256_close( &ctx.sha256, &hash[21] );
|
||||
|
||||
sph_panama_init(&ctx.panama);
|
||||
sph_panama (&ctx.panama, (const void*) &hash[21], 64 );
|
||||
|
@@ -1302,10 +1302,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
|
||||
S = (uint8_t *)XY + XY_size;
|
||||
|
||||
if (t || flags) {
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init(&ctx);
|
||||
SHA256_Update(&ctx, passwd, passwdlen);
|
||||
SHA256_Final(sha256, &ctx);
|
||||
SHA256_Buf( passwd, passwdlen, sha256 );
|
||||
passwd = sha256;
|
||||
passwdlen = sizeof(sha256);
|
||||
}
|
||||
@@ -1382,10 +1379,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
|
||||
}
|
||||
/* Compute StoredKey */
|
||||
{
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init(&ctx);
|
||||
SHA256_Update(&ctx, sha256, sizeof(sha256));
|
||||
SHA256_Final(buf, &ctx);
|
||||
SHA256_Buf( sha256, sizeof(sha256), buf );
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -52,8 +52,8 @@ int scanhash_yespower_r8g( struct work *work, uint32_t max_nonce,
|
||||
endiandata[19] = n;
|
||||
|
||||
// do sha256 prehash
|
||||
SHA256_Init( &sha256_prehash_ctx );
|
||||
SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
|
||||
sph_sha256_init( &sha256_prehash_ctx );
|
||||
sph_sha256( &sha256_prehash_ctx, endiandata, 64 );
|
||||
|
||||
do {
|
||||
yespower_tls( (unsigned char *)endiandata, params.perslen,
|
||||
|
@@ -259,15 +259,24 @@ static inline void salsa20_simd_unshuffle(const salsa20_blk_t *Bin,
|
||||
#define WRITE_X(out) \
|
||||
(out).q[0] = X0; (out).q[1] = X1; (out).q[2] = X2; (out).q[3] = X3;
|
||||
|
||||
#ifdef __XOP__
|
||||
#if defined(__AVX512VL__)
|
||||
|
||||
#define ARX(out, in1, in2, s) \
|
||||
out = _mm_xor_si128(out, _mm_rol_epi32(_mm_add_epi32(in1, in2), s));
|
||||
|
||||
#elif defined(__XOP__)
|
||||
|
||||
#define ARX(out, in1, in2, s) \
|
||||
out = _mm_xor_si128(out, _mm_roti_epi32(_mm_add_epi32(in1, in2), s));
|
||||
|
||||
#else
|
||||
|
||||
#define ARX(out, in1, in2, s) { \
|
||||
__m128i tmp = _mm_add_epi32(in1, in2); \
|
||||
out = _mm_xor_si128(out, _mm_slli_epi32(tmp, s)); \
|
||||
out = _mm_xor_si128(out, _mm_srli_epi32(tmp, 32 - s)); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define SALSA20_2ROUNDS \
|
||||
|
@@ -33,7 +33,8 @@
|
||||
yespower_params_t yespower_params;
|
||||
|
||||
//SHA256_CTX sha256_prehash_ctx;
|
||||
__thread SHA256_CTX sha256_prehash_ctx;
|
||||
__thread sph_sha256_context sha256_prehash_ctx;
|
||||
//__thread SHA256_CTX sha256_prehash_ctx;
|
||||
|
||||
// YESPOWER
|
||||
|
||||
@@ -59,9 +60,9 @@ int scanhash_yespower( struct work *work, uint32_t max_nonce,
|
||||
be32enc( &endiandata[k], pdata[k] );
|
||||
endiandata[19] = n;
|
||||
|
||||
// do sha256 prehash
|
||||
SHA256_Init( &sha256_prehash_ctx );
|
||||
SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
|
||||
// do sha256 prehash
|
||||
sph_sha256_init( &sha256_prehash_ctx );
|
||||
sph_sha256( &sha256_prehash_ctx, endiandata, 64 );
|
||||
|
||||
do {
|
||||
if ( yespower_hash( (char*)endiandata, (char*)vhash, 80, thr_id ) )
|
||||
@@ -100,9 +101,9 @@ int scanhash_yespower_b2b( struct work *work, uint32_t max_nonce,
|
||||
be32enc( &endiandata[k], pdata[k] );
|
||||
endiandata[19] = n;
|
||||
|
||||
// do sha256 prehash
|
||||
SHA256_Init( &sha256_prehash_ctx );
|
||||
SHA256_Update( &sha256_prehash_ctx, endiandata, 64 );
|
||||
// do sha256 prehash
|
||||
sph_sha256_init( &sha256_prehash_ctx );
|
||||
sph_sha256( &sha256_prehash_ctx, endiandata, 64 );
|
||||
|
||||
do {
|
||||
if (yespower_b2b_hash( (char*) endiandata, (char*) vhash, 80, thr_id ) )
|
||||
@@ -165,25 +166,14 @@ bool register_yespowerr16_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
};
|
||||
|
||||
/* not used, doesn't work
|
||||
bool register_yescrypt_05_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower;
|
||||
yespower_params.version = YESPOWER_0_5;
|
||||
yespower_params.N = 2048;
|
||||
yespower_params.r = 8;
|
||||
yespower_params.pers = NULL;
|
||||
yespower_params.perslen = 0;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
// Legacy Yescrypt (yespower v0.5)
|
||||
|
||||
bool register_yescrypt_05_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower;
|
||||
yespower_params.version = YESPOWER_0_5;
|
||||
opt_target_factor = 65536.0;
|
||||
|
||||
if ( opt_param_n ) yespower_params.N = opt_param_n;
|
||||
else yespower_params.N = 2048;
|
||||
@@ -202,8 +192,6 @@ bool register_yescrypt_05_algo( algo_gate_t* gate )
|
||||
yespower_params.perslen = 0;
|
||||
}
|
||||
|
||||
// YESCRYPT_P = 1;
|
||||
|
||||
applog( LOG_NOTICE,"Yescrypt parameters: N= %d, R= %d.",
|
||||
yespower_params.N, yespower_params.r );
|
||||
if ( yespower_params.pers )
|
||||
@@ -233,8 +221,8 @@ bool register_yescryptr16_05_algo( algo_gate_t* gate )
|
||||
yespower_params.version = YESPOWER_0_5;
|
||||
yespower_params.N = 4096;
|
||||
yespower_params.r = 16;
|
||||
yespower_params.pers = NULL;
|
||||
yespower_params.perslen = 0;
|
||||
yespower_params.pers = "Client Key";
|
||||
yespower_params.perslen = 10;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
@@ -251,7 +239,6 @@ bool register_yescryptr32_05_algo( algo_gate_t* gate )
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
||||
// POWER2B
|
||||
|
||||
|
@@ -165,6 +165,7 @@ static inline void salsa20_simd_unshuffle(const salsa20_blk_t *Bin,
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
|
||||
#define DECL_X \
|
||||
__m128i X0, X1, X2, X3;
|
||||
#define DECL_Y \
|
||||
@@ -174,15 +175,24 @@ static inline void salsa20_simd_unshuffle(const salsa20_blk_t *Bin,
|
||||
#define WRITE_X(out) \
|
||||
(out).q[0] = X0; (out).q[1] = X1; (out).q[2] = X2; (out).q[3] = X3;
|
||||
|
||||
#ifdef __XOP__
|
||||
#if defined(__AVX512VL__)
|
||||
|
||||
#define ARX(out, in1, in2, s) \
|
||||
out = _mm_xor_si128(out, _mm_rol_epi32(_mm_add_epi32(in1, in2), s));
|
||||
|
||||
#elif defined(__XOP__)
|
||||
|
||||
#define ARX(out, in1, in2, s) \
|
||||
out = _mm_xor_si128(out, _mm_roti_epi32(_mm_add_epi32(in1, in2), s));
|
||||
|
||||
#else
|
||||
|
||||
#define ARX(out, in1, in2, s) { \
|
||||
__m128i tmp = _mm_add_epi32(in1, in2); \
|
||||
out = _mm_xor_si128(out, _mm_slli_epi32(tmp, s)); \
|
||||
out = _mm_xor_si128(out, _mm_srli_epi32(tmp, 32 - s)); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define SALSA20_2ROUNDS \
|
||||
@@ -1029,72 +1039,72 @@ int yespower(yespower_local_t *local,
|
||||
const yespower_params_t *params,
|
||||
yespower_binary_t *dst, int thrid )
|
||||
{
|
||||
yespower_version_t version = params->version;
|
||||
uint32_t N = params->N;
|
||||
uint32_t r = params->r;
|
||||
const uint8_t *pers = params->pers;
|
||||
size_t perslen = params->perslen;
|
||||
uint32_t Swidth;
|
||||
size_t B_size, V_size, XY_size, need;
|
||||
uint8_t *B, *S;
|
||||
salsa20_blk_t *V, *XY;
|
||||
pwxform_ctx_t ctx;
|
||||
uint8_t sha256[32];
|
||||
SHA256_CTX sha256_ctx;
|
||||
yespower_version_t version = params->version;
|
||||
uint32_t N = params->N;
|
||||
uint32_t r = params->r;
|
||||
const uint8_t *pers = params->pers;
|
||||
size_t perslen = params->perslen;
|
||||
uint32_t Swidth;
|
||||
size_t B_size, V_size, XY_size, need;
|
||||
uint8_t *B, *S;
|
||||
salsa20_blk_t *V, *XY;
|
||||
pwxform_ctx_t ctx;
|
||||
uint8_t sha256[32];
|
||||
sph_sha256_context sha256_ctx;
|
||||
|
||||
/* Sanity-check parameters */
|
||||
if ( (version != YESPOWER_0_5 && version != YESPOWER_1_0)
|
||||
/* Sanity-check parameters */
|
||||
if ( (version != YESPOWER_0_5 && version != YESPOWER_1_0)
|
||||
|| N < 1024 || N > 512 * 1024 || r < 8 || r > 32
|
||||
|| (N & (N - 1)) != 0 || ( !pers && perslen ) )
|
||||
{
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Allocate memory */
|
||||
B_size = (size_t)128 * r;
|
||||
V_size = B_size * N;
|
||||
if ( version == YESPOWER_0_5 )
|
||||
/* Allocate memory */
|
||||
B_size = (size_t)128 * r;
|
||||
V_size = B_size * N;
|
||||
if ( version == YESPOWER_0_5 )
|
||||
{
|
||||
XY_size = B_size * 2;
|
||||
Swidth = Swidth_0_5;
|
||||
ctx.Sbytes = 2 * Swidth_to_Sbytes1( Swidth );
|
||||
} else {
|
||||
XY_size = B_size + 64;
|
||||
Swidth = Swidth_1_0;
|
||||
ctx.Sbytes = 3 * Swidth_to_Sbytes1( Swidth );
|
||||
}
|
||||
need = B_size + V_size + XY_size + ctx.Sbytes;
|
||||
if ( local->aligned_size < need )
|
||||
XY_size = B_size * 2;
|
||||
Swidth = Swidth_0_5;
|
||||
ctx.Sbytes = 2 * Swidth_to_Sbytes1( Swidth );
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( free_region( local ) )
|
||||
return -1;
|
||||
if ( !alloc_region( local, need ) )
|
||||
return -1;
|
||||
}
|
||||
B = (uint8_t *)local->aligned;
|
||||
V = (salsa20_blk_t *)((uint8_t *)B + B_size);
|
||||
XY = (salsa20_blk_t *)((uint8_t *)V + V_size);
|
||||
S = (uint8_t *)XY + XY_size;
|
||||
ctx.S0 = S;
|
||||
ctx.S1 = S + Swidth_to_Sbytes1( Swidth );
|
||||
XY_size = B_size + 64;
|
||||
Swidth = Swidth_1_0;
|
||||
ctx.Sbytes = 3 * Swidth_to_Sbytes1( Swidth );
|
||||
}
|
||||
need = B_size + V_size + XY_size + ctx.Sbytes;
|
||||
if ( local->aligned_size < need )
|
||||
{
|
||||
if ( free_region( local ) )
|
||||
return -1;
|
||||
if ( !alloc_region( local, need ) )
|
||||
return -1;
|
||||
}
|
||||
B = (uint8_t *)local->aligned;
|
||||
V = (salsa20_blk_t *)((uint8_t *)B + B_size);
|
||||
XY = (salsa20_blk_t *)((uint8_t *)V + V_size);
|
||||
S = (uint8_t *)XY + XY_size;
|
||||
ctx.S0 = S;
|
||||
ctx.S1 = S + Swidth_to_Sbytes1( Swidth );
|
||||
|
||||
|
||||
// copy prehash, do tail
|
||||
// copy prehash, do tail
|
||||
memcpy( &sha256_ctx, &sha256_prehash_ctx, sizeof sha256_ctx );
|
||||
SHA256_Update( &sha256_ctx, src+64, srclen-64 );
|
||||
SHA256_Final( sha256, &sha256_ctx );
|
||||
|
||||
// SHA256_Buf(src, srclen, sha256);
|
||||
sph_sha256( &sha256_ctx, src+64, srclen-64 );
|
||||
sph_sha256_close( &sha256_ctx, sha256 );
|
||||
|
||||
if ( version == YESPOWER_0_5 )
|
||||
if ( version == YESPOWER_0_5 )
|
||||
{
|
||||
PBKDF2_SHA256( sha256, sizeof(sha256), src, srclen, 1, B, B_size );
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
memcpy( sha256, B, sizeof(sha256) );
|
||||
smix( B, r, N, V, XY, &ctx );
|
||||
smix( B, r, N, V, XY, &ctx );
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
@@ -1108,54 +1118,36 @@ int yespower(yespower_local_t *local,
|
||||
src = pers;
|
||||
srclen = perslen;
|
||||
}
|
||||
else
|
||||
srclen = 0;
|
||||
|
||||
HMAC_SHA256_Buf( dst, sizeof(*dst), src, srclen, sha256 );
|
||||
SHA256_Buf( sha256, sizeof(sha256), (uint8_t *)dst );
|
||||
|
||||
HMAC_SHA256_CTX ctx;
|
||||
HMAC_SHA256_Init( &ctx, dst, sizeof(*dst) );
|
||||
HMAC_SHA256_Update( &ctx, src, srclen );
|
||||
HMAC_SHA256_Final( sha256, &ctx );
|
||||
|
||||
// SHA256_CTX ctx;
|
||||
SHA256_Init( &sha256_ctx );
|
||||
SHA256_Update( &sha256_ctx, sha256, sizeof(sha256) );
|
||||
SHA256_Final( (unsigned char*)dst, &sha256_ctx );
|
||||
|
||||
|
||||
/*
|
||||
if ( pers )
|
||||
{
|
||||
HMAC_SHA256_Buf( dst, sizeof(*dst), pers, perslen, sha256 );
|
||||
SHA256_Buf( sha256, sizeof(sha256), (uint8_t *)dst );
|
||||
}
|
||||
*/
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx.S2 = S + 2 * Swidth_to_Sbytes1( Swidth );
|
||||
ctx.w = 0;
|
||||
|
||||
if ( pers )
|
||||
ctx.S2 = S + 2 * Swidth_to_Sbytes1( Swidth );
|
||||
ctx.w = 0;
|
||||
if ( pers )
|
||||
{
|
||||
src = pers;
|
||||
srclen = perslen;
|
||||
}
|
||||
src = pers;
|
||||
srclen = perslen;
|
||||
}
|
||||
else
|
||||
srclen = 0;
|
||||
srclen = 0;
|
||||
|
||||
PBKDF2_SHA256( sha256, sizeof(sha256), src, srclen, 1, B, 128 );
|
||||
memcpy( sha256, B, sizeof(sha256) );
|
||||
PBKDF2_SHA256( sha256, sizeof(sha256), src, srclen, 1, B, 128 );
|
||||
memcpy( sha256, B, sizeof(sha256) );
|
||||
|
||||
if ( work_restart[thrid].restart ) return 0;
|
||||
|
||||
smix_1_0( B, r, N, V, XY, &ctx );
|
||||
|
||||
|
||||
HMAC_SHA256_Buf( B + B_size - 64, 64, sha256, sizeof(sha256),
|
||||
(uint8_t *)dst );
|
||||
}
|
||||
}
|
||||
|
||||
/* Success! */
|
||||
return 1;
|
||||
/* Success! */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -34,6 +34,7 @@
|
||||
#include <stdlib.h> /* for size_t */
|
||||
#include "miner.h"
|
||||
#include "simd-utils.h"
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -79,7 +80,8 @@ typedef struct {
|
||||
extern yespower_params_t yespower_params;
|
||||
|
||||
//SHA256_CTX sha256_prehash_ctx;
|
||||
extern __thread SHA256_CTX sha256_prehash_ctx;
|
||||
extern __thread sph_sha256_context sha256_prehash_ctx;
|
||||
//extern __thread SHA256_CTX sha256_prehash_ctx;
|
||||
|
||||
/**
|
||||
* yespower_init_local(local):
|
||||
|
@@ -4,8 +4,9 @@
|
||||
# during develpment. However the information contained may provide compilation
|
||||
# tips to users.
|
||||
|
||||
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen > /dev/null
|
||||
rm cpuminer-avx512-sha-vaes cpuminer-avx512-sha cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 > /dev/null
|
||||
|
||||
# Icelake AVX512 SHA VAES
|
||||
make distclean || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
@@ -16,6 +17,21 @@ mv cpuminer.exe cpuminer-avx512-sha-vaes.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-avx512-sha-vaes
|
||||
|
||||
# Rocketlake AVX512 SHA AES
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=cascadelake -msha -Wall -fno-common" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=skylake-avx512 -msha -Wall -fno-common" ./configure --with-curl
|
||||
# CFLAGS="-O3 -march=rocketlake -Wall -fno-common" ./configure --with-curl
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-avx512-sha.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-avx512-sha
|
||||
|
||||
# Slylake-X AVX512 AES
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=skylake-avx512 -Wall -fno-common" ./configure --with-curl
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
@@ -23,6 +39,7 @@ mv cpuminer.exe cpuminer-avx512.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-avx512
|
||||
|
||||
# Haswell AVX2 AES
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
# GCC 9 doesn't include AES with core-avx2
|
||||
@@ -33,6 +50,7 @@ mv cpuminer.exe cpuminer-avx2.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-avx2
|
||||
|
||||
# Sandybridge AVX AES
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=corei7-avx -maes -Wall -fno-common" ./configure --with-curl
|
||||
@@ -42,15 +60,17 @@ mv cpuminer.exe cpuminer-avx.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-avx
|
||||
|
||||
# Westmere SSE4.2 AES
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -maes -msse4.2 -Wall -fno-common" ./configure --with-curl
|
||||
CFLAGS="-O3 -march=westmere -Wall -fno-common" ./configure --with-curl
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-aes-sse42.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-aes-sse42
|
||||
|
||||
# Nehalem SSE4.2
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=corei7 -Wall -fno-common" ./configure --with-curl
|
||||
@@ -60,6 +80,7 @@ mv cpuminer.exe cpuminer-sse42.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-sse42
|
||||
|
||||
# Core2 SSSE3
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=core2 -Wall -fno-common" ./configure --with-curl
|
||||
@@ -69,6 +90,7 @@ mv cpuminer.exe cpuminer-ssse3.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-ssse3
|
||||
|
||||
# Generic SSE2
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -msse2 -Wall -fno-common" ./configure --with-curl
|
||||
@@ -78,6 +100,7 @@ mv cpuminer.exe cpuminer-sse2.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-sse2
|
||||
|
||||
# AMD Zen1 AVX2 SHA
|
||||
make clean || echo done
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=znver1 -Wall -fno-common" ./configure --with-curl
|
||||
@@ -87,6 +110,18 @@ mv cpuminer.exe cpuminer-zen.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-zen
|
||||
|
||||
# AMD Zen3 AVX2 SHA VAES
|
||||
make clean || echo done
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=znver2 -mvaes -Wall -fno-common" ./configure --with-curl
|
||||
# CFLAGS="-O3 -march=znver3 -Wall -fno-common" ./configure --with-curl
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-zen3.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-zen3
|
||||
|
||||
# Native to current CPU
|
||||
make clean || echo done
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=native -Wall -fno-common" ./configure --with-curl
|
||||
|
@@ -1,27 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
#if [ "$OS" = "Windows_NT" ]; then
|
||||
# ./mingw64.sh
|
||||
# exit 0
|
||||
#fi
|
||||
|
||||
# Linux build
|
||||
|
||||
make distclean || echo clean
|
||||
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
|
||||
# Ubuntu 10.04 (gcc 4.4)
|
||||
# extracflags="-O3 -march=native -Wall -D_REENTRANT -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants -fbranch-target-load-optimize2 -fsched2-use-superblocks -falign-loops=16 -falign-functions=16 -falign-jumps=16 -falign-labels=16"
|
||||
|
||||
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
|
||||
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
|
||||
|
||||
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
|
||||
CFLAGS="-O3 -march=native -Wall -fno-common" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
|
||||
make -j 4
|
||||
|
||||
strip -s cpuminer
|
7
build.sh
7
build.sh
@@ -12,15 +12,8 @@ make distclean || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
|
||||
# Ubuntu 10.04 (gcc 4.4)
|
||||
# extracflags="-O3 -march=native -Wall -D_REENTRANT -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants -fbranch-target-load-optimize2 -fsched2-use-superblocks -falign-loops=16 -falign-functions=16 -falign-jumps=16 -falign-labels=16"
|
||||
|
||||
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
|
||||
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
|
||||
|
||||
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
|
||||
make -j 4
|
||||
|
||||
|
27
buildjdd.sh
27
buildjdd.sh
@@ -1,27 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
#if [ "$OS" = "Windows_NT" ]; then
|
||||
# ./mingw64.sh
|
||||
# exit 0
|
||||
#fi
|
||||
|
||||
# Linux build
|
||||
|
||||
make distclean || echo clean
|
||||
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
|
||||
# Ubuntu 10.04 (gcc 4.4)
|
||||
# extracflags="-O3 -march=native -Wall -D_REENTRANT -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants -fbranch-target-load-optimize2 -fsched2-use-superblocks -falign-loops=16 -falign-functions=16 -falign-jumps=16 -falign-labels=16"
|
||||
|
||||
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
|
||||
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
|
||||
|
||||
CFLAGS="-O3 -march=corei7-avx -msha -Wall" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
|
||||
make -j 4
|
||||
|
||||
strip -s cpuminer
|
@@ -1,10 +1,9 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# imake clean and rm all the targetted executables.
|
||||
# tips to users.
|
||||
# make clean and rm all the targetted executables.
|
||||
|
||||
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen cpuminer-sse42 cpuminer-ssse3 > /dev/null
|
||||
rm cpuminer-avx512-sha-vaes cpuminer-avx512-sha cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen cpuminer-sse42 cpuminer-ssse3 cpuminer-zen3 > /dev/null
|
||||
|
||||
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe cpuminer-sse42 cpuminer-ssse3 > /dev/null
|
||||
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512-sha.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-zen3.exe > /dev/null
|
||||
|
||||
make distclean > /dev/null
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.15.0.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.15.6.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.15.0'
|
||||
PACKAGE_STRING='cpuminer-opt 3.15.0'
|
||||
PACKAGE_VERSION='3.15.6'
|
||||
PACKAGE_STRING='cpuminer-opt 3.15.6'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.15.0 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.15.6 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.15.0:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.15.6:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.15.0
|
||||
cpuminer-opt configure 3.15.6
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.15.0, which was
|
||||
It was created by cpuminer-opt $as_me 3.15.6, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.15.0'
|
||||
VERSION='3.15.6'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.15.0, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.15.6, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.15.0
|
||||
cpuminer-opt config.status 3.15.6
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.15.0])
|
||||
AC_INIT([cpuminer-opt], [3.15.6])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
166
cpu-miner.c
166
cpu-miner.c
@@ -490,8 +490,13 @@ static bool get_mininginfo( CURL *curl, struct work *work )
|
||||
}
|
||||
|
||||
key = json_object_get( res, "networkhashps" );
|
||||
if ( key && json_is_integer( key ) )
|
||||
net_hashrate = (double) json_integer_value( key );
|
||||
if ( key )
|
||||
{
|
||||
if ( json_is_integer( key ) )
|
||||
net_hashrate = (double) json_integer_value( key );
|
||||
else if ( json_is_real( key ) )
|
||||
net_hashrate = (double) json_real_value( key );
|
||||
}
|
||||
|
||||
key = json_object_get( res, "blocks" );
|
||||
if ( key && json_is_integer( key ) )
|
||||
@@ -506,26 +511,7 @@ static bool get_mininginfo( CURL *curl, struct work *work )
|
||||
// complete missing data from getwork
|
||||
work->height = (uint32_t) net_blocks + 1;
|
||||
if ( work->height > g_work.height )
|
||||
{
|
||||
restart_threads();
|
||||
|
||||
/* redundant with new block log
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
char netinfo[64] = { 0 };
|
||||
char srate[32] = { 0 };
|
||||
sprintf( netinfo, "diff %.2f", net_diff );
|
||||
if ( net_hashrate )
|
||||
{
|
||||
format_hashrate( net_hashrate, srate );
|
||||
strcat( netinfo, ", net " );
|
||||
strcat( netinfo, srate );
|
||||
}
|
||||
applog( LOG_BLUE, "%s block %d, %s",
|
||||
algo_names[opt_algo], work->height, netinfo );
|
||||
}
|
||||
*/
|
||||
}
|
||||
} // res
|
||||
}
|
||||
json_decref( val );
|
||||
@@ -920,12 +906,12 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
tmp = json_object_get( val, "workid" );
|
||||
if ( tmp )
|
||||
{
|
||||
if ( !json_is_string( tmp ) )
|
||||
{
|
||||
applog( LOG_ERR, "JSON invalid workid" );
|
||||
goto out;
|
||||
}
|
||||
work->workid = strdup( json_string_value( tmp ) );
|
||||
if ( !json_is_string( tmp ) )
|
||||
{
|
||||
applog( LOG_ERR, "JSON invalid workid" );
|
||||
goto out;
|
||||
}
|
||||
work->workid = strdup( json_string_value( tmp ) );
|
||||
}
|
||||
|
||||
rc = true;
|
||||
@@ -1078,13 +1064,12 @@ void report_summary_log( bool force )
|
||||
|
||||
if ( accepted_share_count < submitted_share_count )
|
||||
{
|
||||
double ltd = exp32 * last_targetdiff;
|
||||
double lost_ghrate = uptime.tv_sec == 0 ? 0.
|
||||
: exp32 * last_targetdiff
|
||||
* (double)(submitted_share_count - accepted_share_count )
|
||||
/ (double)uptime.tv_sec;
|
||||
: ltd * (double)(submitted_share_count - accepted_share_count )
|
||||
/ (double)uptime.tv_sec;
|
||||
double lost_shrate = share_time == 0. ? 0.
|
||||
: exp32 * last_targetdiff * (double)(submits - accepts )
|
||||
/ share_time;
|
||||
: ltd * (double)(submits - accepts ) / share_time;
|
||||
char lshr_units[4] = {0};
|
||||
char lghr_units[4] = {0};
|
||||
scale_hash_for_display( &lost_shrate, lshr_units );
|
||||
@@ -1190,9 +1175,11 @@ static int share_result( int result, struct work *work,
|
||||
{
|
||||
sprintf( ares, "A%d", accepted_share_count );
|
||||
sprintf( bres, "B%d", solved_block_count );
|
||||
stale = work ? work->data[ algo_gate.ntime_index ]
|
||||
!= g_work.data[ algo_gate.ntime_index ] : false;
|
||||
if ( reason ) stale = stale || strstr( reason, "Invalid job id" );
|
||||
if ( reason )
|
||||
stale = strstr( reason, "job" );
|
||||
else if ( work )
|
||||
stale = work->data[ algo_gate.ntime_index ]
|
||||
!= g_work.data[ algo_gate.ntime_index ];
|
||||
if ( stale )
|
||||
{
|
||||
stale_share_count++;
|
||||
@@ -1260,14 +1247,13 @@ static int share_result( int result, struct work *work,
|
||||
if ( unlikely( !( opt_quiet || result || stale ) ) )
|
||||
{
|
||||
uint32_t str[8];
|
||||
uint32_t *targ;
|
||||
|
||||
if ( reason )
|
||||
applog( LOG_WARNING, "Reject reason: %s", reason );
|
||||
if ( reason ) applog( LOG_WARNING, "Reject reason: %s", reason );
|
||||
|
||||
// display share hash and target for troubleshooting
|
||||
diff_to_hash( str, my_stats.share_diff );
|
||||
applog2( LOG_INFO, "Hash: %08x%08x%08x...", str[7], str[6], str[5] );
|
||||
uint32_t *targ;
|
||||
|
||||
if ( work )
|
||||
targ = work->target;
|
||||
else
|
||||
@@ -1580,6 +1566,7 @@ start:
|
||||
{
|
||||
double miner_hr = 0.;
|
||||
double net_hr = net_hashrate;
|
||||
double nd = net_diff * exp32;
|
||||
char net_hr_units[4] = {0};
|
||||
char miner_hr_units[4] = {0};
|
||||
char net_ttf[32];
|
||||
@@ -1594,11 +1581,11 @@ start:
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
|
||||
if ( net_hr > 0. )
|
||||
sprintf_et( net_ttf, ( net_diff * exp32 ) / net_hr );
|
||||
sprintf_et( net_ttf, nd / net_hr );
|
||||
else
|
||||
sprintf( net_ttf, "NA" );
|
||||
if ( miner_hr > 0. )
|
||||
sprintf_et( miner_ttf, ( net_diff * exp32 ) / miner_hr );
|
||||
sprintf_et( miner_ttf, nd / miner_hr );
|
||||
else
|
||||
sprintf( miner_ttf, "NA" );
|
||||
|
||||
@@ -1829,10 +1816,11 @@ bool submit_solution( struct work *work, const void *hash,
|
||||
update_submit_stats( work, hash );
|
||||
|
||||
if unlikely( !have_stratum && !have_longpoll )
|
||||
{ // block solved, force getwork
|
||||
{ // solo, block solved, force getwork
|
||||
pthread_rwlock_wrlock( &g_work_lock );
|
||||
g_work_time = 0;
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
restart_threads();
|
||||
}
|
||||
|
||||
if ( !opt_quiet )
|
||||
@@ -1847,10 +1835,19 @@ bool submit_solution( struct work *work, const void *hash,
|
||||
work->data[ algo_gate.ntime_index ] );
|
||||
}
|
||||
|
||||
if ( unlikely( lowdiff_debug ) )
|
||||
if ( opt_debug )
|
||||
{
|
||||
uint32_t* h = (uint32_t*)hash;
|
||||
uint32_t* t = (uint32_t*)work->target;
|
||||
uint32_t* d = (uint32_t*)work->data;
|
||||
|
||||
unsigned char *xnonce2str = abin2hex( work->xnonce2,
|
||||
work->xnonce2_len );
|
||||
applog(LOG_INFO,"Thread %d, Nonce %08x, Xnonce2 %s", thr->id,
|
||||
work->data[ algo_gate.nonce_index ], xnonce2str );
|
||||
free( xnonce2str );
|
||||
applog(LOG_INFO,"Data[0:19]: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x", d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7],d[8],d[9] );
|
||||
applog(LOG_INFO," : %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x", d[10],d[11],d[12],d[13],d[14],d[15],d[16],d[17],d[18],d[19]);
|
||||
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
|
||||
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
|
||||
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
|
||||
@@ -1868,25 +1865,25 @@ static bool wanna_mine(int thr_id)
|
||||
bool state = true;
|
||||
|
||||
if (opt_max_temp > 0.0)
|
||||
{
|
||||
{
|
||||
float temp = cpu_temp(0);
|
||||
if (temp > opt_max_temp)
|
||||
{
|
||||
{
|
||||
if (!thr_id && !conditional_state[thr_id] && !opt_quiet)
|
||||
applog(LOG_INFO, "temperature too high (%.0fC), waiting...", temp);
|
||||
state = false;
|
||||
}
|
||||
}
|
||||
if (opt_max_diff > 0.0 && net_diff > opt_max_diff)
|
||||
{
|
||||
{
|
||||
if (!thr_id && !conditional_state[thr_id] && !opt_quiet)
|
||||
applog(LOG_INFO, "network diff too high, waiting...");
|
||||
state = false;
|
||||
}
|
||||
if (opt_max_rate > 0.0 && net_hashrate > opt_max_rate)
|
||||
{
|
||||
{
|
||||
if (!thr_id && !conditional_state[thr_id] && !opt_quiet)
|
||||
{
|
||||
{
|
||||
char rate[32];
|
||||
format_hashrate(opt_max_rate, rate);
|
||||
applog(LOG_INFO, "network hashrate too high, waiting %s...", rate);
|
||||
@@ -1903,7 +1900,7 @@ static bool wanna_mine(int thr_id)
|
||||
// default
|
||||
void sha256d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
|
||||
{
|
||||
sha256d(merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size);
|
||||
sha256d( merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size );
|
||||
for ( int i = 0; i < sctx->job.merkle_count; i++ )
|
||||
{
|
||||
memcpy( merkle_root + 32, sctx->job.merkle[i], 32 );
|
||||
@@ -1955,8 +1952,6 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t *nonceptr = work->data + algo_gate.nonce_index;
|
||||
bool force_new_work = false;
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
|
||||
if ( have_stratum )
|
||||
force_new_work = work->job_id ? strtoul( work->job_id, NULL, 16 )
|
||||
!= strtoul( g_work->job_id, NULL, 16 )
|
||||
@@ -1972,8 +1967,6 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
}
|
||||
else
|
||||
++(*nonceptr);
|
||||
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
}
|
||||
|
||||
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
@@ -1989,13 +1982,14 @@ bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
|
||||
static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
{
|
||||
// Safer than testing the job id
|
||||
bool new_job = *get_stratum_job_ntime()
|
||||
!= g_work->data[ algo_gate.ntime_index ];
|
||||
bool new_job;
|
||||
|
||||
pthread_rwlock_wrlock( &g_work_lock );
|
||||
pthread_mutex_lock( &sctx->work_lock );
|
||||
|
||||
new_job = sctx->new_job;
|
||||
sctx->new_job = false;
|
||||
|
||||
free( g_work->job_id );
|
||||
g_work->job_id = strdup( sctx->job.job_id );
|
||||
g_work->xnonce2_len = sctx->xnonce2_size;
|
||||
@@ -2008,8 +2002,12 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
g_work->targetdiff = sctx->job.diff
|
||||
/ ( opt_target_factor * opt_diff_factor );
|
||||
diff_to_hash( g_work->target, g_work->targetdiff );
|
||||
|
||||
// Increment extranonce2
|
||||
for ( int t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
for ( int t = 0;
|
||||
t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] );
|
||||
t++ );
|
||||
|
||||
g_work_time = time(NULL);
|
||||
restart_threads();
|
||||
|
||||
@@ -2031,14 +2029,14 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
else if ( last_block_height != sctx->block_height )
|
||||
applog( LOG_BLUE, "New Block %d, Job %s",
|
||||
sctx->block_height, g_work->job_id );
|
||||
else if ( new_job && g_work->job_id )
|
||||
else if ( g_work->job_id && new_job )
|
||||
applog( LOG_BLUE, "New Work: Block %d, Net diff %.5g, Job %s",
|
||||
sctx->block_height, net_diff, g_work->job_id );
|
||||
else if ( !opt_quiet )
|
||||
{
|
||||
unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
|
||||
g_work->xnonce2_len );
|
||||
applog( LOG_INFO, "Extranonce %s, Block %d, Net Diff %.5g",
|
||||
applog( LOG_INFO, "Extranonce2 %s, Block %d, Net Diff %.5g",
|
||||
xnonce2str, sctx->block_height, net_diff );
|
||||
free( xnonce2str );
|
||||
}
|
||||
@@ -2064,11 +2062,12 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
|
||||
if ( likely( hr > 0. ) )
|
||||
{
|
||||
double nd = net_diff * exp32;
|
||||
char hr_units[4] = {0};
|
||||
char block_ttf[32];
|
||||
char share_ttf[32];
|
||||
|
||||
sprintf_et( block_ttf, ( net_diff * exp32 ) / hr );
|
||||
sprintf_et( block_ttf, nd / hr );
|
||||
sprintf_et( share_ttf, ( g_work->targetdiff * exp32 ) / hr );
|
||||
scale_hash_for_display ( &hr, hr_units );
|
||||
applog2( LOG_INFO, "TTF @ %.2f %sh/s: Block %s, Share %s",
|
||||
@@ -2084,7 +2083,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
: et.tv_sec / ( last_block_height - session_first_block );
|
||||
if ( net_diff && net_ttf )
|
||||
{
|
||||
double net_hr = net_diff * exp32 / net_ttf;
|
||||
double net_hr = nd / net_ttf;
|
||||
char net_hr_units[4] = {0};
|
||||
|
||||
scale_hash_for_display ( &net_hr, net_hr_units );
|
||||
@@ -2239,19 +2238,18 @@ static void *miner_thread( void *userdata )
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
}
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
|
||||
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce );
|
||||
work_restart[thr_id].restart = 0;
|
||||
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
|
||||
} // do_this_thread
|
||||
algo_gate.resync_threads( &work );
|
||||
algo_gate.resync_threads( thr_id, &work );
|
||||
|
||||
if ( unlikely( !algo_gate.ready_to_mine( &work, &stratum, thr_id ) ) )
|
||||
continue;
|
||||
// conditional mining
|
||||
if ( unlikely( !wanna_mine( thr_id ) ) )
|
||||
{
|
||||
sleep(5);
|
||||
continue;
|
||||
}
|
||||
|
||||
// LP_SCANTIME overrides opt_scantime option, is this right?
|
||||
|
||||
@@ -2308,7 +2306,6 @@ static void *miner_thread( void *userdata )
|
||||
// init time
|
||||
if ( firstwork_time == 0 )
|
||||
firstwork_time = time(NULL);
|
||||
work_restart[thr_id].restart = 0;
|
||||
hashes_done = 0;
|
||||
gettimeofday( (struct timeval *) &tv_start, NULL );
|
||||
|
||||
@@ -2439,6 +2436,14 @@ static void *miner_thread( void *userdata )
|
||||
#endif
|
||||
}
|
||||
} // benchmark
|
||||
|
||||
// conditional mining
|
||||
if ( unlikely( !wanna_mine( thr_id ) ) )
|
||||
{
|
||||
sleep(5);
|
||||
continue;
|
||||
}
|
||||
|
||||
} // miner_thread loop
|
||||
|
||||
out:
|
||||
@@ -2730,7 +2735,6 @@ static void *stratum_thread(void *userdata )
|
||||
pthread_rwlock_wrlock( &g_work_lock );
|
||||
g_work_time = 0;
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
// restart_threads();
|
||||
if ( !stratum_connect( &stratum, stratum.url )
|
||||
|| !stratum_subscribe( &stratum )
|
||||
|| !stratum_authorize( &stratum, rpc_user, rpc_pass ) )
|
||||
@@ -2756,9 +2760,7 @@ static void *stratum_thread(void *userdata )
|
||||
report_summary_log( ( stratum_diff != stratum.job.diff )
|
||||
&& ( stratum_diff != 0. ) );
|
||||
|
||||
if ( stratum.job.job_id && ( !g_work_time
|
||||
|| ( *get_stratum_job_ntime()
|
||||
!= g_work.data[ algo_gate.ntime_index ] ) ) )
|
||||
if ( stratum.new_job )
|
||||
stratum_gen_work( &stratum, &g_work );
|
||||
|
||||
if ( likely( stratum_socket_full( &stratum, opt_timeout ) ) )
|
||||
@@ -3382,13 +3384,14 @@ bool check_cpu_capability ()
|
||||
bool sw_has_sha = false;
|
||||
bool sw_has_vaes = false;
|
||||
set_t algo_features = algo_gate.optimizations;
|
||||
bool algo_has_sse2 = set_incl( SSE2_OPT, algo_features );
|
||||
bool algo_has_aes = set_incl( AES_OPT, algo_features );
|
||||
bool algo_has_sse42 = set_incl( SSE42_OPT, algo_features );
|
||||
bool algo_has_avx2 = set_incl( AVX2_OPT, algo_features );
|
||||
bool algo_has_avx512 = set_incl( AVX512_OPT, algo_features );
|
||||
bool algo_has_sha = set_incl( SHA_OPT, algo_features );
|
||||
bool algo_has_vaes = set_incl( VAES_OPT, algo_features );
|
||||
bool algo_has_sse2 = set_incl( SSE2_OPT, algo_features );
|
||||
bool algo_has_aes = set_incl( AES_OPT, algo_features );
|
||||
bool algo_has_sse42 = set_incl( SSE42_OPT, algo_features );
|
||||
bool algo_has_avx2 = set_incl( AVX2_OPT, algo_features );
|
||||
bool algo_has_avx512 = set_incl( AVX512_OPT, algo_features );
|
||||
bool algo_has_sha = set_incl( SHA_OPT, algo_features );
|
||||
bool algo_has_vaes = set_incl( VAES_OPT, algo_features );
|
||||
bool algo_has_vaes256 = set_incl( VAES256_OPT, algo_features );
|
||||
bool use_aes;
|
||||
bool use_sse2;
|
||||
bool use_sse42;
|
||||
@@ -3509,7 +3512,8 @@ bool check_cpu_capability ()
|
||||
use_avx2 = cpu_has_avx2 && sw_has_avx2 && algo_has_avx2;
|
||||
use_avx512 = cpu_has_avx512 && sw_has_avx512 && algo_has_avx512;
|
||||
use_sha = cpu_has_sha && sw_has_sha && algo_has_sha;
|
||||
use_vaes = cpu_has_vaes && sw_has_vaes && algo_has_vaes;
|
||||
use_vaes = cpu_has_vaes && sw_has_vaes && algo_has_vaes
|
||||
&& ( use_avx512 || algo_has_vaes256 );
|
||||
use_none = !( use_sse2 || use_aes || use_sse42 || use_avx512 || use_avx2 ||
|
||||
use_sha || use_vaes );
|
||||
|
||||
|
14
miner.h
14
miner.h
@@ -444,7 +444,8 @@ struct stratum_ctx {
|
||||
struct work work __attribute__ ((aligned (64)));
|
||||
pthread_mutex_t work_lock;
|
||||
|
||||
int block_height;
|
||||
int block_height;
|
||||
bool new_job;
|
||||
} __attribute__ ((aligned (64)));
|
||||
|
||||
bool stratum_socket_full(struct stratum_ctx *sctx, int timeout);
|
||||
@@ -642,7 +643,7 @@ static const char* const algo_names[] = {
|
||||
"lyra2z330",
|
||||
"m7m",
|
||||
"minotaur",
|
||||
"myr-gr",
|
||||
"myr-gr",
|
||||
"neoscrypt",
|
||||
"nist5",
|
||||
"pentablake",
|
||||
@@ -770,7 +771,7 @@ Options:\n\
|
||||
allium Garlicoin (GRLC)\n\
|
||||
anime Animecoin (ANI)\n\
|
||||
argon2 Argon2 Coin (AR2)\n\
|
||||
argon2d250 argon2d-crds, Credits (CRDS)\n\
|
||||
argon2d250\n\
|
||||
argon2d500 argon2d-dyn, Dynamic (DYN)\n\
|
||||
argon2d4096 argon2d-uis, Unitus (UIS)\n\
|
||||
axiom Shabal-256 MemoHash\n\
|
||||
@@ -795,13 +796,13 @@ Options:\n\
|
||||
lyra2h Hppcoin\n\
|
||||
lyra2re lyra2\n\
|
||||
lyra2rev2 lyrav2\n\
|
||||
lyra2rev3 lyrav2v3, Vertcoin\n\
|
||||
lyra2rev3 lyrav2v3\n\
|
||||
lyra2z\n\
|
||||
lyra2z330 Lyra2 330 rows\n\
|
||||
m7m Magi (XMG)\n\
|
||||
myr-gr Myriad-Groestl\n\
|
||||
minotaur Ringcoin (RNG)\n\
|
||||
neoscrypt NeoScrypt(128, 2, 1)\n\
|
||||
neoscrypt NeoScrypt(128, 2, 1)\n\
|
||||
nist5 Nist5\n\
|
||||
pentablake 5 x blake512\n\
|
||||
phi1612 phi\n\
|
||||
@@ -815,7 +816,7 @@ Options:\n\
|
||||
sha256d Double SHA-256\n\
|
||||
sha256q Quad SHA-256, Pyrite (PYE)\n\
|
||||
sha256t Triple SHA-256, Onecoin (OC)\n\
|
||||
sha3d Double Keccak256 (BSHA3)\n\
|
||||
sha3d Double Keccak256 (BSHA3)\n\
|
||||
shavite3 Shavite3\n\
|
||||
skein Skein+Sha (Skeincoin)\n\
|
||||
skein2 Double Skein (Woodcoin)\n\
|
||||
@@ -874,7 +875,6 @@ Options:\n\
|
||||
-s, --scantime=N upper bound on time spent scanning current work when\n\
|
||||
long polling is unavailable, in seconds (default: 5)\n\
|
||||
--randomize Randomize scan range start to reduce duplicates\n\
|
||||
--reset-on-stale Workaround reset stratum if too many stale shares\n\
|
||||
-f, --diff-factor Divide req. difficulty by this factor (std is 1.0)\n\
|
||||
-m, --diff-multiplier Multiply difficulty by this factor (std is 1.0)\n\
|
||||
--hash-meter Display thread hash rates\n\
|
||||
|
@@ -135,11 +135,17 @@ static inline __m128i mm128_neg1_fn()
|
||||
// Bitwise not (~v)
|
||||
#define mm128_not( v ) _mm_xor_si128( (v), m128_neg1 )
|
||||
|
||||
// Unary negation of elements
|
||||
// Unary negation of elements (-v)
|
||||
#define mm128_negate_64( v ) _mm_sub_epi64( m128_zero, v )
|
||||
#define mm128_negate_32( v ) _mm_sub_epi32( m128_zero, v )
|
||||
#define mm128_negate_16( v ) _mm_sub_epi16( m128_zero, v )
|
||||
|
||||
// Clear (zero) 32 bit elements based on bits set in 4 bit mask.
|
||||
// Fast, avoids using vector mask, but only available for 128 bit vectors.
|
||||
#define mm128_mask_32( a, mask ) \
|
||||
_mm_castps_si128( _mm_insert_ps( _mm_castsi128_ps( a ), \
|
||||
_mm_castsi128_ps( a ), mask ) )
|
||||
|
||||
// Add 4 values, fewer dependencies than sequential addition.
|
||||
#define mm128_add4_64( a, b, c, d ) \
|
||||
_mm_add_epi64( _mm_add_epi64( a, b ), _mm_add_epi64( c, d ) )
|
||||
@@ -269,11 +275,8 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
||||
// Rotate vector elements accross all lanes
|
||||
|
||||
#define mm128_swap_64( v ) _mm_shuffle_epi32( v, 0x4e )
|
||||
|
||||
#define mm128_ror_1x32( v ) _mm_shuffle_epi32( v, 0x39 )
|
||||
#define mm128_rol_1x32( v ) _mm_shuffle_epi32( v, 0x93 )
|
||||
|
||||
|
||||
//#define mm128_swap_64( v ) _mm_alignr_epi8( v, v, 8 )
|
||||
//#define mm128_ror_1x32( v ) _mm_alignr_epi8( v, v, 4 )
|
||||
//#define mm128_rol_1x32( v ) _mm_alignr_epi8( v, v, 12 )
|
||||
@@ -282,53 +285,11 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
||||
#define mm128_ror_1x8( v ) _mm_alignr_epi8( v, v, 1 )
|
||||
#define mm128_rol_1x8( v ) _mm_alignr_epi8( v, v, 15 )
|
||||
|
||||
// Rotate by c bytes
|
||||
#define mm128_ror_x8( v, c ) _mm_alignr_epi8( v, c )
|
||||
#define mm128_rol_x8( v, c ) _mm_alignr_epi8( v, 16-(c) )
|
||||
|
||||
|
||||
/*
|
||||
// Rotate 16 byte (128 bit) vector by c bytes.
|
||||
// Less efficient using shift but more versatile. Use only for odd number
|
||||
// byte rotations. Use shuffle above whenever possible.
|
||||
#define mm128_ror_x8( v, c ) \
|
||||
_mm_or_si128( _mm_srli_si128( v, c ), _mm_slli_si128( v, 16-(c) ) )
|
||||
|
||||
#define mm128_rol_x8( v, c ) \
|
||||
_mm_or_si128( _mm_slli_si128( v, c ), _mm_srli_si128( v, 16-(c) ) )
|
||||
|
||||
#if defined (__SSE3__)
|
||||
// no SSE2 implementation, no current users
|
||||
|
||||
#define mm128_ror_1x16( v ) \
|
||||
_mm_shuffle_epi8( v, m128_const_64( 0x01000f0e0d0c0b0a, \
|
||||
0x0908070605040302 ) )
|
||||
#define mm128_rol_1x16( v ) \
|
||||
_mm_shuffle_epi8( v, m128_const_64( 0x0d0c0b0a09080706, \
|
||||
0x0504030201000f0e ) )
|
||||
#define mm128_ror_1x8( v ) \
|
||||
_mm_shuffle_epi8( v, m128_const_64( 0x000f0e0d0c0b0a09, \
|
||||
0x0807060504030201 ) )
|
||||
#define mm128_rol_1x8( v ) \
|
||||
_mm_shuffle_epi8( v, m128_const_64( 0x0e0d0c0b0a090807, \
|
||||
0x060504030201000f ) )
|
||||
#else // SSE2
|
||||
|
||||
#define mm128_ror_1x16( v ) \
|
||||
_mm_or_si128( _mm_srli_si128( v, 2 ), _mm_slli_si128( v, 14 ) )
|
||||
|
||||
#define mm128_rol_1x16( v ) \
|
||||
_mm_or_si128( _mm_slli_si128( v, 2 ), _mm_srli_si128( v, 14 ) )
|
||||
|
||||
#define mm128_ror_1x8( v ) \
|
||||
_mm_or_si128( _mm_srli_si128( v, 1 ), _mm_slli_si128( v, 15 ) )
|
||||
|
||||
#define mm128_rol_1x8( v ) \
|
||||
_mm_or_si128( _mm_slli_si128( v, 1 ), _mm_srli_si128( v, 15 ) )
|
||||
|
||||
#endif // SSE3 else SSE2
|
||||
*/
|
||||
|
||||
|
||||
// Invert vector: {3,2,1,0} -> {0,1,2,3}
|
||||
#define mm128_invert_32( v ) _mm_shuffle_epi32( v, 0x1b )
|
||||
|
||||
|
@@ -26,8 +26,6 @@
|
||||
#define mm256_concat_128( hi, lo ) \
|
||||
_mm256_inserti128_si256( _mm256_castsi128_si256( lo ), hi, 1 )
|
||||
|
||||
#define m256_const1_128( v ) \
|
||||
_mm256_broadcastsi128_si256( v )
|
||||
|
||||
// Equavalent of set, move 64 bit integer constants to respective 64 bit
|
||||
// elements.
|
||||
@@ -144,10 +142,11 @@ do { \
|
||||
|
||||
// Parallel AES, for when x is expected to be in a 256 bit register.
|
||||
// Use same 128 bit key.
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
#define mm256_aesenc_2x128( x, k ) \
|
||||
_mm256_aesenc_epi128( x, m256_const1_128(k ) )
|
||||
_mm256_aesenc_epi128( x, k )
|
||||
|
||||
#else
|
||||
|
||||
|
@@ -56,15 +56,15 @@
|
||||
// If an expensive constant is to be reused in the same function it should
|
||||
// be declared as a local variable defined once and reused.
|
||||
//
|
||||
// Permutations cab be very exppensive if they use a vector control index,
|
||||
// Permutations can be very expensive if they use a vector control index,
|
||||
// even if the permutation itself is quite efficient.
|
||||
// The index is essentially a constant with all the baggage that brings.
|
||||
// The same rules apply, if an index is to be reused it should be defined
|
||||
// as a local. This applies specifically to bswap operations.
|
||||
//
|
||||
// Additionally, permutations using smaller vectors can be more efficient
|
||||
// if the permutation doesn't cross lane boundaries ,typically 128 bits,
|
||||
// ans the smnaller vector can use an imm comtrol.
|
||||
// if the permutation doesn't cross lane boundaries, typically 128 bits,
|
||||
// and the smnaller vector can use an imm comtrol.
|
||||
//
|
||||
// If the permutation doesn't cross lane boundaries a shuffle instructions
|
||||
// can be used with imm control instead of permute.
|
||||
@@ -182,7 +182,10 @@ static inline __m512i m512_const4_64( const uint64_t i3, const uint64_t i2,
|
||||
//
|
||||
// Basic operations without SIMD equivalent
|
||||
|
||||
// ~x
|
||||
#define mm512_not( x ) _mm512_xor_si512( x, m512_neg1 )
|
||||
|
||||
// -x
|
||||
#define mm512_negate_64( x ) _mm512_sub_epi64( m512_zero, x )
|
||||
#define mm512_negate_32( x ) _mm512_sub_epi32( m512_zero, x )
|
||||
#define mm512_negate_16( x ) _mm512_sub_epi16( m512_zero, x )
|
||||
@@ -443,20 +446,13 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
|
||||
//
|
||||
// Rotate elements within 256 bit lanes of 512 bit vector.
|
||||
|
||||
// Rename these for consistency. Element size is always last.
|
||||
// mm<vectorsize>_<op><lanesize>_<elementsize>
|
||||
|
||||
|
||||
// Swap hi & lo 128 bits in each 256 bit lane
|
||||
|
||||
#define mm512_swap256_128( v ) _mm512_permutex_epi64( v, 0x4e )
|
||||
|
||||
// Rotate 256 bit lanes by one 64 bit element
|
||||
|
||||
#define mm512_ror256_64( v ) _mm512_permutex_epi64( v, 0x39 )
|
||||
#define mm512_rol256_64( v ) _mm512_permutex_epi64( v, 0x93 )
|
||||
|
||||
|
||||
// Rotate 256 bit lanes by one 32 bit element
|
||||
|
||||
#define mm512_ror256_32( v ) \
|
||||
|
27
sysinfos.c
27
sysinfos.c
@@ -331,16 +331,20 @@ static inline void cpu_getmodelid(char *outbuf, size_t maxsz)
|
||||
// Feature flags
|
||||
|
||||
// CPU_INFO ECX
|
||||
#define XSAVE_Flag (1<<26)
|
||||
#define OSXSAVE_Flag (1<<27)
|
||||
#define AVX_Flag (1<<28)
|
||||
#define SSE3_Flag 1
|
||||
#define SSSE3_Flag (1<< 9)
|
||||
#define XOP_Flag (1<<11)
|
||||
#define FMA3_Flag (1<<12)
|
||||
#define AES_Flag (1<<25)
|
||||
#define SSE41_Flag (1<<19)
|
||||
#define SSE42_Flag (1<<20)
|
||||
#define AES_Flag (1<<25)
|
||||
#define XSAVE_Flag (1<<26)
|
||||
#define OSXSAVE_Flag (1<<27)
|
||||
#define AVX_Flag (1<<28)
|
||||
|
||||
// CPU_INFO EDX
|
||||
#define SSE_Flag (1<<25) // EDX
|
||||
#define SSE_Flag (1<<25)
|
||||
#define SSE2_Flag (1<<26)
|
||||
|
||||
// EXTENDED_FEATURES EBX
|
||||
@@ -359,8 +363,8 @@ static inline void cpu_getmodelid(char *outbuf, size_t maxsz)
|
||||
|
||||
// Use this to detect presence of feature
|
||||
#define AVX_mask (AVX_Flag|XSAVE_Flag|OSXSAVE_Flag)
|
||||
#define FMA3_mask (FMA3_Flag|AVX_mask)
|
||||
#define AVX512_mask (AVX512VL_Flag|AVX512BW_Flag|AVX512DQ_Flag|AVX512F_Flag)
|
||||
#define FMA3_mask (FMA3_Flag|AVX_mask)
|
||||
#define AVX512_mask (AVX512VL_Flag|AVX512BW_Flag|AVX512DQ_Flag|AVX512F_Flag)
|
||||
|
||||
static inline bool has_sha()
|
||||
{
|
||||
@@ -476,6 +480,17 @@ static inline bool has_avx512()
|
||||
#endif
|
||||
}
|
||||
|
||||
// AMD Zen3 added support for 256 bit VAES without requiring AVX512.
|
||||
// The original Intel spec requires AVX512F to support 512 bit VAES and
|
||||
// requires AVX512VL to support 256 bit VAES.
|
||||
// The CPUID VAES bit alone can't distiguish 256 vs 512 bit.
|
||||
// If necessary:
|
||||
// VAES 256 & 512 = VAES && AVX512VL
|
||||
// VAES 512 = VAES && AVX512F
|
||||
// VAES 256 = ( VAES && AVX512VL ) || ( VAES && !AVX512F )
|
||||
// VAES 512 only = VAES && AVX512F && !AVX512VL
|
||||
// VAES 256 only = VAES && !AVX512F
|
||||
|
||||
static inline bool has_vaes()
|
||||
{
|
||||
#ifdef __arm__
|
||||
|
50
util.c
50
util.c
@@ -1048,53 +1048,51 @@ bool fulltest( const uint32_t *hash, const uint32_t *target )
|
||||
return rc;
|
||||
}
|
||||
|
||||
// Mathmatically the difficulty is simply the reciprocal of the hash.
|
||||
// Mathmatically the difficulty is simply the reciprocal of the hash: d = 1/h.
|
||||
// Both are real numbers but the hash (target) is represented as a 256 bit
|
||||
// number with the upper 32 bits representing the whole integer part and the
|
||||
// lower 224 bits representing the fractional part:
|
||||
// fixed point number with the upper 32 bits representing the whole integer
|
||||
// part and the lower 224 bits representing the fractional part:
|
||||
// target[ 255:224 ] = trunc( 1/diff )
|
||||
// target[ 223: 0 ] = frac( 1/diff )
|
||||
//
|
||||
// The 256 bit hash is exact but any floating point representation is not.
|
||||
// Stratum provides the target difficulty as double precision, inexcact, and
|
||||
// Stratum provides the target difficulty as double precision, inexcact,
|
||||
// which must be converted to a hash target. The converted hash target will
|
||||
// likely be less precise to to inexact input and conversion error.
|
||||
// converted to 256 bit hash which will also be inexact and likelyless
|
||||
// accurate to to error in conversion.
|
||||
// likely be less precise due to inexact input and conversion error.
|
||||
// On the other hand getwork provides a 256 bit hash target which is exact.
|
||||
//
|
||||
// How much precision is needed?
|
||||
//
|
||||
// 128 bit types are implemented in software by the compiler using 64 bit
|
||||
// 128 bit types are implemented in software by the compiler on 64 bit
|
||||
// hardware resulting in lower performance and more error than would be
|
||||
// expected with a hardware 128 bit implementtaion.
|
||||
// expected with a hardware 128 bit implementaion.
|
||||
// Float80 exploits the internals of the FP unit which provide a 64 bit
|
||||
// mantissa in an 80 bit register with hardware rounding. When the destination
|
||||
// is double the data is rounded to float64 format. Long double returns all
|
||||
// 80 bits without rounding and including any accumulated computation error.
|
||||
// Float80 does not fit efficiently in memory.
|
||||
//
|
||||
// 256 bit hash: 76
|
||||
// Significant digits:
|
||||
// 256 bit hash: 76
|
||||
// float: 7 (float32, 80 bits with rounding to 32 bits)
|
||||
// double: 15 (float64, 80 bits with rounding to 64 bits)
|
||||
// long double 19 (float80, 80 bits with no rounding)
|
||||
// __float128 33 (128 bits with no rounding)
|
||||
// long double: 19 (float80, 80 bits with no rounding)
|
||||
// __float128: 33 (128 bits with no rounding)
|
||||
// uint32_t: 9
|
||||
// uint64_t: 19
|
||||
// uint128_t 38
|
||||
//
|
||||
// The concept of significant digits doesn't apply to the 256 bit hash
|
||||
// representation. It's fixed point making leading zeros significant
|
||||
// Leading zeros count in the 256 bit
|
||||
// representation. It's fixed point making leading zeros significant,
|
||||
// limiting its range and precision due to fewer zon-zero significant digits.
|
||||
//
|
||||
// Doing calculations with float128 and uint128 increases precision for
|
||||
// target_to_diff, but doesn't help with stratum diff being limited to
|
||||
// double precision. Is the extra precision really worth the extra cost?
|
||||
//
|
||||
// With double the error rate is 1/1e15, or one hash in every Petahash
|
||||
// with a very low difficulty, not a likely sitiation. Higher difficulty
|
||||
// increases the effective precision. Due to the floating nature of the
|
||||
// decimal point leading zeros aren't counted.
|
||||
// With float128 the error rate is 1/1e33 compared with 1/1e15 for double.
|
||||
// For double that's 1 error in every petahash with a very low difficulty,
|
||||
// not a likely situation. With higher difficulty effective precision
|
||||
// increases.
|
||||
//
|
||||
// Unfortunately I can't get float128 to work so long double (float80) is
|
||||
// as precise as it gets.
|
||||
@@ -1485,9 +1483,12 @@ static bool stratum_parse_extranonce(struct stratum_ctx *sctx, json_t *params, i
|
||||
sctx->xnonce2_size = xn2_size;
|
||||
pthread_mutex_unlock(&sctx->work_lock);
|
||||
|
||||
if (pndx == 0 && opt_debug) /* pool dynamic change */
|
||||
applog(LOG_DEBUG, "Stratum set nonce %s with extranonce2 size=%d",
|
||||
xnonce1, xn2_size);
|
||||
if ( !opt_quiet ) /* pool dynamic change */
|
||||
applog( LOG_INFO, "Stratum extranonce1= %s, extranonce2 size= %d",
|
||||
xnonce1, xn2_size);
|
||||
// if (pndx == 0 && opt_debug)
|
||||
// applog(LOG_DEBUG, "Stratum set nonce %s with extranonce2 size=%d",
|
||||
// xnonce1, xn2_size);
|
||||
|
||||
return true;
|
||||
out:
|
||||
@@ -1581,8 +1582,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern bool opt_extranonce;
|
||||
|
||||
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass)
|
||||
{
|
||||
json_t *val = NULL, *res_val, *err_val;
|
||||
@@ -2171,7 +2170,8 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
|
||||
|
||||
if (!strcasecmp(method, "mining.notify")) {
|
||||
ret = stratum_notify(sctx, params);
|
||||
goto out;
|
||||
sctx->new_job = true;
|
||||
goto out;
|
||||
}
|
||||
if (!strcasecmp(method, "mining.ping")) { // cgminer 4.7.1+
|
||||
if (opt_debug) applog(LOG_DEBUG, "Pool ping");
|
||||
|
@@ -40,6 +40,7 @@ cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/
|
||||
|
||||
# Start building...
|
||||
|
||||
# Icelake AVX512 SHA VAES
|
||||
./clean-all.sh || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
@@ -48,6 +49,16 @@ make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
|
||||
|
||||
# Rocketlake AVX512 SHA AES
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=cascadelake -msha -Wall" ./configure $CONFIGURE_ARGS
|
||||
#CFLAGS="-O3 -march=rocketlake -Wall" ./configure $CONFIGURE_ARGS
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-avx512-sha.exe
|
||||
|
||||
# Zen1 AVX2 AES SHA
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=znver1 -Wall" ./configure $CONFIGURE_ARGS
|
||||
@@ -55,6 +66,16 @@ make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-zen.exe
|
||||
|
||||
# Zen3 AVX2 SHA VAES
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=znver2 -mvaes -Wall" ./configure $CONFIGURE_ARGS
|
||||
# CFLAGS="-O3 -march=znver3 -Wall" ./configure $CONFIGURE_ARGS
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-zen3.exe
|
||||
|
||||
# Slylake-X AVX512 AES
|
||||
# mingw won't compile avx512 without -fno-asynchronous-unwind-tables
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
@@ -64,6 +85,7 @@ make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-avx512.exe
|
||||
|
||||
# Haswell AVX2 AES
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
# GCC 9 doesn't include AES in -march=core-avx2
|
||||
@@ -72,6 +94,7 @@ make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-avx2.exe
|
||||
|
||||
# Sandybridge AVX AES
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
# -march=corei7-avx still includes aes, but just in case
|
||||
@@ -80,7 +103,7 @@ make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-avx.exe
|
||||
|
||||
# -march=westmere is supported in gcc5
|
||||
# Westmere SSE4.2 AES
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=westmere -Wall" ./configure $CONFIGURE_ARGS
|
||||
@@ -89,6 +112,7 @@ make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
||||
|
||||
# Nehalem SSE4.2
|
||||
#make clean || echo clean
|
||||
#rm -f config.status
|
||||
#CFLAGS="-O3 -march=corei7 -Wall" ./configure $CONFIGURE_ARGS
|
||||
@@ -96,6 +120,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
||||
#strip -s cpuminer.exe
|
||||
#mv cpuminer.exe release/cpuminer-sse42.exe
|
||||
|
||||
# Core2 SSSE3
|
||||
#make clean || echo clean
|
||||
#rm -f config.status
|
||||
#CFLAGS="-O3 -march=core2 -Wall" ./configure $CONFIGURE_ARGS
|
||||
@@ -104,6 +129,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
||||
#mv cpuminer.exe release/cpuminer-ssse3.exe
|
||||
#make clean || echo clean
|
||||
|
||||
# Generic SSE2
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -msse2 -Wall" ./configure $CONFIGURE_ARGS
|
||||
|
Reference in New Issue
Block a user