mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v23.14
This commit is contained in:
@@ -87,7 +87,6 @@ Supported Algorithms
|
||||
groestl Groestl coin
|
||||
hex x16r-hex
|
||||
hmq1725
|
||||
hodl Hodlcoin
|
||||
jha Jackpotcoin
|
||||
keccak Maxcoin
|
||||
keccakc Creative coin
|
||||
@@ -115,9 +114,11 @@ Supported Algorithms
|
||||
scrypt:N scrypt(N, 1, 1)
|
||||
scryptn2 scrypt(1048576, 1, 1)
|
||||
sha256d Double SHA-256
|
||||
sha256dt
|
||||
sha256q Quad SHA-256
|
||||
sha256t Triple SHA-256
|
||||
sha3d Double keccak256 (BSHA3)
|
||||
sha512256d
|
||||
skein Skein+Sha (Skeincoin)
|
||||
skein2 Double Skein (Woodcoin)
|
||||
skunk Signatum (SIGT)
|
||||
@@ -145,6 +146,7 @@ Supported Algorithms
|
||||
x16rt-veil veil
|
||||
x16s
|
||||
x17
|
||||
x20r
|
||||
x21s
|
||||
x22i
|
||||
x25x
|
||||
|
@@ -75,6 +75,13 @@ If not what makes it happen or not happen?
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v23.14
|
||||
|
||||
ARM: Groestl AES optimizations enabled.
|
||||
All: Small optimization to Shabal 4way.
|
||||
x86_64: Extend Shabal 4way support to SSE2 from SSE4.1.
|
||||
All: deleted some unused files.
|
||||
|
||||
v23.13
|
||||
|
||||
Added x20r algo.
|
||||
|
@@ -39,7 +39,7 @@ int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
|
||||
blake256r14_4way_update( &blake_4w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
*noncev = v128_bswap32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
blakehash_4way( hash, vdata );
|
||||
|
||||
|
@@ -182,7 +182,7 @@ int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
|
||||
blake256r8_4way_update( &blakecoin_4w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
*noncev = v128_bswap32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
blakecoin_4way_hash( hash, vdata );
|
||||
|
||||
|
@@ -60,54 +60,17 @@ static const v128u64_t SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003 };
|
||||
|
||||
#if defined(__ARM_NEON)
|
||||
|
||||
// No fast shuffle on NEON
|
||||
//static const uint32x4_t vmask_d8 = { 3, 1, 2, 0 };
|
||||
static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
||||
static const v128u32_t gr_mask __attribute__ ((aligned (16))) =
|
||||
{ 0x03020100, 0x0b0a0908, 0x07060504, 0x0f0e0d0c };
|
||||
|
||||
#define gr_shuffle32( v ) v128_blendv( v128_qrev32( v ), v, BLEND_MASK )
|
||||
|
||||
/*
|
||||
#define TRANSP_MASK \
|
||||
0xd,0x5,0x9,0x1,0xc,0x4,0x8,0x0,0xf,0x7,0xb,0x3,0xe,0x6,0xa,0x2
|
||||
#define SUBSH_MASK0 \
|
||||
0xb,0xe,0x1,0x4,0x7,0xa,0xd,0x0,0x3,0x6,0x9,0xc,0xf,0x2,0x5,0x8
|
||||
#define SUBSH_MASK1 \
|
||||
0xc,0xf,0x2,0x5,0x8,0xb,0xe,0x1,0x4,0x7,0xa,0xd,0x0,0x3,0x6,0x9
|
||||
#define SUBSH_MASK2 \
|
||||
0xd,0x0,0x3,0x6,0x9,0xc,0xf,0x2,0x5,0x8,0xb,0xe,0x1,0x4,0x7,0xa
|
||||
#define SUBSH_MASK3 \
|
||||
0xe,0x1,0x4,0x7,0xa,0xd,0x0,0x3,0x6,0x9,0xc,0xf,0x2,0x5,0x8,0xb
|
||||
#define SUBSH_MASK4 \
|
||||
0xf,0x2,0x5,0x8,0xb,0xe,0x1,0x4,0x7,0xa,0xd,0x0,0x3,0x6,0x9,0xc
|
||||
#define SUBSH_MASK5 \
|
||||
0x0,0x3,0x6,0x9,0xc,0xf,0x2,0x5,0x8,0xb,0xe,0x1,0x4,0x7,0xa,0xd
|
||||
#define SUBSH_MASK6 \
|
||||
0x1,0x4,0x7,0xa,0xd,0x0,0x3,0x6,0x9,0xc,0xf,0x2,0x5,0x8,0xb,0xe
|
||||
#define SUBSH_MASK7 \
|
||||
0x6,0x9,0xc,0xf,0x2,0x5,0x8,0xb,0xe,0x1,0x4,0x7,0xa,0xd,0x0,0x3
|
||||
|
||||
//#define gr_shuffle8( v, c ) v128_shullfev8( v, c )
|
||||
|
||||
|
||||
#define gr_shuffle8( v, c15, c14, c13, c12, c11, c10, c09, c08, \
|
||||
c07, c06, c05, c04, c03, c02, c01, c00 ) \
|
||||
v128_movlane8( v128_movlane8( v128_movlane8( v128_movlane8( \
|
||||
v128_movlane8( v128_movlane8( v128_movlane8( v128_movlane8( \
|
||||
v128_movlane8( v128_movlane8( v128_movlane8( v128_movlane8( \
|
||||
v128_movlane8( v128_movlane8( v128_movlane8( v128_movlane8( \
|
||||
v, 15, v, c15 ), 14, v, c14 ), 13, v, c13 ), 12, v, c12 ), \
|
||||
11, v, c11 ), 10, v, c10 ), 9, v, c09 ), 8, v, c08 ), \
|
||||
7, v, c07 ), 6, v, c06 ), 5, v, c05 ), 4, v, c04 ), \
|
||||
3, v, c03 ), 2, v, c02 ), 1, v, c01 ), 0, v, c00 )
|
||||
*/
|
||||
#define gr_shuffle32(v) vqtbl1q_u8( v, gr_mask )
|
||||
|
||||
#else
|
||||
|
||||
#define gr_shuffle32( v ) _mm_shuffle_epi32( v, 0xd8 )
|
||||
#define gr_shuffle32(v) _mm_shuffle_epi32( v, 0xd8 )
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define tos(a) #a
|
||||
#define tostr(a) tos(a)
|
||||
|
||||
@@ -334,17 +297,16 @@ static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
||||
*/
|
||||
#define SUBMIX(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
||||
/* SubBytes */\
|
||||
b0 = v128_xor(b0, b0);\
|
||||
a0 = v128_aesenclast(a0, b0);\
|
||||
a1 = v128_aesenclast(a1, b0);\
|
||||
a2 = v128_aesenclast(a2, b0);\
|
||||
a3 = v128_aesenclast(a3, b0);\
|
||||
a4 = v128_aesenclast(a4, b0);\
|
||||
a5 = v128_aesenclast(a5, b0);\
|
||||
a6 = v128_aesenclast(a6, b0);\
|
||||
a7 = v128_aesenclast(a7, b0);\
|
||||
a0 = v128_aesenclast_nokey( a0 ); \
|
||||
a1 = v128_aesenclast_nokey( a1 ); \
|
||||
a2 = v128_aesenclast_nokey( a2 ); \
|
||||
a3 = v128_aesenclast_nokey( a3 ); \
|
||||
a4 = v128_aesenclast_nokey( a4 ); \
|
||||
a5 = v128_aesenclast_nokey( a5 ); \
|
||||
a6 = v128_aesenclast_nokey( a6 ); \
|
||||
a7 = v128_aesenclast_nokey( a7 ); \
|
||||
/* MixBytes */\
|
||||
MixBytes(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7);\
|
||||
MixBytes( a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7 ); \
|
||||
}
|
||||
|
||||
#define ROUNDS_P(){\
|
||||
@@ -365,7 +327,6 @@ static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX( xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, \
|
||||
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 ); \
|
||||
\
|
||||
/* AddRoundConstant P1024 */\
|
||||
xmm0 = v128_xor( xmm0, \
|
||||
casti_v128( round_const_p, round_counter+1 ) ); \
|
||||
@@ -467,7 +428,6 @@ static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
||||
t1 = v128_unpackhi16(t1, i3);\
|
||||
i2 = v128_unpacklo16(i2, i3);\
|
||||
i0 = v128_unpacklo16(i0, i1);\
|
||||
\
|
||||
/* shuffle with immediate */\
|
||||
t0 = gr_shuffle32( t0 ); \
|
||||
t1 = gr_shuffle32( t1 ); \
|
||||
@@ -477,7 +437,6 @@ static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
||||
i2 = gr_shuffle32( i2 ); \
|
||||
i4 = gr_shuffle32( i4 ); \
|
||||
i6 = gr_shuffle32( i6 ); \
|
||||
\
|
||||
/* continue with unpack */\
|
||||
t4 = i0;\
|
||||
i0 = v128_unpacklo32(i0, i2);\
|
||||
@@ -584,7 +543,8 @@ static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
||||
/* transpose done */\
|
||||
}/**/
|
||||
|
||||
|
||||
#if 0
|
||||
// not used
|
||||
void INIT( v128_t* chaining )
|
||||
{
|
||||
static v128_t xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
@@ -613,6 +573,7 @@ void INIT( v128_t* chaining )
|
||||
chaining[6] = xmm14;
|
||||
chaining[7] = xmm15;
|
||||
}
|
||||
#endif
|
||||
|
||||
void TF1024( v128_t* chaining, const v128_t* message )
|
||||
{
|
||||
|
@@ -1,3 +1,6 @@
|
||||
#if !defined GROESTL256_INTR_AES_H__
|
||||
#define GROESTL256_INTR_AES_H__
|
||||
|
||||
/* groestl-intr-aes.h Aug 2011
|
||||
*
|
||||
* Groestl implementation with intrinsics using ssse3, sse4.1, and aes
|
||||
@@ -50,18 +53,17 @@ static const v128u64_t SUBSH_MASK7 = { 0x090c000306080b07, 0x02050f0a0d01040e };
|
||||
|
||||
#if defined(__ARM_NEON)
|
||||
|
||||
// No fast shuffle on NEON
|
||||
static const uint32x4_t vmask_d8 = { 3, 1, 2, 0 };
|
||||
static const v128u32_t gr_mask __attribute__ ((aligned (16))) =
|
||||
{ 0x03020100, 0x0b0a0908, 0x07060504, 0x0f0e0d0c };
|
||||
|
||||
#define gr_shuffle32( v ) v128_shufflev32( v, vmask_d8 )
|
||||
#define gr_shuffle32(v) vqtbl1q_u8( v, gr_mask )
|
||||
|
||||
#else
|
||||
|
||||
#define gr_shuffle32( v ) _mm_shuffle_epi32( v, 0xd8 )
|
||||
#define gr_shuffle32(v) _mm_shuffle_epi32( v, 0xd8 )
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define tos(a) #a
|
||||
#define tostr(a) tos(a)
|
||||
|
||||
@@ -598,4 +600,4 @@ void OF512( v128_t* chaining )
|
||||
chaining[3] = xmm11;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -181,6 +181,7 @@ int groestl512( hashState_groestl* ctx, void* output, const void* input,
|
||||
|
||||
// digest final padding block and do output transform
|
||||
TF1024( ctx->chaining, ctx->buffer );
|
||||
|
||||
OF1024( ctx->chaining );
|
||||
|
||||
// store hash result in output
|
||||
|
@@ -87,6 +87,7 @@ int final_groestl( hashState_groestl*, void* );
|
||||
int update_and_final_groestl( hashState_groestl*, void*, const void*, int );
|
||||
int groestl512( hashState_groestl*, void*, const void*, uint64_t );
|
||||
#define groestl512_full groestl512
|
||||
#define groestl512_ctx groestl512
|
||||
|
||||
|
||||
#endif /* __hash_h */
|
||||
|
@@ -626,7 +626,7 @@ static const __m256i SUBSH_MASK7_2WAY =
|
||||
|
||||
#define ROUND_2WAY(i, a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
||||
/* AddRoundConstant */\
|
||||
b1 = mm256_bcast_m128( mm128_mask_32( v128_neg1, 0x3 ) ); \
|
||||
b1 = mm256_bcast_m128( v128_mask32( v128_neg1, 0x3 ) ); \
|
||||
a0 = _mm256_xor_si256( a0, mm256_bcast_m128( round_const_l0[i] ) );\
|
||||
a1 = _mm256_xor_si256( a1, b1 );\
|
||||
a2 = _mm256_xor_si256( a2, b1 );\
|
||||
|
@@ -213,7 +213,7 @@ int scanhash_myriad_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
v128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
do {
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
||||
*noncev = v128_bswap32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
||||
|
||||
myriad_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
@@ -465,12 +465,8 @@ typedef union
|
||||
{
|
||||
keccak256_2x64_context keccak;
|
||||
cubehashParam cube;
|
||||
//#if defined(__x86_64__)
|
||||
skein256_2x64_context skein;
|
||||
//#else
|
||||
// sph_skein512_context skein;
|
||||
//#endif
|
||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
hashState_groestl256 groestl;
|
||||
#else
|
||||
sph_groestl256_context groestl;
|
||||
@@ -516,7 +512,6 @@ static void allium_4way_hash( void *hash, const void *midstate_vars,
|
||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
|
||||
//#if defined(__x86_64__)
|
||||
intrlv_2x64( vhashA, hash0, hash1, 256 );
|
||||
skein256_2x64_init( &ctx.skein );
|
||||
skein256_2x64_update( &ctx.skein, vhashA, 32 );
|
||||
@@ -527,23 +522,8 @@ static void allium_4way_hash( void *hash, const void *midstate_vars,
|
||||
skein256_2x64_update( &ctx.skein, vhashA, 32 );
|
||||
skein256_2x64_close( &ctx.skein, vhashA );
|
||||
dintrlv_2x64( hash2, hash3, vhashA, 256 );
|
||||
/*
|
||||
#else
|
||||
sph_skein256_init( &ctx.skein );
|
||||
sph_skein256( &ctx.skein, hash0, 32 );
|
||||
sph_skein256_close( &ctx.skein, hash0 );
|
||||
sph_skein256_init( &ctx.skein );
|
||||
sph_skein256( &ctx.skein, hash1, 32 );
|
||||
sph_skein256_close( &ctx.skein, hash1 );
|
||||
sph_skein256_init( &ctx.skein );
|
||||
sph_skein256( &ctx.skein, hash2, 32 );
|
||||
sph_skein256_close( &ctx.skein, hash2 );
|
||||
sph_skein256_init( &ctx.skein );
|
||||
sph_skein256( &ctx.skein, hash3, 32 );
|
||||
sph_skein256_close( &ctx.skein, hash3 );
|
||||
#endif
|
||||
*/
|
||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
||||
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
groestl256_full( &ctx.groestl, hash0, hash0, 256 );
|
||||
groestl256_full( &ctx.groestl, hash1, hash1, 256 );
|
||||
groestl256_full( &ctx.groestl, hash2, hash2, 256 );
|
||||
|
@@ -67,7 +67,7 @@ int scanhash_lyra2h_4way( struct work *work, uint32_t max_nonce,
|
||||
lyra2h_4way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
*noncev = v128_bswap32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
lyra2h_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
|
@@ -456,7 +456,7 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
*noncev = v128_bswap32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2rev2_4way_hash( hash, vdata );
|
||||
|
||||
|
@@ -7,15 +7,15 @@
|
||||
#include "algo/blake/blake512-hash.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/fugue/fugue-aesni.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#endif
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#endif
|
||||
#include "algo/jh/sph_jh.h"
|
||||
@@ -36,15 +36,15 @@ union _hmq1725_ctx_holder
|
||||
blake512_context blake;
|
||||
sph_bmw512_context bmw;
|
||||
#if defined(__AES__)
|
||||
hashState_groestl groestl;
|
||||
hashState_fugue fugue;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_fugue512_context fugue;
|
||||
#endif
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_skein512_context skein;
|
||||
@@ -62,9 +62,6 @@ union _hmq1725_ctx_holder
|
||||
};
|
||||
typedef union _hmq1725_ctx_holder hmq1725_ctx_holder;
|
||||
|
||||
//static hmq1725_ctx_holder hmq1725_ctx __attribute__ ((aligned (64)));
|
||||
//static __thread sph_bmw512_context hmq_bmw_mid __attribute__ ((aligned (64)));
|
||||
|
||||
extern void hmq1725hash(void *state, const void *input)
|
||||
{
|
||||
const uint32_t mask = 24;
|
||||
@@ -82,7 +79,7 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
|
||||
if ( hashB[0] & mask ) //1
|
||||
{
|
||||
#if defined(__AES__)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
groestl512_full( &ctx.groestl, hashA, hashB, 512 );
|
||||
#else
|
||||
sph_groestl512_init( &ctx.groestl );
|
||||
@@ -226,7 +223,7 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
sph_sha512_close( &ctx.sha, hashA );
|
||||
}
|
||||
|
||||
#if defined(__AES__)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
groestl512_full( &ctx.groestl, hashB, hashA, 512 );
|
||||
#else
|
||||
sph_groestl512_init( &ctx.groestl );
|
||||
|
@@ -587,8 +587,8 @@ void sha256_ni2x_final_rounds( uint32_t *out_X, uint32_t *out_Y,
|
||||
// Add the nonces (msg[0] lane 3) to A & E (STATE0 lanes 1 & 3)
|
||||
TMSG0_X = casti_m128i( msg_X, 0 );
|
||||
TMSG0_Y = casti_m128i( msg_Y, 0 );
|
||||
TMP_X = mm128_xim_32( TMSG0_X, TMSG0_X, 0xd5 );
|
||||
TMP_Y = mm128_xim_32( TMSG0_Y, TMSG0_Y, 0xd5 );
|
||||
TMP_X = v128_xim32( TMSG0_X, TMSG0_X, 0xd5 );
|
||||
TMP_Y = v128_xim32( TMSG0_Y, TMSG0_Y, 0xd5 );
|
||||
STATE0_X = _mm_add_epi32( STATE0_X, TMP_X );
|
||||
STATE0_Y = _mm_add_epi32( STATE0_Y, TMP_Y );
|
||||
|
||||
|
@@ -34,8 +34,6 @@
|
||||
#include <string.h>
|
||||
#include "shabal-hash-4way.h"
|
||||
|
||||
//#if defined(__SSE4_1__) || defined(__ARM_NEON)
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#define DECL_STATE16 \
|
||||
@@ -47,8 +45,6 @@
|
||||
C8, C9, CA, CB, CC, CD, CE, CF; \
|
||||
__m512i M0, M1, M2, M3, M4, M5, M6, M7, \
|
||||
M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
const __m512i FIVE = v512_32( 5 ); \
|
||||
const __m512i THREE = v512_32( 3 ); \
|
||||
uint32_t Wlow, Whigh;
|
||||
|
||||
#define READ_STATE16(state) do \
|
||||
@@ -292,11 +288,21 @@ do { \
|
||||
mm512_swap1024_512( BF, CF ); \
|
||||
} while (0)
|
||||
|
||||
static inline __m512i v512_mult_x3( const __m512i x )
|
||||
{
|
||||
return _mm512_add_epi32( x, _mm512_slli_epi32( x, 1 ) );
|
||||
}
|
||||
|
||||
static inline __m512i v512_mult_x5( const __m512i x )
|
||||
{
|
||||
return _mm512_add_epi32( x, _mm512_slli_epi32( x, 2 ) );
|
||||
}
|
||||
|
||||
#define PERM_ELT16( xa0, xa1, xb0, xb1, xb2, xb3, xc, xm ) \
|
||||
do { \
|
||||
xa0 = mm512_xor3( xm, xb1, mm512_xorandnot( \
|
||||
_mm512_mullo_epi32( mm512_xor3( xa0, xc, \
|
||||
_mm512_mullo_epi32( mm512_rol_32( xa1, 15 ), FIVE ) ), THREE ), \
|
||||
v512_mult_x3( mm512_xor3( xa0, xc, \
|
||||
v512_mult_x5( mm512_rol_32( xa1, 15 ) ) ) ), \
|
||||
xb3, xb2 ) ); \
|
||||
xb0 = mm512_xnor( xa0, mm512_rol_32( xb0, 1 ) ); \
|
||||
} while (0)
|
||||
@@ -644,8 +650,6 @@ shabal512_16way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
C8, C9, CA, CB, CC, CD, CE, CF; \
|
||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7, \
|
||||
M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
const __m256i FIVE = v256_32( 5 ); \
|
||||
const __m256i THREE = v256_32( 3 ); \
|
||||
uint32_t Wlow, Whigh;
|
||||
|
||||
#define READ_STATE8(state) do \
|
||||
@@ -889,11 +893,21 @@ do { \
|
||||
mm256_swap512_256( BF, CF ); \
|
||||
} while (0)
|
||||
|
||||
static inline __m256i v256_mult_x3( const __m256i x )
|
||||
{
|
||||
return _mm256_add_epi32( x, _mm256_slli_epi32( x, 1 ) );
|
||||
}
|
||||
|
||||
static inline __m256i v256_mult_x5( const __m256i x )
|
||||
{
|
||||
return _mm256_add_epi32( x, _mm256_slli_epi32( x, 2 ) );
|
||||
}
|
||||
|
||||
#define PERM_ELT8( xa0, xa1, xb0, xb1, xb2, xb3, xc, xm ) \
|
||||
do { \
|
||||
xa0 = mm256_xor3( xm, xb1, mm256_xorandnot( \
|
||||
_mm256_mullo_epi32( mm256_xor3( xa0, xc, \
|
||||
_mm256_mullo_epi32( mm256_rol_32( xa1, 15 ), FIVE ) ), THREE ), \
|
||||
v256_mult_x3( mm256_xor3( xa0, xc, \
|
||||
v256_mult_x5( mm256_rol_32( xa1, 15 ) ) ) ), \
|
||||
xb3, xb2 ) ); \
|
||||
xb0 = mm256_xnor( xa0, mm256_rol_32( xb0, 1 ) ); \
|
||||
} while (0)
|
||||
@@ -1226,15 +1240,13 @@ shabal512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
#if defined(__SSE4_1__) || defined(__ARM_NEON)
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
|
||||
#define DECL_STATE \
|
||||
v128u32_t A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, AA, AB; \
|
||||
v128u32_t B0, B1, B2, B3, B4, B5, B6, B7, B8, B9, BA, BB, BC, BD, BE, BF; \
|
||||
v128u32_t C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, CA, CB, CC, CD, CE, CF; \
|
||||
v128u32_t M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
const v128u32_t FIVE = v128_32( 5 ); \
|
||||
const v128u32_t THREE = v128_32( 3 ); \
|
||||
uint32_t Wlow, Whigh;
|
||||
|
||||
#define READ_STATE( state ) \
|
||||
@@ -1479,11 +1491,21 @@ shabal512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
v128_swap256_128( BF, CF ); \
|
||||
}
|
||||
|
||||
static inline v128_t v128_mult_x3( const v128_t x )
|
||||
{
|
||||
return v128_add32( x, v128_sl32( x, 1 ) );
|
||||
}
|
||||
|
||||
static inline v128_t v128_mult_x5( const v128_t x )
|
||||
{
|
||||
return v128_add32( x, v128_sl32( x, 2 ) );
|
||||
}
|
||||
|
||||
#define PERM_ELT( xa0, xa1, xb0, xb1, xb2, xb3, xc, xm ) \
|
||||
{ \
|
||||
xa0 = v128_xor3( xm, xb1, v128_xorandnot( \
|
||||
v128_mul32( v128_xor3( xa0, xc, \
|
||||
v128_mul32( v128_rol32( xa1, 15 ), FIVE ) ), THREE ), \
|
||||
v128_mult_x3( v128_xor3( xa0, xc, \
|
||||
v128_mult_x5( v128_rol32( xa1, 15 ) ) ) ), \
|
||||
xb3, xb2 ) ); \
|
||||
xb0 = v128_not( v128_xor( xa0, v128_rol32( xb0, 1 ) ) ); \
|
||||
}
|
||||
|
@@ -62,7 +62,7 @@ void shabal512_8way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_1__) || defined(__ARM_NEON)
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
|
||||
typedef struct {
|
||||
v128_t buf[16] __attribute__ ((aligned (64)));
|
||||
|
@@ -1,369 +0,0 @@
|
||||
#include "Swifftx_sha3.h"
|
||||
extern "C" {
|
||||
#include "SWIFFTX.h"
|
||||
}
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// The default salt value.
|
||||
// This is the expansion of e (Euler's number) - the 19 digits after 2.71:
|
||||
// 8281828459045235360.
|
||||
// The above in base 256, from MSB to LSB:
|
||||
BitSequence SWIF_saltValueChar[SWIF_HAIFA_SALT_SIZE] = {114, 238, 247, 26, 192, 28, 170, 160};
|
||||
|
||||
// All the IVs here below were produced from the decimal digits of e's expansion.
|
||||
// The code can be found in 'ProduceRandomIV.c'.
|
||||
// The initial value for 224 digest size.
|
||||
const BitSequence SWIF_HAIFA_IV_224[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
||||
{37, 242, 132, 2, 167, 81, 158, 237, 113, 77, 162, 60, 65, 236, 108, 246,
|
||||
101, 72, 190, 109, 58, 205, 99, 6, 114, 169, 104, 114, 38, 146, 121, 142,
|
||||
59, 98, 233, 84, 72, 227, 22, 199, 17, 102, 198, 145, 24, 178, 37, 1,
|
||||
215, 245, 66, 120, 230, 193, 113, 253, 165, 218, 66, 134, 49, 231, 124, 204,
|
||||
0};
|
||||
|
||||
// The initial value for 256 digest size.
|
||||
const BitSequence SWIF_HAIFA_IV_256[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
||||
{250, 50, 42, 40, 14, 233, 53, 48, 227, 42, 237, 187, 211, 120, 209, 234,
|
||||
27, 144, 4, 61, 243, 244, 29, 247, 37, 162, 70, 11, 231, 196, 53, 6,
|
||||
193, 240, 94, 126, 204, 132, 104, 46, 114, 29, 3, 104, 118, 184, 201, 3,
|
||||
57, 77, 91, 101, 31, 155, 84, 199, 228, 39, 198, 42, 248, 198, 201, 178,
|
||||
8};
|
||||
|
||||
// The initial value for 384 digest size.
|
||||
const BitSequence SWIF_HAIFA_IV_384[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
||||
{40, 145, 193, 100, 205, 171, 47, 76, 254, 10, 196, 41, 165, 207, 200, 79,
|
||||
109, 13, 75, 201, 17, 172, 64, 162, 217, 22, 88, 39, 51, 30, 220, 151,
|
||||
133, 73, 216, 233, 184, 203, 77, 0, 248, 13, 28, 199, 30, 147, 232, 242,
|
||||
227, 124, 169, 174, 14, 45, 27, 87, 254, 73, 68, 136, 135, 159, 83, 152,
|
||||
0};
|
||||
|
||||
// The initial value for 512 digest size.
|
||||
const BitSequence SWIF_HAIFA_IV_512[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
||||
{195, 126, 197, 167, 157, 114, 99, 126, 208, 105, 200, 90, 71, 195, 144, 138,
|
||||
142, 122, 123, 116, 24, 214, 168, 173, 203, 183, 194, 210, 102, 117, 138, 42,
|
||||
114, 118, 132, 33, 35, 149, 143, 163, 163, 183, 243, 175, 72, 22, 201, 255,
|
||||
102, 243, 22, 187, 211, 167, 239, 76, 164, 70, 80, 182, 181, 212, 9, 185,
|
||||
0};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// NIST API implementation portion.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int Swifftx::Init(int hashbitlen)
|
||||
{
|
||||
switch(hashbitlen)
|
||||
{
|
||||
case 224:
|
||||
swifftxState.hashbitlen = hashbitlen;
|
||||
// Initializes h_0 in HAIFA:
|
||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_224, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
break;
|
||||
case 256:
|
||||
swifftxState.hashbitlen = hashbitlen;
|
||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_256, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
break;
|
||||
case 384:
|
||||
swifftxState.hashbitlen = hashbitlen;
|
||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_384, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
break;
|
||||
case 512:
|
||||
swifftxState.hashbitlen = hashbitlen;
|
||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_512, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
break;
|
||||
default:
|
||||
return BAD_HASHBITLEN;
|
||||
}
|
||||
|
||||
swifftxState.wasUpdated = false;
|
||||
swifftxState.remainingSize = 0;
|
||||
memset(swifftxState.remaining, 0, SWIF_HAIFA_INPUT_BLOCK_SIZE);
|
||||
memset(swifftxState.numOfBitsChar, 0, SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
||||
// Initialize the salt with the default value.
|
||||
memcpy(swifftxState.salt, SWIF_saltValueChar, SWIF_HAIFA_SALT_SIZE);
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int Swifftx::Update(const BitSequence *data, DataLength databitlen)
|
||||
{
|
||||
// The size of input in bytes after putting the remaining data from previous invocation.
|
||||
int sizeOfInputAfterRemaining = 0;
|
||||
// The input block to compression function of SWIFFTX:
|
||||
BitSequence currInputBlock[SWIFFTX_INPUT_BLOCK_SIZE] = {0};
|
||||
// Whether we handled a single block.
|
||||
bool wasSingleBlockHandled = false;
|
||||
|
||||
swifftxState.wasUpdated = true;
|
||||
|
||||
// Handle an empty message as required by NIST. Since 'Final()' is oblivious to the input
|
||||
// (but of course uses the output of the compression function from the previous round,
|
||||
// which is called h_{i-1} in HAIFA article), we have to do nothing here.
|
||||
if (databitlen == 0)
|
||||
return SUCCESS;
|
||||
|
||||
// If we had before an input with unaligned length, return an error
|
||||
if (swifftxState.remainingSize % 8)
|
||||
{
|
||||
return INPUT_DATA_NOT_ALIGNED;
|
||||
}
|
||||
|
||||
// Convert remaining size to bytes.
|
||||
swifftxState.remainingSize /= 8;
|
||||
|
||||
// As long as we have enough data combined from (remaining + data) to fill input block
|
||||
//NASTAVENIE RUND
|
||||
while (((databitlen / 8) + swifftxState.remainingSize) >= SWIF_HAIFA_INPUT_BLOCK_SIZE)
|
||||
{
|
||||
// Fill the input block with data:
|
||||
// 1. The output of the previous block:
|
||||
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
// 2. The input part of the block:
|
||||
// 2a. The remaining data from the previous 'Update()' call:
|
||||
if (swifftxState.remainingSize)
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE, swifftxState.remaining,
|
||||
swifftxState.remainingSize);
|
||||
// 2b. The input data that we have place for after the 'remaining':
|
||||
sizeOfInputAfterRemaining = SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE
|
||||
- ((int) swifftxState.remainingSize) - SWIF_HAIFA_NUM_OF_BITS_SIZE
|
||||
- SWIF_HAIFA_SALT_SIZE;
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize,
|
||||
data, sizeOfInputAfterRemaining);
|
||||
|
||||
// 3. The #bits part of the block:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize
|
||||
+ sizeOfInputAfterRemaining,
|
||||
swifftxState.numOfBitsChar, SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
||||
// 4. The salt part of the block:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize
|
||||
+ sizeOfInputAfterRemaining + SWIF_HAIFA_NUM_OF_BITS_SIZE,
|
||||
swifftxState.salt, SWIF_HAIFA_SALT_SIZE);
|
||||
|
||||
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, false);
|
||||
|
||||
// Update the #bits field with SWIF_HAIFA_INPUT_BLOCK_SIZE.
|
||||
AddToCurrInBase256(swifftxState.numOfBitsChar, SWIF_HAIFA_INPUT_BLOCK_SIZE * 8);
|
||||
wasSingleBlockHandled = true;
|
||||
data += sizeOfInputAfterRemaining;
|
||||
databitlen -= (sizeOfInputAfterRemaining * 8);
|
||||
swifftxState.remainingSize = 0;
|
||||
}
|
||||
|
||||
// Update the swifftxState.remaining and swifftxState.remainingSize.
|
||||
// remainingSize will be in bits after exiting 'Update()'.
|
||||
if (wasSingleBlockHandled)
|
||||
{
|
||||
swifftxState.remainingSize = (unsigned int) databitlen; // now remaining size is in bits.
|
||||
if (swifftxState.remainingSize)
|
||||
memcpy(swifftxState.remaining, data, (swifftxState.remainingSize + 7) / 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(swifftxState.remaining + swifftxState.remainingSize, data,
|
||||
(size_t) (databitlen + 7) / 8);
|
||||
swifftxState.remainingSize = (swifftxState.remainingSize * 8) + (unsigned short) databitlen;
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int Swifftx::Final(BitSequence *hashval)
|
||||
{
|
||||
int i;
|
||||
// Whether to add one last block. True if the padding appended to the last block overflows
|
||||
// the block size.
|
||||
bool toAddFinalBlock = false;
|
||||
bool toPutOneInFinalBlock = false;
|
||||
unsigned short oneShift = 0;
|
||||
// The size of the last input block before the zeroes padding. We add 1 here because we
|
||||
// include the final '1' bit in the calculation and 7 as we round the length to bytes.
|
||||
unsigned short sizeOfLastInputBlock = (swifftxState.remainingSize + 1 + 7) / 8;
|
||||
// The number of bytes of zero in the padding part.
|
||||
// The padding contains:
|
||||
// 1. A single 1 bit.
|
||||
// 2. As many zeroes as needed.
|
||||
// 3. The message length in bits. Occupies SWIF_HAIFA_NUM_OF_BITS_SIZE bytes.
|
||||
// 4. The digest size. Maximum is 512, so we need 2 bytes.
|
||||
// If the total number achieved is negative, add an additional block, as HAIFA specifies.
|
||||
short numOfZeroBytesInPadding = (short) SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE
|
||||
- sizeOfLastInputBlock - (2 * SWIF_HAIFA_NUM_OF_BITS_SIZE) - 2
|
||||
- SWIF_HAIFA_SALT_SIZE;
|
||||
// The input block to compression function of SWIFFTX:
|
||||
BitSequence currInputBlock[SWIFFTX_INPUT_BLOCK_SIZE] = {0};
|
||||
// The message length in base 256.
|
||||
BitSequence messageLengthChar[SWIF_HAIFA_NUM_OF_BITS_SIZE] = {0};
|
||||
// The digest size used for padding:
|
||||
unsigned char digestSizeLSB = swifftxState.hashbitlen % 256;
|
||||
unsigned char digestSizeMSB = (swifftxState.hashbitlen - digestSizeLSB) / 256;
|
||||
|
||||
if (numOfZeroBytesInPadding < 1)
|
||||
toAddFinalBlock = true;
|
||||
|
||||
// Fill the input block with data:
|
||||
// 1. The output of the previous block:
|
||||
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
// 2a. The input part of the block, which is the remaining data from the previous 'Update()'
|
||||
// call, if exists and an extra '1' bit (maybe all we have is this extra 1):
|
||||
|
||||
// Add the last 1 in big-endian convention ...
|
||||
if (swifftxState.remainingSize % 8 == 0)
|
||||
{
|
||||
swifftxState.remaining[sizeOfLastInputBlock - 1] = 0x80;
|
||||
}
|
||||
else
|
||||
{
|
||||
swifftxState.remaining[sizeOfLastInputBlock - 1] |= (1 << (7 - (swifftxState.remainingSize % 8)));
|
||||
}
|
||||
|
||||
if (sizeOfLastInputBlock)
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE, swifftxState.remaining,
|
||||
sizeOfLastInputBlock);
|
||||
|
||||
// Compute the message length in base 256:
|
||||
for (i = 0; i < SWIF_HAIFA_NUM_OF_BITS_SIZE; ++i)
|
||||
messageLengthChar[i] = swifftxState.numOfBitsChar[i];
|
||||
if (sizeOfLastInputBlock)
|
||||
AddToCurrInBase256(messageLengthChar, sizeOfLastInputBlock * 8);
|
||||
|
||||
if (!toAddFinalBlock)
|
||||
{
|
||||
// 2b. Put the zeroes:
|
||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock,
|
||||
0, numOfZeroBytesInPadding);
|
||||
// 2c. Pad the message length:
|
||||
for (i = 0; i < SWIF_HAIFA_NUM_OF_BITS_SIZE; ++i)
|
||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
|
||||
+ numOfZeroBytesInPadding + i] = messageLengthChar[i];
|
||||
// 2d. Pad the digest size:
|
||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
|
||||
+ numOfZeroBytesInPadding + SWIF_HAIFA_NUM_OF_BITS_SIZE] = digestSizeMSB;
|
||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
|
||||
+ numOfZeroBytesInPadding + SWIF_HAIFA_NUM_OF_BITS_SIZE + 1] = digestSizeLSB;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2b. Put the zeroes, if at all:
|
||||
if ((SWIF_HAIFA_INPUT_BLOCK_SIZE - sizeOfLastInputBlock) > 0)
|
||||
{
|
||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock,
|
||||
0, SWIF_HAIFA_INPUT_BLOCK_SIZE - sizeOfLastInputBlock);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. The #bits part of the block:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE,
|
||||
swifftxState.numOfBitsChar, SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
||||
// 4. The salt part of the block:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
|
||||
+ SWIF_HAIFA_NUM_OF_BITS_SIZE,
|
||||
swifftxState.salt,
|
||||
SWIF_HAIFA_SALT_SIZE);
|
||||
|
||||
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, !toAddFinalBlock);
|
||||
|
||||
// If we have to add one more block, it is now:
|
||||
if (toAddFinalBlock)
|
||||
{
|
||||
// 1. The previous output block, as usual.
|
||||
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
||||
|
||||
// 2a. Instead of the input, zeroes:
|
||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE , 0,
|
||||
SWIF_HAIFA_INPUT_BLOCK_SIZE - SWIF_HAIFA_NUM_OF_BITS_SIZE - 2);
|
||||
// 2b. Instead of the input, the message length:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
|
||||
- SWIF_HAIFA_NUM_OF_BITS_SIZE - 2,
|
||||
messageLengthChar,
|
||||
SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
||||
// 2c. Instead of the input, the digest size:
|
||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE - 2] = digestSizeMSB;
|
||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE - 1] = digestSizeLSB;
|
||||
// 3. The #bits part of the block, which is zero in case of additional block:
|
||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE,
|
||||
0,
|
||||
SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
||||
// 4. The salt part of the block:
|
||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
|
||||
+ SWIF_HAIFA_NUM_OF_BITS_SIZE,
|
||||
swifftxState.salt,
|
||||
SWIF_HAIFA_SALT_SIZE);
|
||||
|
||||
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, true);
|
||||
}
|
||||
|
||||
// Finally, copy the result into 'hashval'. In case the digest size is not 512bit, copy the
|
||||
// first hashbitlen of them:
|
||||
for (i = 0; i < (swifftxState.hashbitlen / 8); ++i)
|
||||
hashval[i] = swifftxState.currOutputBlock[i];
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int Swifftx::Hash(int hashbitlen, const BitSequence *data, DataLength databitlen,
|
||||
BitSequence *hashval)
|
||||
{
|
||||
int result;
|
||||
//hashState state;
|
||||
// The pointer to the current place in the input we take into the compression function.
|
||||
DataLength currInputIndex = 0;
|
||||
|
||||
result = Swifftx::Init(hashbitlen);
|
||||
|
||||
if (result != SUCCESS)
|
||||
return result;
|
||||
|
||||
for ( ; (databitlen / 8) > SWIF_HAIFA_INPUT_BLOCK_SIZE;
|
||||
currInputIndex += SWIF_HAIFA_INPUT_BLOCK_SIZE, databitlen -= (SWIF_HAIFA_INPUT_BLOCK_SIZE * 8))
|
||||
{
|
||||
result = Swifftx::Update(data + currInputIndex, SWIF_HAIFA_INPUT_BLOCK_SIZE * 8);
|
||||
if (result != SUCCESS)
|
||||
return result;
|
||||
}
|
||||
|
||||
// The length of the last block may be shorter than (SWIF_HAIFA_INPUT_BLOCK_SIZE * 8)
|
||||
result = Swifftx::Update(data + currInputIndex, databitlen);
|
||||
if (result != SUCCESS)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
return Swifftx::Final(hashval);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper fuction implementation portion.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Swifftx::AddToCurrInBase256(BitSequence value[SWIF_HAIFA_NUM_OF_BITS_SIZE],
|
||||
unsigned short toAdd)
|
||||
{
|
||||
unsigned char remainder = 0;
|
||||
short i;
|
||||
BitSequence currValueInBase256[8] = {0};
|
||||
unsigned short currIndex = 7;
|
||||
unsigned short temp = 0;
|
||||
|
||||
do
|
||||
{
|
||||
remainder = toAdd % 256;
|
||||
currValueInBase256[currIndex--] = remainder;
|
||||
toAdd -= remainder;
|
||||
toAdd /= 256;
|
||||
}
|
||||
while(toAdd != 0);
|
||||
|
||||
for (i = 7; i >= 0; --i)
|
||||
{
|
||||
temp = value[i] + currValueInBase256[i];
|
||||
if (temp > 255)
|
||||
{
|
||||
value[i] = temp % 256;
|
||||
currValueInBase256[i - 1]++;
|
||||
}
|
||||
else
|
||||
value[i] = (unsigned char) temp;
|
||||
}
|
||||
}
|
@@ -1,79 +0,0 @@
|
||||
#ifndef SWIFFTX_SHA3_H
|
||||
#define SWIFFTX_SHA3_H
|
||||
|
||||
#include "sha3_interface.h"
|
||||
#include "stdbool.h"
|
||||
#include "stdint.h"
|
||||
|
||||
class Swifftx : public SHA3 {
|
||||
|
||||
#define SWIFFTX_INPUT_BLOCK_SIZE 256
|
||||
#define SWIFFTX_OUTPUT_BLOCK_SIZE 65
|
||||
#define SWIF_HAIFA_SALT_SIZE 8
|
||||
#define SWIF_HAIFA_NUM_OF_BITS_SIZE 8
|
||||
#define SWIF_HAIFA_INPUT_BLOCK_SIZE (SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE \
|
||||
- SWIF_HAIFA_NUM_OF_BITS_SIZE - SWIF_HAIFA_SALT_SIZE)
|
||||
|
||||
typedef unsigned char BitSequence;
|
||||
//const DataLength SWIF_SALT_VALUE;
|
||||
|
||||
#define SWIF_HAIFA_IV 0
|
||||
|
||||
/*const BitSequence SWIF_HAIFA_IV_224[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
||||
const BitSequence SWIF_HAIFA_IV_256[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
||||
const BitSequence SWIF_HAIFA_IV_384[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
||||
const BitSequence SWIF_HAIFA_IV_512[SWIFFTX_OUTPUT_BLOCK_SIZE];*/
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SUCCESS = 0,
|
||||
FAIL = 1,
|
||||
BAD_HASHBITLEN = 2,
|
||||
BAD_SALT_SIZE = 3,
|
||||
SET_SALT_VALUE_FAILED = 4,
|
||||
INPUT_DATA_NOT_ALIGNED = 5
|
||||
} HashReturn;
|
||||
|
||||
typedef struct hashState {
|
||||
unsigned short hashbitlen;
|
||||
|
||||
// The data remained after the recent call to 'Update()'.
|
||||
BitSequence remaining[SWIF_HAIFA_INPUT_BLOCK_SIZE + 1];
|
||||
|
||||
// The size of the remaining data in bits.
|
||||
// Is 0 in case there is no remaning data at all.
|
||||
unsigned int remainingSize;
|
||||
|
||||
// The current output of the compression function. At the end will contain the final digest
|
||||
// (which may be needed to be truncated, depending on hashbitlen).
|
||||
BitSequence currOutputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
||||
|
||||
// The value of '#bits hashed so far' field in HAIFA, in base 256.
|
||||
BitSequence numOfBitsChar[SWIF_HAIFA_NUM_OF_BITS_SIZE];
|
||||
|
||||
// The salt value currently in use:
|
||||
BitSequence salt[SWIF_HAIFA_SALT_SIZE];
|
||||
|
||||
// Indicates whether a single 'Update()' occured.
|
||||
// Ater a call to 'Update()' the key and the salt values cannot be changed.
|
||||
bool wasUpdated;
|
||||
} hashState;
|
||||
|
||||
private:
|
||||
int swifftxNumRounds;
|
||||
hashState swifftxState;
|
||||
|
||||
|
||||
public:
|
||||
int Init(int hashbitlen);
|
||||
int Update(const BitSequence *data, DataLength databitlen);
|
||||
int Final(BitSequence *hashval);
|
||||
int Hash(int hashbitlen, const BitSequence *data, DataLength databitlen,
|
||||
BitSequence *hashval);
|
||||
|
||||
private:
|
||||
static void AddToCurrInBase256(BitSequence value[SWIF_HAIFA_NUM_OF_BITS_SIZE], unsigned short toAdd);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@@ -1,21 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace hash {
|
||||
|
||||
using BitSequence = unsigned char;
|
||||
using DataLength = unsigned long long;
|
||||
|
||||
struct hash_interface {
|
||||
virtual ~hash_interface() = default;
|
||||
|
||||
virtual int Init(int hash_bitsize) = 0;
|
||||
virtual int Update(const BitSequence *data, DataLength data_bitsize) = 0;
|
||||
virtual int Final(BitSequence *hash) = 0;
|
||||
|
||||
virtual int
|
||||
Hash(int hash_bitsize, const BitSequence *data, DataLength data_bitsize, BitSequence *hash) = 0;
|
||||
};
|
||||
|
||||
} // namespace hash
|
@@ -1,14 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
//#include <streams/hash/hash_interface.h>
|
||||
#include "hash_interface.h"
|
||||
|
||||
namespace sha3 {
|
||||
|
||||
using BitSequence = hash::BitSequence;
|
||||
using DataLength = hash::DataLength;
|
||||
|
||||
struct sha3_interface : hash::hash_interface {};
|
||||
|
||||
} // namespace sha3
|
@@ -27,7 +27,7 @@
|
||||
#else
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#endif
|
||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
@@ -50,7 +50,7 @@ typedef struct TortureGarden TortureGarden;
|
||||
// Graph of hash algos plus SPH contexts
|
||||
struct TortureGarden
|
||||
{
|
||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
hashState_groestl groestl;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
@@ -123,7 +123,7 @@ static int get_hash( void *output, const void *input, TortureGarden *garden,
|
||||
#endif
|
||||
break;
|
||||
case 5:
|
||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
groestl512_full( &garden->groestl, hash, input, 512 );
|
||||
#else
|
||||
sph_groestl512_init( &garden->groestl) ;
|
||||
|
@@ -1092,7 +1092,7 @@ int x16r_2x64_hash_generic( void* output, const void* input, int thrid,
|
||||
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||
break;
|
||||
case GROESTL:
|
||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
groestl512_full( &ctx.groestl, hash0, in0, size<<3 );
|
||||
groestl512_full( &ctx.groestl, hash1, in1, size<<3 );
|
||||
#else
|
||||
@@ -1173,7 +1173,7 @@ int x16r_2x64_hash_generic( void* output, const void* input, int thrid,
|
||||
simd512_ctx( &ctx.simd, hash1, in1, size );
|
||||
break;
|
||||
case ECHO:
|
||||
#if defined(__AES__)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
echo_full( &ctx.echo, hash0, 512, in0, size );
|
||||
echo_full( &ctx.echo, hash1, 512, in1, size );
|
||||
#else
|
||||
|
@@ -218,7 +218,7 @@ union _x16r_2x64_context_overlay
|
||||
{
|
||||
blake512_2x64_context blake;
|
||||
bmw512_2x64_context bmw;
|
||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
hashState_groestl groestl;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
|
@@ -1208,7 +1208,7 @@ union _x16rv2_2x64_context_overlay
|
||||
{
|
||||
blake512_2x64_context blake;
|
||||
bmw512_2x64_context bmw;
|
||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
hashState_groestl groestl;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
@@ -1294,7 +1294,7 @@ int x16rv2_2x64_hash( void* output, const void* input, int thrid )
|
||||
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||
break;
|
||||
case GROESTL:
|
||||
#if defined(__AES__)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
groestl512_full( &ctx.groestl, hash0, in0, size<<3 );
|
||||
groestl512_full( &ctx.groestl, hash1, in1, size<<3 );
|
||||
#else
|
||||
@@ -1400,7 +1400,7 @@ int x16rv2_2x64_hash( void* output, const void* input, int thrid )
|
||||
simd512_ctx( &ctx.simd, hash1, in1, size );
|
||||
break;
|
||||
case ECHO:
|
||||
#if defined(__AES__)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
echo_full( &ctx.echo, hash0, 512, in0, size );
|
||||
echo_full( &ctx.echo, hash1, 512, in1, size );
|
||||
#else
|
||||
|
@@ -294,7 +294,6 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
@@ -938,7 +938,7 @@ int scanhash_x17_4x64( struct work *work, uint32_t max_nonce,
|
||||
#endif
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/haval/sph-haval.h"
|
||||
#if !( defined(__AES__) ) //|| defined(__ARM_FEATURE_AES) )
|
||||
#if !( defined(__AES__) || defined(__ARM_FEATURE_AES) )
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#endif
|
||||
#if !( defined(__AES__) || defined(__ARM_FEATURE_AES) )
|
||||
@@ -950,7 +950,7 @@ union _x17_context_overlay
|
||||
{
|
||||
blake512_2x64_context blake;
|
||||
bmw512_2x64_context bmw;
|
||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
hashState_groestl groestl;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
@@ -1000,7 +1000,7 @@ int x17_2x64_hash( void *output, const void *input, int thr_id )
|
||||
|
||||
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||
|
||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
groestl512_full( &ctx.groestl, hash0, hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, hash1, hash1, 512 );
|
||||
#else
|
||||
|
@@ -5,15 +5,15 @@
|
||||
#include "algo/blake/blake512-hash.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/fugue/fugue-aesni.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#endif
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#endif
|
||||
#include "algo/skein/sph_skein.h"
|
||||
@@ -39,15 +39,15 @@ union _x22i_context_overlay
|
||||
blake512_context blake;
|
||||
sph_bmw512_context bmw;
|
||||
#if defined(__AES__)
|
||||
hashState_groestl groestl;
|
||||
hashState_fugue fugue;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_fugue512_context fugue;
|
||||
#endif
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_jh512_context jh;
|
||||
@@ -81,7 +81,7 @@ int x22i_hash( void *output, const void *input, int thrid )
|
||||
sph_bmw512(&ctx.bmw, (const void*) hash, 64);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
groestl512_full( &ctx.groestl, hash, hash, 512 );
|
||||
#else
|
||||
sph_groestl512_init( &ctx.groestl );
|
||||
|
@@ -5,15 +5,15 @@
|
||||
#include "algo/blake/blake512-hash.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/fugue/fugue-aesni.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#endif
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#endif
|
||||
#include "algo/skein/sph_skein.h"
|
||||
@@ -42,15 +42,15 @@ union _x25x_context_overlay
|
||||
blake512_context blake;
|
||||
sph_bmw512_context bmw;
|
||||
#if defined(__AES__)
|
||||
hashState_groestl groestl;
|
||||
hashState_fugue fugue;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_fugue512_context fugue;
|
||||
#endif
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#else
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_jh512_context jh;
|
||||
@@ -86,7 +86,7 @@ int x25x_hash( void *output, const void *input, int thrid )
|
||||
sph_bmw512(&ctx.bmw, (const void*) &hash[0], 64);
|
||||
sph_bmw512_close(&ctx.bmw, &hash[1]);
|
||||
|
||||
#if defined(__AES__)
|
||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||
groestl512_full( &ctx.groestl, (void*)&hash[2], (const void*)&hash[1], 512 );
|
||||
#else
|
||||
sph_groestl512_init( &ctx.groestl );
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.13.
|
||||
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.14.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
||||
@@ -608,8 +608,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='23.13'
|
||||
PACKAGE_STRING='cpuminer-opt 23.13'
|
||||
PACKAGE_VERSION='23.14'
|
||||
PACKAGE_STRING='cpuminer-opt 23.14'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 23.13 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 23.14 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1432,7 +1432,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 23.13:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 23.14:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1538,7 +1538,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 23.13
|
||||
cpuminer-opt configure 23.14
|
||||
generated by GNU Autoconf 2.71
|
||||
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
@@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 23.13, which was
|
||||
It was created by cpuminer-opt $as_me 23.14, which was
|
||||
generated by GNU Autoconf 2.71. Invocation command line was
|
||||
|
||||
$ $0$ac_configure_args_raw
|
||||
@@ -3593,7 +3593,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='23.13'
|
||||
VERSION='23.14'
|
||||
|
||||
|
||||
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
|
||||
@@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 23.13, which was
|
||||
This file was extended by cpuminer-opt $as_me 23.14, which was
|
||||
generated by GNU Autoconf 2.71. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config='$ac_cs_config_escaped'
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 23.13
|
||||
cpuminer-opt config.status 23.14
|
||||
configured by $0, generated by GNU Autoconf 2.71,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [23.13])
|
||||
AC_INIT([cpuminer-opt], [23.14])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
20
configure~
20
configure~
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 23.13.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 23.14.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='23.13'
|
||||
PACKAGE_STRING='cpuminer-opt 23.13'
|
||||
PACKAGE_VERSION='23.14'
|
||||
PACKAGE_STRING='cpuminer-opt 23.14'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 23.13 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 23.14 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 23.13:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 23.14:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 23.13
|
||||
cpuminer-opt configure 23.14
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 23.13, which was
|
||||
It was created by cpuminer-opt $as_me 23.14, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='23.13'
|
||||
VERSION='23.14'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6718,7 +6718,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 23.13, which was
|
||||
This file was extended by cpuminer-opt $as_me 23.14, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6784,7 +6784,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 23.13
|
||||
cpuminer-opt config.status 23.14
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -3666,11 +3666,6 @@ static int thread_create(struct thr_info *thr, void* func)
|
||||
|
||||
void get_defconfig_path(char *out, size_t bufsize, char *argv0);
|
||||
|
||||
|
||||
#include "simd-utils.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "compat/aes_helper.c"
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct thr_info *thr;
|
||||
|
@@ -322,6 +322,7 @@ static inline __m128i v128_neg1_fn()
|
||||
#define mm128_xim_32( v1, v0, c ) \
|
||||
_mm_castps_si128( _mm_insert_ps( _mm_castsi128_ps( v1 ), \
|
||||
_mm_castsi128_ps( v0 ), c ) )
|
||||
#define v128_xim32 mm128_xim_32
|
||||
|
||||
// Examples of simple operations using xim:
|
||||
/*
|
||||
|
@@ -68,7 +68,7 @@
|
||||
#define v128_mul32 vmulq_u32
|
||||
#define v128_mul16 vmulq_u16
|
||||
|
||||
// slow, tested with argon2d
|
||||
// Widening, shuffle high element to align with Intel
|
||||
static inline uint64x2_t v128_mulw32( uint32x4_t v1, uint32x4_t v0 )
|
||||
{
|
||||
return vmull_u32( vget_low_u32( vcopyq_laneq_u32( v1, 1, v1, 2 ) ),
|
||||
@@ -86,7 +86,7 @@ static inline uint64x2_t v128_mulw32( uint32x4_t v1, uint32x4_t v0 )
|
||||
|
||||
// Not yet needed
|
||||
//#define v128_cmpeq1
|
||||
|
||||
// Signed
|
||||
#define v128_cmpgt64( v1, v0 ) vcgtq_s64( (int64x2_t)v1, (int64x2_t)v0 )
|
||||
#define v128_cmpgt32( v1, v0 ) vcgtq_s32( (int32x4_t)v1, (int32x4_t)v0 )
|
||||
#define v128_cmpgt16( v1, v0 ) vcgtq_s16( (int16x8_t)v1, (int16x8_t)v0 )
|
||||
@@ -406,34 +406,15 @@ static inline void v128_memcpy( void *dst, const void *src, const int n )
|
||||
v1 = vorrq_u32( v1, t1 ); \
|
||||
}
|
||||
|
||||
// Cross lane shuffles, no programmable shuffle in NEON
|
||||
|
||||
// vector mask, use as last resort. prefer rev, alignr, etc
|
||||
// vector mask, use as last resort. prefer tbl, rev, alignr, etc
|
||||
#define v128_shufflev32( v, vmask ) \
|
||||
v128_set32( ((uint32_t*)&v)[ ((uint32_t*)(&vmask))[3] ], \
|
||||
((uint32_t*)&v)[ ((uint32_t*)(&vmask))[2] ], \
|
||||
((uint32_t*)&v)[ ((uint32_t*)(&vmask))[1] ], \
|
||||
((uint32_t*)&v)[ ((uint32_t*)(&vmask))[0] ] ) \
|
||||
|
||||
// compatible with x86_64, but very slow, avoid
|
||||
#define v128_shuffle8( v, vmask ) \
|
||||
v128_set8( ((uint8_t*)&v)[ ((uint8_t*)(&vmask))[15] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[14] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[13] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[12] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[11] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[10] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 9] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 8] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 7] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 6] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 5] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 4] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 3] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 2] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 1] ], \
|
||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 0] ] )
|
||||
|
||||
vqtbl1q_u8( (uint8x16_t)v, (uint8x16_t)vmask );
|
||||
|
||||
// sub-vector shuffles sometimes mirror bit rotation. Shuffle is faster.
|
||||
// Bit rotation already promotes faster widths. Usage is context sensitive.
|
||||
|
Reference in New Issue
Block a user