Compare commits

...

1 Commits

Author SHA1 Message Date
Jay D Dee
b47cfaa720 v3.10.7 2019-12-28 15:00:29 -05:00
75 changed files with 1884 additions and 773 deletions

View File

@@ -35,13 +35,18 @@ not supported. FreeBSD YMMV.
Change Log Change Log
---------- ----------
v3.10.7
AVX512 for x25x, lbry, x13bcd (bcd).
v3.10.6 v3.10.6
Added support for SSL stratum: stratum+tcps:// Added support for SSL stratum: stratum+tcps://
Added job id reporting again, but leaner, suppressed with --quiet. Added job id reporting again, but leaner, suppressed with --quiet.
AVX512 for x21s, x22i, lyra2z, allium AVX512 for x21s, x22i, lyra2z, allium.
Fixed share overflow warnings mining lbry with Ryzen (SHA). Fixed share overflow warnings mining lbry with Ryzen (SHA).

View File

@@ -317,6 +317,7 @@ const char* const algo_alias_map[][2] =
{ "argon2d-crds", "argon2d250" }, { "argon2d-crds", "argon2d250" },
{ "argon2d-dyn", "argon2d500" }, { "argon2d-dyn", "argon2d500" },
{ "argon2d-uis", "argon2d4096" }, { "argon2d-uis", "argon2d4096" },
{ "bcd", "x13bcd" },
{ "bitcore", "timetravel10" }, { "bitcore", "timetravel10" },
{ "bitzeny", "yescryptr8" }, { "bitzeny", "yescryptr8" },
{ "blake256r8", "blakecoin" }, { "blake256r8", "blakecoin" },

View File

@@ -104,7 +104,7 @@ typedef struct {
typedef blake_8way_small_context blake256_8way_context; typedef blake_8way_small_context blake256_8way_context;
void blake256_8way_init(void *cc); void blake256_8way_init(void *cc);
void blake256_8way_update(void *cc, const void *data, size_t len); void blake256_8way_update(void *cc, const void *data, size_t len);
#define blake256_8way blake256_8way_update //#define blake256_8way blake256_8way_update
void blake256_8way_close(void *cc, void *dst); void blake256_8way_close(void *cc, void *dst);
// 14 rounds, blake, decred // 14 rounds, blake, decred

View File

@@ -842,7 +842,8 @@ blake32_4way_init( blake_4way_small_context *ctx, const uint32_t *iv,
} }
static void static void
blake32_4way( blake_4way_small_context *ctx, const void *data, size_t len ) blake32_4way( blake_4way_small_context *ctx, const void *data,
size_t len )
{ {
__m128i *buf = (__m128i*)ctx->buf; __m128i *buf = (__m128i*)ctx->buf;
size_t bptr = ctx->ptr<<2; size_t bptr = ctx->ptr<<2;
@@ -1237,7 +1238,7 @@ blake256_4way_init(void *ctx)
} }
void void
blake256_4way(void *ctx, const void *data, size_t len) blake256_4way_update(void *ctx, const void *data, size_t len)
{ {
blake32_4way(ctx, data, len); blake32_4way(ctx, data, len);
} }

View File

@@ -14,7 +14,6 @@
#ifndef __BLAKE2S_HASH_4WAY_H__ #ifndef __BLAKE2S_HASH_4WAY_H__
#define __BLAKE2S_HASH_4WAY_H__ 1 #define __BLAKE2S_HASH_4WAY_H__ 1
//#if defined(__SSE4_2__)
#if defined(__SSE2__) #if defined(__SSE2__)
#include "simd-utils.h" #include "simd-utils.h"
@@ -132,6 +131,6 @@ int blake2s_16way_final( blake2s_16way_state *S, void *out, uint8_t outlen );
} }
#endif #endif
#endif // __SSE4_2__ #endif // __SSE2__
#endif #endif

View File

@@ -41,7 +41,6 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
for ( int lane = 0; lane < 8; lane++ ) for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hash7[ lane<<1 ] < Htarg ) ) if ( unlikely( hash7[ lane<<1 ] < Htarg ) )
// if ( ( ( hash7[ lane<<1 ] & 0xFFFFFF00 ) == 0 ) )
{ {
extr_lane_8x64( lane_hash, hash, lane, 256 ); extr_lane_8x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) ) if ( fulltest( lane_hash, ptarget ) )
@@ -66,7 +65,7 @@ void bmw512hash_4way(void *state, const void *input)
{ {
bmw512_4way_context ctx; bmw512_4way_context ctx;
bmw512_4way_init( &ctx ); bmw512_4way_init( &ctx );
bmw512_4way( &ctx, input, 80 ); bmw512_4way_update( &ctx, input, 80 );
bmw512_4way_close( &ctx, state ); bmw512_4way_close( &ctx, state );
} }

View File

@@ -45,7 +45,7 @@ void myriad_4way_hash( void *output, const void *input )
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
sha256_4way( &ctx.sha, vhash, 64 ); sha256_4way_update( &ctx.sha, vhash, 64 );
sha256_4way_close( &ctx.sha, output ); sha256_4way_close( &ctx.sha, output );
} }

View File

@@ -1171,7 +1171,8 @@ void hamsi512_4way_init( hamsi_4way_big_context *sc )
sc->h[7] = m256_const1_64( 0x6769756d2042656c ); sc->h[7] = m256_const1_64( 0x6769756d2042656c );
} }
void hamsi512_4way( hamsi_4way_big_context *sc, const void *data, size_t len ) void hamsi512_4way_update( hamsi_4way_big_context *sc, const void *data,
size_t len )
{ {
__m256i *vdata = (__m256i*)data; __m256i *vdata = (__m256i*)data;

View File

@@ -62,7 +62,7 @@ typedef hamsi_4way_big_context hamsi512_4way_context;
void hamsi512_4way_init( hamsi512_4way_context *sc ); void hamsi512_4way_init( hamsi512_4way_context *sc );
void hamsi512_4way_update( hamsi512_4way_context *sc, const void *data, void hamsi512_4way_update( hamsi512_4way_context *sc, const void *data,
size_t len ); size_t len );
#define hamsi512_4way hamsi512_4way_update //#define hamsi512_4way hamsi512_4way_update
void hamsi512_4way_close( hamsi512_4way_context *sc, void *dst ); void hamsi512_4way_close( hamsi512_4way_context *sc, void *dst );
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)

View File

@@ -38,7 +38,7 @@
#define SPH_XCAT_(a, b) a ## b #define SPH_XCAT_(a, b) a ## b
static void static void
SPH_XCAT(SPH_XCAT(haval, PASSES), _4way) SPH_XCAT(SPH_XCAT(haval, PASSES), _4way_update)
( haval_4way_context *sc, const void *data, size_t len ) ( haval_4way_context *sc, const void *data, size_t len )
{ {
__m128i *vdata = (__m128i*)data; __m128i *vdata = (__m128i*)data;

View File

@@ -479,9 +479,9 @@ haval ## xxx ## _ ## y ## _4way_init(void *cc) \
} \ } \
\ \
void \ void \
haval ## xxx ## _ ## y ## _4way (void *cc, const void *data, size_t len) \ haval ## xxx ## _ ## y ## _4way_update (void *cc, const void *data, size_t len) \
{ \ { \
haval ## y ## _4way(cc, data, len); \ haval ## y ## _4way_update(cc, data, len); \
} \ } \
\ \
void \ void \

View File

@@ -85,7 +85,7 @@ typedef haval_4way_context haval256_5_4way_context;
void haval256_5_4way_init( void *cc ); void haval256_5_4way_init( void *cc );
void haval256_5_4way_update( void *cc, const void *data, size_t len ); void haval256_5_4way_update( void *cc, const void *data, size_t len );
#define haval256_5_4way haval256_5_4way_update //#define haval256_5_4way haval256_5_4way_update
void haval256_5_4way_close( void *cc, void *dst ); void haval256_5_4way_close( void *cc, void *dst );

View File

@@ -103,14 +103,12 @@ typedef jh_4way_context jh512_4way_context;
void jh256_4way_init( jh_4way_context *sc); void jh256_4way_init( jh_4way_context *sc);
void jh256_4way_update(void *cc, const void *data, size_t len); void jh256_4way_update(void *cc, const void *data, size_t len);
#define jh256_4way jh256_4way_update
void jh256_4way_close(void *cc, void *dst); void jh256_4way_close(void *cc, void *dst);
void jh512_4way_init( jh_4way_context *sc ); void jh512_4way_init( jh_4way_context *sc );
void jh512_4way_update(void *cc, const void *data, size_t len); void jh512_4way_update(void *cc, const void *data, size_t len);
#define jh512_4way jh512_4way_update
void jh512_4way_close(void *cc, void *dst); void jh512_4way_close(void *cc, void *dst);

View File

@@ -33,7 +33,7 @@ void jha_hash_4way( void *out, const void *input )
keccak512_4way_context ctx_keccak; keccak512_4way_context ctx_keccak;
keccak512_4way_init( &ctx_keccak ); keccak512_4way_init( &ctx_keccak );
keccak512_4way( &ctx_keccak, input, 80 ); keccak512_4way_update( &ctx_keccak, input, 80 );
keccak512_4way_close( &ctx_keccak, vhash ); keccak512_4way_close( &ctx_keccak, vhash );
// Heavy & Light Pair Loop // Heavy & Light Pair Loop
@@ -58,7 +58,7 @@ void jha_hash_4way( void *out, const void *input )
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
skein512_4way_init( &ctx_skein ); skein512_4way_init( &ctx_skein );
skein512_4way( &ctx_skein, vhash, 64 ); skein512_4way_update( &ctx_skein, vhash, 64 );
skein512_4way_close( &ctx_skein, vhashB ); skein512_4way_close( &ctx_skein, vhashB );
for ( int i = 0; i < 8; i++ ) for ( int i = 0; i < 8; i++ )
@@ -69,7 +69,7 @@ void jha_hash_4way( void *out, const void *input )
blake512_4way_close( &ctx_blake, vhashA ); blake512_4way_close( &ctx_blake, vhashA );
jh512_4way_init( &ctx_jh ); jh512_4way_init( &ctx_jh );
jh512_4way( &ctx_jh, vhash, 64 ); jh512_4way_update( &ctx_jh, vhash, 64 );
jh512_4way_close( &ctx_jh, vhashB ); jh512_4way_close( &ctx_jh, vhashB );
for ( int i = 0; i < 8; i++ ) for ( int i = 0; i < 8; i++ )

View File

@@ -99,14 +99,12 @@ typedef keccak64_ctx_m256i keccak512_4way_context;
void keccak256_4way_init(void *cc); void keccak256_4way_init(void *cc);
void keccak256_4way_update(void *cc, const void *data, size_t len); void keccak256_4way_update(void *cc, const void *data, size_t len);
void keccak256_4way_close(void *cc, void *dst); void keccak256_4way_close(void *cc, void *dst);
#define keccak256_4way keccak256_4way_update
void keccak512_4way_init(void *cc); void keccak512_4way_init(void *cc);
void keccak512_4way_update(void *cc, const void *data, size_t len); void keccak512_4way_update(void *cc, const void *data, size_t len);
void keccak512_4way_close(void *cc, void *dst); void keccak512_4way_close(void *cc, void *dst);
void keccak512_4way_addbits_and_close( void keccak512_4way_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst); void *cc, unsigned ub, unsigned n, void *dst);
#define keccak512_4way keccak512_4way_update
#endif #endif

View File

@@ -55,7 +55,6 @@ void allium_8way_hash( void *state, const void *input )
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7, dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash, 256 ); vhash, 256 );
intrlv_2x256( vhash, hash0, hash1, 256 ); intrlv_2x256( vhash, hash0, hash1, 256 );
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 ); LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
dintrlv_2x256( hash0, hash1, vhash, 256 ); dintrlv_2x256( hash0, hash1, vhash, 256 );
@@ -69,19 +68,6 @@ void allium_8way_hash( void *state, const void *input )
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 ); LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
dintrlv_2x256( hash6, hash7, vhash, 256 ); dintrlv_2x256( hash6, hash7, vhash, 256 );
/*
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
LYRA2RE( hash4, 32, hash4, 32, hash4, 32, 1, 8, 8 );
LYRA2RE( hash5, 32, hash5, 32, hash5, 32, 1, 8, 8 );
LYRA2RE( hash6, 32, hash6, 32, hash6, 32, 1, 8, 8 );
LYRA2RE( hash7, 32, hash7, 32, hash7, 32, 1, 8, 8 );
*/
intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, 256 ); intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, 256 );
intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, 256 ); intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, 256 );
@@ -105,20 +91,6 @@ void allium_8way_hash( void *state, const void *input )
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 ); LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
dintrlv_2x256( hash6, hash7, vhash, 256 ); dintrlv_2x256( hash6, hash7, vhash, 256 );
/*
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
LYRA2RE( hash4, 32, hash4, 32, hash4, 32, 1, 8, 8 );
LYRA2RE( hash5, 32, hash5, 32, hash5, 32, 1, 8, 8 );
LYRA2RE( hash6, 32, hash6, 32, hash6, 32, 1, 8, 8 );
LYRA2RE( hash7, 32, hash7, 32, hash7, 32, 1, 8, 8 );
*/
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 ); hash7, 256 );
@@ -232,11 +204,11 @@ void allium_4way_hash( void *state, const void *input )
allium_4way_ctx_holder ctx __attribute__ ((aligned (64))); allium_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &allium_4way_ctx, sizeof(allium_4way_ctx) ); memcpy( &ctx, &allium_4way_ctx, sizeof(allium_4way_ctx) );
blake256_4way( &ctx.blake, input + (64<<2), 16 ); blake256_4way_update( &ctx.blake, input + (64<<2), 16 );
blake256_4way_close( &ctx.blake, vhash32 ); blake256_4way_close( &ctx.blake, vhash32 );
rintrlv_4x32_4x64( vhash64, vhash32, 256 ); rintrlv_4x32_4x64( vhash64, vhash32, 256 );
keccak256_4way( &ctx.keccak, vhash64, 32 ); keccak256_4way_update( &ctx.keccak, vhash64, 32 );
keccak256_4way_close( &ctx.keccak, vhash64 ); keccak256_4way_close( &ctx.keccak, vhash64 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
@@ -261,7 +233,7 @@ void allium_4way_hash( void *state, const void *input )
intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 ); intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
skein256_4way( &ctx.skein, vhash64, 32 ); skein256_4way_update( &ctx.skein, vhash64, 32 );
skein256_4way_close( &ctx.skein, vhash64 ); skein256_4way_close( &ctx.skein, vhash64 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );

View File

@@ -20,7 +20,7 @@ static __thread blake256_4way_context l2h_4way_blake_mid;
void lyra2h_4way_midstate( const void* input ) void lyra2h_4way_midstate( const void* input )
{ {
blake256_4way_init( &l2h_4way_blake_mid ); blake256_4way_init( &l2h_4way_blake_mid );
blake256_4way( &l2h_4way_blake_mid, input, 64 ); blake256_4way_update( &l2h_4way_blake_mid, input, 64 );
} }
void lyra2h_4way_hash( void *state, const void *input ) void lyra2h_4way_hash( void *state, const void *input )

View File

@@ -44,7 +44,7 @@ void lyra2rev2_8way_hash( void *state, const void *input )
lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64))); lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) ); memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) );
blake256_8way( &ctx.blake, input + (64<<3), 16 ); blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
blake256_8way_close( &ctx.blake, vhash ); blake256_8way_close( &ctx.blake, vhash );
rintrlv_8x32_8x64( vhashA, vhash, 256 ); rintrlv_8x32_8x64( vhashA, vhash, 256 );
@@ -176,12 +176,12 @@ void lyra2rev2_4way_hash( void *state, const void *input )
lyra2v2_4way_ctx_holder ctx __attribute__ ((aligned (64))); lyra2v2_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v2_4way_ctx, sizeof(l2v2_4way_ctx) ); memcpy( &ctx, &l2v2_4way_ctx, sizeof(l2v2_4way_ctx) );
blake256_4way( &ctx.blake, input + (64<<2), 16 ); blake256_4way_update( &ctx.blake, input + (64<<2), 16 );
blake256_4way_close( &ctx.blake, vhash ); blake256_4way_close( &ctx.blake, vhash );
rintrlv_4x32_4x64( vhash64, vhash, 256 ); rintrlv_4x32_4x64( vhash64, vhash, 256 );
keccak256_4way( &ctx.keccak, vhash64, 32 ); keccak256_4way_update( &ctx.keccak, vhash64, 32 );
keccak256_4way_close( &ctx.keccak, vhash64 ); keccak256_4way_close( &ctx.keccak, vhash64 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
@@ -201,7 +201,7 @@ void lyra2rev2_4way_hash( void *state, const void *input )
intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 ); intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
skein256_4way( &ctx.skein, vhash64, 32 ); skein256_4way_update( &ctx.skein, vhash64, 32 );
skein256_4way_close( &ctx.skein, vhash64 ); skein256_4way_close( &ctx.skein, vhash64 );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
@@ -217,7 +217,7 @@ void lyra2rev2_4way_hash( void *state, const void *input )
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 ); intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
bmw256_4way( &ctx.bmw, vhash, 32 ); bmw256_4way_update( &ctx.bmw, vhash, 32 );
bmw256_4way_close( &ctx.bmw, state ); bmw256_4way_close( &ctx.bmw, state );
} }
@@ -242,7 +242,7 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
mm128_bswap32_intrlv80_4x32( vdata, pdata ); mm128_bswap32_intrlv80_4x32( vdata, pdata );
blake256_4way_init( &l2v2_4way_ctx.blake ); blake256_4way_init( &l2v2_4way_ctx.blake );
blake256_4way( &l2v2_4way_ctx.blake, vdata, 64 ); blake256_4way_update( &l2v2_4way_ctx.blake, vdata, 64 );
do do
{ {

View File

@@ -209,7 +209,7 @@ void lyra2rev3_8way_hash( void *state, const void *input )
lyra2v3_8way_ctx_holder ctx __attribute__ ((aligned (64))); lyra2v3_8way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v3_8way_ctx, sizeof(l2v3_8way_ctx) ); memcpy( &ctx, &l2v3_8way_ctx, sizeof(l2v3_8way_ctx) );
blake256_8way( &ctx.blake, input + (64*8), 16 ); blake256_8way_update( &ctx.blake, input + (64*8), 16 );
blake256_8way_close( &ctx.blake, vhash ); blake256_8way_close( &ctx.blake, vhash );
dintrlv_8x32( hash0, hash1, hash2, hash3, dintrlv_8x32( hash0, hash1, hash2, hash3,
@@ -252,7 +252,7 @@ void lyra2rev3_8way_hash( void *state, const void *input )
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, 256 ); hash4, hash5, hash6, hash7, 256 );
bmw256_8way( &ctx.bmw, vhash, 32 ); bmw256_8way_update( &ctx.bmw, vhash, 32 );
bmw256_8way_close( &ctx.bmw, state ); bmw256_8way_close( &ctx.bmw, state );
} }
@@ -277,7 +277,7 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
mm256_bswap32_intrlv80_8x32( vdata, pdata ); mm256_bswap32_intrlv80_8x32( vdata, pdata );
blake256_8way_init( &l2v3_8way_ctx.blake ); blake256_8way_init( &l2v3_8way_ctx.blake );
blake256_8way( &l2v3_8way_ctx.blake, vdata, 64 ); blake256_8way_update( &l2v3_8way_ctx.blake, vdata, 64 );
do do
{ {
@@ -334,8 +334,7 @@ void lyra2rev3_4way_hash( void *state, const void *input )
lyra2v3_4way_ctx_holder ctx __attribute__ ((aligned (64))); lyra2v3_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v3_4way_ctx, sizeof(l2v3_4way_ctx) ); memcpy( &ctx, &l2v3_4way_ctx, sizeof(l2v3_4way_ctx) );
// blake256_4way( &ctx.blake, input, 80 ); blake256_4way_update( &ctx.blake, input + (64*4), 16 );
blake256_4way( &ctx.blake, input + (64*4), 16 );
blake256_4way_close( &ctx.blake, vhash ); blake256_4way_close( &ctx.blake, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 ); dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
@@ -358,7 +357,7 @@ void lyra2rev3_4way_hash( void *state, const void *input )
LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 ); LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 ); intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
bmw256_4way( &ctx.bmw, vhash, 32 ); bmw256_4way_update( &ctx.bmw, vhash, 32 );
bmw256_4way_close( &ctx.bmw, state ); bmw256_4way_close( &ctx.bmw, state );
} }
@@ -383,7 +382,7 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
mm128_bswap32_intrlv80_4x32( vdata, pdata ); mm128_bswap32_intrlv80_4x32( vdata, pdata );
blake256_4way_init( &l2v3_4way_ctx.blake ); blake256_4way_init( &l2v3_4way_ctx.blake );
blake256_4way( &l2v3_4way_ctx.blake, vdata, 64 ); blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );
do do
{ {

View File

@@ -149,7 +149,7 @@ static __thread blake256_8way_context l2z_8way_blake_mid;
void lyra2z_8way_midstate( const void* input ) void lyra2z_8way_midstate( const void* input )
{ {
blake256_8way_init( &l2z_8way_blake_mid ); blake256_8way_init( &l2z_8way_blake_mid );
blake256_8way( &l2z_8way_blake_mid, input, 64 ); blake256_8way_update( &l2z_8way_blake_mid, input, 64 );
} }
void lyra2z_8way_hash( void *state, const void *input ) void lyra2z_8way_hash( void *state, const void *input )
@@ -166,7 +166,7 @@ void lyra2z_8way_hash( void *state, const void *input )
blake256_8way_context ctx_blake __attribute__ ((aligned (64))); blake256_8way_context ctx_blake __attribute__ ((aligned (64)));
memcpy( &ctx_blake, &l2z_8way_blake_mid, sizeof l2z_8way_blake_mid ); memcpy( &ctx_blake, &l2z_8way_blake_mid, sizeof l2z_8way_blake_mid );
blake256_8way( &ctx_blake, input + (64*8), 16 ); blake256_8way_update( &ctx_blake, input + (64*8), 16 );
blake256_8way_close( &ctx_blake, vhash ); blake256_8way_close( &ctx_blake, vhash );
dintrlv_8x32( hash0, hash1, hash2, hash3, dintrlv_8x32( hash0, hash1, hash2, hash3,
@@ -247,7 +247,7 @@ static __thread blake256_4way_context l2z_4way_blake_mid;
void lyra2z_4way_midstate( const void* input ) void lyra2z_4way_midstate( const void* input )
{ {
blake256_4way_init( &l2z_4way_blake_mid ); blake256_4way_init( &l2z_4way_blake_mid );
blake256_4way( &l2z_4way_blake_mid, input, 64 ); blake256_4way_update( &l2z_4way_blake_mid, input, 64 );
} }
void lyra2z_4way_hash( void *state, const void *input ) void lyra2z_4way_hash( void *state, const void *input )
@@ -260,7 +260,7 @@ void lyra2z_4way_hash( void *state, const void *input )
blake256_4way_context ctx_blake __attribute__ ((aligned (64))); blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
memcpy( &ctx_blake, &l2z_4way_blake_mid, sizeof l2z_4way_blake_mid ); memcpy( &ctx_blake, &l2z_4way_blake_mid, sizeof l2z_4way_blake_mid );
blake256_4way( &ctx_blake, input + (64*4), 16 ); blake256_4way_update( &ctx_blake, input + (64*4), 16 );
blake256_4way_close( &ctx_blake, vhash ); blake256_4way_close( &ctx_blake, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 ); dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );

View File

@@ -133,7 +133,7 @@ void nist5hash_4way( void *out, const void *input )
keccak512_4way_context ctx_keccak; keccak512_4way_context ctx_keccak;
blake512_4way_init( &ctx_blake ); blake512_4way_init( &ctx_blake );
blake512_4way( &ctx_blake, input, 80 ); blake512_4way_update( &ctx_blake, input, 80 );
blake512_4way_close( &ctx_blake, vhash ); blake512_4way_close( &ctx_blake, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -154,15 +154,15 @@ void nist5hash_4way( void *out, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
jh512_4way_init( &ctx_jh ); jh512_4way_init( &ctx_jh );
jh512_4way( &ctx_jh, vhash, 64 ); jh512_4way_update( &ctx_jh, vhash, 64 );
jh512_4way_close( &ctx_jh, vhash ); jh512_4way_close( &ctx_jh, vhash );
keccak512_4way_init( &ctx_keccak ); keccak512_4way_init( &ctx_keccak );
keccak512_4way( &ctx_keccak, vhash, 64 ); keccak512_4way_update( &ctx_keccak, vhash, 64 );
keccak512_4way_close( &ctx_keccak, vhash ); keccak512_4way_close( &ctx_keccak, vhash );
skein512_4way_init( &ctx_skein ); skein512_4way_init( &ctx_skein );
skein512_4way( &ctx_skein, vhash, 64 ); skein512_4way_update( &ctx_skein, vhash, 64 );
skein512_4way_close( &ctx_skein, out ); skein512_4way_close( &ctx_skein, out );
} }

View File

@@ -54,10 +54,10 @@ void anime_4way_hash( void *state, const void *input )
anime_4way_ctx_holder ctx; anime_4way_ctx_holder ctx;
memcpy( &ctx, &anime_4way_ctx, sizeof(anime_4way_ctx) ); memcpy( &ctx, &anime_4way_ctx, sizeof(anime_4way_ctx) );
bmw512_4way( &ctx.bmw, input, 80 ); bmw512_4way_update( &ctx.bmw, input, 80 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
blake512_4way( &ctx.blake, vhash, 64 ); blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero ); vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
@@ -92,7 +92,7 @@ void anime_4way_hash( void *state, const void *input )
if ( mm256_anybits0( vh_mask ) ) if ( mm256_anybits0( vh_mask ) )
{ {
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhashB ); skein512_4way_close( &ctx.skein, vhashB );
} }
@@ -111,7 +111,7 @@ void anime_4way_hash( void *state, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero ); vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
@@ -119,23 +119,23 @@ void anime_4way_hash( void *state, const void *input )
if ( mm256_anybits1( vh_mask ) ) if ( mm256_anybits1( vh_mask ) )
{ {
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, vhash, 64 ); blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhashA ); blake512_4way_close( &ctx.blake, vhashA );
} }
if ( mm256_anybits0( vh_mask ) ) if ( mm256_anybits0( vh_mask ) )
{ {
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhashB ); bmw512_4way_close( &ctx.bmw, vhashB );
} }
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask ); mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero ); vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
@@ -143,13 +143,13 @@ void anime_4way_hash( void *state, const void *input )
if ( mm256_anybits1( vh_mask ) ) if ( mm256_anybits1( vh_mask ) )
{ {
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhashA ); keccak512_4way_close( &ctx.keccak, vhashA );
} }
if ( mm256_anybits0( vh_mask ) ) if ( mm256_anybits0( vh_mask ) )
{ {
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhashB ); jh512_4way_close( &ctx.jh, vhashB );
} }

View File

@@ -830,7 +830,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
__m256i* vhB = (__m256i*)vhashB; __m256i* vhB = (__m256i*)vhashB;
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, input, 80 ); bmw512_4way_update( &ctx.bmw, input, 80 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -889,18 +889,18 @@ extern void hmq1725_4way_hash(void *state, const void *input)
if ( mm256_anybits1( vh_mask ) ) if ( mm256_anybits1( vh_mask ) )
{ {
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhashB ); skein512_4way_close( &ctx.skein, vhashB );
} }
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask ); mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
// second fork, A = blake parallel, B= bmw parallel. // second fork, A = blake parallel, B= bmw parallel.
@@ -911,14 +911,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
if ( mm256_anybits0( vh_mask ) ) if ( mm256_anybits0( vh_mask ) )
{ {
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, vhash, 64 ); blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhashA ); blake512_4way_close( &ctx.blake, vhashA );
} }
if ( mm256_anybits1( vh_mask ) ) if ( mm256_anybits1( vh_mask ) )
{ {
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhashB ); bmw512_4way_close( &ctx.bmw, vhashB );
} }
@@ -962,14 +962,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
if ( mm256_anybits0( vh_mask ) ) if ( mm256_anybits0( vh_mask ) )
{ {
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhashA ); keccak512_4way_close( &ctx.keccak, vhashA );
} }
if ( mm256_anybits1( vh_mask ) ) if ( mm256_anybits1( vh_mask ) )
{ {
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhashB ); jh512_4way_close( &ctx.jh, vhashB );
} }
@@ -990,7 +990,6 @@ extern void hmq1725_4way_hash(void *state, const void *input)
sph_shavite512 ( &ctx.shavite, hash3, 64 ); sph_shavite512 ( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 ); sph_shavite512_close( &ctx.shavite, hash3 );
intrlv_2x128_512( vhashA, hash0, hash1 ); intrlv_2x128_512( vhashA, hash0, hash1 );
intrlv_2x128_512( vhashB, hash2, hash3 ); intrlv_2x128_512( vhashB, hash2, hash3 );
@@ -1042,7 +1041,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
if ( mm256_anybits1( vh_mask ) ) if ( mm256_anybits1( vh_mask ) )
{ {
haval256_5_4way_init( &ctx.haval ); haval256_5_4way_init( &ctx.haval );
haval256_5_4way( &ctx.haval, vhash, 64 ); haval256_5_4way_update( &ctx.haval, vhash, 64 );
haval256_5_4way_close( &ctx.haval, vhash ); haval256_5_4way_close( &ctx.haval, vhash );
memset( &vhash[8<<2], 0, 32<<2 ); memset( &vhash[8<<2], 0, 32<<2 );
rintrlv_4x32_4x64( vhashB, vhash, 512 ); rintrlv_4x32_4x64( vhashB, vhash, 512 );
@@ -1068,7 +1067,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, vhash, 64 ); blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -1130,7 +1129,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -1214,7 +1213,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way_update( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -1269,7 +1268,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
if ( mm256_anybits1( vh_mask ) ) if ( mm256_anybits1( vh_mask ) )
{ {
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 ); sha512_4way_update( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhashB ); sha512_4way_close( &ctx.sha512, vhashB );
} }
@@ -1289,7 +1288,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 ); sha512_4way_update( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
// A = haval parallel, B = Whirlpool serial // A = haval parallel, B = Whirlpool serial
@@ -1305,7 +1304,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
if ( mm256_anybits0( vh_mask ) ) if ( mm256_anybits0( vh_mask ) )
{ {
haval256_5_4way_init( &ctx.haval ); haval256_5_4way_init( &ctx.haval );
haval256_5_4way( &ctx.haval, vhash, 64 ); haval256_5_4way_update( &ctx.haval, vhash, 64 );
haval256_5_4way_close( &ctx.haval, vhash ); haval256_5_4way_close( &ctx.haval, vhash );
memset( &vhash[8<<2], 0, 32<<2 ); memset( &vhash[8<<2], 0, 32<<2 );
rintrlv_4x32_4x64( vhashA, vhash, 512 ); rintrlv_4x32_4x64( vhashA, vhash, 512 );
@@ -1341,7 +1340,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask ); mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
memcpy(state, vhash, 32<<2 ); memcpy(state, vhash, 32<<2 );

View File

@@ -289,10 +289,10 @@ void quark_4way_hash( void *state, const void *input )
memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) ); memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) );
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero ); vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
@@ -327,7 +327,7 @@ void quark_4way_hash( void *state, const void *input )
if ( mm256_anybits1( vh_mask ) ) if ( mm256_anybits1( vh_mask ) )
{ {
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhashB ); skein512_4way_close( &ctx.skein, vhashB );
} }
@@ -346,7 +346,7 @@ void quark_4way_hash( void *state, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero ); vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
@@ -354,24 +354,24 @@ void quark_4way_hash( void *state, const void *input )
if ( mm256_anybits0( vh_mask ) ) if ( mm256_anybits0( vh_mask ) )
{ {
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, vhash, 64 ); blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhashA ); blake512_4way_close( &ctx.blake, vhashA );
} }
if ( mm256_anybits1( vh_mask ) ) if ( mm256_anybits1( vh_mask ) )
{ {
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhashB ); bmw512_4way_close( &ctx.bmw, vhashB );
} }
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask ); mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero ); vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
@@ -379,14 +379,14 @@ void quark_4way_hash( void *state, const void *input )
if ( mm256_anybits0( vh_mask ) ) if ( mm256_anybits0( vh_mask ) )
{ {
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhashA ); keccak512_4way_close( &ctx.keccak, vhashA );
} }
if ( mm256_anybits1( vh_mask ) ) if ( mm256_anybits1( vh_mask ) )
{ {
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhashB ); jh512_4way_close( &ctx.jh, vhashB );
} }

View File

@@ -7,7 +7,7 @@
#include "ripemd-hash-4way.h" #include "ripemd-hash-4way.h"
#define LBRY_INPUT_SIZE 112 #define LBRY_INPUT_SIZE 112
#define LBRY_MIDSTATE 64 #define LBRY_MIDSTATE 96
#define LBRY_TAIL (LBRY_INPUT_SIZE) - (LBRY_MIDSTATE) #define LBRY_TAIL (LBRY_INPUT_SIZE) - (LBRY_MIDSTATE)
#if defined(LBRY_16WAY) #if defined(LBRY_16WAY)
@@ -35,9 +35,9 @@ void lbry_16way_hash( void* output, const void* input )
uint32_t _ALIGN(64) h13[32]; uint32_t _ALIGN(64) h13[32];
uint32_t _ALIGN(64) h14[32]; uint32_t _ALIGN(64) h14[32];
uint32_t _ALIGN(64) h15[32]; uint32_t _ALIGN(64) h15[32];
sha256_16way_context ctx_sha256 __attribute__ ((aligned (64))); sha256_16way_context ctx_sha256 __attribute__ ((aligned (64)));
sha512_8way_context ctx_sha512; sha512_8way_context ctx_sha512;
ripemd160_16way_context ctx_ripemd; ripemd160_16way_context ctx_ripemd;
memcpy( &ctx_sha256, &sha256_16w_mid, sizeof(ctx_sha256) ); memcpy( &ctx_sha256, &sha256_16w_mid, sizeof(ctx_sha256) );
sha256_16way_update( &ctx_sha256, input + (LBRY_MIDSTATE<<4), LBRY_TAIL ); sha256_16way_update( &ctx_sha256, input + (LBRY_MIDSTATE<<4), LBRY_TAIL );
@@ -62,7 +62,7 @@ void lbry_16way_hash( void* output, const void* input )
sha512_8way_close( &ctx_sha512, vhashB ); sha512_8way_close( &ctx_sha512, vhashB );
// back to 8-way 32 bit // back to 8-way 32 bit
dintrlv_8x64( h0, h1, h2, h3,h4, h5, h6, h7, vhashA, 512 ); dintrlv_8x64( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 512 );
dintrlv_8x64( h8, h9, h10, h11, h12, h13, h14, h15, vhashB, 512 ); dintrlv_8x64( h8, h9, h10, h11, h12, h13, h14, h15, vhashB, 512 );
intrlv_16x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, intrlv_16x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7,
h8, h9, h10, h11, h12, h13, h14, h15, 512 ); h8, h9, h10, h11, h12, h13, h14, h15, 512 );
@@ -90,14 +90,15 @@ int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
{ {
uint32_t hash[8*16] __attribute__ ((aligned (128))); uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[32*16] __attribute__ ((aligned (64))); uint32_t vdata[32*16] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t edata[32] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<4]); uint32_t *hash7 = &(hash[7<<4]);
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t n = pdata[27]; uint32_t n = pdata[27];
const uint32_t first_nonce = pdata[27]; const uint32_t first_nonce = pdata[27];
const uint32_t last_nonce = max_nonce - 16;
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint32_t edata[32] __attribute__ ((aligned (64)));
__m512i *noncev = (__m512i*)vdata + 27; // aligned __m512i *noncev = (__m512i*)vdata + 27; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated int thr_id = mythr->id; // thr_id arg is deprecated
@@ -114,14 +115,13 @@ int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
edata, edata, edata, edata, edata, edata, edata, edata, edata, 1024 ); edata, edata, edata, edata, edata, edata, edata, edata, edata, 1024 );
sha256_16way_init( &sha256_16w_mid ); sha256_16way_init( &sha256_16w_mid );
sha256_16way( &sha256_16w_mid, vdata, LBRY_MIDSTATE ); sha256_16way_update( &sha256_16w_mid, vdata, LBRY_MIDSTATE );
do do
{ {
*noncev = mm512_bswap_32( _mm512_set_epi32( n+15, n+14, n+13, n+12, *noncev = mm512_bswap_32( _mm512_set_epi32(
n+11, n+10, n+ 9, n+ 8, n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n ) );
n+ 3, n+ 2, n+ 1, n ) );
lbry_16way_hash( hash, vdata ); lbry_16way_hash( hash, vdata );
for ( int i = 0; i < 16; i++ ) for ( int i = 0; i < 16; i++ )
@@ -129,27 +129,25 @@ int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
{ {
// deinterleave hash for lane // deinterleave hash for lane
extr_lane_16x32( lane_hash, hash, i, 256 ); extr_lane_16x32( lane_hash, hash, i, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark ) if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
{ {
pdata[27] = n + i; pdata[27] = n + i;
submit_lane_solution( work, lane_hash, mythr, i ); submit_lane_solution( work, lane_hash, mythr, i );
} }
} }
n += 16; n += 16;
} while ( (n < max_nonce-16) && !work_restart[thr_id].restart ); } while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1; *hashes_done = n - first_nonce;
return 0; return 0;
} }
#elif defined(LBRY_8WAY) #elif defined(LBRY_8WAY)
static __thread sha256_8way_context sha256_8w_mid; static __thread sha256_8way_context sha256_8w_mid;
void lbry_8way_hash( void* output, const void* input ) void lbry_8way_hash( void* output, const void* input )
{ {
uint32_t _ALIGN(64) vhashA[16<<3]; uint32_t _ALIGN(128) vhashA[16<<3];
uint32_t _ALIGN(64) vhashB[16<<3]; uint32_t _ALIGN(64) vhashB[16<<3];
uint32_t _ALIGN(64) vhashC[16<<3]; uint32_t _ALIGN(64) vhashC[16<<3];
uint32_t _ALIGN(32) h0[32]; uint32_t _ALIGN(32) h0[32];
@@ -165,11 +163,11 @@ void lbry_8way_hash( void* output, const void* input )
ripemd160_8way_context ctx_ripemd; ripemd160_8way_context ctx_ripemd;
memcpy( &ctx_sha256, &sha256_8w_mid, sizeof(ctx_sha256) ); memcpy( &ctx_sha256, &sha256_8w_mid, sizeof(ctx_sha256) );
sha256_8way( &ctx_sha256, input + (LBRY_MIDSTATE<<3), LBRY_TAIL ); sha256_8way_update( &ctx_sha256, input + (LBRY_MIDSTATE<<3), LBRY_TAIL );
sha256_8way_close( &ctx_sha256, vhashA ); sha256_8way_close( &ctx_sha256, vhashA );
sha256_8way_init( &ctx_sha256 ); sha256_8way_init( &ctx_sha256 );
sha256_8way( &ctx_sha256, vhashA, 32 ); sha256_8way_update( &ctx_sha256, vhashA, 32 );
sha256_8way_close( &ctx_sha256, vhashA ); sha256_8way_close( &ctx_sha256, vhashA );
// reinterleave to do sha512 4-way 64 bit twice. // reinterleave to do sha512 4-way 64 bit twice.
@@ -178,11 +176,11 @@ void lbry_8way_hash( void* output, const void* input )
intrlv_4x64( vhashB, h4, h5, h6, h7, 256 ); intrlv_4x64( vhashB, h4, h5, h6, h7, 256 );
sha512_4way_init( &ctx_sha512 ); sha512_4way_init( &ctx_sha512 );
sha512_4way( &ctx_sha512, vhashA, 32 ); sha512_4way_update( &ctx_sha512, vhashA, 32 );
sha512_4way_close( &ctx_sha512, vhashA ); sha512_4way_close( &ctx_sha512, vhashA );
sha512_4way_init( &ctx_sha512 ); sha512_4way_init( &ctx_sha512 );
sha512_4way( &ctx_sha512, vhashB, 32 ); sha512_4way_update( &ctx_sha512, vhashB, 32 );
sha512_4way_close( &ctx_sha512, vhashB ); sha512_4way_close( &ctx_sha512, vhashB );
// back to 8-way 32 bit // back to 8-way 32 bit
@@ -191,20 +189,20 @@ void lbry_8way_hash( void* output, const void* input )
intrlv_8x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 512 ); intrlv_8x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 512 );
ripemd160_8way_init( &ctx_ripemd ); ripemd160_8way_init( &ctx_ripemd );
ripemd160_8way( &ctx_ripemd, vhashA, 32 ); ripemd160_8way_update( &ctx_ripemd, vhashA, 32 );
ripemd160_8way_close( &ctx_ripemd, vhashB ); ripemd160_8way_close( &ctx_ripemd, vhashB );
ripemd160_8way_init( &ctx_ripemd ); ripemd160_8way_init( &ctx_ripemd );
ripemd160_8way( &ctx_ripemd, vhashA+(8<<3), 32 ); ripemd160_8way_update( &ctx_ripemd, vhashA+(8<<3), 32 );
ripemd160_8way_close( &ctx_ripemd, vhashC ); ripemd160_8way_close( &ctx_ripemd, vhashC );
sha256_8way_init( &ctx_sha256 ); sha256_8way_init( &ctx_sha256 );
sha256_8way( &ctx_sha256, vhashB, 20 ); sha256_8way_update( &ctx_sha256, vhashB, 20 );
sha256_8way( &ctx_sha256, vhashC, 20 ); sha256_8way_update( &ctx_sha256, vhashC, 20 );
sha256_8way_close( &ctx_sha256, vhashA ); sha256_8way_close( &ctx_sha256, vhashA );
sha256_8way_init( &ctx_sha256 ); sha256_8way_init( &ctx_sha256 );
sha256_8way( &ctx_sha256, vhashA, 32 ); sha256_8way_update( &ctx_sha256, vhashA, 32 );
sha256_8way_close( &ctx_sha256, output ); sha256_8way_close( &ctx_sha256, output );
} }
@@ -214,13 +212,13 @@ int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
uint32_t hash[8*8] __attribute__ ((aligned (64))); uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t vdata[32*8] __attribute__ ((aligned (64))); uint32_t vdata[32*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t edata[32] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]); uint32_t *hash7 = &(hash[7<<3]);
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t n = pdata[27]; uint32_t n = pdata[27];
const uint32_t first_nonce = pdata[27]; const uint32_t first_nonce = pdata[27];
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint32_t edata[32] __attribute__ ((aligned (64)));
__m256i *noncev = (__m256i*)vdata + 27; // aligned __m256i *noncev = (__m256i*)vdata + 27; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated int thr_id = mythr->id; // thr_id arg is deprecated
@@ -237,7 +235,7 @@ int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
edata, edata, edata, edata, 1024 ); edata, edata, edata, edata, 1024 );
sha256_8way_init( &sha256_8w_mid ); sha256_8way_init( &sha256_8w_mid );
sha256_8way( &sha256_8w_mid, vdata, LBRY_MIDSTATE ); sha256_8way_update( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
do do
{ {

View File

@@ -98,7 +98,7 @@ int lbry_get_work_data_size() { return LBRY_WORK_DATA_SIZE; }
bool register_lbry_algo( algo_gate_t* gate ) bool register_lbry_algo( algo_gate_t* gate )
{ {
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT; // gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
#if defined (LBRY_16WAY) #if defined (LBRY_16WAY)
gate->scanhash = (void*)&scanhash_lbry_16way; gate->scanhash = (void*)&scanhash_lbry_16way;
gate->hash = (void*)&lbry_16way_hash; gate->hash = (void*)&lbry_16way_hash;

View File

@@ -5,11 +5,10 @@
#include <stdint.h> #include <stdint.h>
// 16 way needs sha256 16 way #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
//#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) #define LBRY_16WAY 1
// #define LBRY_16WAY #elif defined(__AVX2__)
#if defined(__AVX2__) #define LBRY_8WAY 1
#define LBRY_8WAY
#endif #endif
/* /*
#if !defined(__SHA__) #if !defined(__SHA__)
@@ -37,13 +36,13 @@ int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
void lbry_8way_hash( void *state, const void *input ); void lbry_8way_hash( void *state, const void *input );
int scanhash_lbry_8way( struct work *work, uint32_t max_nonce, int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ); uint64_t *hashes_done, struct thr_info *mythr );
/*
#elif defined(LBRY_4WAY) #elif defined(LBRY_4WAY)
void lbry_4way_hash( void *state, const void *input ); void lbry_4way_hash( void *state, const void *input );
int scanhash_lbry_4way( struct work *work, uint32_t max_nonce, int scanhash_lbry_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done ); uint64_t *hashes_done );
*/
#else #else
void lbry_hash( void *state, const void *input ); void lbry_hash( void *state, const void *input );

View File

@@ -259,7 +259,8 @@ void ripemd160_4way_init( ripemd160_4way_context *sc )
sc->count_high = sc->count_low = 0; sc->count_high = sc->count_low = 0;
} }
void ripemd160_4way( ripemd160_4way_context *sc, const void *data, size_t len ) void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
size_t len )
{ {
__m128i *vdata = (__m128i*)data; __m128i *vdata = (__m128i*)data;
size_t ptr; size_t ptr;
@@ -559,7 +560,8 @@ void ripemd160_8way_init( ripemd160_8way_context *sc )
sc->count_high = sc->count_low = 0; sc->count_high = sc->count_low = 0;
} }
void ripemd160_8way( ripemd160_8way_context *sc, const void *data, size_t len ) void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
size_t len )
{ {
__m256i *vdata = (__m256i*)data; __m256i *vdata = (__m256i*)data;
size_t ptr; size_t ptr;
@@ -859,7 +861,7 @@ void ripemd160_16way_init( ripemd160_16way_context *sc )
sc->count_high = sc->count_low = 0; sc->count_high = sc->count_low = 0;
} }
void ripemd160_16way( ripemd160_16way_context *sc, const void *data, void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
size_t len ) size_t len )
{ {
__m512i *vdata = (__m512i*)data; __m512i *vdata = (__m512i*)data;

View File

@@ -16,7 +16,8 @@ typedef struct
} __attribute__ ((aligned (64))) ripemd160_4way_context; } __attribute__ ((aligned (64))) ripemd160_4way_context;
void ripemd160_4way_init( ripemd160_4way_context *sc ); void ripemd160_4way_init( ripemd160_4way_context *sc );
void ripemd160_4way( ripemd160_4way_context *sc, const void *data, size_t len ); void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
size_t len );
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst ); void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst );
#if defined (__AVX2__) #if defined (__AVX2__)
@@ -26,10 +27,11 @@ typedef struct
__m256i buf[64>>2]; __m256i buf[64>>2];
__m256i val[5]; __m256i val[5];
uint32_t count_high, count_low; uint32_t count_high, count_low;
} __attribute__ ((aligned (64))) ripemd160_8way_context; } __attribute__ ((aligned (128))) ripemd160_8way_context;
void ripemd160_8way_init( ripemd160_8way_context *sc ); void ripemd160_8way_init( ripemd160_8way_context *sc );
void ripemd160_8way( ripemd160_8way_context *sc, const void *data, size_t len ); void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
size_t len );
void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst ); void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst );
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
@@ -42,7 +44,7 @@ typedef struct
} __attribute__ ((aligned (128))) ripemd160_16way_context; } __attribute__ ((aligned (128))) ripemd160_16way_context;
void ripemd160_16way_init( ripemd160_16way_context *sc ); void ripemd160_16way_init( ripemd160_16way_context *sc );
void ripemd160_16way( ripemd160_16way_context *sc, const void *data, void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
size_t len ); size_t len );
void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst ); void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst );

View File

@@ -41,13 +41,9 @@
#define SHA2_HASH_4WAY_H__ 1 #define SHA2_HASH_4WAY_H__ 1
#include <stddef.h> #include <stddef.h>
#include "sph_types.h"
#include "simd-utils.h" #include "simd-utils.h"
#if defined(__SSE2__) #if defined(__SSE2__)
//#if defined(__SSE4_2__)
//#define SPH_SIZE_sha256 256
// SHA-256 4 way // SHA-256 4 way
@@ -59,9 +55,12 @@ typedef struct {
} sha256_4way_context __attribute__ ((aligned (64))); } sha256_4way_context __attribute__ ((aligned (64)));
void sha256_4way_init( sha256_4way_context *sc ); void sha256_4way_init( sha256_4way_context *sc );
void sha256_4way( sha256_4way_context *sc, const void *data, size_t len ); void sha256_4way_update( sha256_4way_context *sc, const void *data,
size_t len );
void sha256_4way_close( sha256_4way_context *sc, void *dst ); void sha256_4way_close( sha256_4way_context *sc, void *dst );
#endif // SSE2
#if defined (__AVX2__) #if defined (__AVX2__)
// SHA-256 8 way // SHA-256 8 way
@@ -75,10 +74,28 @@ typedef struct {
void sha256_8way_init( sha256_8way_context *sc ); void sha256_8way_init( sha256_8way_context *sc );
void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len ); void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len );
#define sha256_8way sha256_8way_update
void sha256_8way_close( sha256_8way_context *sc, void *dst ); void sha256_8way_close( sha256_8way_context *sc, void *dst );
//#define SPH_SIZE_sha512 512 #endif // AVX2
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
// SHA-256 16 way
typedef struct {
__m512i buf[64>>2];
__m512i val[8];
uint32_t count_high, count_low;
bool initialized;
} sha256_16way_context __attribute__ ((aligned (128)));
void sha256_16way_init( sha256_16way_context *sc );
void sha256_16way_update( sha256_16way_context *sc, const void *data, size_t len );
void sha256_16way_close( sha256_16way_context *sc, void *dst );
#endif // AVX512
#if defined (__AVX2__)
// SHA-512 4 way // SHA-512 4 way
@@ -92,9 +109,10 @@ typedef struct {
void sha512_4way_init( sha512_4way_context *sc); void sha512_4way_init( sha512_4way_context *sc);
void sha512_4way_update( sha512_4way_context *sc, const void *data, void sha512_4way_update( sha512_4way_context *sc, const void *data,
size_t len ); size_t len );
#define sha512_4way sha512_4way_update
void sha512_4way_close( sha512_4way_context *sc, void *dst ); void sha512_4way_close( sha512_4way_context *sc, void *dst );
#endif // AVX2
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
// SHA-512 8 way // SHA-512 8 way
@@ -111,8 +129,6 @@ void sha512_8way_update( sha512_8way_context *sc, const void *data,
size_t len ); size_t len );
void sha512_8way_close( sha512_8way_context *sc, void *dst ); void sha512_8way_close( sha512_8way_context *sc, void *dst );
#endif // AVX512 #endif // AVX512
#endif // __AVX2__
#endif // __SSE2__
#endif // SHA256_4WAY_H__ #endif // SHA256_4WAY_H__

View File

@@ -39,47 +39,31 @@
// SHA-256 32 bit // SHA-256 32 bit
/* /*
static const sph_u32 H256[8] = { static const uint32_t H256[8] =
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), {
SPH_C32(0x3C6EF372), SPH_C32(0xA54FF53A), 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
SPH_C32(0x510E527F), SPH_C32(0x9B05688C), 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
}; };
*/ */
static const sph_u32 K256[64] = { static const uint32_t K256[64] =
SPH_C32(0x428A2F98), SPH_C32(0x71374491), {
SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5), 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
SPH_C32(0x3956C25B), SPH_C32(0x59F111F1), 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5), 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
SPH_C32(0xD807AA98), SPH_C32(0x12835B01), 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
SPH_C32(0x243185BE), SPH_C32(0x550C7DC3), 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE), 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174), 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786), 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC), 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA), 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA), 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
SPH_C32(0x983E5152), SPH_C32(0xA831C66D), 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7), 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147), 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
SPH_C32(0x06CA6351), SPH_C32(0x14292967), 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138), 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13),
SPH_C32(0x650A7354), SPH_C32(0x766A0ABB),
SPH_C32(0x81C2C92E), SPH_C32(0x92722C85),
SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B),
SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3),
SPH_C32(0xD192E819), SPH_C32(0xD6990624),
SPH_C32(0xF40E3585), SPH_C32(0x106AA070),
SPH_C32(0x19A4C116), SPH_C32(0x1E376C08),
SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5),
SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A),
SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3),
SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
}; };
// SHA-256 4 way // SHA-256 4 way
@@ -248,7 +232,7 @@ void sha256_4way_init( sha256_4way_context *sc )
*/ */
} }
void sha256_4way( sha256_4way_context *sc, const void *data, size_t len ) void sha256_4way_update( sha256_4way_context *sc, const void *data, size_t len )
{ {
__m128i *vdata = (__m128i*)data; __m128i *vdata = (__m128i*)data;
size_t ptr; size_t ptr;
@@ -273,7 +257,7 @@ void sha256_4way( sha256_4way_context *sc, const void *data, size_t len )
ptr = 0; ptr = 0;
} }
clow = sc->count_low; clow = sc->count_low;
clow2 = SPH_T32( clow + clen ); clow2 = clow + clen;
sc->count_low = clow2; sc->count_low = clow2;
if ( clow2 < clow ) if ( clow2 < clow )
sc->count_high++; sc->count_high++;
@@ -306,10 +290,8 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
sc->buf[ pad >> 2 ] = sc->buf[ pad >> 2 ] =
mm128_bswap_32( m128_const1_32( high ) ); mm128_bswap_32( m128_const1_32( high ) );
// mm128_bswap_32( _mm_set1_epi32( high ) );
sc->buf[ ( pad+4 ) >> 2 ] = sc->buf[ ( pad+4 ) >> 2 ] =
mm128_bswap_32( m128_const1_32( low ) ); mm128_bswap_32( m128_const1_32( low ) );
// mm128_bswap_32( _mm_set1_epi32( low ) );
sha256_4way_round( sc, sc->buf, sc->val ); sha256_4way_round( sc, sc->buf, sc->val );
mm128_block_bswap_32( dst, sc->val ); mm128_block_bswap_32( dst, sc->val );
@@ -483,7 +465,7 @@ void sha256_8way_init( sha256_8way_context *sc )
*/ */
} }
void sha256_8way( sha256_8way_context *sc, const void *data, size_t len ) void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len )
{ {
__m256i *vdata = (__m256i*)data; __m256i *vdata = (__m256i*)data;
size_t ptr; size_t ptr;
@@ -508,7 +490,7 @@ void sha256_8way( sha256_8way_context *sc, const void *data, size_t len )
ptr = 0; ptr = 0;
} }
clow = sc->count_low; clow = sc->count_low;
clow2 = SPH_T32( clow + clen ); clow2 = clow + clen;
sc->count_low = clow2; sc->count_low = clow2;
if ( clow2 < clow ) if ( clow2 < clow )
sc->count_high++; sc->count_high++;
@@ -549,5 +531,233 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
mm256_block_bswap_32( dst, sc->val ); mm256_block_bswap_32( dst, sc->val );
} }
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
// SHA-256 16 way
#define CHx16(X, Y, Z) \
_mm512_xor_si512( _mm512_and_si512( _mm512_xor_si512( Y, Z ), X ), Z )
#define MAJx16(X, Y, Z) \
_mm512_or_si512( _mm512_and_si512( X, Y ), \
_mm512_and_si512( _mm512_or_si512( X, Y ), Z ) )
#define BSG2_0x16(x) \
_mm512_xor_si512( _mm512_xor_si512( \
mm512_ror_32(x, 2), mm512_ror_32(x, 13) ), mm512_ror_32( x, 22) )
#define BSG2_1x16(x) \
_mm512_xor_si512( _mm512_xor_si512( \
mm512_ror_32(x, 6), mm512_ror_32(x, 11) ), mm512_ror_32( x, 25) )
#define SSG2_0x16(x) \
_mm512_xor_si512( _mm512_xor_si512( \
mm512_ror_32(x, 7), mm512_ror_32(x, 18) ), _mm512_srli_epi32(x, 3) )
#define SSG2_1x16(x) \
_mm512_xor_si512( _mm512_xor_si512( \
mm512_ror_32(x, 17), mm512_ror_32(x, 19) ), _mm512_srli_epi32(x, 10) )
#define SHA2x16_MEXP( a, b, c, d ) \
mm512_add4_32( SSG2_1x16( W[a] ), W[b], SSG2_0x16( W[c] ), W[d] );
#define SHA2s_16WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
do { \
__m512i T1, T2; \
__m512i K = _mm512_set1_epi32( K256[( (j)+(i) )] ); \
T1 = _mm512_add_epi32( H, mm512_add4_32( BSG2_1x16(E), CHx16(E, F, G), \
K, W[i] ) ); \
T2 = _mm512_add_epi32( BSG2_0x16(A), MAJx16(A, B, C) ); \
D = _mm512_add_epi32( D, T1 ); \
H = _mm512_add_epi32( T1, T2 ); \
} while (0)
static void
sha256_16way_round( sha256_16way_context *ctx, __m512i *in, __m512i r[8] )
{
register __m512i A, B, C, D, E, F, G, H;
__m512i W[16];
mm512_block_bswap_32( W , in );
mm512_block_bswap_32( W+8, in+8 );
if ( ctx->initialized )
{
A = r[0];
B = r[1];
C = r[2];
D = r[3];
E = r[4];
F = r[5];
G = r[6];
H = r[7];
}
else
{
A = m512_const1_64( 0x6A09E6676A09E667 );
B = m512_const1_64( 0xBB67AE85BB67AE85 );
C = m512_const1_64( 0x3C6EF3723C6EF372 );
D = m512_const1_64( 0xA54FF53AA54FF53A );
E = m512_const1_64( 0x510E527F510E527F );
F = m512_const1_64( 0x9B05688C9B05688C );
G = m512_const1_64( 0x1F83D9AB1F83D9AB );
H = m512_const1_64( 0x5BE0CD195BE0CD19 );
}
SHA2s_16WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
SHA2s_16WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
SHA2s_16WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
SHA2s_16WAY_STEP( F, G, H, A, B, C, D, E, 3, 0 );
SHA2s_16WAY_STEP( E, F, G, H, A, B, C, D, 4, 0 );
SHA2s_16WAY_STEP( D, E, F, G, H, A, B, C, 5, 0 );
SHA2s_16WAY_STEP( C, D, E, F, G, H, A, B, 6, 0 );
SHA2s_16WAY_STEP( B, C, D, E, F, G, H, A, 7, 0 );
SHA2s_16WAY_STEP( A, B, C, D, E, F, G, H, 8, 0 );
SHA2s_16WAY_STEP( H, A, B, C, D, E, F, G, 9, 0 );
SHA2s_16WAY_STEP( G, H, A, B, C, D, E, F, 10, 0 );
SHA2s_16WAY_STEP( F, G, H, A, B, C, D, E, 11, 0 );
SHA2s_16WAY_STEP( E, F, G, H, A, B, C, D, 12, 0 );
SHA2s_16WAY_STEP( D, E, F, G, H, A, B, C, 13, 0 );
SHA2s_16WAY_STEP( C, D, E, F, G, H, A, B, 14, 0 );
SHA2s_16WAY_STEP( B, C, D, E, F, G, H, A, 15, 0 );
for ( int j = 16; j < 64; j += 16 )
{
W[ 0] = SHA2x16_MEXP( 14, 9, 1, 0 );
W[ 1] = SHA2x16_MEXP( 15, 10, 2, 1 );
W[ 2] = SHA2x16_MEXP( 0, 11, 3, 2 );
W[ 3] = SHA2x16_MEXP( 1, 12, 4, 3 );
W[ 4] = SHA2x16_MEXP( 2, 13, 5, 4 );
W[ 5] = SHA2x16_MEXP( 3, 14, 6, 5 );
W[ 6] = SHA2x16_MEXP( 4, 15, 7, 6 );
W[ 7] = SHA2x16_MEXP( 5, 0, 8, 7 );
W[ 8] = SHA2x16_MEXP( 6, 1, 9, 8 );
W[ 9] = SHA2x16_MEXP( 7, 2, 10, 9 );
W[10] = SHA2x16_MEXP( 8, 3, 11, 10 );
W[11] = SHA2x16_MEXP( 9, 4, 12, 11 );
W[12] = SHA2x16_MEXP( 10, 5, 13, 12 );
W[13] = SHA2x16_MEXP( 11, 6, 14, 13 );
W[14] = SHA2x16_MEXP( 12, 7, 15, 14 );
W[15] = SHA2x16_MEXP( 13, 8, 0, 15 );
SHA2s_16WAY_STEP( A, B, C, D, E, F, G, H, 0, j );
SHA2s_16WAY_STEP( H, A, B, C, D, E, F, G, 1, j );
SHA2s_16WAY_STEP( G, H, A, B, C, D, E, F, 2, j );
SHA2s_16WAY_STEP( F, G, H, A, B, C, D, E, 3, j );
SHA2s_16WAY_STEP( E, F, G, H, A, B, C, D, 4, j );
SHA2s_16WAY_STEP( D, E, F, G, H, A, B, C, 5, j );
SHA2s_16WAY_STEP( C, D, E, F, G, H, A, B, 6, j );
SHA2s_16WAY_STEP( B, C, D, E, F, G, H, A, 7, j );
SHA2s_16WAY_STEP( A, B, C, D, E, F, G, H, 8, j );
SHA2s_16WAY_STEP( H, A, B, C, D, E, F, G, 9, j );
SHA2s_16WAY_STEP( G, H, A, B, C, D, E, F, 10, j );
SHA2s_16WAY_STEP( F, G, H, A, B, C, D, E, 11, j );
SHA2s_16WAY_STEP( E, F, G, H, A, B, C, D, 12, j );
SHA2s_16WAY_STEP( D, E, F, G, H, A, B, C, 13, j );
SHA2s_16WAY_STEP( C, D, E, F, G, H, A, B, 14, j );
SHA2s_16WAY_STEP( B, C, D, E, F, G, H, A, 15, j );
}
if ( ctx->initialized )
{
r[0] = _mm512_add_epi32( r[0], A );
r[1] = _mm512_add_epi32( r[1], B );
r[2] = _mm512_add_epi32( r[2], C );
r[3] = _mm512_add_epi32( r[3], D );
r[4] = _mm512_add_epi32( r[4], E );
r[5] = _mm512_add_epi32( r[5], F );
r[6] = _mm512_add_epi32( r[6], G );
r[7] = _mm512_add_epi32( r[7], H );
}
else
{
ctx->initialized = true;
r[0] = _mm512_add_epi32( A, m512_const1_64( 0x6A09E6676A09E667 ) );
r[1] = _mm512_add_epi32( B, m512_const1_64( 0xBB67AE85BB67AE85 ) );
r[2] = _mm512_add_epi32( C, m512_const1_64( 0x3C6EF3723C6EF372 ) );
r[3] = _mm512_add_epi32( D, m512_const1_64( 0xA54FF53AA54FF53A ) );
r[4] = _mm512_add_epi32( E, m512_const1_64( 0x510E527F510E527F ) );
r[5] = _mm512_add_epi32( F, m512_const1_64( 0x9B05688C9B05688C ) );
r[6] = _mm512_add_epi32( G, m512_const1_64( 0x1F83D9AB1F83D9AB ) );
r[7] = _mm512_add_epi32( H, m512_const1_64( 0x5BE0CD195BE0CD19 ) );
}
}
void sha256_16way_init( sha256_16way_context *sc )
{
sc->initialized = false;
sc->count_high = sc->count_low = 0;
}
void sha256_16way_update( sha256_16way_context *sc, const void *data,
size_t len )
{
__m512i *vdata = (__m512i*)data;
size_t ptr;
const int buf_size = 64;
ptr = (unsigned)sc->count_low & (buf_size - 1U);
while ( len > 0 )
{
size_t clen;
uint32_t clow, clow2;
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_512( sc->buf + (ptr>>2), vdata, clen>>2 );
vdata = vdata + (clen>>2);
ptr += clen;
len -= clen;
if ( ptr == buf_size )
{
sha256_16way_round( sc, sc->buf, sc->val );
ptr = 0;
}
clow = sc->count_low;
clow2 = clow + clen;
sc->count_low = clow2;
if ( clow2 < clow )
sc->count_high++;
}
}
void sha256_16way_close( sha256_16way_context *sc, void *dst )
{
unsigned ptr;
uint32_t low, high;
const int buf_size = 64;
const int pad = buf_size - 8;
ptr = (unsigned)sc->count_low & (buf_size - 1U);
sc->buf[ ptr>>2 ] = m512_const1_64( 0x0000008000000080 );
ptr += 4;
if ( ptr > pad )
{
memset_zero_512( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
sha256_16way_round( sc, sc->buf, sc->val );
memset_zero_512( sc->buf, pad >> 2 );
}
else
memset_zero_512( sc->buf + (ptr>>2), (pad - ptr) >> 2 );
low = sc->count_low;
high = (sc->count_high << 3) | (low >> 29);
low = low << 3;
sc->buf[ pad >> 2 ] =
mm512_bswap_32( m512_const1_32( high ) );
sc->buf[ ( pad+4 ) >> 2 ] =
mm512_bswap_32( m512_const1_32( low ) );
sha256_16way_round( sc, sc->buf, sc->val );
mm512_block_bswap_32( dst, sc->val );
}
#endif // AVX512
#endif // __AVX2__ #endif // __AVX2__
#endif // __SSE2__ #endif // __SSE2__

View File

@@ -15,19 +15,19 @@ void sha256q_8way_hash( void* output, const void* input )
sha256_8way_context ctx; sha256_8way_context ctx;
memcpy( &ctx, &sha256_ctx8, sizeof ctx ); memcpy( &ctx, &sha256_ctx8, sizeof ctx );
sha256_8way( &ctx, input + (64<<3), 16 ); sha256_8way_update( &ctx, input + (64<<3), 16 );
sha256_8way_close( &ctx, vhash ); sha256_8way_close( &ctx, vhash );
sha256_8way_init( &ctx ); sha256_8way_init( &ctx );
sha256_8way( &ctx, vhash, 32 ); sha256_8way_update( &ctx, vhash, 32 );
sha256_8way_close( &ctx, vhash ); sha256_8way_close( &ctx, vhash );
sha256_8way_init( &ctx ); sha256_8way_init( &ctx );
sha256_8way( &ctx, vhash, 32 ); sha256_8way_update( &ctx, vhash, 32 );
sha256_8way_close( &ctx, vhash ); sha256_8way_close( &ctx, vhash );
sha256_8way_init( &ctx ); sha256_8way_init( &ctx );
sha256_8way( &ctx, vhash, 32 ); sha256_8way_update( &ctx, vhash, 32 );
sha256_8way_close( &ctx, output ); sha256_8way_close( &ctx, output );
} }
@@ -61,7 +61,7 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
// Need big endian data // Need big endian data
mm256_bswap32_intrlv80_8x32( vdata, pdata ); mm256_bswap32_intrlv80_8x32( vdata, pdata );
sha256_8way_init( &sha256_ctx8 ); sha256_8way_init( &sha256_ctx8 );
sha256_8way( &sha256_ctx8, vdata, 64 ); sha256_8way_update( &sha256_ctx8, vdata, 64 );
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] ) for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
{ {
@@ -108,19 +108,19 @@ void sha256q_4way_hash( void* output, const void* input )
sha256_4way_context ctx; sha256_4way_context ctx;
memcpy( &ctx, &sha256_ctx4, sizeof ctx ); memcpy( &ctx, &sha256_ctx4, sizeof ctx );
sha256_4way( &ctx, input + (64<<2), 16 ); sha256_4way_update( &ctx, input + (64<<2), 16 );
sha256_4way_close( &ctx, vhash ); sha256_4way_close( &ctx, vhash );
sha256_4way_init( &ctx ); sha256_4way_init( &ctx );
sha256_4way( &ctx, vhash, 32 ); sha256_4way_update( &ctx, vhash, 32 );
sha256_4way_close( &ctx, vhash ); sha256_4way_close( &ctx, vhash );
sha256_4way_init( &ctx ); sha256_4way_init( &ctx );
sha256_4way( &ctx, vhash, 32 ); sha256_4way_update( &ctx, vhash, 32 );
sha256_4way_close( &ctx, vhash ); sha256_4way_close( &ctx, vhash );
sha256_4way_init( &ctx ); sha256_4way_init( &ctx );
sha256_4way( &ctx, vhash, 32 ); sha256_4way_update( &ctx, vhash, 32 );
sha256_4way_close( &ctx, output ); sha256_4way_close( &ctx, output );
} }
@@ -154,7 +154,7 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
mm128_bswap32_intrlv80_4x32( vdata, pdata ); mm128_bswap32_intrlv80_4x32( vdata, pdata );
sha256_4way_init( &sha256_ctx4 ); sha256_4way_init( &sha256_ctx4 );
sha256_4way( &sha256_ctx4, vdata, 64 ); sha256_4way_update( &sha256_ctx4, vdata, 64 );
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] ) for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
{ {

View File

@@ -15,15 +15,15 @@ void sha256t_8way_hash( void* output, const void* input )
sha256_8way_context ctx; sha256_8way_context ctx;
memcpy( &ctx, &sha256_ctx8, sizeof ctx ); memcpy( &ctx, &sha256_ctx8, sizeof ctx );
sha256_8way( &ctx, input + (64<<3), 16 ); sha256_8way_update( &ctx, input + (64<<3), 16 );
sha256_8way_close( &ctx, vhash ); sha256_8way_close( &ctx, vhash );
sha256_8way_init( &ctx ); sha256_8way_init( &ctx );
sha256_8way( &ctx, vhash, 32 ); sha256_8way_update( &ctx, vhash, 32 );
sha256_8way_close( &ctx, vhash ); sha256_8way_close( &ctx, vhash );
sha256_8way_init( &ctx ); sha256_8way_init( &ctx );
sha256_8way( &ctx, vhash, 32 ); sha256_8way_update( &ctx, vhash, 32 );
sha256_8way_close( &ctx, output ); sha256_8way_close( &ctx, output );
} }
@@ -59,7 +59,7 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
// Need big endian data // Need big endian data
mm256_bswap32_intrlv80_8x32( vdata, pdata ); mm256_bswap32_intrlv80_8x32( vdata, pdata );
sha256_8way_init( &sha256_ctx8 ); sha256_8way_init( &sha256_ctx8 );
sha256_8way( &sha256_ctx8, vdata, 64 ); sha256_8way_update( &sha256_ctx8, vdata, 64 );
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] ) for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
{ {
@@ -101,15 +101,15 @@ void sha256t_4way_hash( void* output, const void* input )
sha256_4way_context ctx; sha256_4way_context ctx;
memcpy( &ctx, &sha256_ctx4, sizeof ctx ); memcpy( &ctx, &sha256_ctx4, sizeof ctx );
sha256_4way( &ctx, input + (64<<2), 16 ); sha256_4way_update( &ctx, input + (64<<2), 16 );
sha256_4way_close( &ctx, vhash ); sha256_4way_close( &ctx, vhash );
sha256_4way_init( &ctx ); sha256_4way_init( &ctx );
sha256_4way( &ctx, vhash, 32 ); sha256_4way_update( &ctx, vhash, 32 );
sha256_4way_close( &ctx, vhash ); sha256_4way_close( &ctx, vhash );
sha256_4way_init( &ctx ); sha256_4way_init( &ctx );
sha256_4way( &ctx, vhash, 32 ); sha256_4way_update( &ctx, vhash, 32 );
sha256_4way_close( &ctx, output ); sha256_4way_close( &ctx, output );
} }
@@ -143,7 +143,7 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
mm128_bswap32_intrlv80_4x32( vdata, pdata ); mm128_bswap32_intrlv80_4x32( vdata, pdata );
sha256_4way_init( &sha256_ctx4 ); sha256_4way_init( &sha256_ctx4 );
sha256_4way( &sha256_ctx4, vdata, 64 ); sha256_4way_update( &sha256_ctx4, vdata, 64 );
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] ) for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
{ {

View File

@@ -37,55 +37,57 @@
#include "sha-hash-4way.h" #include "sha-hash-4way.h"
/* /*
static const sph_u64 H512[8] = { static const uit64_t H512[8] =
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B), {
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1), 0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F), 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179) 0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
}; };
*/ */
static const sph_u64 K512[80] = { static const uint64_t K512[80] =
SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD), {
SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC), 0x428A2F98D728AE22, 0x7137449123EF65CD,
SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019), 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118), 0x3956C25BF348B538, 0x59F111F1B605D019,
SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE), 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2), 0xD807AA98A3030242, 0x12835B0145706FBE,
SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1), 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694), 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1,
SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3), 0x9BDC06A725C71235, 0xC19BF174CF692694,
SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65), 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3,
SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483), 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65,
SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5), 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483,
SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210), 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5,
SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4), 0x983E5152EE66DFAB, 0xA831C66D2DB43210,
SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725), 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4,
SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70), 0xC6E00BF33DA88FC2, 0xD5A79147930AA725,
SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926), 0x06CA6351E003826F, 0x142929670A0E6E70,
SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF), 0x27B70A8546D22FFC, 0x2E1B21385C26C926,
SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8), 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF,
SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B), 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8,
SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001), 0x81C2C92E47EDAEE6, 0x92722C851482353B,
SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30), 0xA2BFE8A14CF10364, 0xA81A664BBC423001,
SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910), 0xC24B8B70D0F89791, 0xC76C51A30654BE30,
SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8), 0xD192E819D6EF5218, 0xD69906245565A910,
SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53), 0xF40E35855771202A, 0x106AA07032BBD1B8,
SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8), 0x19A4C116B8D2D0C8, 0x1E376C085141AB53,
SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB), 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8,
SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3), 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB,
SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60), 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3,
SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC), 0x748F82EE5DEFB2FC, 0x78A5636F43172F60,
SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9), 0x84C87814A1F0AB72, 0x8CC702081A6439EC,
SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B), 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9,
SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207), 0xBEF9A3F7B2C67915, 0xC67178F2E372532B,
SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178), 0xCA273ECEEA26619C, 0xD186B8C721C0C207,
SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6), 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178,
SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B), 0x06F067AA72176FBA, 0x0A637DC5A2C898A6,
SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493), 0x113F9804BEF90DAE, 0x1B710B35131C471B,
SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C), 0x28DB77F523047D84, 0x32CAAB7B40C72493,
SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A), 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C,
SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817) 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A,
0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817
}; };

View File

@@ -97,7 +97,7 @@ void shabal256_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
void shabal512_4way_init( void *cc ); void shabal512_4way_init( void *cc );
void shabal512_4way_update( void *cc, const void *data, size_t len ); void shabal512_4way_update( void *cc, const void *data, size_t len );
#define shabal512_4way shabal512_4way_update //#define shabal512_4way shabal512_4way_update
void shabal512_4way_close( void *cc, void *dst ); void shabal512_4way_close( void *cc, void *dst );
void shabal512_4way_addbits_and_close( void *cc, unsigned ub, unsigned n, void shabal512_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
void *dst ); void *dst );

View File

@@ -18,76 +18,18 @@ void skeinhash_8way( void *state, const void *input )
uint64_t vhash64[8*8] __attribute__ ((aligned (128))); uint64_t vhash64[8*8] __attribute__ ((aligned (128)));
skein512_8way_context ctx_skein; skein512_8way_context ctx_skein;
//#if defined(__SHA__)
// uint32_t hash0[16] __attribute__ ((aligned (64)));
// uint32_t hash1[16] __attribute__ ((aligned (64)));
// uint32_t hash2[16] __attribute__ ((aligned (64)));
// uint32_t hash3[16] __attribute__ ((aligned (64)));
// uint32_t hash4[16] __attribute__ ((aligned (64)));
// uint32_t hash5[16] __attribute__ ((aligned (64)));
// uint32_t hash6[16] __attribute__ ((aligned (64)));
// uint32_t hash7[16] __attribute__ ((aligned (64)));
// SHA256_CTX ctx_sha256;
//#else
uint32_t vhash32[16*8] __attribute__ ((aligned (128))); uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
sha256_8way_context ctx_sha256; sha256_8way_context ctx_sha256;
//#endif
skein512_8way_init( &ctx_skein ); skein512_8way_init( &ctx_skein );
skein512_8way_update( &ctx_skein, input, 80 ); skein512_8way_update( &ctx_skein, input, 80 );
skein512_8way_close( &ctx_skein, vhash64 ); skein512_8way_close( &ctx_skein, vhash64 );
/*
#if defined(__SHA__)
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash64, 512 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash0, 64 );
SHA256_Final( (unsigned char*)hash0, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash1, 64 );
SHA256_Final( (unsigned char*)hash1, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash2, 64 );
SHA256_Final( (unsigned char*)hash2, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash3, 64 );
SHA256_Final( (unsigned char*)hash3, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash4, 64 );
SHA256_Final( (unsigned char*)hash4, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash5, 64 );
SHA256_Final( (unsigned char*)hash5, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash6, 64 );
SHA256_Final( (unsigned char*)hash6, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash7, 64 );
SHA256_Final( (unsigned char*)hash7, &ctx_sha256 );
intrlv_8x32( state, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
#else
*/
rintrlv_8x64_8x32( vhash32, vhash64, 512 ); rintrlv_8x64_8x32( vhash32, vhash64, 512 );
// dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
// vhash64, 512 );
// intrlv_8x32( vhash32, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
// hash7, 512 );
sha256_8way_init( &ctx_sha256 ); sha256_8way_init( &ctx_sha256 );
sha256_8way( &ctx_sha256, vhash32, 64 ); sha256_8way_update( &ctx_sha256, vhash32, 64 );
sha256_8way_close( &ctx_sha256, state ); sha256_8way_close( &ctx_sha256, state );
//#endif
} }
int scanhash_skein_8way( struct work *work, uint32_t max_nonce, int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
@@ -176,7 +118,7 @@ void skeinhash_4way( void *state, const void *input )
rintrlv_4x64_4x32( vhash32, vhash64, 512 ); rintrlv_4x64_4x32( vhash32, vhash64, 512 );
sha256_4way_init( &ctx_sha256 ); sha256_4way_init( &ctx_sha256 );
sha256_4way( &ctx_sha256, vhash32, 64 ); sha256_4way_update( &ctx_sha256, vhash32, 64 );
sha256_4way_close( &ctx_sha256, state ); sha256_4way_close( &ctx_sha256, state );
#endif #endif
} }

View File

@@ -93,12 +93,12 @@ typedef sph_skein_4way_big_context skein256_4way_context;
void skein512_4way_init( skein512_4way_context *sc ); void skein512_4way_init( skein512_4way_context *sc );
void skein512_4way_update( void *cc, const void *data, size_t len ); void skein512_4way_update( void *cc, const void *data, size_t len );
void skein512_4way_close( void *cc, void *dst ); void skein512_4way_close( void *cc, void *dst );
#define skein512_4way skein512_4way_update //#define skein512_4way skein512_4way_update
void skein256_4way_init( skein256_4way_context *sc ); void skein256_4way_init( skein256_4way_context *sc );
void skein256_4way_update( void *cc, const void *data, size_t len ); void skein256_4way_update( void *cc, const void *data, size_t len );
void skein256_4way_close( void *cc, void *dst ); void skein256_4way_close( void *cc, void *dst );
#define skein256_4way skein256_4way_update //#define skein256_4way skein256_4way_update
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@@ -68,11 +68,11 @@ void skein2hash_4way( void *output, const void *input )
uint64_t hash[16*4] __attribute__ ((aligned (64))); uint64_t hash[16*4] __attribute__ ((aligned (64)));
skein512_4way_init( &ctx ); skein512_4way_init( &ctx );
skein512_4way( &ctx, input, 80 ); skein512_4way_update( &ctx, input, 80 );
skein512_4way_close( &ctx, hash ); skein512_4way_close( &ctx, hash );
skein512_4way_init( &ctx ); skein512_4way_init( &ctx );
skein512_4way( &ctx, hash, 64 ); skein512_4way_update( &ctx, hash, 64 );
skein512_4way_close( &ctx, output ); skein512_4way_close( &ctx, output );
} }

View File

@@ -50,41 +50,138 @@
#include <string.h> #include <string.h>
#include "sm3-hash-4way.h" #include "sm3-hash-4way.h"
#ifdef __SSE4_2__ #ifdef __AVX2__
void sm3_4way_init( sm3_4way_ctx_t *ctx ) #define P0_8W(x) \
_mm256_xor_si256( x, _mm256_xor_si256( mm256_rol_32( x, 9 ), \
mm256_rol_32( x, 17 ) ) )
#define P1_8W(x) \
_mm256_xor_si256( x, _mm256_xor_si256( mm256_rol_32( x, 15 ), \
mm256_rol_32( x, 23 ) ) )
#define FF0_8W(x,y,z) \
_mm256_xor_si256( x, _mm256_xor_si256( y, z ) )
#define FF1_8W(x,y,z) \
_mm256_or_si256( _mm256_or_si256( _mm256_and_si256( x, y ), \
_mm256_and_si256( x, z ) ), \
_mm256_and_si256( y, z ) )
#define GG0_8W(x,y,z) FF0_8W(x,y,z)
#define GG1_8W(x,y,z) \
_mm256_or_si256( _mm256_and_si256( x, y ), \
_mm256_andnot_si256( x, z ) )
void sm3_8way_compress( __m256i *digest, __m256i *block )
{ {
ctx->digest[0] = _mm_set1_epi32( 0x7380166F ); __m256i W[68], W1[64];
ctx->digest[1] = _mm_set1_epi32( 0x4914B2B9 ); __m256i A = digest[ 0 ];
ctx->digest[2] = _mm_set1_epi32( 0x172442D7 ); __m256i B = digest[ 1 ];
ctx->digest[3] = _mm_set1_epi32( 0xDA8A0600 ); __m256i C = digest[ 2 ];
ctx->digest[4] = _mm_set1_epi32( 0xA96F30BC ); __m256i D = digest[ 3 ];
ctx->digest[5] = _mm_set1_epi32( 0x163138AA ); __m256i E = digest[ 4 ];
ctx->digest[6] = _mm_set1_epi32( 0xE38DEE4D ); __m256i F = digest[ 5 ];
ctx->digest[7] = _mm_set1_epi32( 0xB0FB0E4E ); __m256i G = digest[ 6 ];
ctx->nblocks = 0; __m256i H = digest[ 7 ];
ctx->num = 0; __m256i SS1, SS2, TT1, TT2, T;
int j;
for ( j = 0; j < 16; j++ )
W[j] = mm256_bswap_32( block[j] );
for ( j = 16; j < 68; j++ )
W[j] = _mm256_xor_si256( P1_8W( _mm256_xor_si256(
_mm256_xor_si256( W[ j-16 ], W[ j-9 ] ),
mm256_rol_32( W[ j-3 ], 15 ) ) ),
_mm256_xor_si256( mm256_rol_32( W[ j-13 ], 7 ), W[ j-6 ] ) );
for( j = 0; j < 64; j++ )
W1[j] = _mm256_xor_si256( W[j], W[j+4] );
T = _mm256_set1_epi32( 0x79CC4519UL );
for( j =0; j < 16; j++ )
{
SS1 = mm256_rol_32( _mm256_add_epi32( E, _mm256_add_epi32(
mm256_rol_32( A, 12 ), mm256_rol_var_32( T, j ) ) ), 7 );
SS2 = _mm256_xor_si256( SS1, mm256_rol_32( A, 12 ) );
TT1 = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32(
FF0_8W( A, B, C ), D ), SS2 ), W1[j] );
TT2 = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32(
GG0_8W( E, F, G ), H ), SS1 ), W[j] );
D = C;
C = mm256_rol_32( B, 9 );
B = A;
A = TT1;
H = G;
G = mm256_rol_32( F, 19 );
F = E;
E = P0_8W( TT2 );
}
T = _mm256_set1_epi32( 0x7A879D8AUL );
for( j =16; j < 64; j++ )
{
SS1 = mm256_rol_32( _mm256_add_epi32( _mm256_add_epi32(
mm256_rol_32(A,12), E ), mm256_rol_var_32( T, j&31 ) ), 7 );
SS2 = _mm256_xor_si256( SS1, mm256_rol_32( A, 12 ) );
TT1 = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32(
FF1_8W( A, B, C ), D ), SS2 ), W1[j] );
TT2 = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32(
GG1_8W( E, F, G ), H ), SS1 ), W[j] );
D = C;
C = mm256_rol_32( B, 9 );
B = A;
A = TT1;
H = G;
G = mm256_rol_32( F, 19 );
F = E;
E = P0_8W( TT2 );
}
digest[0] = _mm256_xor_si256( digest[0], A );
digest[1] = _mm256_xor_si256( digest[1], B );
digest[2] = _mm256_xor_si256( digest[2], C );
digest[3] = _mm256_xor_si256( digest[3], D );
digest[4] = _mm256_xor_si256( digest[4], E );
digest[5] = _mm256_xor_si256( digest[5], F );
digest[6] = _mm256_xor_si256( digest[6], G );
digest[7] = _mm256_xor_si256( digest[7], H );
} }
void sm3_4way( void *cc, const void *data, size_t len ) void sm3_8way_init( sm3_8way_ctx_t *ctx )
{ {
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc; ctx->digest[0] = _mm256_set1_epi32( 0x7380166F );
__m128i *block = (__m128i*)ctx->block; ctx->digest[1] = _mm256_set1_epi32( 0x4914B2B9 );
__m128i *vdata = (__m128i*)data; ctx->digest[2] = _mm256_set1_epi32( 0x172442D7 );
ctx->digest[3] = _mm256_set1_epi32( 0xDA8A0600 );
ctx->digest[4] = _mm256_set1_epi32( 0xA96F30BC );
ctx->digest[5] = _mm256_set1_epi32( 0x163138AA );
ctx->digest[6] = _mm256_set1_epi32( 0xE38DEE4D );
ctx->digest[7] = _mm256_set1_epi32( 0xB0FB0E4E );
ctx->nblocks = 0;
ctx->num = 0;
}
void sm3_8way_update( void *cc, const void *data, size_t len )
{
sm3_8way_ctx_t *ctx = (sm3_8way_ctx_t*)cc;
__m256i *block = (__m256i*)ctx->block;
__m256i *vdata = (__m256i*)data;
if ( ctx->num ) if ( ctx->num )
{ {
unsigned int left = SM3_BLOCK_SIZE - ctx->num; unsigned int left = SM3_BLOCK_SIZE - ctx->num;
if ( len < left ) if ( len < left )
{ {
memcpy_128( block + (ctx->num >> 2), vdata , len>>2 ); memcpy_256( block + (ctx->num >> 2), vdata , len>>2 );
ctx->num += len; ctx->num += len;
return; return;
} }
else else
{ {
memcpy_128( block + (ctx->num >> 2), vdata , left>>2 ); memcpy_256( block + (ctx->num >> 2), vdata , left>>2 );
sm3_4way_compress( ctx->digest, block ); sm3_8way_compress( ctx->digest, block );
ctx->nblocks++; ctx->nblocks++;
vdata += left>>2; vdata += left>>2;
len -= left; len -= left;
@@ -92,49 +189,53 @@ void sm3_4way( void *cc, const void *data, size_t len )
} }
while ( len >= SM3_BLOCK_SIZE ) while ( len >= SM3_BLOCK_SIZE )
{ {
sm3_4way_compress( ctx->digest, vdata ); sm3_8way_compress( ctx->digest, vdata );
ctx->nblocks++; ctx->nblocks++;
vdata += SM3_BLOCK_SIZE>>2; vdata += SM3_BLOCK_SIZE>>2;
len -= SM3_BLOCK_SIZE; len -= SM3_BLOCK_SIZE;
} }
ctx->num = len; ctx->num = len;
if ( len ) if ( len )
memcpy_128( block, vdata, len>>2 ); memcpy_256( block, vdata, len>>2 );
} }
void sm3_4way_close( void *cc, void *dst ) void sm3_8way_close( void *cc, void *dst )
{ {
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc; sm3_8way_ctx_t *ctx = (sm3_8way_ctx_t*)cc;
__m128i *hash = (__m128i*)dst; __m256i *hash = (__m256i*)dst;
__m128i *count = (__m128i*)(ctx->block + ( (SM3_BLOCK_SIZE - 8) >> 2 ) ); __m256i *count = (__m256i*)(ctx->block + ( (SM3_BLOCK_SIZE - 8) >> 2 ) );
__m128i *block = (__m128i*)ctx->block; __m256i *block = (__m256i*)ctx->block;
int i; int i;
block[ctx->num] = _mm_set1_epi32( 0x80 ); block[ctx->num] = _mm256_set1_epi32( 0x80 );
if ( ctx->num + 8 <= SM3_BLOCK_SIZE ) if ( ctx->num + 8 <= SM3_BLOCK_SIZE )
{ {
memset_zero_128( block + (ctx->num >> 2) + 1, memset_zero_256( block + (ctx->num >> 2) + 1,
( SM3_BLOCK_SIZE - ctx->num - 8 ) >> 2 ); ( SM3_BLOCK_SIZE - ctx->num - 8 ) >> 2 );
} }
else else
{ {
memset_zero_128( block + (ctx->num >> 2) + 1, memset_zero_256( block + (ctx->num >> 2) + 1,
( SM3_BLOCK_SIZE - (ctx->num >> 2) - 1 ) ); ( SM3_BLOCK_SIZE - (ctx->num >> 2) - 1 ) );
sm3_4way_compress( ctx->digest, block ); sm3_8way_compress( ctx->digest, block );
memset_zero_128( block, ( SM3_BLOCK_SIZE - 8 ) >> 2 ); memset_zero_256( block, ( SM3_BLOCK_SIZE - 8 ) >> 2 );
} }
count[0] = mm128_bswap_32( count[0] = mm256_bswap_32(
_mm_set1_epi32( ctx->nblocks >> 23 ) ); _mm256_set1_epi32( ctx->nblocks >> 23 ) );
count[1] = mm128_bswap_32( _mm_set1_epi32( ( ctx->nblocks << 9 ) + count[1] = mm256_bswap_32( _mm256_set1_epi32( ( ctx->nblocks << 9 ) +
( ctx->num << 3 ) ) ); ( ctx->num << 3 ) ) );
sm3_4way_compress( ctx->digest, block ); sm3_8way_compress( ctx->digest, block );
for ( i = 0; i < 8 ; i++ ) for ( i = 0; i < 8 ; i++ )
hash[i] = mm128_bswap_32( ctx->digest[i] ); hash[i] = mm256_bswap_32( ctx->digest[i] );
} }
#endif
#if defined(__SSE2__)
#define P0(x) _mm_xor_si128( x, _mm_xor_si128( mm128_rol_32( x, 9 ), \ #define P0(x) _mm_xor_si128( x, _mm_xor_si128( mm128_rol_32( x, 9 ), \
mm128_rol_32( x, 17 ) ) ) mm128_rol_32( x, 17 ) ) )
#define P1(x) _mm_xor_si128( x, _mm_xor_si128( mm128_rol_32( x, 15 ), \ #define P1(x) _mm_xor_si128( x, _mm_xor_si128( mm128_rol_32( x, 15 ), \
@@ -227,5 +328,88 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
digest[7] = _mm_xor_si128( digest[7], H ); digest[7] = _mm_xor_si128( digest[7], H );
} }
void sm3_4way_init( sm3_4way_ctx_t *ctx )
{
ctx->digest[0] = _mm_set1_epi32( 0x7380166F );
ctx->digest[1] = _mm_set1_epi32( 0x4914B2B9 );
ctx->digest[2] = _mm_set1_epi32( 0x172442D7 );
ctx->digest[3] = _mm_set1_epi32( 0xDA8A0600 );
ctx->digest[4] = _mm_set1_epi32( 0xA96F30BC );
ctx->digest[5] = _mm_set1_epi32( 0x163138AA );
ctx->digest[6] = _mm_set1_epi32( 0xE38DEE4D );
ctx->digest[7] = _mm_set1_epi32( 0xB0FB0E4E );
ctx->nblocks = 0;
ctx->num = 0;
}
void sm3_4way_update( void *cc, const void *data, size_t len )
{
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc;
__m128i *block = (__m128i*)ctx->block;
__m128i *vdata = (__m128i*)data;
if ( ctx->num )
{
unsigned int left = SM3_BLOCK_SIZE - ctx->num;
if ( len < left )
{
memcpy_128( block + (ctx->num >> 2), vdata , len>>2 );
ctx->num += len;
return;
}
else
{
memcpy_128( block + (ctx->num >> 2), vdata , left>>2 );
sm3_4way_compress( ctx->digest, block );
ctx->nblocks++;
vdata += left>>2;
len -= left;
}
}
while ( len >= SM3_BLOCK_SIZE )
{
sm3_4way_compress( ctx->digest, vdata );
ctx->nblocks++;
vdata += SM3_BLOCK_SIZE>>2;
len -= SM3_BLOCK_SIZE;
}
ctx->num = len;
if ( len )
memcpy_128( block, vdata, len>>2 );
}
void sm3_4way_close( void *cc, void *dst )
{
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc;
__m128i *hash = (__m128i*)dst;
__m128i *count = (__m128i*)(ctx->block + ( (SM3_BLOCK_SIZE - 8) >> 2 ) );
__m128i *block = (__m128i*)ctx->block;
int i;
block[ctx->num] = _mm_set1_epi32( 0x80 );
if ( ctx->num + 8 <= SM3_BLOCK_SIZE )
{
memset_zero_128( block + (ctx->num >> 2) + 1,
( SM3_BLOCK_SIZE - ctx->num - 8 ) >> 2 );
}
else
{
memset_zero_128( block + (ctx->num >> 2) + 1,
( SM3_BLOCK_SIZE - (ctx->num >> 2) - 1 ) );
sm3_4way_compress( ctx->digest, block );
memset_zero_128( block, ( SM3_BLOCK_SIZE - 8 ) >> 2 );
}
count[0] = mm128_bswap_32(
_mm_set1_epi32( ctx->nblocks >> 23 ) );
count[1] = mm128_bswap_32( _mm_set1_epi32( ( ctx->nblocks << 9 ) +
( ctx->num << 3 ) ) );
sm3_4way_compress( ctx->digest, block );
for ( i = 0; i < 8 ; i++ )
hash[i] = mm128_bswap_32( ctx->digest[i] );
}
#endif #endif

View File

@@ -48,14 +48,13 @@
*/ */
#ifndef SPH_SM3_HASH_4WAY_H #ifndef SPH_SM3_HASH_4WAY_H
#define SPH_SM3_HASH_4WAY_H #define SPH_SM3_HASH_4WAY_H 1
#define SM3_DIGEST_LENGTH 32 #define SM3_DIGEST_LENGTH 32
#define SM3_BLOCK_SIZE 64 #define SM3_BLOCK_SIZE 64
#define SM3_CBLOCK (SM3_BLOCK_SIZE) #define SM3_CBLOCK (SM3_BLOCK_SIZE)
#define SM3_HMAC_SIZE (SM3_DIGEST_LENGTH) #define SM3_HMAC_SIZE (SM3_DIGEST_LENGTH)
#include <sys/types.h> #include <sys/types.h>
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
@@ -65,7 +64,6 @@
extern "C" { extern "C" {
#endif #endif
typedef struct { typedef struct {
__m128i block[16] __attribute__ ((aligned (64))); __m128i block[16] __attribute__ ((aligned (64)));
__m128i digest[8]; __m128i digest[8];
@@ -74,15 +72,24 @@ typedef struct {
} sm3_4way_ctx_t; } sm3_4way_ctx_t;
void sm3_4way_init( sm3_4way_ctx_t *ctx ); void sm3_4way_init( sm3_4way_ctx_t *ctx );
//void sm3_4way_update( sm3_4way_ctx_t *ctx, const unsigned char* data, void sm3_4way_update(void *cc, const void *data, size_t len);
// size_t data_len );
//void sm3_4way_final( sm3_4way_ctx_t *ctx,
// unsigned char digest[SM3_DIGEST_LENGTH] );
void sm3_4way_compress( __m128i *digest, __m128i *block );
void sm3_4way(void *cc, const void *data, size_t len);
void sm3_4way_close(void *cc, void *dst); void sm3_4way_close(void *cc, void *dst);
#if defined(__AVX2__)
typedef struct {
__m256i block[16] __attribute__ ((aligned (64)));
__m256i digest[8];
uint32_t nblocks;
uint32_t num;
} sm3_8way_ctx_t;
void sm3_8way_init( sm3_8way_ctx_t *ctx );
void sm3_8way_update(void *cc, const void *data, size_t len);
void sm3_8way_close(void *cc, void *dst);
#endif
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@@ -282,11 +282,11 @@ void c11_4way_hash( void *state, const void *input )
memcpy( &ctx, &c11_4way_ctx, sizeof(c11_4way_ctx) ); memcpy( &ctx, &c11_4way_ctx, sizeof(c11_4way_ctx) );
// 1 Blake 4way // 1 Blake 4way
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw // 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
// Serial // Serial
@@ -305,15 +305,15 @@ void c11_4way_hash( void *state, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
// 4 JH // 4 JH
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
// 5 Keccak // 5 Keccak
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
// 6 Skein // 6 Skein
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
// Serial // Serial

View File

@@ -84,13 +84,13 @@ void timetravel_4way_hash(void *output, const void *input)
switch ( permutation[i] ) switch ( permutation[i] )
{ {
case 0: case 0:
blake512_4way( &ctx.blake, vhashA, dataLen ); blake512_4way_update( &ctx.blake, vhashA, dataLen );
blake512_4way_close( &ctx.blake, vhashB ); blake512_4way_close( &ctx.blake, vhashB );
if ( i == 7 ) if ( i == 7 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break; break;
case 1: case 1:
bmw512_4way( &ctx.bmw, vhashA, dataLen ); bmw512_4way_update( &ctx.bmw, vhashA, dataLen );
bmw512_4way_close( &ctx.bmw, vhashB ); bmw512_4way_close( &ctx.bmw, vhashB );
if ( i == 7 ) if ( i == 7 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
@@ -112,19 +112,19 @@ void timetravel_4way_hash(void *output, const void *input)
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 ); intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
break; break;
case 3: case 3:
skein512_4way( &ctx.skein, vhashA, dataLen ); skein512_4way_update( &ctx.skein, vhashA, dataLen );
skein512_4way_close( &ctx.skein, vhashB ); skein512_4way_close( &ctx.skein, vhashB );
if ( i == 7 ) if ( i == 7 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break; break;
case 4: case 4:
jh512_4way( &ctx.jh, vhashA, dataLen ); jh512_4way_update( &ctx.jh, vhashA, dataLen );
jh512_4way_close( &ctx.jh, vhashB ); jh512_4way_close( &ctx.jh, vhashB );
if ( i == 7 ) if ( i == 7 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break; break;
case 5: case 5:
keccak512_4way( &ctx.keccak, vhashA, dataLen ); keccak512_4way_update( &ctx.keccak, vhashA, dataLen );
keccak512_4way_close( &ctx.keccak, vhashB ); keccak512_4way_close( &ctx.keccak, vhashB );
if ( i == 7 ) if ( i == 7 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );

View File

@@ -90,13 +90,13 @@ void timetravel10_4way_hash(void *output, const void *input)
switch ( permutation[i] ) switch ( permutation[i] )
{ {
case 0: case 0:
blake512_4way( &ctx.blake, vhashA, dataLen ); blake512_4way_update( &ctx.blake, vhashA, dataLen );
blake512_4way_close( &ctx.blake, vhashB ); blake512_4way_close( &ctx.blake, vhashB );
if ( i == 9 ) if ( i == 9 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break; break;
case 1: case 1:
bmw512_4way( &ctx.bmw, vhashA, dataLen ); bmw512_4way_update( &ctx.bmw, vhashA, dataLen );
bmw512_4way_close( &ctx.bmw, vhashB ); bmw512_4way_close( &ctx.bmw, vhashB );
if ( i == 9 ) if ( i == 9 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
@@ -118,19 +118,19 @@ void timetravel10_4way_hash(void *output, const void *input)
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 ); intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, dataLen<<3 );
break; break;
case 3: case 3:
skein512_4way( &ctx.skein, vhashA, dataLen ); skein512_4way_update( &ctx.skein, vhashA, dataLen );
skein512_4way_close( &ctx.skein, vhashB ); skein512_4way_close( &ctx.skein, vhashB );
if ( i == 9 ) if ( i == 9 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break; break;
case 4: case 4:
jh512_4way( &ctx.jh, vhashA, dataLen ); jh512_4way_update( &ctx.jh, vhashA, dataLen );
jh512_4way_close( &ctx.jh, vhashB ); jh512_4way_close( &ctx.jh, vhashB );
if ( i == 9 ) if ( i == 9 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );
break; break;
case 5: case 5:
keccak512_4way( &ctx.keccak, vhashA, dataLen ); keccak512_4way_update( &ctx.keccak, vhashA, dataLen );
keccak512_4way_close( &ctx.keccak, vhashB ); keccak512_4way_close( &ctx.keccak, vhashB );
if ( i == 9 ) if ( i == 9 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhashB, dataLen<<3 );

View File

@@ -282,11 +282,11 @@ void x11_4way_hash( void *state, const void *input )
memcpy( &ctx, &x11_4way_ctx, sizeof(x11_4way_ctx) ); memcpy( &ctx, &x11_4way_ctx, sizeof(x11_4way_ctx) );
// 1 Blake 4way // 1 Blake 4way
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw // 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
// Serial // Serial
@@ -305,15 +305,15 @@ void x11_4way_hash( void *state, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
// 4 Skein // 4 Skein
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
// 5 JH // 5 JH
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
// 6 Keccak // 6 Keccak
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );

View File

@@ -85,12 +85,12 @@ void x11evo_4way_hash( void *state, const void *input )
switch ( idx ) switch ( idx )
{ {
case 0: case 0:
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
break; break;
case 1: case 1:
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
if ( i >= len-1 ) if ( i >= len-1 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
@@ -112,19 +112,19 @@ void x11evo_4way_hash( void *state, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 64<<3 );
break; break;
case 3: case 3:
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
if ( i >= len-1 ) if ( i >= len-1 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
break; break;
case 4: case 4:
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
if ( i >= len-1 ) if ( i >= len-1 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );
break; break;
case 5: case 5:
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
if ( i >= len-1 ) if ( i >= len-1 )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 64<<3 );

View File

@@ -310,10 +310,10 @@ void x11gost_4way_hash( void *state, const void *input )
x11gost_4way_ctx_holder ctx; x11gost_4way_ctx_holder ctx;
memcpy( &ctx, &x11gost_4way_ctx, sizeof(x11gost_4way_ctx) ); memcpy( &ctx, &x11gost_4way_ctx, sizeof(x11gost_4way_ctx) );
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
// Serial // Serial
@@ -333,13 +333,13 @@ void x11gost_4way_hash( void *state, const void *input )
// 4way // 4way
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
// Serial // Serial

View File

@@ -272,10 +272,10 @@ void x12_4way_hash( void *state, const void *input )
x12_4way_ctx_holder ctx; x12_4way_ctx_holder ctx;
memcpy( &ctx, &x12_4way_ctx, sizeof(x12_4way_ctx) ); memcpy( &ctx, &x12_4way_ctx, sizeof(x12_4way_ctx) );
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -328,16 +328,16 @@ void x12_4way_hash( void *state, const void *input )
// Parallel 4way 64 bit // Parallel 4way 64 bit
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 ); dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 );

View File

@@ -225,11 +225,11 @@ void phi1612_4way_hash( void *state, const void *input )
memcpy( &ctx, &phi1612_4way_ctx, sizeof(phi1612_4way_ctx) ); memcpy( &ctx, &phi1612_4way_ctx, sizeof(phi1612_4way_ctx) );
// Skein parallel 4way // Skein parallel 4way
skein512_4way( &ctx.skein, input, 80 ); skein512_4way_update( &ctx.skein, input, 80 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
// JH // JH
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
// Serial to the end // Serial to the end

View File

@@ -168,7 +168,7 @@ void skunk_4way_hash( void *output, const void *input )
skunk_4way_ctx_holder ctx __attribute__ ((aligned (64))); skunk_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &skunk_4way_ctx, sizeof(skunk_4way_ctx) ); memcpy( &ctx, &skunk_4way_ctx, sizeof(skunk_4way_ctx) );
skein512_4way( &ctx.skein, input, 80 ); skein512_4way_update( &ctx.skein, input, 80 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );

View File

@@ -321,11 +321,11 @@ void x13_4way_hash( void *state, const void *input )
memcpy( &ctx, &x13_4way_ctx, sizeof(x13_4way_ctx) ); memcpy( &ctx, &x13_4way_ctx, sizeof(x13_4way_ctx) );
// 1 Blake // 1 Blake
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw // 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
// Serial // Serial
@@ -344,15 +344,15 @@ void x13_4way_hash( void *state, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
// 4 Skein // 4 Skein
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
// 5 JH // 5 JH
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
// 6 Keccak // 6 Keccak
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
// Serial // Serial
@@ -416,7 +416,7 @@ void x13_4way_hash( void *state, const void *input )
// 12 Hamsi parallel 4way 32 bit // 12 Hamsi parallel 4way 32 bit
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );

View File

@@ -1,7 +1,4 @@
#include "x13sm3-gate.h" #include "x13sm3-gate.h"
#if defined(X13SM3_4WAY)
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
@@ -20,6 +17,281 @@
#include "algo/hamsi/hamsi-hash-4way.h" #include "algo/hamsi/hamsi-hash-4way.h"
#include "algo/fugue/sph_fugue.h" #include "algo/fugue/sph_fugue.h"
#if defined(X13BCD_8WAY)
typedef struct {
blake512_8way_context blake;
bmw512_8way_context bmw;
hashState_groestl groestl;
skein512_8way_context skein;
jh512_8way_context jh;
keccak512_8way_context keccak;
cubehashParam cube;
sph_shavite512_context shavite;
simd_4way_context simd;
hashState_echo echo;
sm3_8way_ctx_t sm3;
hamsi512_8way_context hamsi;
sph_fugue512_context fugue;
} x13bcd_8way_ctx_holder;
x13bcd_8way_ctx_holder x13bcd_8way_ctx __attribute__ ((aligned (64)));
static __thread blake512_8way_context x13bcd_8way_ctx_mid;
void init_x13bcd_8way_ctx()
{
blake512_8way_init( &x13bcd_8way_ctx.blake );
bmw512_8way_init( &x13bcd_8way_ctx.bmw );
init_groestl( &x13bcd_8way_ctx.groestl, 64 );
skein512_8way_init( &x13bcd_8way_ctx.skein );
jh512_8way_init( &x13bcd_8way_ctx.jh );
keccak512_8way_init( &x13bcd_8way_ctx.keccak );
cubehashInit( &x13bcd_8way_ctx.cube, 512, 16, 32 );
sph_shavite512_init( &x13bcd_8way_ctx.shavite );
simd_4way_init( &x13bcd_8way_ctx.simd, 512 );
init_echo( &x13bcd_8way_ctx.echo, 512 );
sm3_8way_init( &x13bcd_8way_ctx.sm3 );
hamsi512_8way_init( &x13bcd_8way_ctx.hamsi );
sph_fugue512_init( &x13bcd_8way_ctx.fugue );
};
void x13bcd_8way_hash( void *state, const void *input )
{
uint64_t vhash[8*8] __attribute__ ((aligned (128)));
uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t hash4[8] __attribute__ ((aligned (64)));
uint64_t hash5[8] __attribute__ ((aligned (64)));
uint64_t hash6[8] __attribute__ ((aligned (64)));
uint64_t hash7[8] __attribute__ ((aligned (64)));
x13bcd_8way_ctx_holder ctx;
memcpy( &ctx, &x13bcd_8way_ctx, sizeof(x13bcd_8way_ctx) );
// Blake
memcpy( &ctx.blake, &x13bcd_8way_ctx_mid, sizeof(x13bcd_8way_ctx_mid) );
blake512_8way_update( &ctx.blake, input + (64<<3), 16 );
blake512_8way_close( &ctx.blake, vhash );
// Bmw
bmw512_8way_update( &ctx.bmw, vhash, 64 );
bmw512_8way_close( &ctx.bmw, vhash );
// Serial
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash );
// Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
// Parallel 4way
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7 );
// Skein
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
// JH
jh512_8way_update( &ctx.jh, vhash, 64 );
jh512_8way_close( &ctx.jh, vhash );
// Keccak
keccak512_8way_update( &ctx.keccak, vhash, 64 );
keccak512_8way_close( &ctx.keccak, vhash );
// SM3 parallel 32 bit
rintrlv_8x64_8x32( vhashA, vhash, 512 );
memset( vhash, 0, sizeof vhash );
sm3_8way_update( &ctx.sm3, vhashA, 64 );
sm3_8way_close( &ctx.sm3, vhash );
dintrlv_8x32_512( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash );
// Cubehash
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1, 64 );
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2, 64 );
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 );
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash4, (const byte*) hash4, 64 );
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash5, (const byte*) hash5, 64 );
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash6, (const byte*) hash6, 64 );
memcpy( &ctx.cube, &x13bcd_8way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash7, (const byte*) hash7, 64 );
// Shavite
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512( &ctx.shavite, hash4, 64 );
sph_shavite512_close( &ctx.shavite, hash4 );
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512( &ctx.shavite, hash5, 64 );
sph_shavite512_close( &ctx.shavite, hash5 );
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512( &ctx.shavite, hash6, 64 );
sph_shavite512_close( &ctx.shavite, hash6 );
memcpy( &ctx.shavite, &x13bcd_8way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512( &ctx.shavite, hash7, 64 );
sph_shavite512_close( &ctx.shavite, hash7 );
// Simd
intrlv_4x128( vhash, hash0, hash1, hash2, hash3, 512 );
simd_4way_update_close( &ctx.simd, vhash, vhash, 512 );
dintrlv_4x128( hash0, hash1, hash2, hash3, vhash, 512 );
intrlv_4x128( vhash, hash4, hash5, hash6, hash7, 512 );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhash, vhash, 512 );
dintrlv_4x128( hash4, hash5, hash6, hash7, vhash, 512 );
// Echo
update_final_echo( &ctx.echo, (BitSequence *)hash0,
(const BitSequence *) hash0, 512 );
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash1,
(const BitSequence *) hash1, 512 );
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash2,
(const BitSequence *) hash2, 512 );
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 );
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash4,
(const BitSequence *) hash4, 512 );
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash5,
(const BitSequence *) hash5, 512 );
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash6,
(const BitSequence *) hash6, 512 );
memcpy( &ctx.echo, &x13bcd_8way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash7,
(const BitSequence *) hash7, 512 );
// Hamsi parallel 4x32x2
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7 );
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
hamsi512_8way_close( &ctx.hamsi, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash );
// Fugue serial
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, state );
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, state+32 );
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, state+64 );
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, state+96 );
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash4, 64 );
sph_fugue512_close( &ctx.fugue, state+128 );
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash5, 64 );
sph_fugue512_close( &ctx.fugue, state+160 );
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash6, 64 );
sph_fugue512_close( &ctx.fugue, state+192 );
memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash7, 64 );
sph_fugue512_close( &ctx.fugue, state+224 );
}
int scanhash_x13bcd_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
mm512_bswap32_intrlv80_8x64( vdata, pdata );
blake512_8way_init( &x13bcd_8way_ctx_mid );
blake512_8way_update( &x13bcd_8way_ctx_mid, vdata, 64 );
do
{
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x13bcd_8way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 8; i++ )
if ( (hash+(i<<3))[7] <= Htarg )
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
n += 8;
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(X13BCD_4WAY)
typedef struct { typedef struct {
blake512_4way_context blake; blake512_4way_context blake;
bmw512_4way_context bmw; bmw512_4way_context bmw;
@@ -68,11 +340,11 @@ void x13bcd_4way_hash( void *state, const void *input )
// Blake // Blake
memcpy( &ctx.blake, &x13bcd_ctx_mid, sizeof(x13bcd_ctx_mid) ); memcpy( &ctx.blake, &x13bcd_ctx_mid, sizeof(x13bcd_ctx_mid) );
blake512_4way( &ctx.blake, input + (64<<2), 16 ); blake512_4way_update( &ctx.blake, input + (64<<2), 16 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
// Bmw // Bmw
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
// Serial // Serial
@@ -91,15 +363,15 @@ void x13bcd_4way_hash( void *state, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
// Skein // Skein
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
// JH // JH
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
// Keccak // Keccak
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -118,7 +390,7 @@ void x13bcd_4way_hash( void *state, const void *input )
uint32_t sm3_hash3[32] __attribute__ ((aligned (32))); uint32_t sm3_hash3[32] __attribute__ ((aligned (32)));
memset( sm3_hash3, 0, sizeof sm3_hash3 ); memset( sm3_hash3, 0, sizeof sm3_hash3 );
sm3_4way( &ctx.sm3, vhash, 64 ); sm3_4way_update( &ctx.sm3, vhash, 64 );
sm3_4way_close( &ctx.sm3, sm3_vhash ); sm3_4way_close( &ctx.sm3, sm3_vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 ); dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
@@ -171,20 +443,23 @@ void x13bcd_4way_hash( void *state, const void *input )
// Hamsi parallel 4x32x2 // Hamsi parallel 4x32x2
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// Fugue serial // Fugue serial
sph_fugue512( &ctx.fugue, hash0, 64 ); sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 ); sph_fugue512_close( &ctx.fugue, hash0 );
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) ); memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue,
sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash1, 64 ); sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 ); sph_fugue512_close( &ctx.fugue, hash1 );
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) ); memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue,
sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash2, 64 ); sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 ); sph_fugue512_close( &ctx.fugue, hash2 );
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) ); memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue,
sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash3, 64 ); sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 ); sph_fugue512_close( &ctx.fugue, hash3 );
@@ -203,44 +478,33 @@ int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t n = pdata[19]; uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9; // aligned __m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
mm256_bswap32_intrlv80_4x64( vdata, pdata ); mm256_bswap32_intrlv80_4x64( vdata, pdata );
blake512_4way_init( &x13bcd_ctx_mid ); blake512_4way_init( &x13bcd_ctx_mid );
blake512_4way( &x13bcd_ctx_mid, vdata, 64 ); blake512_4way( &x13bcd_ctx_mid, vdata, 64 );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
for ( int m=0; m < 6; m++ ) x13bcd_4way_hash( hash, vdata );
if ( Htarg <= htmax[m] ) pdata[19] = n;
{
uint32_t mask = masks[m];
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x13bcd_4way_hash( hash, vdata ); for ( int i = 0; i < 4; i++ )
pdata[19] = n; if ( (hash+(i<<3))[7] <= Htarg )
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
for ( int i = 0; i < 4; i++ ) {
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) ) pdata[19] = n+i;
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) submit_lane_solution( work, hash+(i<<3), mythr, i );
{ }
pdata[19] = n+i; n += 4;
submit_lane_solution( work, hash+(i<<3), mythr, i ); } while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
} *hashes_done = n - first_nonce;
n += 4;
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
break;
}
*hashes_done = n - first_nonce + 1;
return 0; return 0;
} }

View File

@@ -71,13 +71,11 @@ void x13sm3_4way_hash( void *state, const void *input )
// Blake // Blake
memcpy( &ctx.blake, &x13sm3_ctx_mid, sizeof(x13sm3_ctx_mid) ); memcpy( &ctx.blake, &x13sm3_ctx_mid, sizeof(x13sm3_ctx_mid) );
blake512_4way( &ctx.blake, input + (64<<2), 16 ); blake512_4way_update( &ctx.blake, input + (64<<2), 16 );
// blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
// Bmw // Bmw
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
// Serial // Serial
@@ -96,15 +94,15 @@ void x13sm3_4way_hash( void *state, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
// Skein // Skein
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
// JH // JH
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
// Keccak // Keccak
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
// Serial to the end // Serial to the end
@@ -180,13 +178,13 @@ void x13sm3_4way_hash( void *state, const void *input )
uint32_t sm3_hash3[32] __attribute__ ((aligned (32))); uint32_t sm3_hash3[32] __attribute__ ((aligned (32)));
memset( sm3_hash3, 0, sizeof sm3_hash3 ); memset( sm3_hash3, 0, sizeof sm3_hash3 );
sm3_4way( &ctx.sm3, vhash, 64 ); sm3_4way_update( &ctx.sm3, vhash, 64 );
sm3_4way_close( &ctx.sm3, sm3_vhash ); sm3_4way_close( &ctx.sm3, sm3_vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 ); dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
// Hamsi parallel 4x32x2 // Hamsi parallel 4x32x2
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );

View File

@@ -17,7 +17,11 @@ bool register_x13sm3_algo( algo_gate_t* gate )
bool register_x13bcd_algo( algo_gate_t* gate ) bool register_x13bcd_algo( algo_gate_t* gate )
{ {
#if defined (X13SM3_4WAY) #if defined (X13BCD_8WAY)
init_x13bcd_8way_ctx();
gate->scanhash = (void*)&scanhash_x13bcd_8way;
gate->hash = (void*)&x13bcd_8way_hash;
#elif defined (X13BCD_4WAY)
init_x13bcd_4way_ctx(); init_x13bcd_4way_ctx();
gate->scanhash = (void*)&scanhash_x13bcd_4way; gate->scanhash = (void*)&scanhash_x13bcd_4way;
gate->hash = (void*)&x13bcd_4way_hash; gate->hash = (void*)&x13bcd_4way_hash;
@@ -26,7 +30,7 @@ bool register_x13bcd_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x13bcd; gate->scanhash = (void*)&scanhash_x13bcd;
gate->hash = (void*)&x13bcd_hash; gate->hash = (void*)&x13bcd_hash;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
return true; return true;
}; };

View File

@@ -5,13 +5,11 @@
#include <stdint.h> #include <stdint.h>
#if defined(__AVX2__) && defined(__AES__) #if defined(__AVX2__) && defined(__AES__)
#define X13SM3_4WAY #define X13SM3_4WAY 1
#endif #endif
bool register_x13sm3_algo( algo_gate_t* gate ); bool register_x13sm3_algo( algo_gate_t* gate );
bool register_x13bcd_algo( algo_gate_t* gate );
#if defined(X13SM3_4WAY) #if defined(X13SM3_4WAY)
void x13sm3_4way_hash( void *state, const void *input ); void x13sm3_4way_hash( void *state, const void *input );
@@ -19,18 +17,39 @@ int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ); uint64_t *hashes_done, struct thr_info *mythr );
void init_x13sm3_4way_ctx(); void init_x13sm3_4way_ctx();
void x13bcd_4way_hash( void *state, const void *input ); #else
int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void init_x13bcd_4way_ctx();
#endif
void x13sm3_hash( void *state, const void *input ); void x13sm3_hash( void *state, const void *input );
int scanhash_x13sm3( struct work *work, uint32_t max_nonce, int scanhash_x13sm3( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ); uint64_t *hashes_done, struct thr_info *mythr );
void init_x13sm3_ctx(); void init_x13sm3_ctx();
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define X13BCD_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define X13BCD_4WAY 1
#endif
bool register_x13bcd_algo( algo_gate_t* gate );
#if defined(X13BCD_8WAY)
void x13bcd_8way_hash( void *state, const void *input );
int scanhash_x13bcd_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void init_x13bcd_8way_ctx();
#elif defined(X13BCD_4WAY)
void x13bcd_4way_hash( void *state, const void *input );
int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void init_x13bcd_4way_ctx();
#else
void x13bcd_hash( void *state, const void *input ); void x13bcd_hash( void *state, const void *input );
int scanhash_x13bcd( struct work *work, uint32_t max_nonce, int scanhash_x13bcd( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ); uint64_t *hashes_done, struct thr_info *mythr );
@@ -38,3 +57,4 @@ void init_x13bcd_ctx();
#endif #endif
#endif

View File

@@ -34,14 +34,14 @@ void polytimos_4way_hash( void *output, const void *input )
poly_4way_context_overlay ctx; poly_4way_context_overlay ctx;
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, input, 80 ); skein512_4way_update( &ctx.skein, input, 80 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
// Need to convert from 64 bit interleaved to 32 bit interleaved. // Need to convert from 64 bit interleaved to 32 bit interleaved.
uint32_t vhash32[16*4]; uint32_t vhash32[16*4];
rintrlv_4x64_4x32( vhash32, vhash, 512 ); rintrlv_4x64_4x32( vhash32, vhash, 512 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash32, 64 ); shabal512_4way_update( &ctx.shabal, vhash32, 64 );
shabal512_4way_close( &ctx.shabal, vhash32 ); shabal512_4way_close( &ctx.shabal, vhash32 );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash32, 512 ); dintrlv_4x32( hash0, hash1, hash2, hash3, vhash32, 512 );

View File

@@ -38,7 +38,7 @@ void veltor_4way_hash( void *output, const void *input )
veltor_4way_ctx_holder ctx __attribute__ ((aligned (64))); veltor_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &veltor_4way_ctx, sizeof(veltor_4way_ctx) ); memcpy( &ctx, &veltor_4way_ctx, sizeof(veltor_4way_ctx) );
skein512_4way( &ctx.skein, input, 80 ); skein512_4way_update( &ctx.skein, input, 80 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -55,7 +55,7 @@ void veltor_4way_hash( void *output, const void *input )
sph_shavite512_close( &ctx.shavite, hash3 ); sph_shavite512_close( &ctx.shavite, hash3 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way_update( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );

View File

@@ -325,11 +325,11 @@ void x14_4way_hash( void *state, const void *input )
memcpy( &ctx, &x14_4way_ctx, sizeof(x14_4way_ctx) ); memcpy( &ctx, &x14_4way_ctx, sizeof(x14_4way_ctx) );
// 1 Blake // 1 Blake
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw // 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
// Serial // Serial
@@ -348,15 +348,15 @@ void x14_4way_hash( void *state, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
// 4 Skein // 4 Skein
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
// 5 JH // 5 JH
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
// 6 Keccak // 6 Keccak
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
// Serial // Serial
@@ -420,7 +420,7 @@ void x14_4way_hash( void *state, const void *input )
// 12 Hamsi parallel 4way 32 bit // 12 Hamsi parallel 4way 32 bit
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );

View File

@@ -374,11 +374,11 @@ void x15_4way_hash( void *state, const void *input )
memcpy( &ctx, &x15_4way_ctx, sizeof(x15_4way_ctx) ); memcpy( &ctx, &x15_4way_ctx, sizeof(x15_4way_ctx) );
// 1 Blake // 1 Blake
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw // 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
// Serial // Serial
@@ -397,15 +397,15 @@ void x15_4way_hash( void *state, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
// 4 Skein // 4 Skein
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
// 5 JH // 5 JH
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
// 6 Keccak // 6 Keccak
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
// Serial to the end // Serial to the end
@@ -469,7 +469,7 @@ void x15_4way_hash( void *state, const void *input )
// 12 Hamsi parallel 4way 32 bit // 12 Hamsi parallel 4way 32 bit
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );

View File

@@ -463,11 +463,11 @@ void x16r_4way_hash( void* output, const void* input )
case BLAKE: case BLAKE:
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
if ( i == 0 ) if ( i == 0 )
blake512_4way( &ctx.blake, input, size ); blake512_4way_update( &ctx.blake, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
blake512_4way( &ctx.blake, vhash, size ); blake512_4way_update( &ctx.blake, vhash, size );
} }
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -475,11 +475,11 @@ void x16r_4way_hash( void* output, const void* input )
case BMW: case BMW:
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
if ( i == 0 ) if ( i == 0 )
bmw512_4way( &ctx.bmw, input, size ); bmw512_4way_update( &ctx.bmw, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
bmw512_4way( &ctx.bmw, vhash, size ); bmw512_4way_update( &ctx.bmw, vhash, size );
} }
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -501,11 +501,11 @@ void x16r_4way_hash( void* output, const void* input )
case SKEIN: case SKEIN:
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
if ( i == 0 ) if ( i == 0 )
skein512_4way( &ctx.skein, input, size ); skein512_4way_update( &ctx.skein, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
skein512_4way( &ctx.skein, vhash, size ); skein512_4way_update( &ctx.skein, vhash, size );
} }
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -513,11 +513,11 @@ void x16r_4way_hash( void* output, const void* input )
case JH: case JH:
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
if ( i == 0 ) if ( i == 0 )
jh512_4way( &ctx.jh, input, size ); jh512_4way_update( &ctx.jh, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
jh512_4way( &ctx.jh, vhash, size ); jh512_4way_update( &ctx.jh, vhash, size );
} }
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -525,11 +525,11 @@ void x16r_4way_hash( void* output, const void* input )
case KECCAK: case KECCAK:
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
if ( i == 0 ) if ( i == 0 )
keccak512_4way( &ctx.keccak, input, size ); keccak512_4way_update( &ctx.keccak, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
keccak512_4way( &ctx.keccak, vhash, size ); keccak512_4way_update( &ctx.keccak, vhash, size );
} }
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -599,7 +599,7 @@ void x16r_4way_hash( void* output, const void* input )
case HAMSI: case HAMSI:
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, size ); hamsi512_4way_update( &ctx.hamsi, vhash, size );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break; break;
@@ -620,7 +620,7 @@ void x16r_4way_hash( void* output, const void* input )
case SHABAL: case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, size ); shabal512_4way_update( &ctx.shabal, vhash, size );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
break; break;
@@ -641,7 +641,7 @@ void x16r_4way_hash( void* output, const void* input )
case SHA_512: case SHA_512:
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, size ); sha512_4way_update( &ctx.sha512, vhash, size );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break; break;

View File

@@ -458,11 +458,11 @@ void x16rt_4way_hash( void* output, const void* input )
case BLAKE: case BLAKE:
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
if ( i == 0 ) if ( i == 0 )
blake512_4way( &ctx.blake, input, size ); blake512_4way_update( &ctx.blake, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
blake512_4way( &ctx.blake, vhash, size ); blake512_4way_update( &ctx.blake, vhash, size );
} }
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -470,11 +470,11 @@ void x16rt_4way_hash( void* output, const void* input )
case BMW: case BMW:
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
if ( i == 0 ) if ( i == 0 )
bmw512_4way( &ctx.bmw, input, size ); bmw512_4way_update( &ctx.bmw, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
bmw512_4way( &ctx.bmw, vhash, size ); bmw512_4way_update( &ctx.bmw, vhash, size );
} }
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -496,11 +496,11 @@ void x16rt_4way_hash( void* output, const void* input )
case SKEIN: case SKEIN:
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
if ( i == 0 ) if ( i == 0 )
skein512_4way( &ctx.skein, input, size ); skein512_4way_update( &ctx.skein, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
skein512_4way( &ctx.skein, vhash, size ); skein512_4way_update( &ctx.skein, vhash, size );
} }
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -508,11 +508,11 @@ void x16rt_4way_hash( void* output, const void* input )
case JH: case JH:
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
if ( i == 0 ) if ( i == 0 )
jh512_4way( &ctx.jh, input, size ); jh512_4way_update( &ctx.jh, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
jh512_4way( &ctx.jh, vhash, size ); jh512_4way_update( &ctx.jh, vhash, size );
} }
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -520,11 +520,11 @@ void x16rt_4way_hash( void* output, const void* input )
case KECCAK: case KECCAK:
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
if ( i == 0 ) if ( i == 0 )
keccak512_4way( &ctx.keccak, input, size ); keccak512_4way_update( &ctx.keccak, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
keccak512_4way( &ctx.keccak, vhash, size ); keccak512_4way_update( &ctx.keccak, vhash, size );
} }
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -594,7 +594,7 @@ void x16rt_4way_hash( void* output, const void* input )
case HAMSI: case HAMSI:
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, size ); hamsi512_4way_update( &ctx.hamsi, vhash, size );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break; break;
@@ -615,7 +615,7 @@ void x16rt_4way_hash( void* output, const void* input )
case SHABAL: case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, size ); shabal512_4way_update( &ctx.shabal, vhash, size );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
break; break;
@@ -636,7 +636,7 @@ void x16rt_4way_hash( void* output, const void* input )
case SHA_512: case SHA_512:
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, size ); sha512_4way_update( &ctx.sha512, vhash, size );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break; break;

View File

@@ -553,11 +553,11 @@ void x16rv2_4way_hash( void* output, const void* input )
case BLAKE: case BLAKE:
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
if ( i == 0 ) if ( i == 0 )
blake512_4way( &ctx.blake, input, size ); blake512_4way_update( &ctx.blake, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
blake512_4way( &ctx.blake, vhash, size ); blake512_4way_update( &ctx.blake, vhash, size );
} }
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -565,11 +565,11 @@ void x16rv2_4way_hash( void* output, const void* input )
case BMW: case BMW:
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
if ( i == 0 ) if ( i == 0 )
bmw512_4way( &ctx.bmw, input, size ); bmw512_4way_update( &ctx.bmw, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
bmw512_4way( &ctx.bmw, vhash, size ); bmw512_4way_update( &ctx.bmw, vhash, size );
} }
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -591,11 +591,11 @@ void x16rv2_4way_hash( void* output, const void* input )
case SKEIN: case SKEIN:
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
if ( i == 0 ) if ( i == 0 )
skein512_4way( &ctx.skein, input, size ); skein512_4way_update( &ctx.skein, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
skein512_4way( &ctx.skein, vhash, size ); skein512_4way_update( &ctx.skein, vhash, size );
} }
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -603,11 +603,11 @@ void x16rv2_4way_hash( void* output, const void* input )
case JH: case JH:
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
if ( i == 0 ) if ( i == 0 )
jh512_4way( &ctx.jh, input, size ); jh512_4way_update( &ctx.jh, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
jh512_4way( &ctx.jh, vhash, size ); jh512_4way_update( &ctx.jh, vhash, size );
} }
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -631,7 +631,7 @@ void x16rv2_4way_hash( void* output, const void* input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break; break;
@@ -721,7 +721,7 @@ void x16rv2_4way_hash( void* output, const void* input )
case HAMSI: case HAMSI:
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, size ); hamsi512_4way_update( &ctx.hamsi, vhash, size );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break; break;
@@ -742,7 +742,7 @@ void x16rv2_4way_hash( void* output, const void* input )
case SHABAL: case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, size ); shabal512_4way_update( &ctx.shabal, vhash, size );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
break; break;
@@ -779,7 +779,7 @@ void x16rv2_4way_hash( void* output, const void* input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 ); sha512_4way_update( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break; break;

View File

@@ -578,11 +578,11 @@ void x21s_4way_hash( void* output, const void* input )
case BLAKE: case BLAKE:
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
if ( i == 0 ) if ( i == 0 )
blake512_4way( &ctx.blake, input, size ); blake512_4way_update( &ctx.blake, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
blake512_4way( &ctx.blake, vhash, size ); blake512_4way_update( &ctx.blake, vhash, size );
} }
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -590,11 +590,11 @@ void x21s_4way_hash( void* output, const void* input )
case BMW: case BMW:
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
if ( i == 0 ) if ( i == 0 )
bmw512_4way( &ctx.bmw, input, size ); bmw512_4way_update( &ctx.bmw, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
bmw512_4way( &ctx.bmw, vhash, size ); bmw512_4way_update( &ctx.bmw, vhash, size );
} }
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -616,11 +616,11 @@ void x21s_4way_hash( void* output, const void* input )
case SKEIN: case SKEIN:
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
if ( i == 0 ) if ( i == 0 )
skein512_4way( &ctx.skein, input, size ); skein512_4way_update( &ctx.skein, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
skein512_4way( &ctx.skein, vhash, size ); skein512_4way_update( &ctx.skein, vhash, size );
} }
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -628,11 +628,11 @@ void x21s_4way_hash( void* output, const void* input )
case JH: case JH:
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
if ( i == 0 ) if ( i == 0 )
jh512_4way( &ctx.jh, input, size ); jh512_4way_update( &ctx.jh, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
jh512_4way( &ctx.jh, vhash, size ); jh512_4way_update( &ctx.jh, vhash, size );
} }
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -640,11 +640,11 @@ void x21s_4way_hash( void* output, const void* input )
case KECCAK: case KECCAK:
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
if ( i == 0 ) if ( i == 0 )
keccak512_4way( &ctx.keccak, input, size ); keccak512_4way_update( &ctx.keccak, input, size );
else else
{ {
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
keccak512_4way( &ctx.keccak, vhash, size ); keccak512_4way_update( &ctx.keccak, vhash, size );
} }
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -714,7 +714,7 @@ void x21s_4way_hash( void* output, const void* input )
case HAMSI: case HAMSI:
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, size ); hamsi512_4way_update( &ctx.hamsi, vhash, size );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break; break;
@@ -735,7 +735,7 @@ void x21s_4way_hash( void* output, const void* input )
case SHABAL: case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, size ); shabal512_4way_update( &ctx.shabal, vhash, size );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
break; break;
@@ -756,7 +756,7 @@ void x21s_4way_hash( void* output, const void* input )
case SHA_512: case SHA_512:
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 ); intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, size ); sha512_4way_update( &ctx.sha512, vhash, size );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break; break;
@@ -767,7 +767,7 @@ void x21s_4way_hash( void* output, const void* input )
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
haval256_5_4way_init( &ctx.haval ); haval256_5_4way_init( &ctx.haval );
haval256_5_4way( &ctx.haval, vhash, 64 ); haval256_5_4way_update( &ctx.haval, vhash, 64 );
haval256_5_4way_close( &ctx.haval, vhash ); haval256_5_4way_close( &ctx.haval, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
@@ -831,7 +831,7 @@ void x21s_4way_hash( void* output, const void* input )
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
sha256_4way_init( &ctx.sha256 ); sha256_4way_init( &ctx.sha256 );
sha256_4way( &ctx.sha256, vhash, 64 ); sha256_4way_update( &ctx.sha256, vhash, 64 );
sha256_4way_close( &ctx.sha256, vhash ); sha256_4way_close( &ctx.sha256, vhash );
dintrlv_4x32( output, output+32, output+64,output+96, vhash, 256 ); dintrlv_4x32( output, output+32, output+64,output+96, vhash, 256 );

View File

@@ -1319,7 +1319,7 @@ int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target; const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8; const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9; // aligned __m512i *noncev = (__m512i*)vdata + 9; // aligned
uint32_t n = first_nonce; uint32_t n = first_nonce;
const int thr_id = mythr->id; const int thr_id = mythr->id;
@@ -1350,8 +1350,6 @@ int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
return 0; return 0;
} }
#elif defined(SONOA_4WAY) #elif defined(SONOA_4WAY)
union _sonoa_4way_context_overlay union _sonoa_4way_context_overlay
@@ -1391,11 +1389,11 @@ void sonoa_4way_hash( void *state, const void *input )
// 1 // 1
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -1412,15 +1410,15 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 ); rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
@@ -1466,7 +1464,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -1483,15 +1481,15 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 ); rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
@@ -1535,13 +1533,13 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
// 3 // 3
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -1558,15 +1556,15 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 ); rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
@@ -1610,7 +1608,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -1632,7 +1630,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -1649,15 +1647,15 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 ); rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
@@ -1701,7 +1699,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -1722,13 +1720,13 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way_update( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
rintrlv_4x32_4x64( vhashB, vhash, 512 ); rintrlv_4x32_4x64( vhashB, vhash, 512 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhashB, 64 ); hamsi512_4way_update( &ctx.hamsi, vhashB, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -1758,13 +1756,13 @@ void sonoa_4way_hash( void *state, const void *input )
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 ); rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
rintrlv_4x64_4x32( vhashB, vhash, 512 ); rintrlv_4x64_4x32( vhashB, vhash, 512 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhashB, 64 ); shabal512_4way_update( &ctx.shabal, vhashB, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
@@ -1781,15 +1779,15 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 ); rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
@@ -1833,7 +1831,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -1854,7 +1852,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way_update( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
@@ -1877,7 +1875,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -1894,15 +1892,15 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 ); rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
@@ -1946,7 +1944,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -1967,7 +1965,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way_update( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
@@ -1988,7 +1986,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 ); sha512_4way_update( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -2011,7 +2009,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -2028,15 +2026,15 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 ); rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
@@ -2080,7 +2078,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -2101,7 +2099,7 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way_update( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
@@ -2122,13 +2120,13 @@ void sonoa_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 ); sha512_4way_update( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
rintrlv_4x64_4x32( vhashB, vhash, 512 ); rintrlv_4x64_4x32( vhashB, vhash, 512 );
haval256_5_4way_init( &ctx.haval ); haval256_5_4way_init( &ctx.haval );
haval256_5_4way( &ctx.haval, vhashB, 64 ); haval256_5_4way_update( &ctx.haval, vhashB, 64 );
haval256_5_4way_close( &ctx.haval, state ); haval256_5_4way_close( &ctx.haval, state );
} }

View File

@@ -360,12 +360,12 @@ void x17_4way_hash( void *state, const void *input )
// 1 Blake parallel 4 way 64 bit // 1 Blake parallel 4 way 64 bit
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw // 2 Bmw
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
// Serialize // Serialize
@@ -386,17 +386,17 @@ void x17_4way_hash( void *state, const void *input )
// 4 Skein parallel 4 way 64 bit // 4 Skein parallel 4 way 64 bit
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
// 5 JH // 5 JH
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
// 6 Keccak // 6 Keccak
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
// 7 Luffa parallel 2 way 128 bit // 7 Luffa parallel 2 way 128 bit
@@ -428,7 +428,6 @@ void x17_4way_hash( void *state, const void *input )
dintrlv_2x128_512( hash0, hash1, vhashA ); dintrlv_2x128_512( hash0, hash1, vhashA );
dintrlv_2x128_512( hash2, hash3, vhashB ); dintrlv_2x128_512( hash2, hash3, vhashB );
// 11 Echo serial // 11 Echo serial
init_echo( &ctx.echo, 512 ); init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash0, update_final_echo( &ctx.echo, (BitSequence *)hash0,
@@ -447,7 +446,7 @@ void x17_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
@@ -470,7 +469,7 @@ void x17_4way_hash( void *state, const void *input )
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way_update( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
@@ -493,14 +492,14 @@ void x17_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 ); sha512_4way_update( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
// 17 Haval parallel 32 bit // 17 Haval parallel 32 bit
rintrlv_4x64_4x32( vhashB, vhash, 512 ); rintrlv_4x64_4x32( vhashB, vhash, 512 );
haval256_5_4way_init( &ctx.haval ); haval256_5_4way_init( &ctx.haval );
haval256_5_4way( &ctx.haval, vhashB, 64 ); haval256_5_4way_update( &ctx.haval, vhashB, 64 );
haval256_5_4way_close( &ctx.haval, state ); haval256_5_4way_close( &ctx.haval, state );
} }

View File

@@ -569,12 +569,12 @@ void xevan_4way_hash( void *output, const void *input )
// parallel 4 way // parallel 4 way
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close(&ctx.blake, vhash); blake512_4way_close(&ctx.blake, vhash);
memset( &vhash[8<<2], 0, 64<<2 ); memset( &vhash[8<<2], 0, 64<<2 );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, dataLen ); bmw512_4way_update( &ctx.bmw, vhash, dataLen );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
// Serial // Serial
@@ -597,15 +597,15 @@ void xevan_4way_hash( void *output, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, dataLen ); skein512_4way_update( &ctx.skein, vhash, dataLen );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, dataLen ); jh512_4way_update( &ctx.jh, vhash, dataLen );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, dataLen ); keccak512_4way_update( &ctx.keccak, vhash, dataLen );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 ); rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
@@ -649,7 +649,7 @@ void xevan_4way_hash( void *output, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, dataLen ); hamsi512_4way_update( &ctx.hamsi, vhash, dataLen );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
@@ -671,7 +671,7 @@ void xevan_4way_hash( void *output, const void *input )
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 ); intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, dataLen ); shabal512_4way_update( &ctx.shabal, vhash, dataLen );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 ); dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
@@ -693,13 +693,13 @@ void xevan_4way_hash( void *output, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, dataLen ); sha512_4way_update( &ctx.sha512, vhash, dataLen );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
rintrlv_4x64_4x32( vhashA, vhash, dataLen<<3 ); rintrlv_4x64_4x32( vhashA, vhash, dataLen<<3 );
haval256_5_4way_init( &ctx.haval ); haval256_5_4way_init( &ctx.haval );
haval256_5_4way( &ctx.haval, vhashA, dataLen ); haval256_5_4way_update( &ctx.haval, vhashA, dataLen );
haval256_5_4way_close( &ctx.haval, vhashA ); haval256_5_4way_close( &ctx.haval, vhashA );
rintrlv_4x32_4x64( vhash, vhashA, dataLen<<3 ); rintrlv_4x32_4x64( vhash, vhashA, dataLen<<3 );
@@ -707,11 +707,11 @@ void xevan_4way_hash( void *output, const void *input )
memset( &vhash[ 4<<2 ], 0, (dataLen-32) << 2 ); memset( &vhash[ 4<<2 ], 0, (dataLen-32) << 2 );
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, vhash, dataLen ); blake512_4way_update( &ctx.blake, vhash, dataLen );
blake512_4way_close(&ctx.blake, vhash); blake512_4way_close(&ctx.blake, vhash);
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, dataLen ); bmw512_4way_update( &ctx.bmw, vhash, dataLen );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
@@ -732,15 +732,15 @@ void xevan_4way_hash( void *output, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, dataLen ); skein512_4way_update( &ctx.skein, vhash, dataLen );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, dataLen ); jh512_4way_update( &ctx.jh, vhash, dataLen );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, dataLen ); keccak512_4way_update( &ctx.keccak, vhash, dataLen );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 ); rintrlv_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
@@ -784,7 +784,7 @@ void xevan_4way_hash( void *output, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, dataLen ); hamsi512_4way_update( &ctx.hamsi, vhash, dataLen );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 ); dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
@@ -805,7 +805,7 @@ void xevan_4way_hash( void *output, const void *input )
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 ); intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, dataLen ); shabal512_4way_update( &ctx.shabal, vhash, dataLen );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 ); dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
@@ -826,13 +826,13 @@ void xevan_4way_hash( void *output, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 ); intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, dataLen ); sha512_4way_update( &ctx.sha512, vhash, dataLen );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
rintrlv_4x64_4x32( vhashA, vhash, dataLen<<3 ); rintrlv_4x64_4x32( vhashA, vhash, dataLen<<3 );
haval256_5_4way_init( &ctx.haval ); haval256_5_4way_init( &ctx.haval );
haval256_5_4way( &ctx.haval, vhashA, dataLen ); haval256_5_4way_update( &ctx.haval, vhashA, dataLen );
haval256_5_4way_close( &ctx.haval, output ); haval256_5_4way_close( &ctx.haval, output );
} }

View File

@@ -439,10 +439,8 @@ int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
return 0; return 0;
} }
#elif defined(X22I_4WAY) #elif defined(X22I_4WAY)
union _x22i_4way_ctx_overlay union _x22i_4way_ctx_overlay
{ {
blake512_4way_context blake; blake512_4way_context blake;
@@ -477,8 +475,6 @@ void x22i_4way_hash( void *output, const void *input )
uint64_t vhash[8*4] __attribute__ ((aligned (64))); uint64_t vhash[8*4] __attribute__ ((aligned (64)));
uint64_t vhashA[8*4] __attribute__ ((aligned (64))); uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
uint64_t vhashB[8*4] __attribute__ ((aligned (64))); uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
// unsigned char hash[64 * 4] __attribute__((aligned(64))) = {0};
unsigned char hashA0[64] __attribute__((aligned(64))) = {0}; unsigned char hashA0[64] __attribute__((aligned(64))) = {0};
unsigned char hashA1[64] __attribute__((aligned(32))) = {0}; unsigned char hashA1[64] __attribute__((aligned(32))) = {0};
unsigned char hashA2[64] __attribute__((aligned(32))) = {0}; unsigned char hashA2[64] __attribute__((aligned(32))) = {0};
@@ -486,13 +482,12 @@ void x22i_4way_hash( void *output, const void *input )
x22i_ctx_overlay ctx; x22i_ctx_overlay ctx;
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
@@ -511,15 +506,15 @@ void x22i_4way_hash( void *output, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 ); rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
@@ -560,13 +555,11 @@ void x22i_4way_hash( void *output, const void *input )
update_final_echo ( &ctx.echo, (BitSequence*)hash3, update_final_echo ( &ctx.echo, (BitSequence*)hash3,
(const BitSequence*)hash3, 512 ); (const BitSequence*)hash3, 512 );
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue ); sph_fugue512_init( &ctx.fugue );
@@ -585,9 +578,8 @@ void x22i_4way_hash( void *output, const void *input )
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way_update( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32_512( &hash0[8], &hash1[8], &hash2[8], &hash3[8], vhash ); dintrlv_4x32_512( &hash0[8], &hash1[8], &hash2[8], &hash3[8], vhash );
sph_whirlpool_init( &ctx.whirlpool ); sph_whirlpool_init( &ctx.whirlpool );
@@ -606,12 +598,10 @@ void x22i_4way_hash( void *output, const void *input )
intrlv_4x64_512( vhash, &hash0[16], &hash1[16], &hash2[16], &hash3[16] ); intrlv_4x64_512( vhash, &hash0[16], &hash1[16], &hash2[16], &hash3[16] );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 ); sha512_4way_update( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64_512( &hash0[24], &hash1[24], &hash2[24], &hash3[24], vhash ); dintrlv_4x64_512( &hash0[24], &hash1[24], &hash2[24], &hash3[24], vhash );
// InitializeSWIFFTX();
ComputeSingleSWIFFTX((unsigned char*)hash0, (unsigned char*)hashA0); ComputeSingleSWIFFTX((unsigned char*)hash0, (unsigned char*)hashA0);
ComputeSingleSWIFFTX((unsigned char*)hash1, (unsigned char*)hashA1); ComputeSingleSWIFFTX((unsigned char*)hash1, (unsigned char*)hashA1);
ComputeSingleSWIFFTX((unsigned char*)hash2, (unsigned char*)hashA2); ComputeSingleSWIFFTX((unsigned char*)hash2, (unsigned char*)hashA2);
@@ -622,9 +612,8 @@ void x22i_4way_hash( void *output, const void *input )
memset( vhash, 0, 64*4 ); memset( vhash, 0, 64*4 );
haval256_5_4way_init( &ctx.haval ); haval256_5_4way_init( &ctx.haval );
haval256_5_4way( &ctx.haval, vhashA, 64 ); haval256_5_4way_update( &ctx.haval, vhashA, 64 );
haval256_5_4way_close( &ctx.haval, vhash ); haval256_5_4way_close( &ctx.haval, vhash );
dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
memset( hashA0, 0, 64 ); memset( hashA0, 0, 64 );
@@ -675,10 +664,8 @@ void x22i_4way_hash( void *output, const void *input )
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
sha256_4way_init( &ctx.sha256 ); sha256_4way_init( &ctx.sha256 );
sha256_4way( &ctx.sha256, vhash, 64 ); sha256_4way_update( &ctx.sha256, vhash, 64 );
sha256_4way_close( &ctx.sha256, output ); sha256_4way_close( &ctx.sha256, output );
// memcpy(output, hash, 32);
} }

View File

@@ -1,5 +1,9 @@
#include "x22i-gate.h" #include "x22i-gate.h"
// Ryzen has poor AVX2 performance so use SHA over AVX2.
// Intel has AVX512 so use AVX512 over SHA.
// When Ryzen AVX2 improves use AVX2 over SHA.
bool register_x22i_algo( algo_gate_t* gate ) bool register_x22i_algo( algo_gate_t* gate )
{ {
#if defined (X22I_8WAY) #if defined (X22I_8WAY)
@@ -23,17 +27,17 @@ bool register_x25x_algo( algo_gate_t* gate )
#if defined (X25X_8WAY) #if defined (X25X_8WAY)
gate->scanhash = (void*)&scanhash_x25x_8way; gate->scanhash = (void*)&scanhash_x25x_8way;
gate->hash = (void*)&x25x_8way_hash; gate->hash = (void*)&x25x_8way_hash;
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
#elif defined (X25X_4WAY) #elif defined (X25X_4WAY)
gate->scanhash = (void*)&scanhash_x25x_4way; gate->scanhash = (void*)&scanhash_x25x_4way;
gate->hash = (void*)&x25x_4way_hash; gate->hash = (void*)&x25x_4way_hash;
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
#else #else
gate->scanhash = (void*)&scanhash_x25x; gate->scanhash = (void*)&scanhash_x25x;
gate->hash = (void*)&x25x_hash; gate->hash = (void*)&x25x_hash;
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT | AVX512_OPT;
#endif #endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT; // gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
return true; return true;
}; };

View File

@@ -34,13 +34,9 @@ int scanhash_x22i( struct work *work, uint32_t max_nonce,
#endif #endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
// Big problems with x25x 8 way. It blows up just by increasing the #define X25X_8WAY 1
// buffer sizes and nothing else. It may have to do with accessing 2 dim arrays. #elif defined(__AVX2__) && defined(__AES__)
//#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
// #define X25X_8WAY 1
#if defined(__AVX2__) && defined(__AES__)
#define X25X_4WAY 1 #define X25X_4WAY 1
#endif #endif

View File

@@ -1,7 +1,4 @@
#include "x22i-gate.h" #include "x22i-gate.h"
#if defined(X25X_4WAY)
#include "algo/blake/blake-hash-4way.h" #include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/bmw-hash-4way.h" #include "algo/bmw/bmw-hash-4way.h"
#include "algo/skein/skein-hash-4way.h" #include "algo/skein/skein-hash-4way.h"
@@ -16,8 +13,11 @@
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/luffa/luffa_for_sse2.h" #include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h" #include "algo/cubehash/cubehash_sse2.h"
#include "algo/luffa/luffa-hash-2way.h"
#include "algo/cubehash/cube-hash-2way.h"
#include "algo/shavite/sph_shavite.h" #include "algo/shavite/sph_shavite.h"
#include "algo/simd/nist.h" #include "algo/simd/nist.h"
#include "algo/simd/simd-hash-2way.h"
#include "algo/fugue/sph_fugue.h" #include "algo/fugue/sph_fugue.h"
#include "algo/whirlpool/sph_whirlpool.h" #include "algo/whirlpool/sph_whirlpool.h"
#include "algo/tiger/sph_tiger.h" #include "algo/tiger/sph_tiger.h"
@@ -27,33 +27,6 @@
#include "algo/panama/sph_panama.h" #include "algo/panama/sph_panama.h"
#include "algo/lanehash/lane.h" #include "algo/lanehash/lane.h"
union _x25x_4way_ctx_overlay
{
blake512_4way_context blake;
bmw512_4way_context bmw;
hashState_groestl groestl;
hashState_echo echo;
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
sha512_4way_context sha512;
haval256_5_4way_context haval;
sph_tiger_context tiger;
sph_gost512_context gost;
sha256_4way_context sha256;
sph_panama_context panama;
blake2s_4way_state blake2s;
};
typedef union _x25x_4way_ctx_overlay x25x_4way_ctx_overlay;
void x25x_shuffle( void *hash ) void x25x_shuffle( void *hash )
{ {
// Simple shuffle algorithm, instead of just reversing // Simple shuffle algorithm, instead of just reversing
@@ -81,28 +54,544 @@ void x25x_shuffle( void *hash )
#undef X25X_SHUFFLE_ROUNDS #undef X25X_SHUFFLE_ROUNDS
} }
void x25x_4way_hash( void *output, const void *input ) #if defined(X25X_8WAY)
union _x25x_8way_ctx_overlay
{ {
blake512_8way_context blake;
bmw512_8way_context bmw;
hashState_groestl groestl;
skein512_8way_context skein;
jh512_8way_context jh;
keccak512_8way_context keccak;
luffa_4way_context luffa;
cube_4way_context cube;
sph_shavite512_context shavite;
simd_4way_context simd;
hashState_echo echo;
hamsi512_8way_context hamsi;
sph_fugue512_context fugue;
shabal512_8way_context shabal;
sph_whirlpool_context whirlpool;
sha512_8way_context sha512;
haval256_5_8way_context haval;
sph_tiger_context tiger;
sph_gost512_context gost;
sha256_8way_context sha256;
sph_panama_context panama;
blake2s_8way_state blake2s;
};
typedef union _x25x_8way_ctx_overlay x25x_8way_ctx_overlay;
void x25x_8way_hash( void *output, const void *input )
{
uint64_t vhash[8*8] __attribute__ ((aligned (128)));
unsigned char hash0[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash1[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash2[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash3[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash4[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash5[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash6[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash7[25][64] __attribute__((aligned(64))) = {0};
unsigned char vhashX[24][64*8] __attribute__ ((aligned (64)));
uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
uint64_t vhashB[8*8] __attribute__ ((aligned (64)));
x25x_8way_ctx_overlay ctx __attribute__ ((aligned (64)));
blake512_8way_init( &ctx.blake );
blake512_8way_update( &ctx.blake, input, 80 );
blake512_8way_close( &ctx.blake, vhash );
dintrlv_8x64_512( hash0[0], hash1[0], hash2[0], hash3[0],
hash4[0], hash5[0], hash6[0], hash7[0], vhash );
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, 64 );
bmw512_8way_close( &ctx.bmw, vhash );
dintrlv_8x64_512( hash0[1], hash1[1], hash2[1], hash3[1],
hash4[1], hash5[1], hash6[1], hash7[1], vhash );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0[2],
(const char*)hash0[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1[2],
(const char*)hash1[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2[2],
(const char*)hash2[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3[2],
(const char*)hash3[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash4[2],
(const char*)hash4[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash5[2],
(const char*)hash5[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash6[2],
(const char*)hash6[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash7[2],
(const char*)hash7[1], 512 );
intrlv_8x64_512( vhash, hash0[2], hash1[2], hash2[2], hash3[2],
hash4[2], hash5[2], hash6[2], hash7[2] );
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
dintrlv_8x64_512( hash0[3], hash1[3], hash2[3], hash3[3],
hash4[3], hash5[3], hash6[3], hash7[3], vhash );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
jh512_8way_close( &ctx.jh, vhash );
dintrlv_8x64_512( hash0[4], hash1[4], hash2[4], hash3[4],
hash4[4], hash5[4], hash6[4], hash7[4], vhash );
keccak512_8way_init( &ctx.keccak );
keccak512_8way_update( &ctx.keccak, vhash, 64 );
keccak512_8way_close( &ctx.keccak, vhash );
dintrlv_8x64_512( hash0[5], hash1[5], hash2[5], hash3[5],
hash4[5], hash5[5], hash6[5], hash7[5], vhash );
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
dintrlv_4x128_512( hash0[6], hash1[6], hash2[6], hash3[6], vhashA );
dintrlv_4x128_512( hash4[6], hash5[6], hash6[6], hash7[6], vhashB );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 64 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 64 );
dintrlv_4x128_512( hash0[7], hash1[7], hash2[7], hash3[7], vhashA );
dintrlv_4x128_512( hash4[7], hash5[7], hash6[7], hash7[7], vhashB );
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64);
sph_shavite512_close(&ctx.shavite, hash0[8]);
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash1[7], 64);
sph_shavite512_close(&ctx.shavite, hash1[8]);
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash2[7], 64);
sph_shavite512_close(&ctx.shavite, hash2[8]);
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash3[7], 64);
sph_shavite512_close(&ctx.shavite, hash3[8]);
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash4[7], 64);
sph_shavite512_close(&ctx.shavite, hash4[8]);
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash5[7], 64);
sph_shavite512_close(&ctx.shavite, hash5[8]);
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash6[7], 64);
sph_shavite512_close(&ctx.shavite, hash6[8]);
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash7[7], 64);
sph_shavite512_close(&ctx.shavite, hash7[8]);
intrlv_4x128_512( vhashA, hash0[8], hash1[8], hash2[8], hash3[8] );
intrlv_4x128_512( vhashB, hash4[8], hash5[8], hash6[8], hash7[8] );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashA, vhashA, 512 );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashB, vhashB, 512 );
dintrlv_4x128_512( hash0[9], hash1[9], hash2[9], hash3[9], vhashA );
dintrlv_4x128_512( hash4[9], hash5[9], hash6[9], hash7[9], vhashB );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash0[10],
(const BitSequence*)hash0[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash1[10],
(const BitSequence*)hash1[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash2[10],
(const BitSequence*)hash2[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash3[10],
(const BitSequence*)hash3[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash4[10],
(const BitSequence*)hash4[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash5[10],
(const BitSequence*)hash5[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash6[10],
(const BitSequence*)hash6[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash7[10],
(const BitSequence*)hash7[9], 512 );
intrlv_8x64_512( vhash, hash0[10], hash1[10], hash2[10], hash3[10],
hash4[10], hash5[10], hash6[10], hash7[10] );
hamsi512_8way_init( &ctx.hamsi );
hamsi512_8way_update( &ctx.hamsi, vhash, 64 );
hamsi512_8way_close( &ctx.hamsi, vhash );
dintrlv_8x64_512( hash0[11], hash1[11], hash2[11], hash3[11],
hash4[11], hash5[11], hash6[11], hash7[11], vhash );
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) hash0[11], 64);
sph_fugue512_close(&ctx.fugue, hash0[12]);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) hash1[11], 64);
sph_fugue512_close(&ctx.fugue, hash1[12]);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) hash2[11], 64);
sph_fugue512_close(&ctx.fugue, hash2[12]);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) hash3[11], 64);
sph_fugue512_close(&ctx.fugue, hash3[12]);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) hash4[11], 64);
sph_fugue512_close(&ctx.fugue, hash4[12]);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) hash5[11], 64);
sph_fugue512_close(&ctx.fugue, hash5[12]);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) hash6[11], 64);
sph_fugue512_close(&ctx.fugue, hash6[12]);
sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) hash7[11], 64);
sph_fugue512_close(&ctx.fugue, hash7[12]);
intrlv_8x32_512( vhash, hash0[12], hash1[12], hash2[12], hash3[12],
hash4[12], hash5[12], hash6[12], hash7[12] );
shabal512_8way_init( &ctx.shabal );
shabal512_8way_update( &ctx.shabal, vhash, 64 );
shabal512_8way_close( &ctx.shabal, vhash );
dintrlv_8x32_512( hash0[13], hash1[13], hash2[13], hash3[13],
hash4[13], hash5[13], hash6[13], hash7[13], vhash );
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) hash0[13], 64);
sph_whirlpool_close(&ctx.whirlpool, hash0[14]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) hash1[13], 64);
sph_whirlpool_close(&ctx.whirlpool, hash1[14]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) hash2[13], 64);
sph_whirlpool_close(&ctx.whirlpool, hash2[14]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) hash3[13], 64);
sph_whirlpool_close(&ctx.whirlpool, hash3[14]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) hash4[13], 64);
sph_whirlpool_close(&ctx.whirlpool, hash4[14]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) hash5[13], 64);
sph_whirlpool_close(&ctx.whirlpool, hash5[14]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) hash6[13], 64);
sph_whirlpool_close(&ctx.whirlpool, hash6[14]);
sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) hash7[13], 64);
sph_whirlpool_close(&ctx.whirlpool, hash7[14]);
intrlv_8x64_512( vhash, hash0[14], hash1[14], hash2[14], hash3[14],
hash4[14], hash5[14], hash6[14], hash7[14] );
sha512_8way_init( &ctx.sha512 );
sha512_8way_update( &ctx.sha512, vhash, 64 );
sha512_8way_close( &ctx.sha512, vhash );
dintrlv_8x64_512( hash0[15], hash1[15], hash2[15], hash3[15],
hash4[15], hash5[15], hash6[15], hash7[15], vhash );
ComputeSingleSWIFFTX((unsigned char*)hash0[12], (unsigned char*)hash0[16]);
ComputeSingleSWIFFTX((unsigned char*)hash1[12], (unsigned char*)hash1[16]);
ComputeSingleSWIFFTX((unsigned char*)hash2[12], (unsigned char*)hash2[16]);
ComputeSingleSWIFFTX((unsigned char*)hash3[12], (unsigned char*)hash3[16]);
ComputeSingleSWIFFTX((unsigned char*)hash4[12], (unsigned char*)hash4[16]);
ComputeSingleSWIFFTX((unsigned char*)hash5[12], (unsigned char*)hash5[16]);
ComputeSingleSWIFFTX((unsigned char*)hash6[12], (unsigned char*)hash6[16]);
ComputeSingleSWIFFTX((unsigned char*)hash7[12], (unsigned char*)hash7[16]);
intrlv_8x32_512( vhashA, hash0[16], hash1[16], hash2[16], hash3[16],
hash4[16], hash5[16], hash6[16], hash7[16] );
memset( vhash, 0, 64*8 );
haval256_5_8way_init( &ctx.haval );
haval256_5_8way_update( &ctx.haval, vhashA, 64 );
haval256_5_8way_close( &ctx.haval, vhash );
dintrlv_8x32_512( hash0[17], hash1[17], hash2[17], hash3[17],
hash4[17], hash5[17], hash6[17], hash7[17], vhash );
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash0[17], 64);
sph_tiger_close(&ctx.tiger, (void*) hash0[18]);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash1[17], 64);
sph_tiger_close(&ctx.tiger, (void*) hash1[18]);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash2[17], 64);
sph_tiger_close(&ctx.tiger, (void*) hash2[18]);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash3[17], 64);
sph_tiger_close(&ctx.tiger, (void*) hash3[18]);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash4[17], 64);
sph_tiger_close(&ctx.tiger, (void*) hash4[18]);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash5[17], 64);
sph_tiger_close(&ctx.tiger, (void*) hash5[18]);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash6[17], 64);
sph_tiger_close(&ctx.tiger, (void*) hash6[18]);
sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash7[17], 64);
sph_tiger_close(&ctx.tiger, (void*) hash7[18]);
intrlv_2x256( vhash, hash0[18], hash1[18], 256 );
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash0[19], hash1[19], vhash, 256 );
intrlv_2x256( vhash, hash2[18], hash3[18], 256 );
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash2[19], hash3[19], vhash, 256 );
intrlv_2x256( vhash, hash4[18], hash5[18], 256 );
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash4[19], hash5[19], vhash, 256 );
intrlv_2x256( vhash, hash6[18], hash7[18], 256 );
LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash6[19], hash7[19], vhash, 256 );
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash0[19], 64);
sph_gost512_close(&ctx.gost, (void*) hash0[20]);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash1[19], 64);
sph_gost512_close(&ctx.gost, (void*) hash1[20]);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash2[19], 64);
sph_gost512_close(&ctx.gost, (void*) hash2[20]);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash3[19], 64);
sph_gost512_close(&ctx.gost, (void*) hash3[20]);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash4[19], 64);
sph_gost512_close(&ctx.gost, (void*) hash4[20]);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash5[19], 64);
sph_gost512_close(&ctx.gost, (void*) hash5[20]);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash6[19], 64);
sph_gost512_close(&ctx.gost, (void*) hash6[20]);
sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash7[19], 64);
sph_gost512_close(&ctx.gost, (void*) hash7[20]);
intrlv_8x32_512( vhashA, hash0[20], hash1[20], hash2[20], hash3[20],
hash4[20], hash5[20], hash6[20], hash7[20] );
sha256_8way_init( &ctx.sha256 );
sha256_8way_update( &ctx.sha256, vhashA, 64 );
sha256_8way_close( &ctx.sha256, vhash );
dintrlv_8x32_512( hash0[21], hash1[21], hash2[21], hash3[21],
hash4[21], hash5[21], hash6[21], hash7[21], vhash );
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) hash0[21], 64 );
sph_panama_close(&ctx.panama, (void*) hash0[22]);
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) hash1[21], 64 );
sph_panama_close(&ctx.panama, (void*) hash1[22]);
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) hash2[21], 64 );
sph_panama_close(&ctx.panama, (void*) hash2[22]);
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) hash3[21], 64 );
sph_panama_close(&ctx.panama, (void*) hash3[22]);
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) hash4[21], 64 );
sph_panama_close(&ctx.panama, (void*) hash4[22]);
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) hash5[21], 64 );
sph_panama_close(&ctx.panama, (void*) hash5[22]);
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) hash6[21], 64 );
sph_panama_close(&ctx.panama, (void*) hash6[22]);
sph_panama_init(&ctx.panama);
sph_panama (&ctx.panama, (const void*) hash7[21], 64 );
sph_panama_close(&ctx.panama, (void*) hash7[22]);
laneHash(512, (const BitSequence*)hash0[22], 512, (BitSequence*)hash0[23]);
laneHash(512, (const BitSequence*)hash1[22], 512, (BitSequence*)hash1[23]);
laneHash(512, (const BitSequence*)hash2[22], 512, (BitSequence*)hash2[23]);
laneHash(512, (const BitSequence*)hash3[22], 512, (BitSequence*)hash3[23]);
laneHash(512, (const BitSequence*)hash4[22], 512, (BitSequence*)hash5[23]);
laneHash(512, (const BitSequence*)hash5[22], 512, (BitSequence*)hash5[23]);
laneHash(512, (const BitSequence*)hash6[22], 512, (BitSequence*)hash6[23]);
laneHash(512, (const BitSequence*)hash7[22], 512, (BitSequence*)hash7[23]);
x25x_shuffle( hash0 );
x25x_shuffle( hash1 );
x25x_shuffle( hash2 );
x25x_shuffle( hash3 );
x25x_shuffle( hash4 );
x25x_shuffle( hash5 );
x25x_shuffle( hash6 );
x25x_shuffle( hash7 );
intrlv_8x32_512( vhashX[ 0], hash0[ 0], hash1[ 0], hash2[ 0], hash3[ 0],
hash4[ 0], hash5[ 0], hash6[ 0], hash7[ 0] );
intrlv_8x32_512( vhashX[ 1], hash0[ 1], hash1[ 1], hash2[ 1], hash3[ 1],
hash4[ 1], hash5[ 1], hash6[ 1], hash7[ 1] );
intrlv_8x32_512( vhashX[ 2], hash0[ 2], hash1[ 2], hash2[ 2], hash3[ 2],
hash4[ 2], hash5[ 2], hash6[ 2], hash7[ 2] );
intrlv_8x32_512( vhashX[ 3], hash0[ 3], hash1[ 3], hash2[ 3], hash3[ 3],
hash4[ 3], hash5[ 3], hash6[ 3], hash7[ 3] );
intrlv_8x32_512( vhashX[ 4], hash0[ 4], hash1[ 4], hash2[ 4], hash3[ 4],
hash4[ 4], hash5[ 4], hash6[ 4], hash7[ 4] );
intrlv_8x32_512( vhashX[ 5], hash0[ 5], hash1[ 5], hash2[ 5], hash3[ 5],
hash4[ 5], hash5[ 5], hash6[ 5], hash7[ 5] );
intrlv_8x32_512( vhashX[ 6], hash0[ 6], hash1[ 6], hash2[ 6], hash3[ 6],
hash4[ 6], hash5[ 6], hash6[ 6], hash7[ 6] );
intrlv_8x32_512( vhashX[ 7], hash0[ 7], hash1[ 7], hash2[ 7], hash3[ 7],
hash4[ 7], hash5[ 7], hash6[ 7], hash7[ 7] );
intrlv_8x32_512( vhashX[ 8], hash0[ 8], hash1[ 8], hash2[ 8], hash3[ 8],
hash4[ 8], hash5[ 8], hash6[ 8], hash7[ 8] );
intrlv_8x32_512( vhashX[ 9], hash0[ 9], hash1[ 9], hash2[ 9], hash3[ 9],
hash4[ 9], hash5[ 9], hash6[ 9], hash7[ 9] );
intrlv_8x32_512( vhashX[10], hash0[10], hash1[10], hash2[10], hash3[10],
hash4[10], hash5[10], hash6[10], hash7[10] );
intrlv_8x32_512( vhashX[11], hash0[11], hash1[11], hash2[11], hash3[11],
hash4[11], hash5[11], hash6[11], hash7[11] );
intrlv_8x32_512( vhashX[12], hash0[12], hash1[12], hash2[12], hash3[12],
hash4[12], hash5[12], hash6[12], hash7[12] );
intrlv_8x32_512( vhashX[13], hash0[13], hash1[13], hash2[13], hash3[13],
hash4[13], hash5[13], hash6[13], hash7[13] );
intrlv_8x32_512( vhashX[14], hash0[14], hash1[14], hash2[14], hash3[14],
hash4[14], hash5[14], hash6[14], hash7[14] );
intrlv_8x32_512( vhashX[15], hash0[15], hash1[15], hash2[15], hash3[15],
hash4[15], hash5[15], hash6[15], hash7[15] );
intrlv_8x32_512( vhashX[16], hash0[16], hash1[16], hash2[16], hash3[16],
hash4[16], hash5[16], hash6[16], hash7[16] );
intrlv_8x32_512( vhashX[17], hash0[17], hash1[17], hash2[17], hash3[17],
hash4[17], hash5[17], hash6[17], hash7[17] );
intrlv_8x32_512( vhashX[18], hash0[18], hash1[18], hash2[18], hash3[18],
hash4[18], hash5[18], hash6[18], hash7[18] );
intrlv_8x32_512( vhashX[19], hash0[19], hash1[19], hash2[19], hash3[19],
hash4[19], hash5[19], hash6[19], hash7[19] );
intrlv_8x32_512( vhashX[20], hash0[20], hash1[20], hash2[20], hash3[20],
hash4[20], hash5[20], hash6[20], hash7[20] );
intrlv_8x32_512( vhashX[21], hash0[21], hash1[21], hash2[21], hash3[21],
hash4[21], hash5[21], hash6[21], hash7[21] );
intrlv_8x32_512( vhashX[22], hash0[22], hash1[22], hash2[22], hash3[22],
hash4[22], hash5[22], hash6[22], hash7[22] );
intrlv_8x32_512( vhashX[23], hash0[23], hash1[23], hash2[23], hash3[23],
hash4[23], hash5[23], hash6[23], hash7[23] );
blake2s_8way_init( &ctx.blake2s, 32 );
blake2s_8way_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
}
int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
__m512i *noncev = (__m512i*)vdata + 9; // aligned
uint32_t n = first_nonce;
const uint32_t last_nonce = max_nonce - 4;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
InitializeSWIFFTX();
mm512_bswap32_intrlv80_8x64( vdata, pdata );
do
{
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x25x_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(X25X_4WAY)
union _x25x_4way_ctx_overlay
{
blake512_4way_context blake;
bmw512_4way_context bmw;
hashState_groestl groestl;
hashState_echo echo;
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
sha512_4way_context sha512;
haval256_5_4way_context haval;
sph_tiger_context tiger;
sph_gost512_context gost;
sha256_4way_context sha256;
sph_panama_context panama;
blake2s_4way_state blake2s;
};
typedef union _x25x_4way_ctx_overlay x25x_4way_ctx_overlay;
void x25x_4way_hash( void *output, const void *input )
{
uint64_t vhash[8*4] __attribute__ ((aligned (128)));
unsigned char hash0[25][64] __attribute__((aligned(64))) = {0}; unsigned char hash0[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash1[25][64] __attribute__((aligned(64))) = {0}; unsigned char hash1[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash2[25][64] __attribute__((aligned(64))) = {0}; unsigned char hash2[25][64] __attribute__((aligned(64))) = {0};
unsigned char hash3[25][64] __attribute__((aligned(64))) = {0}; unsigned char hash3[25][64] __attribute__((aligned(64))) = {0};
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
// Doubling the size of vhashX breaks everything. It may have something
// to do with accessing arrays: vhashX vs vhashX[0] vs &vhash[0].
// Changing notation did seem to allow the larger buffer but still resulted
// in problems further along.
// unsigned char vhashX[24][64*8] __attribute__ ((aligned (64)));
unsigned char vhashX[24][64*4] __attribute__ ((aligned (64))); unsigned char vhashX[24][64*4] __attribute__ ((aligned (64)));
x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64))); x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64)));
blake512_4way_init( &ctx.blake ); blake512_4way_init( &ctx.blake );
blake512_4way( &ctx.blake, input, 80 ); blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash ); blake512_4way_close( &ctx.blake, vhash );
dintrlv_4x64_512( hash0[0], hash1[0], hash2[0], hash3[0], vhash ); dintrlv_4x64_512( hash0[0], hash1[0], hash2[0], hash3[0], vhash );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0[1], hash1[1], hash2[1], hash3[1], vhash ); dintrlv_4x64_512( hash0[1], hash1[1], hash2[1], hash3[1], vhash );
@@ -118,24 +607,24 @@ void x25x_4way_hash( void *output, const void *input )
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3[2], update_and_final_groestl( &ctx.groestl, (char*)hash3[2],
(const char*)hash3[1], 512 ); (const char*)hash3[1], 512 );
intrlv_4x64_512( vhash, hash0[2], hash1[2], hash2[2], hash3[2] ); intrlv_4x64_512( vhash, hash0[2], hash1[2], hash2[2], hash3[2] );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash ); skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64_512( hash0[3], hash1[3], hash2[3], hash3[3], vhash ); dintrlv_4x64_512( hash0[3], hash1[3], hash2[3], hash3[3], vhash );
jh512_4way_init( &ctx.jh ); jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 ); jh512_4way_update( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash ); jh512_4way_close( &ctx.jh, vhash );
dintrlv_4x64_512( hash0[4], hash1[4], hash2[4], hash3[4], vhash ); dintrlv_4x64_512( hash0[4], hash1[4], hash2[4], hash3[4], vhash );
keccak512_4way_init( &ctx.keccak ); keccak512_4way_init( &ctx.keccak );
keccak512_4way( &ctx.keccak, vhash, 64 ); keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash ); keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64_512( hash0[5], hash1[5], hash2[5], hash3[5], vhash ); dintrlv_4x64_512( hash0[5], hash1[5], hash2[5], hash3[5], vhash );
init_luffa( &ctx.luffa, 512 ); init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0[6], update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0[6],
(const BitSequence*)hash0[5], 64 ); (const BitSequence*)hash0[5], 64 );
@@ -162,9 +651,9 @@ void x25x_4way_hash( void *output, const void *input )
cubehashUpdateDigest( &ctx.cube, (byte*) hash3[7], cubehashUpdateDigest( &ctx.cube, (byte*) hash3[7],
(const byte*)hash3[6], 64 ); (const byte*)hash3[6], 64 );
sph_shavite512_init(&ctx.shavite); sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64); sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64);
sph_shavite512_close(&ctx.shavite, hash0[8]); sph_shavite512_close(&ctx.shavite, hash0[8]);
sph_shavite512_init(&ctx.shavite); sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash1[7], 64); sph_shavite512(&ctx.shavite, (const void*) hash1[7], 64);
sph_shavite512_close(&ctx.shavite, hash1[8]); sph_shavite512_close(&ctx.shavite, hash1[8]);
@@ -204,13 +693,13 @@ void x25x_4way_hash( void *output, const void *input )
intrlv_4x64_512( vhash, hash0[10], hash1[10], hash2[10], hash3[10] ); intrlv_4x64_512( vhash, hash0[10], hash1[10], hash2[10], hash3[10] );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way_update( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64_512( hash0[11], hash1[11], hash2[11], hash3[11], vhash ); dintrlv_4x64_512( hash0[11], hash1[11], hash2[11], hash3[11], vhash );
sph_fugue512_init(&ctx.fugue); sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) hash0[11], 64); sph_fugue512(&ctx.fugue, (const void*) hash0[11], 64);
sph_fugue512_close(&ctx.fugue, hash0[12]); sph_fugue512_close(&ctx.fugue, hash0[12]);
sph_fugue512_init(&ctx.fugue); sph_fugue512_init(&ctx.fugue);
sph_fugue512(&ctx.fugue, (const void*) hash1[11], 64); sph_fugue512(&ctx.fugue, (const void*) hash1[11], 64);
sph_fugue512_close(&ctx.fugue, hash1[12]); sph_fugue512_close(&ctx.fugue, hash1[12]);
@@ -224,13 +713,13 @@ void x25x_4way_hash( void *output, const void *input )
intrlv_4x32_512( vhash, hash0[12], hash1[12], hash2[12], hash3[12] ); intrlv_4x32_512( vhash, hash0[12], hash1[12], hash2[12], hash3[12] );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way_update( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32_512( hash0[13], hash1[13], hash2[13], hash3[13], vhash ); dintrlv_4x32_512( hash0[13], hash1[13], hash2[13], hash3[13], vhash );
sph_whirlpool_init(&ctx.whirlpool); sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) hash0[13], 64); sph_whirlpool (&ctx.whirlpool, (const void*) hash0[13], 64);
sph_whirlpool_close(&ctx.whirlpool, hash0[14]); sph_whirlpool_close(&ctx.whirlpool, hash0[14]);
sph_whirlpool_init(&ctx.whirlpool); sph_whirlpool_init(&ctx.whirlpool);
sph_whirlpool (&ctx.whirlpool, (const void*) hash1[13], 64); sph_whirlpool (&ctx.whirlpool, (const void*) hash1[13], 64);
sph_whirlpool_close(&ctx.whirlpool, hash1[14]); sph_whirlpool_close(&ctx.whirlpool, hash1[14]);
@@ -244,11 +733,10 @@ void x25x_4way_hash( void *output, const void *input )
intrlv_4x64_512( vhash, hash0[14], hash1[14], hash2[14], hash3[14] ); intrlv_4x64_512( vhash, hash0[14], hash1[14], hash2[14], hash3[14] );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 ); sha512_4way_update( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64_512( hash0[15], hash1[15], hash2[15], hash3[15], vhash ); dintrlv_4x64_512( hash0[15], hash1[15], hash2[15], hash3[15], vhash );
ComputeSingleSWIFFTX((unsigned char*)hash0[12], (unsigned char*)hash0[16]); ComputeSingleSWIFFTX((unsigned char*)hash0[12], (unsigned char*)hash0[16]);
ComputeSingleSWIFFTX((unsigned char*)hash1[12], (unsigned char*)hash1[16]); ComputeSingleSWIFFTX((unsigned char*)hash1[12], (unsigned char*)hash1[16]);
ComputeSingleSWIFFTX((unsigned char*)hash2[12], (unsigned char*)hash2[16]); ComputeSingleSWIFFTX((unsigned char*)hash2[12], (unsigned char*)hash2[16]);
@@ -257,15 +745,15 @@ void x25x_4way_hash( void *output, const void *input )
intrlv_4x32_512( vhashX[0], hash0[16], hash1[16], hash2[16], hash3[16] ); intrlv_4x32_512( vhashX[0], hash0[16], hash1[16], hash2[16], hash3[16] );
memset( vhash, 0, 64*4 ); memset( vhash, 0, 64*4 );
haval256_5_4way_init( &ctx.haval ); haval256_5_4way_init( &ctx.haval );
haval256_5_4way( &ctx.haval, vhashX[0], 64 ); haval256_5_4way_update( &ctx.haval, vhashX[0], 64 );
haval256_5_4way_close( &ctx.haval, vhash ); haval256_5_4way_close( &ctx.haval, vhash );
dintrlv_4x32_512( hash0[17], hash1[17], hash2[17], hash3[17], vhash ); dintrlv_4x32_512( hash0[17], hash1[17], hash2[17], hash3[17], vhash );
sph_tiger_init(&ctx.tiger); sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash0[17], 64); sph_tiger (&ctx.tiger, (const void*) hash0[17], 64);
sph_tiger_close(&ctx.tiger, (void*) hash0[18]); sph_tiger_close(&ctx.tiger, (void*) hash0[18]);
sph_tiger_init(&ctx.tiger); sph_tiger_init(&ctx.tiger);
sph_tiger (&ctx.tiger, (const void*) hash1[17], 64); sph_tiger (&ctx.tiger, (const void*) hash1[17], 64);
sph_tiger_close(&ctx.tiger, (void*) hash1[18]); sph_tiger_close(&ctx.tiger, (void*) hash1[18]);
@@ -276,7 +764,7 @@ void x25x_4way_hash( void *output, const void *input )
sph_tiger (&ctx.tiger, (const void*) hash3[17], 64); sph_tiger (&ctx.tiger, (const void*) hash3[17], 64);
sph_tiger_close(&ctx.tiger, (void*) hash3[18]); sph_tiger_close(&ctx.tiger, (void*) hash3[18]);
LYRA2RE( (void*)hash0[19], 32, (const void*)hash0[18], 32, LYRA2RE( (void*)hash0[19], 32, (const void*)hash0[18], 32,
(const void*)hash0[18], 32, 1, 4, 4 ); (const void*)hash0[18], 32, 1, 4, 4 );
LYRA2RE( (void*)hash1[19], 32, (const void*)hash1[18], 32, LYRA2RE( (void*)hash1[19], 32, (const void*)hash1[18], 32,
(const void*)hash1[18], 32, 1, 4, 4 ); (const void*)hash1[18], 32, 1, 4, 4 );
@@ -285,9 +773,9 @@ void x25x_4way_hash( void *output, const void *input )
LYRA2RE( (void*)hash3[19], 32, (const void*)hash3[18], 32, LYRA2RE( (void*)hash3[19], 32, (const void*)hash3[18], 32,
(const void*)hash3[18], 32, 1, 4, 4 ); (const void*)hash3[18], 32, 1, 4, 4 );
sph_gost512_init(&ctx.gost); sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash0[19], 64); sph_gost512 (&ctx.gost, (const void*) hash0[19], 64);
sph_gost512_close(&ctx.gost, (void*) hash0[20]); sph_gost512_close(&ctx.gost, (void*) hash0[20]);
sph_gost512_init(&ctx.gost); sph_gost512_init(&ctx.gost);
sph_gost512 (&ctx.gost, (const void*) hash1[19], 64); sph_gost512 (&ctx.gost, (const void*) hash1[19], 64);
sph_gost512_close(&ctx.gost, (void*) hash1[20]); sph_gost512_close(&ctx.gost, (void*) hash1[20]);
@@ -302,7 +790,7 @@ void x25x_4way_hash( void *output, const void *input )
memset( vhash, 0, 64*4 ); memset( vhash, 0, 64*4 );
sha256_4way_init( &ctx.sha256 ); sha256_4way_init( &ctx.sha256 );
sha256_4way( &ctx.sha256, vhashX[0], 64 ); sha256_4way_update( &ctx.sha256, vhashX[0], 64 );
sha256_4way_close( &ctx.sha256, vhash ); sha256_4way_close( &ctx.sha256, vhash );
dintrlv_4x32_512( hash0[21], hash1[21], hash2[21], hash3[21], vhash ); dintrlv_4x32_512( hash0[21], hash1[21], hash2[21], hash3[21], vhash );
@@ -356,20 +844,12 @@ void x25x_4way_hash( void *output, const void *input )
blake2s_4way_init( &ctx.blake2s, 32 ); blake2s_4way_init( &ctx.blake2s, 32 );
blake2s_4way_full_blocks( &ctx.blake2s, output, vhashX, 64*24 ); blake2s_4way_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
/*
dintrlv_4x32( hash0[24], hash1[24], hash2[24], hash3[24], vhash, 256 );
memcpy(output, hash0[24], 32);
memcpy(output+32, hash1[24], 32);
memcpy(output+64, hash2[24], 32);
memcpy(output+96, hash3[24], 32);
*/
} }
int scanhash_x25x_4way( struct work* work, uint32_t max_nonce, int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
uint32_t hash[4*16] __attribute__ ((aligned (64))); uint32_t hash[16*4] __attribute__ ((aligned (128)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]); uint32_t *hash7 = &(hash[7<<2]);
@@ -401,17 +881,8 @@ int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
{ {
pdata[19] = n + lane; pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane ); submit_lane_solution( work, lane_hash, mythr, lane );
} }
} }
/*
for ( int i = 0; i < 4; i++ )
if ( unlikely( (hash+(i<<3))[7] <= Htarg ) )
if( likely( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
*/
n += 4; n += 4;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) ); } while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );

View File

@@ -4,7 +4,7 @@
# during develpment. However the information contained may provide compilation # during develpment. However the information contained may provide compilation
# tips to users. # tips to users.
rm cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen rm cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen > /dev/null
make distclean || echo clean make distclean || echo clean
rm -f config.status rm -f config.status

27
build-avx2.sh Executable file
View File

@@ -0,0 +1,27 @@
#!/bin/bash
#if [ "$OS" = "Windows_NT" ]; then
# ./mingw64.sh
# exit 0
#fi
# Linux build
make distclean || echo clean
rm -f config.status
./autogen.sh || echo done
# Ubuntu 10.04 (gcc 4.4)
# extracflags="-O3 -march=native -Wall -D_REENTRANT -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants -fbranch-target-load-optimize2 -fsched2-use-superblocks -falign-loops=16 -falign-functions=16 -falign-jumps=16 -falign-labels=16"
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
CFLAGS="-O3 -march=haswell -maes -Wall" ./configure --with-curl
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
make -j 4
strip -s cpuminer

10
clean-all.sh Executable file
View File

@@ -0,0 +1,10 @@
#!/bin/bash
#
# imake clean and rm all the targetted executables.
# tips to users.
rm cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen > /dev/null
rm cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-aes-avx.exe cpuminer-aes-sse42.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-sse2.exe cpuminer-zen.exe > /dev/null
make distclean

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.10.6. # Generated by GNU Autoconf 2.69 for cpuminer-opt 3.10.7.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.10.6' PACKAGE_VERSION='3.10.7'
PACKAGE_STRING='cpuminer-opt 3.10.6' PACKAGE_STRING='cpuminer-opt 3.10.7'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures cpuminer-opt 3.10.6 to adapt to many kinds of systems. \`configure' configures cpuminer-opt 3.10.7 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.10.6:";; short | recursive ) echo "Configuration of cpuminer-opt 3.10.7:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 3.10.6 cpuminer-opt configure 3.10.7
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.10.6, which was It was created by cpuminer-opt $as_me 3.10.7, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='3.10.6' VERSION='3.10.7'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 3.10.6, which was This file was extended by cpuminer-opt $as_me 3.10.7, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 3.10.6 cpuminer-opt config.status 3.10.7
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.10.6]) AC_INIT([cpuminer-opt], [3.10.7])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -27,6 +27,9 @@ ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
#sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac #sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
# make release directory and copy selected DLLs. # make release directory and copy selected DLLs.
rm -rf release > /dev/null
mkdir release mkdir release
cp README.txt release/ cp README.txt release/
cp README.md release/ cp README.md release/
@@ -35,10 +38,6 @@ cp $MINGW_LIB/zlib1.dll release/
cp $MINGW_LIB/libwinpthread-1.dll release/ cp $MINGW_LIB/libwinpthread-1.dll release/
cp $GCC_MINGW_LIB/libstdc++-6.dll release/ cp $GCC_MINGW_LIB/libstdc++-6.dll release/
cp $GCC_MINGW_LIB/libgcc_s_seh-1.dll release/ cp $GCC_MINGW_LIB/libgcc_s_seh-1.dll release/
#cp /usr/x86_64-w64-mingw32/lib/zlib1.dll release/
#cp /usr/x86_64-w64-mingw32/lib/libwinpthread-1.dll release/
#cp /usr/lib/gcc/x86_64-w64-mingw32/7.3-win32/libstdc++-6.dll release/
#cp /usr/lib/gcc/x86_64-w64-mingw32/7.3-win32/libgcc_s_seh-1.dll release/
cp $LOCAL_LIB/openssl/libcrypto-1_1-x64.dll release/ cp $LOCAL_LIB/openssl/libcrypto-1_1-x64.dll release/
cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/ cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/