This commit is contained in:
Jay D Dee
2024-05-20 23:08:50 -04:00
parent 4f930574cc
commit 042d13d1e1
129 changed files with 835 additions and 538 deletions

View File

@@ -1611,7 +1611,7 @@ void blake256_8way_final_rounds_le( void *final_hash, const void *midstate,
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
///////////////////////////////////////
//
@@ -2617,7 +2617,7 @@ blake32_8way_close_le( blake_8way_small_context *sc, unsigned ub, unsigned n,
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
//Blake-256 16 way AVX512

View File

@@ -147,7 +147,7 @@ void blake256r8_8way_close(void *cc, void *dst);
#define blake256r8_8x32_update blake256r14_8way_update
#define blake256r8_8x32_close blake256r14_8way_close
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
///////////////////////////////////
//

View File

@@ -226,7 +226,7 @@ static const uint8_t sigma[12][16] =
#define Mx_(n) Mx__(n)
#define Mx__(n) M ## n
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
#define B2B8W_G(a, b, c, d, x, y) \
{ \

View File

@@ -15,7 +15,7 @@
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
typedef struct ALIGN( 64 ) {
__m512i b[16]; // input buffer

View File

@@ -3,7 +3,7 @@
#include <stdint.h>
#include "blake2b-hash.h"
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
#define BLAKE2B_8WAY
#elif defined(__AVX2__)
#define BLAKE2B_4WAY

View File

@@ -497,7 +497,7 @@ int blake2s_8way_full_blocks( blake2s_8way_state *S, void *out,
#endif // __AVX2__
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
// Blake2s-256 16 way

View File

@@ -29,20 +29,20 @@
#define ALIGN(x) __attribute__((aligned(x)))
#endif
typedef struct __blake2s_nway_param
{
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint8_t node_offset[6];// 14
uint8_t node_depth; // 15
uint8_t inner_length; // 16
// uint8_t reserved[0];
uint8_t salt[8]; // 24
uint8_t personal[8]; // 32
} blake2s_nway_param;
typedef struct __blake2s_nway_param
{
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint8_t node_offset[6];// 14
uint8_t node_depth; // 15
uint8_t inner_length; // 16
// uint8_t reserved[0];
uint8_t salt[8]; // 24
uint8_t personal[8]; // 32
} blake2s_nway_param;
typedef struct ALIGN( 64 ) __blake2s_4way_state
{
@@ -67,7 +67,7 @@ int blake2s_4way_full_blocks( blake2s_4way_state *S, void *out,
typedef struct ALIGN( 64 ) __blake2s_8way_state
{
__m256i h[8];
uint8_t buf[ 32 * 8 ];
uint8_t buf[ 64 * 8 ];
uint32_t t[2];
uint32_t f[2];
size_t buflen;
@@ -83,12 +83,12 @@ int blake2s_8way_full_blocks( blake2s_8way_state *S, void *out,
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
typedef struct ALIGN( 64 ) __blake2s_16way_state
{
__m512i h[8];
uint8_t buf[ 32 * 16 ];
uint8_t buf[ 64 * 16 ];
uint32_t t[2];
uint32_t f[2];
size_t buflen;

View File

@@ -3,7 +3,7 @@
#include <string.h>
#include <stdint.h>
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
#define BLAKE2S_16WAY
#elif defined(__AVX2__)
#define BLAKE2S_8WAY

View File

@@ -349,16 +349,16 @@ void blake512_transform( uint64_t *H, const uint64_t *buf, const uint64_t T0,
Va = v128_add64( Va, v128_add64( Vb, \
v128_set64( CBx( r, Sd ) ^ Mx( r, Sc ), \
CBx( r, Sb ) ^ Mx( r, Sa ) ) ) ); \
Vd = v128_ror64( v128_xor( Vd, Va ), 32 ); \
Vd = v128_ror64xor( Vd, Va, 32 ); \
Vc = v128_add64( Vc, Vd ); \
Vb = v128_ror64( v128_xor( Vb, Vc ), 25 ); \
Vb = v128_ror64xor( Vb, Vc, 25 ); \
\
Va = v128_add64( Va, v128_add64( Vb, \
v128_set64( CBx( r, Sc ) ^ Mx( r, Sd ), \
CBx( r, Sa ) ^ Mx( r, Sb ) ) ) ); \
Vd = v128_ror64( v128_xor( Vd, Va ), 16 ); \
Vd = v128_ror64xor( Vd, Va, 16 ); \
Vc = v128_add64( Vc, Vd ); \
Vb = v128_ror64( v128_xor( Vb, Vc ), 11 ); \
Vb = v128_ror64xor( Vb, Vc, 11 ); \
}
#define BLAKE512_ROUND( R ) \
@@ -559,7 +559,7 @@ void blake512_full( blake512_context *sc, void *dst, const void *data,
#if defined(__AVX2__)
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
////////////////////////////////////
//
@@ -1887,13 +1887,13 @@ blake512_4x64_close(void *cc, void *dst)
#define GB_2X64( m0, m1, c0, c1, a, b, c, d ) \
{ \
a = v128_add64( v128_add64( v128_xor( v128_64( c1 ), m0 ), b ), a ); \
d = v128_ror64( v128_xor( d, a ), 32 ); \
d = v128_ror64xor( d, a, 32 ); \
c = v128_add64( c, d ); \
b = v128_ror64( v128_xor( b, c ), 25 ); \
b = v128_ror64xor( b, c, 25 ); \
a = v128_add64( v128_add64( v128_xor( v128_64( c0 ), m1 ), b ), a ); \
d = v128_ror64( v128_xor( d, a ), 16 ); \
d = v128_ror64xor( d, a, 16 ); \
c = v128_add64( c, d ); \
b = v128_ror64( v128_xor( b, c ), 11 ); \
b = v128_ror64xor( b, c, 11 ); \
}
#define ROUND_B_2X64(r) \
@@ -2054,9 +2054,9 @@ void blake512_2x64_prehash_part1_le( blake_2x64_big_context *sc,
// G4 skip nonce
V0 = v128_add64( v128_add64( v128_xor( v128_64( CB9 ), sc->buf[ 8] ), V5 ),
V0 );
VF = v128_ror64( v128_xor( VF, V0 ), 32 );
VF = v128_ror64xor( VF, V0, 32 );
VA = v128_add64( VA, VF );
V5 = v128_ror64( v128_xor( V5, VA ), 25 );
V5 = v128_ror64xor( V5, VA, 25 );
V0 = v128_add64( V0, V5 );
GB_2X64( sc->buf[10], sc->buf[11], CBA, CBB, V1, V6, VB, VC );
@@ -2137,9 +2137,9 @@ void blake512_2x64_prehash_part2_le( blake_2x64_big_context *sc, void *hash,
// finish round 0, with the nonce now available
V0 = v128_add64( V0, v128_xor( v128_64( CB8 ), M9 ) );
VF = v128_ror64( v128_xor( VF, V0 ), 16 );
VF = v128_ror64xor( VF, V0, 16 );
VA = v128_add64( VA, VF );
V5 = v128_ror64( v128_xor( V5, VA ), 11 );
V5 = v128_ror64xor( V5, VA, 11 );
// Round 1
// G0
@@ -2147,34 +2147,34 @@ void blake512_2x64_prehash_part2_le( blake_2x64_big_context *sc, void *hash,
// G1
V1 = v128_add64( V1, V5 );
VD = v128_ror64( v128_xor( VD, V1 ), 32 );
VD = v128_ror64xor( VD, V1, 32 );
V9 = v128_add64( V9, VD );
V5 = v128_ror64( v128_xor( V5, V9 ), 25 );
V5 = v128_ror64xor( V5, V9, 25 );
V1 = v128_add64( V1, v128_add64( v128_xor( v128_64( CBx(1,2) ), Mx(1,3) ),
V5 ) );
VD = v128_ror64( v128_xor( VD, V1 ), 16 );
VD = v128_ror64xor( VD, V1, 16 );
V9 = v128_add64( V9, VD );
V5 = v128_ror64( v128_xor( V5, V9 ), 11 );
V5 = v128_ror64xor( V5, V9, 11 );
// G2
V2 = v128_add64( V2, v128_xor( v128_64( CBF ), M9 ) );
VE = v128_ror64( v128_xor( VE, V2 ), 32 );
VE = v128_ror64xor( VE, V2, 32 );
VA = v128_add64( VA, VE );
V6 = v128_ror64( v128_xor( V6, VA ), 25 );
V6 = v128_ror64xor( V6, VA, 25 );
V2 = v128_add64( V2, v128_add64( v128_xor( v128_64( CB9 ), MF ), V6 ) );
VE = v128_ror64( v128_xor( VE, V2 ), 16 );
VE = v128_ror64xor( VE, V2, 16 );
VA = v128_add64( VA, VE );
V6 = v128_ror64( v128_xor( V6, VA ), 11 );
V6 = v128_ror64xor( V6, VA, 11 );
// G3
VF = v128_ror64( v128_xor( VF, V3 ), 32 );
VF = v128_ror64xor( VF, V3, 32 );
VB = v128_add64( VB, VF );
V7 = v128_ror64( v128_xor( V7, VB ), 25 );
V7 = v128_ror64xor( V7, VB, 25 );
V3 = v128_add64( V3, v128_add64( v128_xor( v128_64( CBx(1, 6) ), Mx(1, 7) ),
V7 ) );
VF = v128_ror64( v128_xor( VF, V3 ), 16 );
VF = v128_ror64xor( VF, V3, 16 );
VB = v128_add64( VB, VF );
V7 = v128_ror64( v128_xor( V7, VB ), 11 );
V7 = v128_ror64xor( V7, VB, 11 );
// G4, G5, G6, G7
GB_2X64(Mx(1, 8), Mx(1, 9), CBx(1, 8), CBx(1, 9), V0, V5, VA, VF);

View File

@@ -92,7 +92,7 @@ void blake512_4x64_final_le( blake_4x64_big_context *sc, void *hash,
#define blake512_4way_prehash_le blake512_4x64_prehash_le
#define blake512_4way_final_le blake512_4x64_final_le
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
////////////////////////////
//

View File

@@ -4,7 +4,7 @@
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#if defined(SIMD512)
#define BLAKECOIN_16WAY
#elif defined(__AVX2__)
#define BLAKECOIN_8WAY

View File

@@ -101,15 +101,15 @@
{ \
Va = v128_add64( Va, v128_add64( Vb, \
v128_set64( m[ sigmaR[ Sc ] ], m[ sigmaR[ Sa ] ] ) ) ); \
Vd = v128_ror64( v128_xor( Vd, Va ), 32 ); \
Vd = v128_ror64xor( Vd, Va, 32 ); \
Vc = v128_add64( Vc, Vd ); \
Vb = v128_ror64( v128_xor( Vb, Vc ), 24 ); \
Vb = v128_ror64xor( Vb, Vc, 24 ); \
\
Va = v128_add64( Va, v128_add64( Vb, \
v128_set64( m[ sigmaR[ Sd ] ], m[ sigmaR[ Sb ] ] ) ) ); \
Vd = v128_ror64( v128_xor( Vd, Va ), 16 ); \
Vd = v128_ror64xor( Vd, Va, 16 ); \
Vc = v128_add64( Vc, Vd ); \
Vb = v128_ror64( v128_xor( Vb, Vc ), 63 ); \
Vb = v128_ror64xor( Vb, Vc, 63 ); \
}
#define BLAKE2B_ROUND( R ) \