v3.7.4

2026-07-14 19:06:50 +00:00 · 2021-09-29 17:31:16 -04:00
parent 9b905fccc8
commit 2cd1507c2e
80 changed files with 8145 additions and 2097 deletions
--- a/algo/lyra2/allium-4way.c
+++ b/algo/lyra2/allium-4way.c
@@ -16,7 +16,7 @@
 typedef struct {
   blake256_16way_context     blake;
   keccak256_8way_context    keccak;
-   cube_4way_context          cube;
+   cube_4way_2buf_context    cube;
   skein256_8way_context     skein;
 #if defined(__VAES__)
   groestl256_4way_context groestl;
@@ -30,13 +30,7 @@ static __thread allium_16way_ctx_holder allium_16way_ctx;
 bool init_allium_16way_ctx()
 {
   keccak256_8way_init( &allium_16way_ctx.keccak );
-   cube_4way_init( &allium_16way_ctx.cube, 256, 16, 32 );
   skein256_8way_init( &allium_16way_ctx.skein );
-#if defined(__VAES__)
-   groestl256_4way_init( &allium_16way_ctx.groestl, 32 );
-#else
-   init_groestl256( &allium_16way_ctx.groestl, 32 );
-#endif
   return true;
 }

@@ -111,12 +105,11 @@ void allium_16way_hash( void *state, const void *input )
   intrlv_2x256( vhash, hash14, hash15, 256 );
   LYRA2RE_2WAY( vhash, 32, vhash, 32, 1, 8, 8 );
   dintrlv_2x256( hash14, hash15, vhash, 256 );
-  
+
   intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, 256 );
   intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, 256 );

-   cube_4way_full( &ctx.cube, vhashA, 256, vhashA, 32 );
-   cube_4way_full( &ctx.cube, vhashB, 256, vhashB, 32 );
+   cube_4way_2buf_full( &ctx.cube, vhashA, vhashB, 256, vhashA, vhashB, 32 );

   dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
   dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
@@ -124,8 +117,7 @@ void allium_16way_hash( void *state, const void *input )
   intrlv_4x128( vhashA, hash8, hash9, hash10, hash11, 256 );
   intrlv_4x128( vhashB, hash12, hash13, hash14, hash15, 256 );

-   cube_4way_full( &ctx.cube, vhashA, 256, vhashA, 32 );
-   cube_4way_full( &ctx.cube, vhashB, 256, vhashB, 32 );
+   cube_4way_2buf_full( &ctx.cube, vhashA, vhashB, 256, vhashA, vhashB, 32 );

   dintrlv_4x128( hash8, hash9, hash10, hash11, vhashA, 256 );
   dintrlv_4x128( hash12, hash13, hash14, hash15, vhashB, 256 );
@@ -255,7 +247,7 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
 typedef struct {
   blake256_8way_context     blake;
   keccak256_4way_context    keccak;
-   cubehashParam             cube;
+   cube_2way_context         cube;
   skein256_4way_context     skein;
 #if defined(__VAES__)
   groestl256_2way_context   groestl;
@@ -269,13 +261,7 @@ static __thread allium_8way_ctx_holder allium_8way_ctx;
 bool init_allium_8way_ctx()
 {
   keccak256_4way_init( &allium_8way_ctx.keccak );
-   cubehashInit( &allium_8way_ctx.cube, 256, 16, 32 );
   skein256_4way_init( &allium_8way_ctx.skein );
-#if defined(__VAES__)
-   groestl256_2way_init( &allium_8way_ctx.groestl, 32 );
-#else
-   init_groestl256( &allium_8way_ctx.groestl, 32 );
-#endif
   return true;
 }

@@ -320,21 +306,20 @@ void allium_8way_hash( void *hash, const void *input )
   LYRA2RE( hash6, 32, hash6, 32, hash6, 32, 1, 8, 8 );
   LYRA2RE( hash7, 32, hash7, 32, hash7, 32, 1, 8, 8 );

-   cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*)hash0, 32 );
-   cubehashInit( &ctx.cube, 256, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*)hash1, 32 );
-   cubehashInit( &ctx.cube, 256, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*)hash2, 32 );
-   cubehashInit( &ctx.cube, 256, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*)hash3, 32 );
-   cubehashInit( &ctx.cube, 256, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*)hash4, (const byte*)hash4, 32 );
-   cubehashInit( &ctx.cube, 256, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*)hash5, (const byte*)hash5, 32 );
-   cubehashInit( &ctx.cube, 256, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*)hash6, (const byte*)hash6, 32 );
-   cubehashInit( &ctx.cube, 256, 16, 32 );
-   cubehashUpdateDigest( &ctx.cube, (byte*)hash7, (const byte*)hash7, 32 );
+
+   intrlv_2x128( vhashA, hash0, hash1, 256 );
+   intrlv_2x128( vhashB, hash2, hash3, 256 );
+   cube_2way_full( &ctx.cube, vhashA, 256, vhashA, 32 );
+   cube_2way_full( &ctx.cube, vhashB, 256, vhashB, 32 );
+   dintrlv_2x128( hash0, hash1, vhashA, 256 );
+   dintrlv_2x128( hash2, hash3, vhashB, 256 );
+
+   intrlv_2x128( vhashA, hash4, hash5, 256 );
+   intrlv_2x128( vhashB, hash6, hash7, 256 );
+   cube_2way_full( &ctx.cube, vhashA, 256, vhashA, 32 );
+   cube_2way_full( &ctx.cube, vhashB, 256, vhashB, 32 );
+   dintrlv_2x128( hash4, hash5, vhashA, 256 );
+   dintrlv_2x128( hash6, hash7, vhashB, 256 );

   LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
   LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
--- a/algo/lyra2/sponge.h
+++ b/algo/lyra2/sponge.h
@@ -66,13 +66,13 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){

 #define LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
   G2W_4X64( s0, s1, s2, s3 ); \
-   s1 = mm512_ror256_64( s1); \
+   s3 = mm512_shufll256_64( s3 ); \
+   s1 = mm512_shuflr256_64( s1); \
   s2 = mm512_swap256_128( s2 ); \
-   s3 = mm512_rol256_64( s3 ); \
   G2W_4X64( s0, s1, s2, s3 ); \
-   s1 = mm512_rol256_64( s1 ); \
-   s2 = mm512_swap256_128( s2 ); \
-   s3 = mm512_ror256_64( s3 );
+   s3 = mm512_shuflr256_64( s3 ); \
+   s1 = mm512_shufll256_64( s1 ); \
+   s2 = mm512_swap256_128( s2 ); 

 #define LYRA_12_ROUNDS_2WAY_AVX512( s0, s1, s2, s3 ) \
   LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
@@ -107,13 +107,13 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){

 #define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
   G_4X64( s0, s1, s2, s3 ); \
-   s1 = mm256_ror_1x64( s1); \
+   s3 = mm256_shufll_64( s3 ); \
+   s1 = mm256_shuflr_64( s1); \
   s2 = mm256_swap_128( s2 ); \
-   s3 = mm256_rol_1x64( s3 ); \
   G_4X64( s0, s1, s2, s3 ); \
-   s1 = mm256_rol_1x64( s1 ); \
-   s2 = mm256_swap_128( s2 ); \
-   s3 = mm256_ror_1x64( s3 );
+   s3 = mm256_shuflr_64( s3 ); \
+   s1 = mm256_shufll_64( s1 ); \
+   s2 = mm256_swap_128( s2 );

 #define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
   LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
@@ -148,14 +148,14 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
 #define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
   G_2X64( s0, s2, s4, s6 ); \
   G_2X64( s1, s3, s5, s7 ); \
-   mm128_ror256_64( s2, s3 ); \
+   mm128_vrol256_64( s6, s7 ); \
+   mm128_vror256_64( s2, s3 ); \
   mm128_swap256_128( s4, s5 ); \
-   mm128_rol256_64( s6, s7 ); \
   G_2X64( s0, s2, s4, s6 ); \
   G_2X64( s1, s3, s5, s7 ); \
-   mm128_rol256_64( s2, s3 ); \
-   mm128_swap256_128( s4, s5 ); \
-   mm128_ror256_64( s6, s7 );
+   mm128_vror256_64( s6, s7 ); \
+   mm128_vrol256_64( s2, s3 ); \
+   mm128_swap256_128( s4, s5 );

 #define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
   LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \