This commit is contained in:
Jay D Dee
2019-12-17 00:57:35 -05:00
parent a17ff6f189
commit d741f1c9a9
51 changed files with 5473 additions and 911 deletions

View File

@@ -65,14 +65,14 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
b = mm512_ror_64( _mm512_xor_si512( b, c ), 63 );
#define LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
G_4X64( s0, s1, s2, s3 ); \
s1 = mm512_ror_1x64( s1); \
s2 = mm512_swap128_256( s2 ); \
s3 = mm512_rol1x64_256( s3 ); \
G_4X64( s0, s1, s2, s3 ); \
s1 = mm512_rol1x64_256( s1 ); \
s2 = mm512_swap128_256( s2 ); \
s3 = mm512_ror1x64_256( s3 );
G2W_4X64( s0, s1, s2, s3 ); \
s1 = mm512_ror256_64( s1); \
s2 = mm512_swap256_128( s2 ); \
s3 = mm512_rol256_64( s3 ); \
G2W_4X64( s0, s1, s2, s3 ); \
s1 = mm512_rol256_64( s1 ); \
s2 = mm512_swap256_128( s2 ); \
s3 = mm512_ror256_64( s3 );
#define LYRA_12_ROUNDS_2WAY_AVX512( s0, s1, s2, s3 ) \
LYRA_ROUND_2WAY_AVX512( s0, s1, s2, s3 ) \
@@ -148,14 +148,14 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
G_2X64( s0, s2, s4, s6 ); \
G_2X64( s1, s3, s5, s7 ); \
mm128_ror1x64_256( s2, s3 ); \
mm128_swap128_256( s4, s5 ); \
mm128_rol1x64_256( s6, s7 ); \
mm128_ror256_64( s2, s3 ); \
mm128_swap256_128( s4, s5 ); \
mm128_rol256_64( s6, s7 ); \
G_2X64( s0, s2, s4, s6 ); \
G_2X64( s1, s3, s5, s7 ); \
mm128_rol1x64_256( s2, s3 ); \
mm128_swap128_256( s4, s5 ); \
mm128_ror1x64_256( s6, s7 );
mm128_rol256_64( s2, s3 ); \
mm128_swap256_128( s4, s5 ); \
mm128_ror256_64( s6, s7 );
#define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
@@ -220,7 +220,23 @@ void reducedDuplexRow1_2way( uint64_t *state, uint64_t *rowIn,
uint64_t *rowOut, uint64_t nCols);
void reducedDuplexRowSetup_2way( uint64_t *state, uint64_t *rowIn,
uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols );
void reducedDuplexRow_2way(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn0, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
/*
void reducedDuplexRow_2way( uint64_t *state, uint64_t *rowIn,
uint64_t *rowInOut0, uint64_t *rowInOut1,
uint64_t *rowOut, uint64_t nCols);
*/
union _povly
{
__m512i *v512;
__m256i *v256;
uint64_t *u64;
};
typedef union _povly povly;
void reducedDuplexRow_2way( uint64_t *state, povly matrix, uint64_t rowIn,
uint64_t rowInOut0, uint64_t rowInOut1,
uint64_t rowOut, uint64_t nCols);
#endif