mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.4.8 release
This commit is contained in:
@@ -23,42 +23,34 @@
|
||||
#define SPONGE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include "avxdefs.h"
|
||||
|
||||
/* Blake2b IV Array */
|
||||
#if defined(__GNUC__)
|
||||
#define ALIGN __attribute__ ((aligned(32)))
|
||||
#elif defined(_MSC_VER)
|
||||
#define ALIGN __declspec(align(32))
|
||||
#else
|
||||
#define ALIGN
|
||||
#endif
|
||||
|
||||
|
||||
/*Blake2b IV Array*/
|
||||
static const uint64_t blake2b_IV[8] =
|
||||
{
|
||||
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
||||
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
||||
};
|
||||
|
||||
/* Blake2b's rotation */
|
||||
static __inline uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
#ifdef _MSC_VER
|
||||
return _rotr64(w, c);
|
||||
#else
|
||||
return ( w >> c ) | ( w << ( 64 - c ) );
|
||||
#endif
|
||||
/*Blake2b's rotation*/
|
||||
static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
return ( w >> c ) | ( w << ( 64 - c ) );
|
||||
}
|
||||
|
||||
#if defined __AVX2__
|
||||
// only available with avx2
|
||||
|
||||
// rotate each uint64 c bits
|
||||
// returns _m256i
|
||||
#define mm256_rotr_64(w,c) _mm256_or_si256(_mm256_srli_epi64(w, c), \
|
||||
_mm256_slli_epi64(w, 64 - c))
|
||||
|
||||
// Rotate 4 uint64 (256 bits) by one uint64 (64 bits)
|
||||
// returns __m256i
|
||||
#define mm256_rotl256_1x64(s) _mm256_permute4x64_epi64( s, 0x39 )
|
||||
#define mm256_rotr256_1x64(s) _mm256_permute4x64_epi64( s, 0x93 )
|
||||
|
||||
// swap hi and lo 128 bits in 256 bit vector
|
||||
// returns _m256i
|
||||
#define mm256_swap128(s) _mm256_permute2f128_si256( s, s, 1 )
|
||||
|
||||
// init vectors from memory
|
||||
// returns void, updates defines and inits implicit args a, b, c, d
|
||||
#define LYRA_INIT_AVX2 \
|
||||
@@ -88,15 +80,29 @@ static __inline uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 63 );
|
||||
|
||||
#define LYRA_ROUND_AVX2 \
|
||||
G_4X64( a[0], a[1], a[2], a[3] ); \
|
||||
a[1] = mm256_rotl256_1x64( a[1]); \
|
||||
a[2] = mm256_swap128( a[2] ); \
|
||||
a[3] = mm256_rotr256_1x64( a[3] ); \
|
||||
G_4X64( a[0], a[1], a[2], a[3] ); \
|
||||
a[1] = mm256_rotr256_1x64( a[1] ); \
|
||||
a[2] = mm256_swap128( a[2] ); \
|
||||
a[3] = mm256_rotl256_1x64( a[3] );
|
||||
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s1 = mm256_rotl256_1x64( s1); \
|
||||
s2 = mm256_swap128( s2 ); \
|
||||
s3 = mm256_rotr256_1x64( s3 ); \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s1 = mm256_rotr256_1x64( s1 ); \
|
||||
s2 = mm256_swap128( s2 ); \
|
||||
s3 = mm256_rotl256_1x64( s3 );
|
||||
|
||||
#define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
|
||||
#else
|
||||
// only available with avx
|
||||
@@ -148,6 +154,7 @@ static __inline uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
/*
|
||||
#if defined __AVX__
|
||||
// can coexist with AVX2
|
||||
|
||||
@@ -161,7 +168,7 @@ static __inline uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
#define mm128_swap128(s0, s1) s0 = _mm_xor_si128(s0, s1); \
|
||||
s1 = _mm_xor_si128(s0, s1); \
|
||||
s0 = _mm_xor_si128(s0, s1);
|
||||
|
||||
|
||||
// swap uint64 in 128 bit source vector, equivalent of rotating 128 bits by
|
||||
// 64 bits (8 bytes)
|
||||
// __m128i
|
||||
@@ -193,30 +200,33 @@ static __inline uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
} while(0)
|
||||
|
||||
#endif // AVX
|
||||
*/
|
||||
|
||||
/* Blake2b's G function */
|
||||
#define G(r,i,a,b,c,d) do { \
|
||||
a = a + b; \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = a + b; \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
// Scalar
|
||||
//Blake2b's G function
|
||||
#define G(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b; \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = a + b; \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while(0)
|
||||
|
||||
|
||||
/*One Round of the Blake2b's compression function*/
|
||||
#define ROUND_LYRA(r) \
|
||||
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
||||
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
||||
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
||||
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
||||
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
|
||||
#define ROUND_LYRA(r) \
|
||||
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
||||
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
||||
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
||||
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
||||
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
|
||||
|
||||
|
||||
//---- Housekeeping
|
||||
@@ -224,18 +234,31 @@ void initState(uint64_t state[/*16*/]);
|
||||
|
||||
//---- Squeezes
|
||||
void squeeze(uint64_t *state, unsigned char *out, unsigned int len);
|
||||
void reducedSqueezeRow0(uint64_t* state, uint64_t* row, const uint32_t nCols);
|
||||
void reducedSqueezeRow0(uint64_t* state, uint64_t* row, uint64_t nCols);
|
||||
|
||||
//---- Absorbs
|
||||
void absorbBlock(uint64_t *state, const uint64_t *in);
|
||||
void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);
|
||||
|
||||
//---- Duplexes
|
||||
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, const uint32_t nCols);
|
||||
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, const uint32_t nCols);
|
||||
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, const uint32_t nCols);
|
||||
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols);
|
||||
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
|
||||
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
|
||||
|
||||
//---- Misc
|
||||
void printArray(unsigned char *array, unsigned int size, char *name);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
////TESTS////
|
||||
//void reducedDuplexRowc(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||
//void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||
//void reducedDuplexRowSetupv4(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn2, uint64_t *rowOut1, uint64_t *rowOut2);
|
||||
//void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||
//void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||
//void reducedDuplexRowSetupv5d(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||
/////////////
|
||||
|
||||
|
||||
#endif /* SPONGE_H_ */
|
||||
|
||||
Reference in New Issue
Block a user