mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
1321ac474c |
@@ -289,7 +289,7 @@ cpuminer_SOURCES = \
|
||||
algo/yescrypt/yescrypt-best.c \
|
||||
algo/yespower/yespower-gate.c \
|
||||
algo/yespower/yespower-blake2b.c \
|
||||
algo/yespower/crypto/blake2b-yp.c \
|
||||
algo/yespower/crypto/hmac-blake2b.c \
|
||||
algo/yespower/yescrypt-r8g.c \
|
||||
algo/yespower/yespower-opt.c
|
||||
|
||||
|
@@ -65,6 +65,15 @@ If not what makes it happen or not happen?
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.20.1
|
||||
|
||||
sph_blake2b optimized 1-way SSSE3 & AVX2.
|
||||
Removed duplicate Blake2b used by Power2b algo, will now use optimized sph_blake2b.
|
||||
Removed imprecise hash & target display from rejected share log.
|
||||
Share and target difficulty is now displayed only for low diificulty shares.
|
||||
Updated configure.ac to check for AVX512 asm support.
|
||||
Small optimization to Lyra2 SSE2.
|
||||
|
||||
v3.20.0
|
||||
|
||||
#375 Fixed segfault in algos using Groestl VAES due to use of uninitialized data.
|
||||
|
@@ -52,6 +52,180 @@ static const uint8_t sigma[12][16] =
|
||||
};
|
||||
|
||||
|
||||
#define Z00 0
|
||||
#define Z01 1
|
||||
#define Z02 2
|
||||
#define Z03 3
|
||||
#define Z04 4
|
||||
#define Z05 5
|
||||
#define Z06 6
|
||||
#define Z07 7
|
||||
#define Z08 8
|
||||
#define Z09 9
|
||||
#define Z0A A
|
||||
#define Z0B B
|
||||
#define Z0C C
|
||||
#define Z0D D
|
||||
#define Z0E E
|
||||
#define Z0F F
|
||||
|
||||
#define Z10 E
|
||||
#define Z11 A
|
||||
#define Z12 4
|
||||
#define Z13 8
|
||||
#define Z14 9
|
||||
#define Z15 F
|
||||
#define Z16 D
|
||||
#define Z17 6
|
||||
#define Z18 1
|
||||
#define Z19 C
|
||||
#define Z1A 0
|
||||
#define Z1B 2
|
||||
#define Z1C B
|
||||
#define Z1D 7
|
||||
#define Z1E 5
|
||||
#define Z1F 3
|
||||
|
||||
#define Z20 B
|
||||
#define Z21 8
|
||||
#define Z22 C
|
||||
#define Z23 0
|
||||
#define Z24 5
|
||||
#define Z25 2
|
||||
#define Z26 F
|
||||
#define Z27 D
|
||||
#define Z28 A
|
||||
#define Z29 E
|
||||
#define Z2A 3
|
||||
#define Z2B 6
|
||||
#define Z2C 7
|
||||
#define Z2D 1
|
||||
#define Z2E 9
|
||||
#define Z2F 4
|
||||
|
||||
#define Z30 7
|
||||
#define Z31 9
|
||||
#define Z32 3
|
||||
#define Z33 1
|
||||
#define Z34 D
|
||||
#define Z35 C
|
||||
#define Z36 B
|
||||
#define Z37 E
|
||||
#define Z38 2
|
||||
#define Z39 6
|
||||
#define Z3A 5
|
||||
#define Z3B A
|
||||
#define Z3C 4
|
||||
#define Z3D 0
|
||||
#define Z3E F
|
||||
#define Z3F 8
|
||||
|
||||
#define Z40 9
|
||||
#define Z41 0
|
||||
#define Z42 5
|
||||
#define Z43 7
|
||||
#define Z44 2
|
||||
#define Z45 4
|
||||
#define Z46 A
|
||||
#define Z47 F
|
||||
#define Z48 E
|
||||
#define Z49 1
|
||||
#define Z4A B
|
||||
#define Z4B C
|
||||
#define Z4C 6
|
||||
#define Z4D 8
|
||||
#define Z4E 3
|
||||
#define Z4F D
|
||||
|
||||
#define Z50 2
|
||||
#define Z51 C
|
||||
#define Z52 6
|
||||
#define Z53 A
|
||||
#define Z54 0
|
||||
#define Z55 B
|
||||
#define Z56 8
|
||||
#define Z57 3
|
||||
#define Z58 4
|
||||
#define Z59 D
|
||||
#define Z5A 7
|
||||
#define Z5B 5
|
||||
#define Z5C F
|
||||
#define Z5D E
|
||||
#define Z5E 1
|
||||
#define Z5F 9
|
||||
|
||||
#define Z60 C
|
||||
#define Z61 5
|
||||
#define Z62 1
|
||||
#define Z63 F
|
||||
#define Z64 E
|
||||
#define Z65 D
|
||||
#define Z66 4
|
||||
#define Z67 A
|
||||
#define Z68 0
|
||||
#define Z69 7
|
||||
#define Z6A 6
|
||||
#define Z6B 3
|
||||
#define Z6C 9
|
||||
#define Z6D 2
|
||||
#define Z6E 8
|
||||
#define Z6F B
|
||||
|
||||
#define Z70 D
|
||||
#define Z71 B
|
||||
#define Z72 7
|
||||
#define Z73 E
|
||||
#define Z74 C
|
||||
#define Z75 1
|
||||
#define Z76 3
|
||||
#define Z77 9
|
||||
#define Z78 5
|
||||
#define Z79 0
|
||||
#define Z7A F
|
||||
#define Z7B 4
|
||||
#define Z7C 8
|
||||
#define Z7D 6
|
||||
#define Z7E 2
|
||||
#define Z7F A
|
||||
|
||||
#define Z80 6
|
||||
#define Z81 F
|
||||
#define Z82 E
|
||||
#define Z83 9
|
||||
#define Z84 B
|
||||
#define Z85 3
|
||||
#define Z86 0
|
||||
#define Z87 8
|
||||
#define Z88 C
|
||||
#define Z89 2
|
||||
#define Z8A D
|
||||
#define Z8B 7
|
||||
#define Z8C 1
|
||||
#define Z8D 4
|
||||
#define Z8E A
|
||||
#define Z8F 5
|
||||
|
||||
#define Z90 A
|
||||
#define Z91 2
|
||||
#define Z92 8
|
||||
#define Z93 4
|
||||
#define Z94 7
|
||||
#define Z95 6
|
||||
#define Z96 1
|
||||
#define Z97 5
|
||||
#define Z98 F
|
||||
#define Z99 B
|
||||
#define Z9A 9
|
||||
#define Z9B E
|
||||
#define Z9C 3
|
||||
#define Z9D C
|
||||
#define Z9E D
|
||||
#define Z9F 0
|
||||
|
||||
#define Mx(r, i) Mx_(Z ## r ## i)
|
||||
#define Mx_(n) Mx__(n)
|
||||
#define Mx__(n) M ## n
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
#define B2B8W_G(a, b, c, d, x, y) \
|
||||
|
@@ -757,7 +757,6 @@ void blake512_8way_final_le( blake_8way_big_context *sc, void *hash,
|
||||
GB_8WAY(Mx(1, C), Mx(1, D), CBx(1, C), CBx(1, D), V2, V7, V8, VD);
|
||||
GB_8WAY(Mx(1, E), Mx(1, F), CBx(1, E), CBx(1, F), V3, V4, V9, VE);
|
||||
|
||||
|
||||
// remaining rounds
|
||||
ROUND_B_8WAY(2);
|
||||
ROUND_B_8WAY(3);
|
||||
|
@@ -30,16 +30,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "simd-utils.h"
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "sph_blake2b.h"
|
||||
|
||||
// Cyclic right rotation.
|
||||
|
||||
#ifndef ROTR64
|
||||
#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
|
||||
#endif
|
||||
|
||||
// Little-endian byte access.
|
||||
|
||||
#define B2B_GET64(p) \
|
||||
@@ -54,45 +48,131 @@
|
||||
|
||||
// G Mixing function.
|
||||
|
||||
#define B2B_G(a, b, c, d, x, y) { \
|
||||
v[a] = v[a] + v[b] + x; \
|
||||
v[d] = ROTR64(v[d] ^ v[a], 32); \
|
||||
v[c] = v[c] + v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c], 24); \
|
||||
v[a] = v[a] + v[b] + y; \
|
||||
v[d] = ROTR64(v[d] ^ v[a], 16); \
|
||||
v[c] = v[c] + v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c], 63); }
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#define BLAKE2B_G( R, Sa, Sb, Sc, Sd, Na, Nb ) \
|
||||
{ \
|
||||
V[0] = _mm256_add_epi64( V[0], _mm256_add_epi64( V[1], \
|
||||
_mm256_set_epi64x( m[ sigma[R][Sd] ], m[ sigma[R][Sc] ], \
|
||||
m[ sigma[R][Sb] ], m[ sigma[R][Sa] ] ) ) ); \
|
||||
V[3] = mm256_ror_64( _mm256_xor_si256( V[3], V[0] ), Na ); \
|
||||
V[2] = _mm256_add_epi64( V[2], V[3] ); \
|
||||
V[1] = mm256_ror_64( _mm256_xor_si256( V[1], V[2] ), Nb ); \
|
||||
}
|
||||
|
||||
#define BLAKE2B_ROUND( R ) \
|
||||
{ \
|
||||
__m256i *V = (__m256i*)v; \
|
||||
BLAKE2B_G( R, 0, 2, 4, 6, 32, 24 ); \
|
||||
BLAKE2B_G( R, 1, 3, 5, 7, 16, 63 ); \
|
||||
V[3] = mm256_shufll_64( V[3] ); \
|
||||
V[2] = mm256_swap_128( V[2] ); \
|
||||
V[1] = mm256_shuflr_64( V[1] ); \
|
||||
BLAKE2B_G( R, 8, 10, 12, 14, 32, 24 ); \
|
||||
BLAKE2B_G( R, 9, 11, 13, 15, 16, 63 ); \
|
||||
V[3] = mm256_shuflr_64( V[3] ); \
|
||||
V[2] = mm256_swap_128( V[2] ); \
|
||||
V[1] = mm256_shufll_64( V[1] ); \
|
||||
}
|
||||
|
||||
#elif defined(__SSSE3__)
|
||||
|
||||
#define BLAKE2B_G( R, Va, Vb, Vc, Vd, Sa, Sb, Na, Nb ) \
|
||||
{ \
|
||||
Va = _mm_add_epi64( Va, _mm_add_epi64( Vb, \
|
||||
_mm_set_epi64x( m[ sigma[R][Sb] ], m[ sigma[R][Sa] ] ) ) ); \
|
||||
Vd = mm128_ror_64( _mm_xor_si128( Vd, Va ), Na ); \
|
||||
Vc = _mm_add_epi64( Vc, Vd ); \
|
||||
Vb = mm128_ror_64( _mm_xor_si128( Vb, Vc ), Nb ); \
|
||||
}
|
||||
|
||||
#define BLAKE2B_ROUND( R ) \
|
||||
{ \
|
||||
__m128i *V = (__m128i*)v; \
|
||||
__m128i V2, V3, V6, V7; \
|
||||
BLAKE2B_G( R, V[0], V[2], V[4], V[6], 0, 2, 32, 24 ); \
|
||||
BLAKE2B_G( R, V[0], V[2], V[4], V[6], 1, 3, 16, 63 ); \
|
||||
BLAKE2B_G( R, V[1], V[3], V[5], V[7], 4, 6, 32, 24 ); \
|
||||
BLAKE2B_G( R, V[1], V[3], V[5], V[7], 5, 7, 16, 63 ); \
|
||||
V2 = mm128_shufl2r_64( V[2], V[3] ); \
|
||||
V3 = mm128_shufl2r_64( V[3], V[2] ); \
|
||||
V6 = mm128_shufl2l_64( V[6], V[7] ); \
|
||||
V7 = mm128_shufl2l_64( V[7], V[6] ); \
|
||||
BLAKE2B_G( R, V[0], V2, V[5], V6, 8, 10, 32, 24 ); \
|
||||
BLAKE2B_G( R, V[0], V2, V[5], V6, 9, 11, 16, 63 ); \
|
||||
BLAKE2B_G( R, V[1], V3, V[4], V7, 12, 14, 32, 24 ); \
|
||||
BLAKE2B_G( R, V[1], V3, V[4], V7, 13, 15, 16, 63 ); \
|
||||
V[2] = mm128_shufl2l_64( V2, V3 ); \
|
||||
V[3] = mm128_shufl2l_64( V3, V2 ); \
|
||||
V[6] = mm128_shufl2r_64( V6, V7 ); \
|
||||
V[7] = mm128_shufl2r_64( V7, V6 ); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#ifndef ROTR64
|
||||
#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
|
||||
#endif
|
||||
|
||||
#define BLAKE2B_G( R, Va, Vb, Vc, Vd, Sa, Sb ) \
|
||||
{ \
|
||||
Va = Va + Vb + m[ sigma[R][Sa] ]; \
|
||||
Vd = ROTR64( Vd ^ Va, 32 ); \
|
||||
Vc = Vc + Vd; \
|
||||
Vb = ROTR64( Vb ^ Vc, 24 ); \
|
||||
Va = Va + Vb + m[ sigma[R][Sb] ]; \
|
||||
Vd = ROTR64( Vd ^ Va, 16 ); \
|
||||
Vc = Vc + Vd; \
|
||||
Vb = ROTR64( Vb ^ Vc, 63 ); \
|
||||
}
|
||||
|
||||
#define BLAKE2B_ROUND( R ) \
|
||||
{ \
|
||||
BLAKE2B_G( R, v[ 0], v[ 4], v[ 8], v[12], 0, 1 ); \
|
||||
BLAKE2B_G( R, v[ 1], v[ 5], v[ 9], v[13], 2, 3 ); \
|
||||
BLAKE2B_G( R, v[ 2], v[ 6], v[10], v[14], 4, 5 ); \
|
||||
BLAKE2B_G( R, v[ 3], v[ 7], v[11], v[15], 6, 7 ); \
|
||||
BLAKE2B_G( R, v[ 0], v[ 5], v[10], v[15], 8, 9 ); \
|
||||
BLAKE2B_G( R, v[ 1], v[ 6], v[11], v[12], 10, 11 ); \
|
||||
BLAKE2B_G( R, v[ 2], v[ 7], v[ 8], v[13], 12, 13 ); \
|
||||
BLAKE2B_G( R, v[ 3], v[ 4], v[ 9], v[14], 14, 15 ); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Initialization Vector.
|
||||
|
||||
static const uint64_t blake2b_iv[8] = {
|
||||
static const uint64_t blake2b_iv[8] __attribute__ ((aligned (32))) =
|
||||
{
|
||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
|
||||
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
|
||||
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
||||
};
|
||||
|
||||
static const uint8_t sigma[12][16] __attribute__ ((aligned (32))) =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
|
||||
// Compression function. "last" flag indicates last block.
|
||||
|
||||
static void blake2b_compress( sph_blake2b_ctx *ctx, int last )
|
||||
{
|
||||
const uint8_t sigma[12][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
int i;
|
||||
uint64_t v[16], m[16];
|
||||
uint64_t v[16] __attribute__ ((aligned (32)));
|
||||
uint64_t m[16] __attribute__ ((aligned (32)));
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++) { // init work variables
|
||||
v[i] = ctx->h[i];
|
||||
@@ -106,16 +186,8 @@ static void blake2b_compress( sph_blake2b_ctx *ctx, int last )
|
||||
for (i = 0; i < 16; i++) // get little-endian words
|
||||
m[i] = B2B_GET64(&ctx->b[8 * i]);
|
||||
|
||||
for (i = 0; i < 12; i++) { // twelve rounds
|
||||
B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]);
|
||||
B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]);
|
||||
B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]);
|
||||
B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]);
|
||||
B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]);
|
||||
B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]);
|
||||
B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]);
|
||||
B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]);
|
||||
}
|
||||
for (i = 0; i < 12; i++)
|
||||
BLAKE2B_ROUND( i );
|
||||
|
||||
for( i = 0; i < 8; ++i )
|
||||
ctx->h[i] ^= v[i] ^ v[i + 8];
|
||||
|
@@ -150,12 +150,10 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
mm128_vrol256_64( s6, s7 ); \
|
||||
mm128_vror256_64( s2, s3 ); \
|
||||
mm128_swap256_128( s4, s5 ); \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
G_2X64( s0, s2, s5, s6 ); \
|
||||
G_2X64( s1, s3, s4, s7 ); \
|
||||
mm128_vror256_64( s6, s7 ); \
|
||||
mm128_vrol256_64( s2, s3 ); \
|
||||
mm128_swap256_128( s4, s5 );
|
||||
mm128_vrol256_64( s2, s3 );
|
||||
|
||||
#define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
|
@@ -1,323 +0,0 @@
|
||||
/*
|
||||
* Copyright 2009 Colin Percival, 2014 savale
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "simd-utils.h"
|
||||
#include <algo/yespower/crypto/sph_types.h>
|
||||
#include "blake2b-yp.h"
|
||||
|
||||
// Cyclic right rotation.
|
||||
//#ifndef ROTR64
|
||||
//#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
|
||||
//#endif
|
||||
|
||||
#define ROTR64(x, y) ror64( x, y )
|
||||
|
||||
// Little-endian byte access.
|
||||
#define B2B_GET64(p) \
|
||||
(((uint64_t) ((uint8_t *) (p))[0]) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[7]) << 56))
|
||||
|
||||
// G Mixing function.
|
||||
#define B2B_G(a, b, c, d, x, y) { \
|
||||
v[a] = v[a] + v[b] + x; \
|
||||
v[d] = ROTR64(v[d] ^ v[a], 32); \
|
||||
v[c] = v[c] + v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c], 24); \
|
||||
v[a] = v[a] + v[b] + y; \
|
||||
v[d] = ROTR64(v[d] ^ v[a], 16); \
|
||||
v[c] = v[c] + v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c], 63); }
|
||||
|
||||
// Initialization Vector.
|
||||
static const uint64_t blake2b_iv[8] = {
|
||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
|
||||
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
|
||||
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
||||
};
|
||||
|
||||
// Compression function. "last" flag indicates last block.
|
||||
static void blake2b_compress(blake2b_yp_ctx *ctx, int last)
|
||||
{
|
||||
const uint8_t sigma[12][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
int i;
|
||||
uint64_t v[16], m[16];
|
||||
|
||||
// init work variables
|
||||
for (i = 0; i < 8; i++) {
|
||||
v[i] = ctx->h[i];
|
||||
v[i + 8] = blake2b_iv[i];
|
||||
}
|
||||
|
||||
v[12] ^= ctx->t[0]; // low 64 bits of offset
|
||||
v[13] ^= ctx->t[1]; // high 64 bits
|
||||
|
||||
// last block flag set ?
|
||||
if (last) {
|
||||
v[14] = ~v[14];
|
||||
}
|
||||
|
||||
// get little-endian words
|
||||
for (i = 0; i < 16; i++) {
|
||||
m[i] = B2B_GET64(&ctx->b[8 * i]);
|
||||
}
|
||||
|
||||
// twelve rounds
|
||||
for (i = 0; i < 12; i++) {
|
||||
B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]);
|
||||
B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]);
|
||||
B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]);
|
||||
B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]);
|
||||
B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]);
|
||||
B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]);
|
||||
B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]);
|
||||
B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]);
|
||||
}
|
||||
|
||||
for(i = 0; i < 8; ++i) {
|
||||
ctx->h[i] ^= v[i] ^ v[i + 8];
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the hashing context "ctx" with optional key "key".
|
||||
// 1 <= outlen <= 64 gives the digest size in bytes.
|
||||
// Secret key (also <= 64 bytes) is optional (keylen = 0).
|
||||
int blake2b_yp_init(blake2b_yp_ctx *ctx, size_t outlen,
|
||||
const void *key, size_t keylen) // (keylen=0: no key)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
// illegal parameters
|
||||
if (outlen == 0 || outlen > 64 || keylen > 64) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// state, "param block"
|
||||
for (i = 0; i < 8; i++) {
|
||||
ctx->h[i] = blake2b_iv[i];
|
||||
}
|
||||
|
||||
ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen;
|
||||
|
||||
ctx->t[0] = 0; // input count low word
|
||||
ctx->t[1] = 0; // input count high word
|
||||
ctx->c = 0; // pointer within buffer
|
||||
ctx->outlen = outlen;
|
||||
|
||||
// zero input block
|
||||
for (i = keylen; i < 128; i++) {
|
||||
ctx->b[i] = 0;
|
||||
}
|
||||
|
||||
if (keylen > 0) {
|
||||
blake2b_yp_update(ctx, key, keylen);
|
||||
ctx->c = 128; // at the end
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Add "inlen" bytes from "in" into the hash.
|
||||
void blake2b_yp_update(blake2b_yp_ctx *ctx,
|
||||
const void *in, size_t inlen) // data bytes
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < inlen; i++) {
|
||||
if (ctx->c == 128) { // buffer full ?
|
||||
ctx->t[0] += ctx->c; // add counters
|
||||
if (ctx->t[0] < ctx->c) // carry overflow ?
|
||||
ctx->t[1]++; // high word
|
||||
blake2b_compress(ctx, 0); // compress (not last)
|
||||
ctx->c = 0; // counter to zero
|
||||
}
|
||||
ctx->b[ctx->c++] = ((const uint8_t *) in)[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Generate the message digest (size given in init).
|
||||
// Result placed in "out".
|
||||
void blake2b_yp_final(blake2b_yp_ctx *ctx, void *out)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
ctx->t[0] += ctx->c; // mark last block offset
|
||||
// carry overflow
|
||||
if (ctx->t[0] < ctx->c) {
|
||||
ctx->t[1]++; // high word
|
||||
}
|
||||
|
||||
// fill up with zeros
|
||||
while (ctx->c < 128) {
|
||||
ctx->b[ctx->c++] = 0;
|
||||
}
|
||||
|
||||
blake2b_compress(ctx, 1); // final block flag = 1
|
||||
|
||||
// little endian convert and store
|
||||
for (i = 0; i < ctx->outlen; i++) {
|
||||
((uint8_t *) out)[i] =
|
||||
(ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
// inlen = number of bytes
|
||||
void blake2b_yp_hash(void *out, const void *in, size_t inlen) {
|
||||
blake2b_yp_ctx ctx;
|
||||
blake2b_yp_init(&ctx, 32, NULL, 0);
|
||||
blake2b_yp_update(&ctx, in, inlen);
|
||||
blake2b_yp_final(&ctx, out);
|
||||
}
|
||||
|
||||
// // keylen = number of bytes
|
||||
void hmac_blake2b_yp_init(hmac_yp_ctx *hctx, const void *_key, size_t keylen) {
|
||||
const uint8_t *key = _key;
|
||||
uint8_t keyhash[32];
|
||||
uint8_t pad[64];
|
||||
uint64_t i;
|
||||
|
||||
if (keylen > 64) {
|
||||
blake2b_yp_hash(keyhash, key, keylen);
|
||||
key = keyhash;
|
||||
keylen = 32;
|
||||
}
|
||||
|
||||
blake2b_yp_init(&hctx->inner, 32, NULL, 0);
|
||||
memset(pad, 0x36, 64);
|
||||
for (i = 0; i < keylen; ++i) {
|
||||
pad[i] ^= key[i];
|
||||
}
|
||||
|
||||
blake2b_yp_update(&hctx->inner, pad, 64);
|
||||
blake2b_yp_init(&hctx->outer, 32, NULL, 0);
|
||||
memset(pad, 0x5c, 64);
|
||||
for (i = 0; i < keylen; ++i) {
|
||||
pad[i] ^= key[i];
|
||||
}
|
||||
|
||||
blake2b_yp_update(&hctx->outer, pad, 64);
|
||||
memset(keyhash, 0, 32);
|
||||
}
|
||||
|
||||
// datalen = number of bits
|
||||
void hmac_blake2b_yp_update(hmac_yp_ctx *hctx, const void *data, size_t datalen) {
|
||||
// update the inner state
|
||||
blake2b_yp_update(&hctx->inner, data, datalen);
|
||||
}
|
||||
|
||||
void hmac_blake2b_yp_final(hmac_yp_ctx *hctx, uint8_t *digest) {
|
||||
uint8_t ihash[32];
|
||||
blake2b_yp_final(&hctx->inner, ihash);
|
||||
blake2b_yp_update(&hctx->outer, ihash, 32);
|
||||
blake2b_yp_final(&hctx->outer, digest);
|
||||
memset(ihash, 0, 32);
|
||||
}
|
||||
|
||||
// // keylen = number of bytes; inlen = number of bytes
|
||||
void hmac_blake2b_yp_hash(void *out, const void *key, size_t keylen, const void *in, size_t inlen) {
|
||||
hmac_yp_ctx hctx;
|
||||
hmac_blake2b_yp_init(&hctx, key, keylen);
|
||||
hmac_blake2b_yp_update(&hctx, in, inlen);
|
||||
hmac_blake2b_yp_final(&hctx, out);
|
||||
}
|
||||
|
||||
void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||
{
|
||||
hmac_yp_ctx PShctx, hctx;
|
||||
size_t i;
|
||||
uint32_t ivec;
|
||||
uint8_t U[32];
|
||||
uint8_t T[32];
|
||||
uint64_t j;
|
||||
int k;
|
||||
size_t clen;
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
hmac_blake2b_yp_init(&PShctx, passwd, passwdlen);
|
||||
hmac_blake2b_yp_update(&PShctx, salt, saltlen);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
ivec = bswap_32( i+1 );
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(&hctx, &PShctx, sizeof(hmac_yp_ctx));
|
||||
hmac_blake2b_yp_update(&hctx, &ivec, 4);
|
||||
hmac_blake2b_yp_final(&hctx, U);
|
||||
|
||||
/* T_i = U_1 ... */
|
||||
memcpy(T, U, 32);
|
||||
|
||||
for (j = 2; j <= c; j++) {
|
||||
/* Compute U_j. */
|
||||
hmac_blake2b_yp_init(&hctx, passwd, passwdlen);
|
||||
hmac_blake2b_yp_update(&hctx, U, 32);
|
||||
hmac_blake2b_yp_final(&hctx, U);
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for (k = 0; k < 32; k++) {
|
||||
T[k] ^= U[k];
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy as many bytes as necessary into buf. */
|
||||
clen = dkLen - i * 32;
|
||||
if (clen > 32) {
|
||||
clen = 32;
|
||||
}
|
||||
|
||||
memcpy(&buf[i * 32], T, clen);
|
||||
}
|
||||
|
||||
/* Clean PShctx, since we never called _Final on it. */
|
||||
memset(&PShctx, 0, sizeof(hmac_yp_ctx));
|
||||
}
|
@@ -1,42 +0,0 @@
|
||||
#pragma once
|
||||
#ifndef __BLAKE2B_H__
|
||||
#define __BLAKE2B_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_MSC_VER) || defined(__x86_64__) || defined(__x86__)
|
||||
#define NATIVE_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
// state context
|
||||
typedef struct {
|
||||
uint8_t b[128]; // input buffer
|
||||
uint64_t h[8]; // chained state
|
||||
uint64_t t[2]; // total number of bytes
|
||||
size_t c; // pointer for b[]
|
||||
size_t outlen; // digest size
|
||||
} blake2b_yp_ctx;
|
||||
|
||||
typedef struct {
|
||||
blake2b_yp_ctx inner;
|
||||
blake2b_yp_ctx outer;
|
||||
} hmac_yp_ctx;
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int blake2b_yp_init(blake2b_yp_ctx *ctx, size_t outlen, const void *key, size_t keylen);
|
||||
void blake2b_yp_update(blake2b_yp_ctx *ctx, const void *in, size_t inlen);
|
||||
void blake2b_yp_final(blake2b_yp_ctx *ctx, void *out);
|
||||
void blake2b_yp_hash(void *out, const void *in, size_t inlen);
|
||||
void hmac_blake2b_yp_hash(void *out, const void *key, size_t keylen, const void *in, size_t inlen);
|
||||
void pbkdf2_blake2b_yp(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
150
algo/yespower/crypto/hmac-blake2b.c
Normal file
150
algo/yespower/crypto/hmac-blake2b.c
Normal file
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
* Copyright 2009 Colin Percival, 2014 savale
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "simd-utils.h"
|
||||
#include "hmac-blake2b.h"
|
||||
|
||||
// keylen = number of bytes
|
||||
void hmac_blake2b_init( hmac_blake2b_ctx *hctx, const void *_key,
|
||||
size_t keylen )
|
||||
{
|
||||
const uint8_t *key = _key;
|
||||
uint8_t keyhash[32];
|
||||
uint8_t pad[64];
|
||||
uint64_t i;
|
||||
|
||||
if (keylen > 64)
|
||||
{
|
||||
sph_blake2b_ctx ctx;
|
||||
sph_blake2b_init( &ctx, 32, NULL, 0 );
|
||||
sph_blake2b_update( &ctx, key, keylen );
|
||||
sph_blake2b_final( &ctx, keyhash );
|
||||
key = keyhash;
|
||||
keylen = 32;
|
||||
}
|
||||
|
||||
sph_blake2b_init( &hctx->inner, 32, NULL, 0 );
|
||||
memset( pad, 0x36, 64 );
|
||||
for ( i = 0; i < keylen; ++i )
|
||||
pad[i] ^= key[i];
|
||||
|
||||
sph_blake2b_update( &hctx->inner, pad, 64 );
|
||||
sph_blake2b_init( &hctx->outer, 32, NULL, 0 );
|
||||
memset( pad, 0x5c, 64 );
|
||||
for ( i = 0; i < keylen; ++i )
|
||||
pad[i] ^= key[i];
|
||||
|
||||
sph_blake2b_update( &hctx->outer, pad, 64 );
|
||||
memset( keyhash, 0, 32 );
|
||||
}
|
||||
|
||||
// datalen = number of bits
|
||||
void hmac_blake2b_update( hmac_blake2b_ctx *hctx, const void *data,
|
||||
size_t datalen )
|
||||
{
|
||||
// update the inner state
|
||||
sph_blake2b_update( &hctx->inner, data, datalen );
|
||||
}
|
||||
|
||||
void hmac_blake2b_final( hmac_blake2b_ctx *hctx, uint8_t *digest )
|
||||
{
|
||||
uint8_t ihash[32];
|
||||
sph_blake2b_final( &hctx->inner, ihash );
|
||||
sph_blake2b_update( &hctx->outer, ihash, 32 );
|
||||
sph_blake2b_final( &hctx->outer, digest );
|
||||
memset( ihash, 0, 32 );
|
||||
}
|
||||
|
||||
// // keylen = number of bytes; inlen = number of bytes
|
||||
void hmac_blake2b_hash( void *out, const void *key, size_t keylen,
|
||||
const void *in, size_t inlen )
|
||||
{
|
||||
hmac_blake2b_ctx hctx;
|
||||
hmac_blake2b_init( &hctx, key, keylen );
|
||||
hmac_blake2b_update( &hctx, in, inlen );
|
||||
hmac_blake2b_final( &hctx, out );
|
||||
}
|
||||
|
||||
void pbkdf2_blake2b( const uint8_t *passwd, size_t passwdlen,
|
||||
const uint8_t *salt, size_t saltlen, uint64_t c,
|
||||
uint8_t *buf, size_t dkLen )
|
||||
{
|
||||
hmac_blake2b_ctx PShctx, hctx;
|
||||
size_t i;
|
||||
uint32_t ivec;
|
||||
uint8_t U[32];
|
||||
uint8_t T[32];
|
||||
uint64_t j;
|
||||
int k;
|
||||
size_t clen;
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
hmac_blake2b_init( &PShctx, passwd, passwdlen );
|
||||
hmac_blake2b_update( &PShctx, salt, saltlen );
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for ( i = 0; i * 32 < dkLen; i++ )
|
||||
{
|
||||
/* Generate INT(i + 1). */
|
||||
ivec = bswap_32( i+1 );
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy( &hctx, &PShctx, sizeof(hmac_blake2b_ctx) );
|
||||
hmac_blake2b_update( &hctx, &ivec, 4 );
|
||||
hmac_blake2b_final( &hctx, U );
|
||||
|
||||
/* T_i = U_1 ... */
|
||||
memcpy( T, U, 32 );
|
||||
|
||||
for ( j = 2; j <= c; j++ )
|
||||
{
|
||||
/* Compute U_j. */
|
||||
hmac_blake2b_init( &hctx, passwd, passwdlen );
|
||||
hmac_blake2b_update( &hctx, U, 32 );
|
||||
hmac_blake2b_final( &hctx, U );
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for ( k = 0; k < 32; k++ )
|
||||
T[k] ^= U[k];
|
||||
}
|
||||
|
||||
/* Copy as many bytes as necessary into buf. */
|
||||
clen = dkLen - i * 32;
|
||||
if (clen > 32)
|
||||
clen = 32;
|
||||
|
||||
memcpy( &buf[i * 32], T, clen );
|
||||
}
|
||||
|
||||
/* Clean PShctx, since we never called _Final on it. */
|
||||
memset( &PShctx, 0, sizeof(hmac_blake2b_ctx) );
|
||||
}
|
34
algo/yespower/crypto/hmac-blake2b.h
Normal file
34
algo/yespower/crypto/hmac-blake2b.h
Normal file
@@ -0,0 +1,34 @@
|
||||
#pragma once
|
||||
#ifndef __HMAC_BLAKE2B_H__
|
||||
#define __HMAC_BLAKE2B_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include "algo/blake/sph_blake2b.h"
|
||||
|
||||
#if defined(_MSC_VER) || defined(__x86_64__) || defined(__x86__)
|
||||
#define NATIVE_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
sph_blake2b_ctx inner;
|
||||
sph_blake2b_ctx outer;
|
||||
} hmac_blake2b_ctx;
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void hmac_blake2b_hash( void *out, const void *key, size_t keylen,
|
||||
const void *in, size_t inlen );
|
||||
|
||||
void pbkdf2_blake2b( const uint8_t * passwd, size_t passwdlen,
|
||||
const uint8_t * salt, size_t saltlen, uint64_t c,
|
||||
uint8_t * buf, size_t dkLen );
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@@ -95,7 +95,7 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "crypto/blake2b-yp.h"
|
||||
#include "crypto/hmac-blake2b.h"
|
||||
#include "yespower.h"
|
||||
|
||||
#ifdef __unix__
|
||||
@@ -1136,6 +1136,7 @@ int yespower_b2b(yespower_local_t *local,
|
||||
salsa20_blk_t *V, *XY;
|
||||
pwxform_ctx_t ctx;
|
||||
uint8_t init_hash[32];
|
||||
sph_blake2b_ctx blake2b_ctx;
|
||||
|
||||
/* Sanity-check parameters */
|
||||
if ((N < 1024 || N > 512 * 1024 || r < 8 || r > 32 ||
|
||||
@@ -1167,7 +1168,9 @@ int yespower_b2b(yespower_local_t *local,
|
||||
ctx.S0 = S;
|
||||
ctx.S1 = S + Swidth_to_Sbytes1(Swidth);
|
||||
|
||||
blake2b_yp_hash(init_hash, src, srclen);
|
||||
sph_blake2b_init( &blake2b_ctx, 32, NULL, 0 );
|
||||
sph_blake2b_update( &blake2b_ctx, src, srclen );
|
||||
sph_blake2b_final( &blake2b_ctx, init_hash );
|
||||
|
||||
ctx.S2 = S + 2 * Swidth_to_Sbytes1(Swidth);
|
||||
ctx.w = 0;
|
||||
@@ -1181,7 +1184,7 @@ int yespower_b2b(yespower_local_t *local,
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
pbkdf2_blake2b_yp(init_hash, sizeof(init_hash), src, srclen, 1, B, 128);
|
||||
pbkdf2_blake2b(init_hash, sizeof(init_hash), src, srclen, 1, B, 128);
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
@@ -1190,7 +1193,7 @@ int yespower_b2b(yespower_local_t *local,
|
||||
|
||||
if ( work_restart[thrid].restart ) return false;
|
||||
|
||||
hmac_blake2b_yp_hash((uint8_t *)dst, B + B_size - 64, 64, init_hash, sizeof(init_hash));
|
||||
hmac_blake2b_hash((uint8_t *)dst, B + B_size - 64, 64, init_hash, sizeof(init_hash));
|
||||
|
||||
/* Success! */
|
||||
return 1;
|
||||
|
@@ -249,7 +249,7 @@ bool register_power2b_algo( algo_gate_t* gate )
|
||||
applog( LOG_NOTICE,"Key= \"%s\"", yespower_params.pers );
|
||||
applog( LOG_NOTICE,"Key length= %d\n", yespower_params.perslen );
|
||||
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower_b2b;
|
||||
gate->hash = (void*)&yespower_b2b_hash;
|
||||
opt_target_factor = 65536.0;
|
||||
|
48
configure
vendored
48
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.20.0.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.20.1.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.20.0'
|
||||
PACKAGE_STRING='cpuminer-opt 3.20.0'
|
||||
PACKAGE_VERSION='3.20.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.20.1'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.20.0 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.20.1 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.20.0:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.20.1:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.20.0
|
||||
cpuminer-opt configure 3.20.1
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.20.0, which was
|
||||
It was created by cpuminer-opt $as_me 3.20.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.20.0'
|
||||
VERSION='3.20.1'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -5820,6 +5820,34 @@ $as_echo "#define USE_AVX2 1" >>confdefs.h
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
|
||||
$as_echo "yes" >&6; }
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX512 code" >&5
|
||||
$as_echo_n "checking whether we can compile AVX512 code... " >&6; }
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
asm ("vpaddd %zmm0, %zmm1, %zmm2{%k1}");
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_compile "$LINENO"; then :
|
||||
|
||||
$as_echo "#define USE_AVX512 1" >>confdefs.h
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
|
||||
$as_echo "yes" >&6; }
|
||||
|
||||
else
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
|
||||
$as_echo "no" >&6; }
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX512 instruction set." >&5
|
||||
$as_echo "$as_me: WARNING: The assembler does not support the AVX512 instruction set." >&2;}
|
||||
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
|
||||
else
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
|
||||
@@ -6690,7 +6718,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.20.0, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.20.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6784,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.20.0
|
||||
cpuminer-opt config.status 3.20.1
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
10
configure.ac
10
configure.ac
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.20.0])
|
||||
AC_INIT([cpuminer-opt], [3.20.1])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
@@ -93,6 +93,14 @@ then
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vpaddd %ymm0, %ymm1, %ymm2");])],
|
||||
AC_DEFINE(USE_AVX2, 1, [Define to 1 if AVX2 assembly is available.])
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_MSG_CHECKING(whether we can compile AVX512 code)
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vpaddd %zmm0, %zmm1, %zmm2{%k1}");])],
|
||||
AC_DEFINE(USE_AVX512, 1, [Define to 1 if AVX512 assembly is available.])
|
||||
AC_MSG_RESULT(yes)
|
||||
,
|
||||
AC_MSG_RESULT(no)
|
||||
AC_MSG_WARN([The assembler does not support the AVX512 instruction set.])
|
||||
)
|
||||
,
|
||||
AC_MSG_RESULT(no)
|
||||
AC_MSG_WARN([The assembler does not support the AVX2 instruction set.])
|
||||
|
24
cpu-miner.c
24
cpu-miner.c
@@ -1300,6 +1300,7 @@ static int share_result( int result, struct work *work,
|
||||
my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
|
||||
bres, CL_N, share_time, latency );
|
||||
|
||||
/*
|
||||
if ( unlikely( opt_debug || !result || solved ) )
|
||||
{
|
||||
if ( have_stratum )
|
||||
@@ -1309,14 +1310,27 @@ static int share_result( int result, struct work *work,
|
||||
applog2( LOG_INFO, "Diff %.5g, Block %d",
|
||||
my_stats.share_diff, work ? work->height : last_block_height );
|
||||
}
|
||||
*/
|
||||
|
||||
if ( unlikely( !( opt_quiet || result || stale ) ) )
|
||||
{
|
||||
uint32_t str[8];
|
||||
uint32_t *targ;
|
||||
// uint32_t str[8];
|
||||
// uint32_t *targ;
|
||||
|
||||
if ( reason ) applog( LOG_MINR, "Reject reason: %s", reason );
|
||||
|
||||
if ( reason ) applog2( LOG_MINR, "Reject reason: %s", reason );
|
||||
{
|
||||
// The exact hash is not avaiable here, it's just an imprecise
|
||||
// approximation calculated from the share difficulty. It's useless
|
||||
// for anything other than low diff rejects. Until and unless a
|
||||
// solution is implemented to make the hash and targets avaiable
|
||||
// don't bother displaying them. In the meantime display the diff for
|
||||
// low diff rejects.
|
||||
|
||||
if ( strstr( reason, "difficulty" ) )
|
||||
applog2( LOG_MINR, "Share diff: %.5g, Target: %.5g",
|
||||
my_stats.share_diff, my_stats.target_diff );
|
||||
|
||||
/*
|
||||
diff_to_hash( str, my_stats.share_diff );
|
||||
applog2( LOG_INFO, "Hash: %08x%08x%08x%08x%08x%08x", str[7], str[6],
|
||||
str[5], str[4], str[3],str[2], str[1], str[0] );
|
||||
@@ -1330,6 +1344,8 @@ static int share_result( int result, struct work *work,
|
||||
}
|
||||
applog2( LOG_INFO, "Target: %08x%08x%08x%08x%08x%08x", targ[7], targ[6],
|
||||
targ[5], targ[4], targ[3], targ[2], targ[1], targ[0] );
|
||||
*/
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
@@ -546,14 +546,13 @@ static inline void mm128_block_bswap_32( __m128i *d, const __m128i *s )
|
||||
|
||||
|
||||
// Two input shuffle-rotate.
|
||||
// Concatenate v1 & v2 and rotate as one 256 bit vector.
|
||||
// Continue to use vror/vrol for now to avoid confusion with
|
||||
// shufl2r/shufl2l function macros available with AVX512.
|
||||
// Concatenate v1 & v2 and bit rotate as one 256 bit vector.
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
// Function macro with two inputs and one output, inputs are preserved.
|
||||
// Two input functions are not available without SSSE3. Use procedure
|
||||
// Function macros with two inputs and one output, inputs are preserved.
|
||||
// Returns the high 128 bits, ie updated v1.
|
||||
// These two-input functions are not available without SSSE3. Use procedure
|
||||
// macros below instead.
|
||||
|
||||
#define mm128_shufl2r_64( v1, v2 ) _mm_alignr_epi8( v2, v1, 8 )
|
||||
@@ -568,12 +567,9 @@ static inline void mm128_block_bswap_32( __m128i *d, const __m128i *s )
|
||||
#define mm128_shufl2r_8( v1, v2 ) _mm_alignr_epi8( v2, v1, 8 )
|
||||
#define mm128_shufl2l_8( v1, v2 ) _mm_alignr_epi8( v1, v2, 8 )
|
||||
|
||||
// Procedure macros with 2 inputs and 2 outputs, inputs args are overwritten.
|
||||
|
||||
// These macros retain the vrol/vror name for now to avoid
|
||||
// confusion with the shufl2r/shuffle2l function macros above.
|
||||
// These may be renamed to something like shufl2r2 for 2 nputs and
|
||||
// 2 outputs, ie SHUFfLe 2 inputs Right with 2 outputs.
|
||||
// Procedure macros with 2 inputs and 2 outputs, input args are overwritten.
|
||||
// Deprecated for SSSE3 and above, they exist for SSSE3 only for compatibility
|
||||
// with existing code. The function macros above can be used more effciently.
|
||||
|
||||
#define mm128_vror256_64( v1, v2 ) \
|
||||
do { \
|
||||
|
Reference in New Issue
Block a user