mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.8.0
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
#include "blake-gate.h"
|
||||
|
||||
#if defined (__AVX__)
|
||||
#if defined (BLAKE_4WAY)
|
||||
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
|
@@ -7,6 +7,7 @@ int64_t blake_get_max64 ()
|
||||
|
||||
bool register_blake_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_max64 = (void*)&blake_get_max64;
|
||||
//#if defined (__AVX2__) && defined (FOUR_WAY)
|
||||
// gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
|
||||
@@ -14,7 +15,6 @@ bool register_blake_algo( algo_gate_t* gate )
|
||||
// gate->hash = (void*)&blakehash_8way;
|
||||
#if defined(BLAKE_4WAY)
|
||||
four_way_not_tested();
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
gate->scanhash = (void*)&scanhash_blake_4way;
|
||||
gate->hash = (void*)&blakehash_4way;
|
||||
#else
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX__)
|
||||
#if defined(__AVX2__)
|
||||
#define BLAKE_4WAY
|
||||
#endif
|
||||
|
||||
|
@@ -78,6 +78,8 @@ static const sph_u64 IV512[8] = {
|
||||
|
||||
#if SPH_COMPACT_BLAKE_32 || SPH_COMPACT_BLAKE_64
|
||||
|
||||
// Blake-256 4 & 8 way, Blake-512 4way
|
||||
|
||||
static const unsigned sigma[16][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
@@ -273,6 +275,8 @@ static const unsigned sigma[16][16] = {
|
||||
#define Mx_(n) Mx__(n)
|
||||
#define Mx__(n) M ## n
|
||||
|
||||
// Blake-256 4 & 8 way
|
||||
|
||||
#define CSx(r, i) CSx_(Z ## r ## i)
|
||||
#define CSx_(n) CSx__(n)
|
||||
#define CSx__(n) CS ## n
|
||||
@@ -311,6 +315,8 @@ static const sph_u32 CS[16] = {
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// Blake-512 4 way
|
||||
|
||||
#define CBx(r, i) CBx_(Z ## r ## i)
|
||||
#define CBx_(n) CBx__(n)
|
||||
#define CBx__(n) CB ## n
|
||||
@@ -401,6 +407,35 @@ do { \
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
// BLAKE256 8 WAY
|
||||
|
||||
#define GS_8WAY( m0, m1, c0, c1, a, b, c, d ) \
|
||||
do { \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
|
||||
_mm256_set1_epi32( c1 ), m0 ), b ), a ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
|
||||
_mm256_set1_epi32( c0 ), m1 ), b ), a ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_S_8WAY(r) do { \
|
||||
GS_8WAY(Mx(r, 0), Mx(r, 1), CSx(r, 0), CSx(r, 1), V0, V4, V8, VC); \
|
||||
GS_8WAY(Mx(r, 2), Mx(r, 3), CSx(r, 2), CSx(r, 3), V1, V5, V9, VD); \
|
||||
GS_8WAY(Mx(r, 4), Mx(r, 5), CSx(r, 4), CSx(r, 5), V2, V6, VA, VE); \
|
||||
GS_8WAY(Mx(r, 6), Mx(r, 7), CSx(r, 6), CSx(r, 7), V3, V7, VB, VF); \
|
||||
GS_8WAY(Mx(r, 8), Mx(r, 9), CSx(r, 8), CSx(r, 9), V0, V5, VA, VF); \
|
||||
GS_8WAY(Mx(r, A), Mx(r, B), CSx(r, A), CSx(r, B), V1, V6, VB, VC); \
|
||||
GS_8WAY(Mx(r, C), Mx(r, D), CSx(r, C), CSx(r, D), V2, V7, V8, VD); \
|
||||
GS_8WAY(Mx(r, E), Mx(r, F), CSx(r, E), CSx(r, F), V3, V4, V9, VE); \
|
||||
} while (0)
|
||||
|
||||
// Blake-512 4 way
|
||||
|
||||
#define GB_4WAY(m0, m1, c0, c1, a, b, c, d) do { \
|
||||
a = _mm256_add_epi64( _mm256_add_epi64( _mm256_xor_si256( \
|
||||
_mm256_set_epi64x( c1, c1, c1, c1 ), m0 ), b ), a ); \
|
||||
@@ -627,6 +662,125 @@ do { \
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
// Blake-256 8 way
|
||||
|
||||
#define DECL_STATE32_8WAY \
|
||||
__m256i H0, H1, H2, H3, H4, H5, H6, H7; \
|
||||
__m256i S0, S1, S2, S3; \
|
||||
sph_u32 T0, T1;
|
||||
|
||||
#define READ_STATE32_8WAY(state) \
|
||||
do { \
|
||||
H0 = (state)->H[0]; \
|
||||
H1 = (state)->H[1]; \
|
||||
H2 = (state)->H[2]; \
|
||||
H3 = (state)->H[3]; \
|
||||
H4 = (state)->H[4]; \
|
||||
H5 = (state)->H[5]; \
|
||||
H6 = (state)->H[6]; \
|
||||
H7 = (state)->H[7]; \
|
||||
S0 = (state)->S[0]; \
|
||||
S1 = (state)->S[1]; \
|
||||
S2 = (state)->S[2]; \
|
||||
S3 = (state)->S[3]; \
|
||||
T0 = (state)->T0; \
|
||||
T1 = (state)->T1; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE32_8WAY(state) \
|
||||
do { \
|
||||
(state)->H[0] = H0; \
|
||||
(state)->H[1] = H1; \
|
||||
(state)->H[2] = H2; \
|
||||
(state)->H[3] = H3; \
|
||||
(state)->H[4] = H4; \
|
||||
(state)->H[5] = H5; \
|
||||
(state)->H[6] = H6; \
|
||||
(state)->H[7] = H7; \
|
||||
(state)->S[0] = S0; \
|
||||
(state)->S[1] = S1; \
|
||||
(state)->S[2] = S2; \
|
||||
(state)->S[3] = S3; \
|
||||
(state)->T0 = T0; \
|
||||
(state)->T1 = T1; \
|
||||
} while (0)
|
||||
|
||||
#define COMPRESS32_8WAY( rounds ) \
|
||||
do { \
|
||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
__m256i M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
__m256i V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
V3 = H3; \
|
||||
V4 = H4; \
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm256_xor_si256( S0, _mm256_set1_epi32( CS0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, _mm256_set1_epi32( CS1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, _mm256_set1_epi32( CS2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, _mm256_set1_epi32( CS3 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set1_epi32( T0 ), _mm256_set1_epi32( CS4 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set1_epi32( T0 ), _mm256_set1_epi32( CS5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set1_epi32( T1 ), _mm256_set1_epi32( CS6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set1_epi32( T1 ), _mm256_set1_epi32( CS7 ) ); \
|
||||
M0 = mm256_byteswap_32( * buf ); \
|
||||
M1 = mm256_byteswap_32( *(buf+1) ); \
|
||||
M2 = mm256_byteswap_32( *(buf+2) ); \
|
||||
M3 = mm256_byteswap_32( *(buf+3) ); \
|
||||
M4 = mm256_byteswap_32( *(buf+4) ); \
|
||||
M5 = mm256_byteswap_32( *(buf+5) ); \
|
||||
M6 = mm256_byteswap_32( *(buf+6) ); \
|
||||
M7 = mm256_byteswap_32( *(buf+7) ); \
|
||||
M8 = mm256_byteswap_32( *(buf+8) ); \
|
||||
M9 = mm256_byteswap_32( *(buf+9) ); \
|
||||
MA = mm256_byteswap_32( *(buf+10) ); \
|
||||
MB = mm256_byteswap_32( *(buf+11) ); \
|
||||
MC = mm256_byteswap_32( *(buf+12) ); \
|
||||
MD = mm256_byteswap_32( *(buf+13) ); \
|
||||
ME = mm256_byteswap_32( *(buf+14) ); \
|
||||
MF = mm256_byteswap_32( *(buf+15) ); \
|
||||
ROUND_S_8WAY(0); \
|
||||
ROUND_S_8WAY(1); \
|
||||
ROUND_S_8WAY(2); \
|
||||
ROUND_S_8WAY(3); \
|
||||
ROUND_S_8WAY(4); \
|
||||
ROUND_S_8WAY(5); \
|
||||
ROUND_S_8WAY(6); \
|
||||
ROUND_S_8WAY(7); \
|
||||
if (rounds == 14) \
|
||||
{ \
|
||||
ROUND_S_8WAY(8); \
|
||||
ROUND_S_8WAY(9); \
|
||||
ROUND_S_8WAY(0); \
|
||||
ROUND_S_8WAY(1); \
|
||||
ROUND_S_8WAY(2); \
|
||||
ROUND_S_8WAY(3); \
|
||||
} \
|
||||
H0 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( V8, V0 ), \
|
||||
S0 ), H0 ); \
|
||||
H1 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( V9, V1 ), \
|
||||
S1 ), H1 ); \
|
||||
H2 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VA, V2 ), \
|
||||
S2 ), H2 ); \
|
||||
H3 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VB, V3 ), \
|
||||
S3 ), H3 ); \
|
||||
H4 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VC, V4 ), \
|
||||
S0 ), H4 ); \
|
||||
H5 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VD, V5 ), \
|
||||
S1 ), H5 ); \
|
||||
H6 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VE, V6 ), \
|
||||
S2 ), H6 ); \
|
||||
H7 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VF, V7 ), \
|
||||
S3 ), H7 ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
// Blake-512 4 way
|
||||
|
||||
#define DECL_STATE64_4WAY \
|
||||
__m256i H0, H1, H2, H3, H4, H5, H6, H7; \
|
||||
__m256i S0, S1, S2, S3; \
|
||||
@@ -813,7 +967,7 @@ do { \
|
||||
|
||||
#endif
|
||||
|
||||
static const sph_u32 salt_zero_small[4] = { 0, 0, 0, 0 };
|
||||
static const sph_u32 salt_zero_4way_small[4] = { 0, 0, 0, 0 };
|
||||
|
||||
static void
|
||||
blake32_4way_init( blake_4way_small_context *sc, const sph_u32 *iv,
|
||||
@@ -934,6 +1088,129 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
// Blake-256 8 way
|
||||
|
||||
static const sph_u32 salt_zero_8way_small[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
static void
|
||||
blake32_8way_init( blake_8way_small_context *sc, const sph_u32 *iv,
|
||||
const sph_u32 *salt, int rounds )
|
||||
{
|
||||
int i;
|
||||
for ( i = 0; i < 8; i++ )
|
||||
sc->H[i] = _mm256_set1_epi32( iv[i] );
|
||||
for ( i = 0; i < 4; i++ )
|
||||
sc->S[i] = _mm256_set1_epi32( salt[i] );
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
sc->rounds = rounds;
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_8way( blake_8way_small_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf;
|
||||
size_t ptr;
|
||||
const int buf_size = 64; // number of elements, sizeof/4
|
||||
DECL_STATE32_8WAY
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
if ( len < buf_size - ptr )
|
||||
{
|
||||
memcpy_256( buf + (ptr>>2), vdata, len>>2 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE32_8WAY(sc);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
|
||||
clen = buf_size - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy_256( buf + (ptr>>2), vdata, clen>>2 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>2);
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
if ( ( T0 = SPH_T32(T0 + 512) ) < 512 )
|
||||
T1 = SPH_T32(T1 + 1);
|
||||
COMPRESS32_8WAY( sc->rounds );
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE32_8WAY(sc);
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32 )
|
||||
{
|
||||
union {
|
||||
__m256i buf[16];
|
||||
sph_u32 dummy;
|
||||
} u;
|
||||
size_t ptr, k;
|
||||
unsigned bit_len;
|
||||
sph_u32 th, tl;
|
||||
__m256i *out;
|
||||
|
||||
ptr = sc->ptr;
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
u.buf[ptr>>2] = _mm256_set1_epi32( 0x80 );
|
||||
tl = sc->T0 + bit_len;
|
||||
th = sc->T1;
|
||||
|
||||
if ( ptr == 0 )
|
||||
{
|
||||
sc->T0 = SPH_C32(0xFFFFFE00UL);
|
||||
sc->T1 = SPH_C32(0xFFFFFFFFUL);
|
||||
}
|
||||
else if ( sc->T0 == 0 )
|
||||
{
|
||||
sc->T0 = SPH_C32(0xFFFFFE00UL) + bit_len;
|
||||
sc->T1 = SPH_T32(sc->T1 - 1);
|
||||
}
|
||||
else
|
||||
sc->T0 -= 512 - bit_len;
|
||||
|
||||
if ( ptr <= 52 )
|
||||
{
|
||||
memset_zero_256( u.buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
|
||||
if (out_size_w32 == 8)
|
||||
u.buf[52>>2] = _mm256_or_si256( u.buf[52>>2],
|
||||
_mm256_set1_epi32( 0x01000000UL ) );
|
||||
*(u.buf+(56>>2)) = mm256_byteswap_32( _mm256_set1_epi32( th ) );
|
||||
*(u.buf+(60>>2)) = mm256_byteswap_32( _mm256_set1_epi32( tl ) );
|
||||
blake32_8way( sc, u.buf + (ptr>>2), 64 - ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_256( u.buf + (ptr>>2) + 1, (60-ptr) >> 2 );
|
||||
blake32_8way( sc, u.buf + (ptr>>2), 64 - ptr );
|
||||
sc->T0 = SPH_C32(0xFFFFFE00UL);
|
||||
sc->T1 = SPH_C32(0xFFFFFFFFUL);
|
||||
memset_zero_256( u.buf, 56>>2 );
|
||||
if (out_size_w32 == 8)
|
||||
u.buf[52>>2] = _mm256_set1_epi32( 0x01000000UL );
|
||||
*(u.buf+(56>>2)) = mm256_byteswap_32( _mm256_set1_epi32( th ) );
|
||||
*(u.buf+(60>>2)) = mm256_byteswap_32( _mm256_set1_epi32( tl ) );
|
||||
blake32_8way( sc, u.buf, 64 );
|
||||
}
|
||||
out = (__m256i*)dst;
|
||||
for ( k = 0; k < out_size_w32; k++ )
|
||||
out[k] = mm256_byteswap_32( sc->H[k] );
|
||||
}
|
||||
|
||||
// Blake-512 4 way
|
||||
|
||||
static const sph_u64 salt_zero_big[4] = { 0, 0, 0, 0 };
|
||||
|
||||
static void
|
||||
@@ -1065,11 +1342,13 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
|
||||
#endif
|
||||
|
||||
// Blake-256 4 way & 8 way
|
||||
|
||||
// default 14 rounds, backward copatibility
|
||||
void
|
||||
blake256_4way_init(void *cc)
|
||||
{
|
||||
blake32_4way_init( cc, IV256, salt_zero_small, 14 );
|
||||
blake32_4way_init( cc, IV256, salt_zero_4way_small, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1084,10 +1363,31 @@ blake256_4way_close(void *cc, void *dst)
|
||||
blake32_4way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
// 14 rounds blake, decred
|
||||
#if defined(__AVX2__)
|
||||
void
|
||||
blake256_8way_init(void *cc)
|
||||
{
|
||||
blake32_8way_init( cc, IV256, salt_zero_8way_small, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
blake256_8way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_8way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256_8way_close(void *cc, void *dst)
|
||||
{
|
||||
blake32_8way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// 14 rounds Blake, Decred
|
||||
void blake256r14_4way_init(void *cc)
|
||||
{
|
||||
blake32_4way_init( cc, IV256, salt_zero_small, 14 );
|
||||
blake32_4way_init( cc, IV256, salt_zero_4way_small, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1102,10 +1402,31 @@ blake256r14_4way_close(void *cc, void *dst)
|
||||
blake32_4way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
// 8 rounds blakecoin, vanilla
|
||||
#if defined(__AVX2__)
|
||||
|
||||
void blake256r14_8way_init(void *cc)
|
||||
{
|
||||
blake32_8way_init( cc, IV256, salt_zero_8way_small, 14 );
|
||||
}
|
||||
|
||||
void
|
||||
blake256r14_8way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_8way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256r14_8way_close(void *cc, void *dst)
|
||||
{
|
||||
blake32_8way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// 8 rounds Blakecoin, Vanilla
|
||||
void blake256r8_4way_init(void *cc)
|
||||
{
|
||||
blake32_4way_init( cc, IV256, salt_zero_small, 8 );
|
||||
blake32_4way_init( cc, IV256, salt_zero_4way_small, 8 );
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1122,6 +1443,29 @@ blake256r8_4way_close(void *cc, void *dst)
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
void blake256r8_8way_init(void *cc)
|
||||
{
|
||||
blake32_8way_init( cc, IV256, salt_zero_8way_small, 8 );
|
||||
}
|
||||
|
||||
void
|
||||
blake256r8_8way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32_8way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake256r8_8way_close(void *cc, void *dst)
|
||||
{
|
||||
blake32_8way_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Blake-512 4 way
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
void
|
||||
blake512_4way_init(void *cc)
|
||||
{
|
||||
|
@@ -51,6 +51,11 @@ extern "C"{
|
||||
|
||||
#define SPH_SIZE_blake512 512
|
||||
|
||||
// With AVX only Blake-256 4 way is available.
|
||||
// With AVX2 Blake-256 8way & Blake-512 4 way are also available.
|
||||
|
||||
// Blake-256 4 way
|
||||
|
||||
typedef struct {
|
||||
__m128i buf[16] __attribute__ ((aligned (64)));
|
||||
__m128i H[8];
|
||||
@@ -80,6 +85,37 @@ void blake256r8_4way_close(void *cc, void *dst);
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
// Blake-256 8 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16] __attribute__ ((aligned (64)));
|
||||
__m256i H[8];
|
||||
__m256i S[4];
|
||||
size_t ptr;
|
||||
sph_u32 T0, T1;
|
||||
int rounds; // 14 for blake, 8 for blakecoin & vanilla
|
||||
} blake_8way_small_context;
|
||||
|
||||
// Default 14 rounds
|
||||
typedef blake_8way_small_context blake256_8way_context;
|
||||
void blake256_8way_init(void *cc);
|
||||
void blake256_8way(void *cc, const void *data, size_t len);
|
||||
void blake256_8way_close(void *cc, void *dst);
|
||||
|
||||
// 14 rounds, blake, decred
|
||||
typedef blake_8way_small_context blake256r14_8way_context;
|
||||
void blake256r14_8way_init(void *cc);
|
||||
void blake256r14_8way(void *cc, const void *data, size_t len);
|
||||
void blake256r14_8way_close(void *cc, void *dst);
|
||||
|
||||
// 8 rounds, blakecoin, vanilla
|
||||
typedef blake_8way_small_context blake256r8_8way_context;
|
||||
void blake256r8_8way_init(void *cc);
|
||||
void blake256r8_8way(void *cc, const void *data, size_t len);
|
||||
void blake256r8_8way_close(void *cc, void *dst);
|
||||
|
||||
// Blake-512 4 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16] __attribute__ ((aligned (64)));
|
||||
__m256i H[8];
|
||||
|
@@ -3,7 +3,7 @@
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "crypto/blake2s.h"
|
||||
#include "sph-blake2s.h"
|
||||
|
||||
static __thread blake2s_state s_midstate;
|
||||
static __thread blake2s_state s_ctx;
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include "blakecoin-gate.h"
|
||||
|
||||
#if defined (__AVX__)
|
||||
#if defined (BLAKECOIN_4WAY)
|
||||
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
|
@@ -15,13 +15,13 @@ void bc4w_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t *end_nonce_ptr, bool clean_job )
|
||||
{
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
//
|
||||
|
||||
// if ( have_stratum && ( *nonceptr >= *end_nonce_ptr ) )
|
||||
// algo_gate.stratum_gen_work( &stratum, g_work );
|
||||
|
||||
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|
||||
|| ( *nonceptr >= *end_nonce_ptr )
|
||||
|| ( work->job_id != g_work->job_id ) && clean_job )
|
||||
|| ( ( work->job_id != g_work->job_id ) && clean_job ) )
|
||||
/*
|
||||
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|
||||
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|
||||
@@ -47,7 +47,6 @@ bool register_vanilla_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(BLAKECOIN_4WAY)
|
||||
// four_way_not_tested();
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
gate->scanhash = (void*)&scanhash_blakecoin_4way;
|
||||
gate->hash = (void*)&blakecoin_4way_hash;
|
||||
// gate->get_new_work = (void*)&bc4w_get_new_work;
|
||||
@@ -57,7 +56,7 @@ bool register_vanilla_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&blakecoinhash;
|
||||
// blakecoin_init( &blake_init_ctx );
|
||||
#endif
|
||||
gate->optimizations = AVX2_OPT | FOUR_WAY_OPT;
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_max64 = (void*)&blakecoin_get_max64;
|
||||
return true;
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX__)
|
||||
#if defined(__AVX2__)
|
||||
#define BLAKECOIN_4WAY
|
||||
#endif
|
||||
|
||||
|
@@ -145,15 +145,13 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(DECRED_4WAY)
|
||||
four_way_not_tested();
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
gate->scanhash = (void*)&scanhash_decred_4way;
|
||||
gate->hash = (void*)&decred_hash_4way;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_decred;
|
||||
gate->hash = (void*)&decred_hash;
|
||||
#endif
|
||||
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
|
||||
gate->display_extra_data = (void*)&decred_decode_extradata;
|
||||
|
@@ -18,7 +18,7 @@
|
||||
// uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX__)
|
||||
#if defined(__AVX2__)
|
||||
#define DECRED_4WAY
|
||||
#endif
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include "pentablake-gate.h"
|
||||
|
||||
#ifdef __AVX2__
|
||||
#if defined (__AVX2__)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
@@ -9,7 +9,7 @@ bool register_pentablake_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_pentablake;
|
||||
gate->hash = (void*)&pentablakehash;
|
||||
#endif
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX2__)
|
||||
#if defined(__AVX2__)
|
||||
#define PENTABLAKE_4WAY
|
||||
#endif
|
||||
|
||||
|
378
algo/blake/sph-blake2s.c
Normal file
378
algo/blake/sph-blake2s.c
Normal file
@@ -0,0 +1,378 @@
|
||||
/**
|
||||
* BLAKE2 reference source code package - reference C implementations
|
||||
*
|
||||
* Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
*
|
||||
* To the extent possible under law, the author(s) have dedicated all copyright
|
||||
* and related and neighboring rights to this software to the public domain
|
||||
* worldwide. This software is distributed without any warranty.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
* this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "sph-blake2s.h"
|
||||
|
||||
static const uint32_t blake2s_IV[8] =
|
||||
{
|
||||
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
|
||||
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
|
||||
};
|
||||
|
||||
static const uint8_t blake2s_sigma[10][16] =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||
};
|
||||
|
||||
static inline int blake2s_set_lastnode( blake2s_state *S )
|
||||
{
|
||||
S->f[1] = ~0U;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_clear_lastnode( blake2s_state *S )
|
||||
{
|
||||
S->f[1] = 0U;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Some helper functions, not necessarily useful */
|
||||
static inline int blake2s_set_lastblock( blake2s_state *S )
|
||||
{
|
||||
if( S->last_node ) blake2s_set_lastnode( S );
|
||||
|
||||
S->f[0] = ~0U;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_clear_lastblock( blake2s_state *S )
|
||||
{
|
||||
if( S->last_node ) blake2s_clear_lastnode( S );
|
||||
|
||||
S->f[0] = 0U;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
|
||||
{
|
||||
S->t[0] += inc;
|
||||
S->t[1] += ( S->t[0] < inc );
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Parameter-related functions
|
||||
static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length )
|
||||
{
|
||||
P->digest_length = digest_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout )
|
||||
{
|
||||
P->fanout = fanout;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth )
|
||||
{
|
||||
P->depth = depth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length )
|
||||
{
|
||||
store32( &P->leaf_length, leaf_length );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset )
|
||||
{
|
||||
store48( P->node_offset, node_offset );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth )
|
||||
{
|
||||
P->node_depth = node_depth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length )
|
||||
{
|
||||
P->inner_length = inner_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] )
|
||||
{
|
||||
memcpy( P->salt, salt, BLAKE2S_SALTBYTES );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] )
|
||||
{
|
||||
memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blake2s_init0( blake2s_state *S )
|
||||
{
|
||||
memset( S, 0, sizeof( blake2s_state ) );
|
||||
|
||||
for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* init2 xors IV with input parameter block */
|
||||
int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
|
||||
{
|
||||
blake2s_init0( S );
|
||||
uint32_t *p = ( uint32_t * )( P );
|
||||
|
||||
/* IV XOR ParamBlock */
|
||||
for( size_t i = 0; i < 8; ++i )
|
||||
S->h[i] ^= load32( &p[i] );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Sequential blake2s initialization
|
||||
int blake2s_init( blake2s_state *S, const uint8_t outlen )
|
||||
{
|
||||
blake2s_param P[1];
|
||||
|
||||
/* Move interval verification here? */
|
||||
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
|
||||
|
||||
P->digest_length = outlen;
|
||||
P->key_length = 0;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store48( &P->node_offset, 0 );
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
// memset(P->reserved, 0, sizeof(P->reserved) );
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
return blake2s_init_param( S, P );
|
||||
}
|
||||
|
||||
int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
|
||||
{
|
||||
blake2s_param P[1];
|
||||
|
||||
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
|
||||
|
||||
if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
|
||||
|
||||
P->digest_length = outlen;
|
||||
P->key_length = keylen;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
store32( &P->leaf_length, 0 );
|
||||
store48( &P->node_offset, 0 );
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
// memset(P->reserved, 0, sizeof(P->reserved) );
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
|
||||
if( blake2s_init_param( S, P ) < 0 ) return -1;
|
||||
|
||||
{
|
||||
uint8_t block[BLAKE2S_BLOCKBYTES];
|
||||
memset( block, 0, BLAKE2S_BLOCKBYTES );
|
||||
memcpy( block, key, keylen );
|
||||
blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
|
||||
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
|
||||
{
|
||||
uint32_t m[16];
|
||||
uint32_t v[16];
|
||||
|
||||
for( size_t i = 0; i < 16; ++i )
|
||||
m[i] = load32( block + i * sizeof( m[i] ) );
|
||||
|
||||
for( size_t i = 0; i < 8; ++i )
|
||||
v[i] = S->h[i];
|
||||
|
||||
v[ 8] = blake2s_IV[0];
|
||||
v[ 9] = blake2s_IV[1];
|
||||
v[10] = blake2s_IV[2];
|
||||
v[11] = blake2s_IV[3];
|
||||
v[12] = S->t[0] ^ blake2s_IV[4];
|
||||
v[13] = S->t[1] ^ blake2s_IV[5];
|
||||
v[14] = S->f[0] ^ blake2s_IV[6];
|
||||
v[15] = S->f[1] ^ blake2s_IV[7];
|
||||
#define G(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
|
||||
d = SPH_ROTR32(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = SPH_ROTR32(b ^ c, 12); \
|
||||
a = a + b + m[blake2s_sigma[r][2*i+1]]; \
|
||||
d = SPH_ROTR32(d ^ a, 8); \
|
||||
c = c + d; \
|
||||
b = SPH_ROTR32(b ^ c, 7); \
|
||||
} while(0)
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
||||
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
||||
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
||||
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
||||
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
||||
} while(0)
|
||||
ROUND( 0 );
|
||||
ROUND( 1 );
|
||||
ROUND( 2 );
|
||||
ROUND( 3 );
|
||||
ROUND( 4 );
|
||||
ROUND( 5 );
|
||||
ROUND( 6 );
|
||||
ROUND( 7 );
|
||||
ROUND( 8 );
|
||||
ROUND( 9 );
|
||||
|
||||
for( size_t i = 0; i < 8; ++i )
|
||||
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen )
|
||||
{
|
||||
while( inlen > 0 )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
|
||||
|
||||
if( inlen > fill )
|
||||
{
|
||||
memcpy( S->buf + left, in, fill ); // Fill buffer
|
||||
S->buflen += fill;
|
||||
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
|
||||
blake2s_compress( S, S->buf ); // Compress
|
||||
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
|
||||
S->buflen -= BLAKE2S_BLOCKBYTES;
|
||||
in += fill;
|
||||
inlen -= fill;
|
||||
}
|
||||
else // inlen <= fill
|
||||
{
|
||||
memcpy(S->buf + left, in, (size_t) inlen);
|
||||
S->buflen += (size_t) inlen; // Be lazy, do not compress
|
||||
in += inlen;
|
||||
inlen -= inlen;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen )
|
||||
{
|
||||
uint8_t buffer[BLAKE2S_OUTBYTES];
|
||||
|
||||
if( S->buflen > BLAKE2S_BLOCKBYTES )
|
||||
{
|
||||
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
|
||||
blake2s_compress( S, S->buf );
|
||||
S->buflen -= BLAKE2S_BLOCKBYTES;
|
||||
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
|
||||
}
|
||||
|
||||
blake2s_increment_counter( S, ( uint32_t )S->buflen );
|
||||
blake2s_set_lastblock( S );
|
||||
memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
|
||||
blake2s_compress( S, S->buf );
|
||||
|
||||
for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
|
||||
store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
|
||||
|
||||
memcpy( out, buffer, outlen );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
|
||||
{
|
||||
blake2s_state S[1];
|
||||
|
||||
/* Verify parameters */
|
||||
if ( NULL == in ) return -1;
|
||||
|
||||
if ( NULL == out ) return -1;
|
||||
|
||||
if ( NULL == key ) keylen = 0; /* Fail here instead if keylen != 0 and key == NULL? */
|
||||
|
||||
if( keylen > 0 )
|
||||
{
|
||||
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( blake2s_init( S, outlen ) < 0 ) return -1;
|
||||
}
|
||||
|
||||
blake2s_update( S, ( uint8_t * )in, inlen );
|
||||
blake2s_final( S, out, outlen );
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(BLAKE2S_SELFTEST)
|
||||
#include <string.h>
|
||||
#include "blake2-kat.h" /* test data not included */
|
||||
int main( int argc, char **argv )
|
||||
{
|
||||
uint8_t key[BLAKE2S_KEYBYTES];
|
||||
uint8_t buf[KAT_LENGTH];
|
||||
|
||||
for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i )
|
||||
key[i] = ( uint8_t )i;
|
||||
|
||||
for( size_t i = 0; i < KAT_LENGTH; ++i )
|
||||
buf[i] = ( uint8_t )i;
|
||||
|
||||
for( size_t i = 0; i < KAT_LENGTH; ++i )
|
||||
{
|
||||
uint8_t hash[BLAKE2S_OUTBYTES];
|
||||
blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES );
|
||||
|
||||
if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
|
||||
{
|
||||
puts( "error" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
puts( "ok" );
|
||||
return 0;
|
||||
}
|
||||
#endif
|
150
algo/blake/sph-blake2s.h
Normal file
150
algo/blake/sph-blake2s.h
Normal file
@@ -0,0 +1,150 @@
|
||||
/**
|
||||
* BLAKE2 reference source code package - reference C implementations
|
||||
*
|
||||
* Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
*
|
||||
* To the extent possible under law, the author(s) have dedicated all copyright
|
||||
* and related and neighboring rights to this software to the public domain
|
||||
* worldwide. This software is distributed without any warranty.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
* this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
#pragma once
|
||||
#ifndef __BLAKE2_H__
|
||||
#define __BLAKE2_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <inttypes.h>
|
||||
#define inline __inline
|
||||
#define ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGN(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
|
||||
/* blake2-impl.h */
|
||||
|
||||
static inline uint32_t load32(const void *src)
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return *(uint32_t *)(src);
|
||||
#else
|
||||
const uint8_t *p = (uint8_t *)src;
|
||||
uint32_t w = *p++;
|
||||
w |= (uint32_t)(*p++) << 8;
|
||||
w |= (uint32_t)(*p++) << 16;
|
||||
w |= (uint32_t)(*p++) << 24;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void store32(void *dst, uint32_t w)
|
||||
{
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
*(uint32_t *)(dst) = w;
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w; w >>= 8;
|
||||
*p++ = (uint8_t)w; w >>= 8;
|
||||
*p++ = (uint8_t)w; w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint64_t load48(const void *src)
|
||||
{
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
w |= (uint64_t)(*p++) << 8;
|
||||
w |= (uint64_t)(*p++) << 16;
|
||||
w |= (uint64_t)(*p++) << 24;
|
||||
w |= (uint64_t)(*p++) << 32;
|
||||
w |= (uint64_t)(*p++) << 40;
|
||||
return w;
|
||||
}
|
||||
|
||||
static inline void store48(void *dst, uint64_t w)
|
||||
{
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w; w >>= 8;
|
||||
*p++ = (uint8_t)w; w >>= 8;
|
||||
*p++ = (uint8_t)w; w >>= 8;
|
||||
*p++ = (uint8_t)w; w >>= 8;
|
||||
*p++ = (uint8_t)w; w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
}
|
||||
|
||||
/* prevents compiler optimizing out memset() */
|
||||
static inline void secure_zero_memory(void *v, size_t n)
|
||||
{
|
||||
volatile uint8_t *p = ( volatile uint8_t * )v;
|
||||
|
||||
while( n-- ) *p++ = 0;
|
||||
}
|
||||
|
||||
/* blake2.h */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum blake2s_constant
|
||||
{
|
||||
BLAKE2S_BLOCKBYTES = 64,
|
||||
BLAKE2S_OUTBYTES = 32,
|
||||
BLAKE2S_KEYBYTES = 32,
|
||||
BLAKE2S_SALTBYTES = 8,
|
||||
BLAKE2S_PERSONALBYTES = 8
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
typedef struct __blake2s_param
|
||||
{
|
||||
uint8_t digest_length; // 1
|
||||
uint8_t key_length; // 2
|
||||
uint8_t fanout; // 3
|
||||
uint8_t depth; // 4
|
||||
uint32_t leaf_length; // 8
|
||||
uint8_t node_offset[6];// 14
|
||||
uint8_t node_depth; // 15
|
||||
uint8_t inner_length; // 16
|
||||
// uint8_t reserved[0];
|
||||
uint8_t salt[BLAKE2S_SALTBYTES]; // 24
|
||||
uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32
|
||||
} blake2s_param;
|
||||
|
||||
ALIGN( 64 ) typedef struct __blake2s_state
|
||||
{
|
||||
uint32_t h[8];
|
||||
uint32_t t[2];
|
||||
uint32_t f[2];
|
||||
uint8_t buf[2 * BLAKE2S_BLOCKBYTES];
|
||||
size_t buflen;
|
||||
uint8_t last_node;
|
||||
} blake2s_state ;
|
||||
#pragma pack(pop)
|
||||
|
||||
int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] );
|
||||
|
||||
// Streaming API
|
||||
int blake2s_init( blake2s_state *S, const uint8_t outlen );
|
||||
int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
|
||||
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
|
||||
int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen );
|
||||
int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen );
|
||||
|
||||
// Simple API
|
||||
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
|
||||
// Direct Hash Mining Helpers
|
||||
#define blake2s_salt32(out, in, inlen, key32) blake2s(out, in, key32, 32, inlen, 32) /* neoscrypt */
|
||||
#define blake2s_simple(out, in, inlen) blake2s(out, in, NULL, 32, inlen, 0)
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user