This commit is contained in:
Jay D Dee
2018-01-23 21:02:16 -05:00
parent a90d75b8f5
commit ad2275f74a
121 changed files with 4662 additions and 467 deletions

View File

@@ -1,6 +1,6 @@
#include "blake-gate.h"
#if defined (__AVX__)
#if defined (BLAKE_4WAY)
#include "blake-hash-4way.h"
#include <string.h>

View File

@@ -7,6 +7,7 @@ int64_t blake_get_max64 ()
bool register_blake_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT;
gate->get_max64 = (void*)&blake_get_max64;
//#if defined (__AVX2__) && defined (FOUR_WAY)
// gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
@@ -14,7 +15,6 @@ bool register_blake_algo( algo_gate_t* gate )
// gate->hash = (void*)&blakehash_8way;
#if defined(BLAKE_4WAY)
four_way_not_tested();
gate->optimizations = FOUR_WAY_OPT;
gate->scanhash = (void*)&scanhash_blake_4way;
gate->hash = (void*)&blakehash_4way;
#else

View File

@@ -4,7 +4,7 @@
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(FOUR_WAY) && defined(__AVX__)
#if defined(__AVX2__)
#define BLAKE_4WAY
#endif

View File

@@ -78,6 +78,8 @@ static const sph_u64 IV512[8] = {
#if SPH_COMPACT_BLAKE_32 || SPH_COMPACT_BLAKE_64
// Blake-256 4 & 8 way, Blake-512 4way
static const unsigned sigma[16][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
@@ -273,6 +275,8 @@ static const unsigned sigma[16][16] = {
#define Mx_(n) Mx__(n)
#define Mx__(n) M ## n
// Blake-256 4 & 8 way
#define CSx(r, i) CSx_(Z ## r ## i)
#define CSx_(n) CSx__(n)
#define CSx__(n) CS ## n
@@ -311,6 +315,8 @@ static const sph_u32 CS[16] = {
#if defined(__AVX2__)
// Blake-512 4 way
#define CBx(r, i) CBx_(Z ## r ## i)
#define CBx_(n) CBx__(n)
#define CBx__(n) CB ## n
@@ -401,6 +407,35 @@ do { \
#if defined (__AVX2__)
// BLAKE256 8 WAY
#define GS_8WAY( m0, m1, c0, c1, a, b, c, d ) \
do { \
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
_mm256_set1_epi32( c1 ), m0 ), b ), a ); \
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 16 ); \
c = _mm256_add_epi32( c, d ); \
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 12 ); \
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
_mm256_set1_epi32( c0 ), m1 ), b ), a ); \
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 8 ); \
c = _mm256_add_epi32( c, d ); \
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 7 ); \
} while (0)
#define ROUND_S_8WAY(r) do { \
GS_8WAY(Mx(r, 0), Mx(r, 1), CSx(r, 0), CSx(r, 1), V0, V4, V8, VC); \
GS_8WAY(Mx(r, 2), Mx(r, 3), CSx(r, 2), CSx(r, 3), V1, V5, V9, VD); \
GS_8WAY(Mx(r, 4), Mx(r, 5), CSx(r, 4), CSx(r, 5), V2, V6, VA, VE); \
GS_8WAY(Mx(r, 6), Mx(r, 7), CSx(r, 6), CSx(r, 7), V3, V7, VB, VF); \
GS_8WAY(Mx(r, 8), Mx(r, 9), CSx(r, 8), CSx(r, 9), V0, V5, VA, VF); \
GS_8WAY(Mx(r, A), Mx(r, B), CSx(r, A), CSx(r, B), V1, V6, VB, VC); \
GS_8WAY(Mx(r, C), Mx(r, D), CSx(r, C), CSx(r, D), V2, V7, V8, VD); \
GS_8WAY(Mx(r, E), Mx(r, F), CSx(r, E), CSx(r, F), V3, V4, V9, VE); \
} while (0)
// Blake-512 4 way
#define GB_4WAY(m0, m1, c0, c1, a, b, c, d) do { \
a = _mm256_add_epi64( _mm256_add_epi64( _mm256_xor_si256( \
_mm256_set_epi64x( c1, c1, c1, c1 ), m0 ), b ), a ); \
@@ -627,6 +662,125 @@ do { \
#if defined (__AVX2__)
// Blake-256 8 way
#define DECL_STATE32_8WAY \
__m256i H0, H1, H2, H3, H4, H5, H6, H7; \
__m256i S0, S1, S2, S3; \
sph_u32 T0, T1;
#define READ_STATE32_8WAY(state) \
do { \
H0 = (state)->H[0]; \
H1 = (state)->H[1]; \
H2 = (state)->H[2]; \
H3 = (state)->H[3]; \
H4 = (state)->H[4]; \
H5 = (state)->H[5]; \
H6 = (state)->H[6]; \
H7 = (state)->H[7]; \
S0 = (state)->S[0]; \
S1 = (state)->S[1]; \
S2 = (state)->S[2]; \
S3 = (state)->S[3]; \
T0 = (state)->T0; \
T1 = (state)->T1; \
} while (0)
#define WRITE_STATE32_8WAY(state) \
do { \
(state)->H[0] = H0; \
(state)->H[1] = H1; \
(state)->H[2] = H2; \
(state)->H[3] = H3; \
(state)->H[4] = H4; \
(state)->H[5] = H5; \
(state)->H[6] = H6; \
(state)->H[7] = H7; \
(state)->S[0] = S0; \
(state)->S[1] = S1; \
(state)->S[2] = S2; \
(state)->S[3] = S3; \
(state)->T0 = T0; \
(state)->T1 = T1; \
} while (0)
#define COMPRESS32_8WAY( rounds ) \
do { \
__m256i M0, M1, M2, M3, M4, M5, M6, M7; \
__m256i M8, M9, MA, MB, MC, MD, ME, MF; \
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
__m256i V8, V9, VA, VB, VC, VD, VE, VF; \
V0 = H0; \
V1 = H1; \
V2 = H2; \
V3 = H3; \
V4 = H4; \
V5 = H5; \
V6 = H6; \
V7 = H7; \
V8 = _mm256_xor_si256( S0, _mm256_set1_epi32( CS0 ) ); \
V9 = _mm256_xor_si256( S1, _mm256_set1_epi32( CS1 ) ); \
VA = _mm256_xor_si256( S2, _mm256_set1_epi32( CS2 ) ); \
VB = _mm256_xor_si256( S3, _mm256_set1_epi32( CS3 ) ); \
VC = _mm256_xor_si256( _mm256_set1_epi32( T0 ), _mm256_set1_epi32( CS4 ) ); \
VD = _mm256_xor_si256( _mm256_set1_epi32( T0 ), _mm256_set1_epi32( CS5 ) ); \
VE = _mm256_xor_si256( _mm256_set1_epi32( T1 ), _mm256_set1_epi32( CS6 ) ); \
VF = _mm256_xor_si256( _mm256_set1_epi32( T1 ), _mm256_set1_epi32( CS7 ) ); \
M0 = mm256_byteswap_32( * buf ); \
M1 = mm256_byteswap_32( *(buf+1) ); \
M2 = mm256_byteswap_32( *(buf+2) ); \
M3 = mm256_byteswap_32( *(buf+3) ); \
M4 = mm256_byteswap_32( *(buf+4) ); \
M5 = mm256_byteswap_32( *(buf+5) ); \
M6 = mm256_byteswap_32( *(buf+6) ); \
M7 = mm256_byteswap_32( *(buf+7) ); \
M8 = mm256_byteswap_32( *(buf+8) ); \
M9 = mm256_byteswap_32( *(buf+9) ); \
MA = mm256_byteswap_32( *(buf+10) ); \
MB = mm256_byteswap_32( *(buf+11) ); \
MC = mm256_byteswap_32( *(buf+12) ); \
MD = mm256_byteswap_32( *(buf+13) ); \
ME = mm256_byteswap_32( *(buf+14) ); \
MF = mm256_byteswap_32( *(buf+15) ); \
ROUND_S_8WAY(0); \
ROUND_S_8WAY(1); \
ROUND_S_8WAY(2); \
ROUND_S_8WAY(3); \
ROUND_S_8WAY(4); \
ROUND_S_8WAY(5); \
ROUND_S_8WAY(6); \
ROUND_S_8WAY(7); \
if (rounds == 14) \
{ \
ROUND_S_8WAY(8); \
ROUND_S_8WAY(9); \
ROUND_S_8WAY(0); \
ROUND_S_8WAY(1); \
ROUND_S_8WAY(2); \
ROUND_S_8WAY(3); \
} \
H0 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( V8, V0 ), \
S0 ), H0 ); \
H1 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( V9, V1 ), \
S1 ), H1 ); \
H2 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VA, V2 ), \
S2 ), H2 ); \
H3 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VB, V3 ), \
S3 ), H3 ); \
H4 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VC, V4 ), \
S0 ), H4 ); \
H5 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VD, V5 ), \
S1 ), H5 ); \
H6 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VE, V6 ), \
S2 ), H6 ); \
H7 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VF, V7 ), \
S3 ), H7 ); \
} while (0)
// Blake-512 4 way
#define DECL_STATE64_4WAY \
__m256i H0, H1, H2, H3, H4, H5, H6, H7; \
__m256i S0, S1, S2, S3; \
@@ -813,7 +967,7 @@ do { \
#endif
static const sph_u32 salt_zero_small[4] = { 0, 0, 0, 0 };
static const sph_u32 salt_zero_4way_small[4] = { 0, 0, 0, 0 };
static void
blake32_4way_init( blake_4way_small_context *sc, const sph_u32 *iv,
@@ -934,6 +1088,129 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
#if defined (__AVX2__)
// Blake-256 8 way
static const sph_u32 salt_zero_8way_small[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
static void
blake32_8way_init( blake_8way_small_context *sc, const sph_u32 *iv,
const sph_u32 *salt, int rounds )
{
int i;
for ( i = 0; i < 8; i++ )
sc->H[i] = _mm256_set1_epi32( iv[i] );
for ( i = 0; i < 4; i++ )
sc->S[i] = _mm256_set1_epi32( salt[i] );
sc->T0 = sc->T1 = 0;
sc->ptr = 0;
sc->rounds = rounds;
}
static void
blake32_8way( blake_8way_small_context *sc, const void *data, size_t len )
{
__m256i *vdata = (__m256i*)data;
__m256i *buf;
size_t ptr;
const int buf_size = 64; // number of elements, sizeof/4
DECL_STATE32_8WAY
buf = sc->buf;
ptr = sc->ptr;
if ( len < buf_size - ptr )
{
memcpy_256( buf + (ptr>>2), vdata, len>>2 );
ptr += len;
sc->ptr = ptr;
return;
}
READ_STATE32_8WAY(sc);
while ( len > 0 )
{
size_t clen;
clen = buf_size - ptr;
if (clen > len)
clen = len;
memcpy_256( buf + (ptr>>2), vdata, clen>>2 );
ptr += clen;
vdata += (clen>>2);
len -= clen;
if ( ptr == buf_size )
{
if ( ( T0 = SPH_T32(T0 + 512) ) < 512 )
T1 = SPH_T32(T1 + 1);
COMPRESS32_8WAY( sc->rounds );
ptr = 0;
}
}
WRITE_STATE32_8WAY(sc);
sc->ptr = ptr;
}
static void
blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
void *dst, size_t out_size_w32 )
{
union {
__m256i buf[16];
sph_u32 dummy;
} u;
size_t ptr, k;
unsigned bit_len;
sph_u32 th, tl;
__m256i *out;
ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
u.buf[ptr>>2] = _mm256_set1_epi32( 0x80 );
tl = sc->T0 + bit_len;
th = sc->T1;
if ( ptr == 0 )
{
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
}
else if ( sc->T0 == 0 )
{
sc->T0 = SPH_C32(0xFFFFFE00UL) + bit_len;
sc->T1 = SPH_T32(sc->T1 - 1);
}
else
sc->T0 -= 512 - bit_len;
if ( ptr <= 52 )
{
memset_zero_256( u.buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
if (out_size_w32 == 8)
u.buf[52>>2] = _mm256_or_si256( u.buf[52>>2],
_mm256_set1_epi32( 0x01000000UL ) );
*(u.buf+(56>>2)) = mm256_byteswap_32( _mm256_set1_epi32( th ) );
*(u.buf+(60>>2)) = mm256_byteswap_32( _mm256_set1_epi32( tl ) );
blake32_8way( sc, u.buf + (ptr>>2), 64 - ptr );
}
else
{
memset_zero_256( u.buf + (ptr>>2) + 1, (60-ptr) >> 2 );
blake32_8way( sc, u.buf + (ptr>>2), 64 - ptr );
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
memset_zero_256( u.buf, 56>>2 );
if (out_size_w32 == 8)
u.buf[52>>2] = _mm256_set1_epi32( 0x01000000UL );
*(u.buf+(56>>2)) = mm256_byteswap_32( _mm256_set1_epi32( th ) );
*(u.buf+(60>>2)) = mm256_byteswap_32( _mm256_set1_epi32( tl ) );
blake32_8way( sc, u.buf, 64 );
}
out = (__m256i*)dst;
for ( k = 0; k < out_size_w32; k++ )
out[k] = mm256_byteswap_32( sc->H[k] );
}
// Blake-512 4 way
static const sph_u64 salt_zero_big[4] = { 0, 0, 0, 0 };
static void
@@ -1065,11 +1342,13 @@ blake64_4way_close( blake_4way_big_context *sc,
#endif
// Blake-256 4 way & 8 way
// default 14 rounds, backward copatibility
void
blake256_4way_init(void *cc)
{
blake32_4way_init( cc, IV256, salt_zero_small, 14 );
blake32_4way_init( cc, IV256, salt_zero_4way_small, 14 );
}
void
@@ -1084,10 +1363,31 @@ blake256_4way_close(void *cc, void *dst)
blake32_4way_close(cc, 0, 0, dst, 8);
}
// 14 rounds blake, decred
#if defined(__AVX2__)
void
blake256_8way_init(void *cc)
{
blake32_8way_init( cc, IV256, salt_zero_8way_small, 14 );
}
void
blake256_8way(void *cc, const void *data, size_t len)
{
blake32_8way(cc, data, len);
}
void
blake256_8way_close(void *cc, void *dst)
{
blake32_8way_close(cc, 0, 0, dst, 8);
}
#endif
// 14 rounds Blake, Decred
void blake256r14_4way_init(void *cc)
{
blake32_4way_init( cc, IV256, salt_zero_small, 14 );
blake32_4way_init( cc, IV256, salt_zero_4way_small, 14 );
}
void
@@ -1102,10 +1402,31 @@ blake256r14_4way_close(void *cc, void *dst)
blake32_4way_close(cc, 0, 0, dst, 8);
}
// 8 rounds blakecoin, vanilla
#if defined(__AVX2__)
void blake256r14_8way_init(void *cc)
{
blake32_8way_init( cc, IV256, salt_zero_8way_small, 14 );
}
void
blake256r14_8way(void *cc, const void *data, size_t len)
{
blake32_8way(cc, data, len);
}
void
blake256r14_8way_close(void *cc, void *dst)
{
blake32_8way_close(cc, 0, 0, dst, 8);
}
#endif
// 8 rounds Blakecoin, Vanilla
void blake256r8_4way_init(void *cc)
{
blake32_4way_init( cc, IV256, salt_zero_small, 8 );
blake32_4way_init( cc, IV256, salt_zero_4way_small, 8 );
}
void
@@ -1122,6 +1443,29 @@ blake256r8_4way_close(void *cc, void *dst)
#if defined (__AVX2__)
void blake256r8_8way_init(void *cc)
{
blake32_8way_init( cc, IV256, salt_zero_8way_small, 8 );
}
void
blake256r8_8way(void *cc, const void *data, size_t len)
{
blake32_8way(cc, data, len);
}
void
blake256r8_8way_close(void *cc, void *dst)
{
blake32_8way_close(cc, 0, 0, dst, 8);
}
#endif
// Blake-512 4 way
#if defined (__AVX2__)
void
blake512_4way_init(void *cc)
{

View File

@@ -51,6 +51,11 @@ extern "C"{
#define SPH_SIZE_blake512 512
// With AVX only Blake-256 4 way is available.
// With AVX2 Blake-256 8way & Blake-512 4 way are also available.
// Blake-256 4 way
typedef struct {
__m128i buf[16] __attribute__ ((aligned (64)));
__m128i H[8];
@@ -80,6 +85,37 @@ void blake256r8_4way_close(void *cc, void *dst);
#ifdef __AVX2__
// Blake-256 8 way
typedef struct {
__m256i buf[16] __attribute__ ((aligned (64)));
__m256i H[8];
__m256i S[4];
size_t ptr;
sph_u32 T0, T1;
int rounds; // 14 for blake, 8 for blakecoin & vanilla
} blake_8way_small_context;
// Default 14 rounds
typedef blake_8way_small_context blake256_8way_context;
void blake256_8way_init(void *cc);
void blake256_8way(void *cc, const void *data, size_t len);
void blake256_8way_close(void *cc, void *dst);
// 14 rounds, blake, decred
typedef blake_8way_small_context blake256r14_8way_context;
void blake256r14_8way_init(void *cc);
void blake256r14_8way(void *cc, const void *data, size_t len);
void blake256r14_8way_close(void *cc, void *dst);
// 8 rounds, blakecoin, vanilla
typedef blake_8way_small_context blake256r8_8way_context;
void blake256r8_8way_init(void *cc);
void blake256r8_8way(void *cc, const void *data, size_t len);
void blake256r8_8way_close(void *cc, void *dst);
// Blake-512 4 way
typedef struct {
__m256i buf[16] __attribute__ ((aligned (64)));
__m256i H[8];

View File

@@ -3,7 +3,7 @@
#include <string.h>
#include <stdint.h>
#include "crypto/blake2s.h"
#include "sph-blake2s.h"
static __thread blake2s_state s_midstate;
static __thread blake2s_state s_ctx;

View File

@@ -1,6 +1,6 @@
#include "blakecoin-gate.h"
#if defined (__AVX__)
#if defined (BLAKECOIN_4WAY)
#include "blake-hash-4way.h"
#include <string.h>

View File

@@ -15,13 +15,13 @@ void bc4w_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t *end_nonce_ptr, bool clean_job )
{
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
//
// if ( have_stratum && ( *nonceptr >= *end_nonce_ptr ) )
// algo_gate.stratum_gen_work( &stratum, g_work );
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|| ( *nonceptr >= *end_nonce_ptr )
|| ( work->job_id != g_work->job_id ) && clean_job )
|| ( ( work->job_id != g_work->job_id ) && clean_job ) )
/*
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
@@ -47,7 +47,6 @@ bool register_vanilla_algo( algo_gate_t* gate )
{
#if defined(BLAKECOIN_4WAY)
// four_way_not_tested();
gate->optimizations = FOUR_WAY_OPT;
gate->scanhash = (void*)&scanhash_blakecoin_4way;
gate->hash = (void*)&blakecoin_4way_hash;
// gate->get_new_work = (void*)&bc4w_get_new_work;
@@ -57,7 +56,7 @@ bool register_vanilla_algo( algo_gate_t* gate )
gate->hash = (void*)&blakecoinhash;
// blakecoin_init( &blake_init_ctx );
#endif
gate->optimizations = AVX2_OPT | FOUR_WAY_OPT;
gate->optimizations = AVX2_OPT;
gate->get_max64 = (void*)&blakecoin_get_max64;
return true;
}

View File

@@ -4,7 +4,7 @@
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(FOUR_WAY) && defined(__AVX__)
#if defined(__AVX2__)
#define BLAKECOIN_4WAY
#endif

View File

@@ -145,15 +145,13 @@ bool register_decred_algo( algo_gate_t* gate )
{
#if defined(DECRED_4WAY)
four_way_not_tested();
gate->optimizations = FOUR_WAY_OPT;
gate->scanhash = (void*)&scanhash_decred_4way;
gate->hash = (void*)&decred_hash_4way;
#else
gate->optimizations = SSE2_OPT;
gate->scanhash = (void*)&scanhash_decred;
gate->hash = (void*)&decred_hash;
#endif
gate->optimizations = AVX2_OPT;
gate->get_nonceptr = (void*)&decred_get_nonceptr;
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
gate->display_extra_data = (void*)&decred_decode_extradata;

View File

@@ -18,7 +18,7 @@
// uint64_t *hashes_done );
#endif
#if defined(FOUR_WAY) && defined(__AVX__)
#if defined(__AVX2__)
#define DECRED_4WAY
#endif

View File

@@ -1,6 +1,6 @@
#include "pentablake-gate.h"
#ifdef __AVX2__
#if defined (__AVX2__)
#include <stdlib.h>
#include <stdint.h>

View File

@@ -9,7 +9,7 @@ bool register_pentablake_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_pentablake;
gate->hash = (void*)&pentablakehash;
#endif
gate->optimizations = FOUR_WAY_OPT;
gate->optimizations = AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -4,7 +4,7 @@
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(FOUR_WAY) && defined(__AVX2__)
#if defined(__AVX2__)
#define PENTABLAKE_4WAY
#endif

378
algo/blake/sph-blake2s.c Normal file
View File

@@ -0,0 +1,378 @@
/**
* BLAKE2 reference source code package - reference C implementations
*
* Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide. This software is distributed without any warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication along with
* this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/sha/sph_types.h"
#include "sph-blake2s.h"
static const uint32_t blake2s_IV[8] =
{
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
static const uint8_t blake2s_sigma[10][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
};
static inline int blake2s_set_lastnode( blake2s_state *S )
{
S->f[1] = ~0U;
return 0;
}
static inline int blake2s_clear_lastnode( blake2s_state *S )
{
S->f[1] = 0U;
return 0;
}
/* Some helper functions, not necessarily useful */
static inline int blake2s_set_lastblock( blake2s_state *S )
{
if( S->last_node ) blake2s_set_lastnode( S );
S->f[0] = ~0U;
return 0;
}
static inline int blake2s_clear_lastblock( blake2s_state *S )
{
if( S->last_node ) blake2s_clear_lastnode( S );
S->f[0] = 0U;
return 0;
}
static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
{
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
return 0;
}
// Parameter-related functions
static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length )
{
P->digest_length = digest_length;
return 0;
}
static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout )
{
P->fanout = fanout;
return 0;
}
static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth )
{
P->depth = depth;
return 0;
}
static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length )
{
store32( &P->leaf_length, leaf_length );
return 0;
}
static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset )
{
store48( P->node_offset, node_offset );
return 0;
}
static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth )
{
P->node_depth = node_depth;
return 0;
}
static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length )
{
P->inner_length = inner_length;
return 0;
}
static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] )
{
memcpy( P->salt, salt, BLAKE2S_SALTBYTES );
return 0;
}
static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] )
{
memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES );
return 0;
}
static inline int blake2s_init0( blake2s_state *S )
{
memset( S, 0, sizeof( blake2s_state ) );
for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
return 0;
}
/* init2 xors IV with input parameter block */
int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
{
blake2s_init0( S );
uint32_t *p = ( uint32_t * )( P );
/* IV XOR ParamBlock */
for( size_t i = 0; i < 8; ++i )
S->h[i] ^= load32( &p[i] );
return 0;
}
// Sequential blake2s initialization
int blake2s_init( blake2s_state *S, const uint8_t outlen )
{
blake2s_param P[1];
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
P->digest_length = outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store48( &P->node_offset, 0 );
P->node_depth = 0;
P->inner_length = 0;
// memset(P->reserved, 0, sizeof(P->reserved) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2s_init_param( S, P );
}
int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
{
blake2s_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store48( &P->node_offset, 0 );
P->node_depth = 0;
P->inner_length = 0;
// memset(P->reserved, 0, sizeof(P->reserved) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
if( blake2s_init_param( S, P ) < 0 ) return -1;
{
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
{
uint32_t m[16];
uint32_t v[16];
for( size_t i = 0; i < 16; ++i )
m[i] = load32( block + i * sizeof( m[i] ) );
for( size_t i = 0; i < 8; ++i )
v[i] = S->h[i];
v[ 8] = blake2s_IV[0];
v[ 9] = blake2s_IV[1];
v[10] = blake2s_IV[2];
v[11] = blake2s_IV[3];
v[12] = S->t[0] ^ blake2s_IV[4];
v[13] = S->t[1] ^ blake2s_IV[5];
v[14] = S->f[0] ^ blake2s_IV[6];
v[15] = S->f[1] ^ blake2s_IV[7];
#define G(r,i,a,b,c,d) \
do { \
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
d = SPH_ROTR32(d ^ a, 16); \
c = c + d; \
b = SPH_ROTR32(b ^ c, 12); \
a = a + b + m[blake2s_sigma[r][2*i+1]]; \
d = SPH_ROTR32(d ^ a, 8); \
c = c + d; \
b = SPH_ROTR32(b ^ c, 7); \
} while(0)
#define ROUND(r) \
do { \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
} while(0)
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
for( size_t i = 0; i < 8; ++i )
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
#undef G
#undef ROUND
return 0;
}
int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen )
{
while( inlen > 0 )
{
size_t left = S->buflen;
size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
if( inlen > fill )
{
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf ); // Compress
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
S->buflen -= BLAKE2S_BLOCKBYTES;
in += fill;
inlen -= fill;
}
else // inlen <= fill
{
memcpy(S->buf + left, in, (size_t) inlen);
S->buflen += (size_t) inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
}
}
return 0;
}
int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen )
{
uint8_t buffer[BLAKE2S_OUTBYTES];
if( S->buflen > BLAKE2S_BLOCKBYTES )
{
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf );
S->buflen -= BLAKE2S_BLOCKBYTES;
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
}
blake2s_increment_counter( S, ( uint32_t )S->buflen );
blake2s_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
blake2s_compress( S, S->buf );
for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
memcpy( out, buffer, outlen );
return 0;
}
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
{
blake2s_state S[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key ) keylen = 0; /* Fail here instead if keylen != 0 and key == NULL? */
if( keylen > 0 )
{
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
}
else
{
if( blake2s_init( S, outlen ) < 0 ) return -1;
}
blake2s_update( S, ( uint8_t * )in, inlen );
blake2s_final( S, out, outlen );
return 0;
}
#if defined(BLAKE2S_SELFTEST)
#include <string.h>
#include "blake2-kat.h" /* test data not included */
int main( int argc, char **argv )
{
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[KAT_LENGTH];
for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2S_OUTBYTES];
blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES );
if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
{
puts( "error" );
return -1;
}
}
puts( "ok" );
return 0;
}
#endif

150
algo/blake/sph-blake2s.h Normal file
View File

@@ -0,0 +1,150 @@
/**
* BLAKE2 reference source code package - reference C implementations
*
* Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide. This software is distributed without any warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication along with
* this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#pragma once
#ifndef __BLAKE2_H__
#define __BLAKE2_H__
#include <stddef.h>
#include <stdint.h>
#if defined(_MSC_VER)
#include <inttypes.h>
#define inline __inline
#define ALIGN(x) __declspec(align(x))
#else
#define ALIGN(x) __attribute__((aligned(x)))
#endif
/* blake2-impl.h */
static inline uint32_t load32(const void *src)
{
#if defined(NATIVE_LITTLE_ENDIAN)
return *(uint32_t *)(src);
#else
const uint8_t *p = (uint8_t *)src;
uint32_t w = *p++;
w |= (uint32_t)(*p++) << 8;
w |= (uint32_t)(*p++) << 16;
w |= (uint32_t)(*p++) << 24;
return w;
#endif
}
static inline void store32(void *dst, uint32_t w)
{
#if defined(NATIVE_LITTLE_ENDIAN)
*(uint32_t *)(dst) = w;
#else
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w; w >>= 8;
*p++ = (uint8_t)w; w >>= 8;
*p++ = (uint8_t)w; w >>= 8;
*p++ = (uint8_t)w;
#endif
}
static inline uint64_t load48(const void *src)
{
const uint8_t *p = (const uint8_t *)src;
uint64_t w = *p++;
w |= (uint64_t)(*p++) << 8;
w |= (uint64_t)(*p++) << 16;
w |= (uint64_t)(*p++) << 24;
w |= (uint64_t)(*p++) << 32;
w |= (uint64_t)(*p++) << 40;
return w;
}
static inline void store48(void *dst, uint64_t w)
{
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w; w >>= 8;
*p++ = (uint8_t)w; w >>= 8;
*p++ = (uint8_t)w; w >>= 8;
*p++ = (uint8_t)w; w >>= 8;
*p++ = (uint8_t)w; w >>= 8;
*p++ = (uint8_t)w;
}
/* prevents compiler optimizing out memset() */
static inline void secure_zero_memory(void *v, size_t n)
{
volatile uint8_t *p = ( volatile uint8_t * )v;
while( n-- ) *p++ = 0;
}
/* blake2.h */
#if defined(__cplusplus)
extern "C" {
#endif
enum blake2s_constant
{
BLAKE2S_BLOCKBYTES = 64,
BLAKE2S_OUTBYTES = 32,
BLAKE2S_KEYBYTES = 32,
BLAKE2S_SALTBYTES = 8,
BLAKE2S_PERSONALBYTES = 8
};
#pragma pack(push, 1)
typedef struct __blake2s_param
{
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint8_t node_offset[6];// 14
uint8_t node_depth; // 15
uint8_t inner_length; // 16
// uint8_t reserved[0];
uint8_t salt[BLAKE2S_SALTBYTES]; // 24
uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32
} blake2s_param;
ALIGN( 64 ) typedef struct __blake2s_state
{
uint32_t h[8];
uint32_t t[2];
uint32_t f[2];
uint8_t buf[2 * BLAKE2S_BLOCKBYTES];
size_t buflen;
uint8_t last_node;
} blake2s_state ;
#pragma pack(pop)
int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] );
// Streaming API
int blake2s_init( blake2s_state *S, const uint8_t outlen );
int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen );
int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen );
// Simple API
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
// Direct Hash Mining Helpers
#define blake2s_salt32(out, in, inlen, key32) blake2s(out, in, key32, 32, inlen, 32) /* neoscrypt */
#define blake2s_simple(out, in, inlen) blake2s(out, in, NULL, 32, inlen, 0)
#if defined(__cplusplus)
}
#endif
#endif