This commit is contained in:
Jay D Dee
2023-09-13 11:48:52 -04:00
parent 4378d2f841
commit d6b5750362
28 changed files with 1626 additions and 1327 deletions

View File

@@ -1,60 +1,15 @@
/* $Id: sph_blake.h 252 2011-06-07 17:55:14Z tp $ */
/**
* BLAKE interface. BLAKE is a family of functions which differ by their
* output size; this implementation defines BLAKE for output sizes 224,
* 256, 384 and 512 bits. This implementation conforms to the "third
* round" specification.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_blake.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef __BLAKE_HASH_4WAY__
#define __BLAKE_HASH_4WAY__ 1
#ifdef __cplusplus
extern "C"{
#endif
#ifndef BLAKE_HASH_4WAY__
#define BLAKE_HASH_4WAY__ 1
#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "simd-utils.h"
#define SPH_SIZE_blake256 256
#define SPH_SIZE_blake512 512
/////////////////////////
//
// Blake-256 1 way SSE2
void blake256_transform_le( uint32_t *H, const uint32_t *buf,
const uint32_t T0, const uint32_t T1 );
const uint32_t T0, const uint32_t T1, int rounds );
/////////////////////////
//
@@ -75,13 +30,13 @@ typedef struct {
int rounds; // 14 for blake, 8 for blakecoin & vanilla
} blake_4way_small_context __attribute__ ((aligned (64)));
// Default, 14 rounds, blake, decred
// Default, 14 rounds
typedef blake_4way_small_context blake256_4way_context;
void blake256_4way_init(void *ctx);
void blake256_4way_update(void *ctx, const void *data, size_t len);
void blake256_4way_close(void *ctx, void *dst);
// 14 rounds, blake, decred
// 14 rounds
typedef blake_4way_small_context blake256r14_4way_context;
void blake256r14_4way_init(void *cc);
void blake256r14_4way_update(void *cc, const void *data, size_t len);
@@ -103,7 +58,7 @@ typedef struct {
__m256i buf[16] __attribute__ ((aligned (64)));
__m256i H[8];
size_t ptr;
sph_u32 T0, T1;
uint32_t T0, T1;
int rounds; // 14 for blake, 8 for blakecoin & vanilla
} blake_8way_small_context;
@@ -117,7 +72,7 @@ void blake256_8way_close_le(void *cc, void *dst);
void blake256_8way_round0_prehash_le( void *midstate, const void *midhash,
void *data );
void blake256_8way_final_rounds_le( void *final_hash, const void *midstate,
const void *midhash, const void *data );
const void *midhash, const void *data, const int rounds );
// 14 rounds, blake, decred
typedef blake_8way_small_context blake256r14_8way_context;
@@ -138,7 +93,7 @@ typedef struct {
__m256i H[8];
__m256i S[4];
size_t ptr;
sph_u64 T0, T1;
uint64_t T0, T1;
} blake_4way_big_context __attribute__ ((aligned (128)));
typedef blake_4way_big_context blake512_4way_context;
@@ -180,7 +135,7 @@ void blake256_16way_close_le(void *cc, void *dst);
void blake256_16way_round0_prehash_le( void *midstate, const void *midhash,
void *data );
void blake256_16way_final_rounds_le( void *final_hash, const void *midstate,
const void *midhash, const void *data );
const void *midhash, const void *data, const int rounds );
// 14 rounds, blake, decred
@@ -204,7 +159,7 @@ typedef struct {
__m512i H[8];
__m512i S[4];
size_t ptr;
sph_u64 T0, T1;
uint64_t T0, T1;
} blake_8way_big_context __attribute__ ((aligned (128)));
typedef blake_8way_big_context blake512_8way_context;
@@ -224,8 +179,4 @@ void blake512_8way_final_le( blake_8way_big_context *sc, void *hash,
#endif // AVX512
#endif // AVX2
#ifdef __cplusplus
}
#endif
#endif // BLAKE_HASH_4WAY_H__

View File

@@ -40,26 +40,6 @@
#include "blake-hash-4way.h"
#ifdef __cplusplus
extern "C"{
#endif
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BLAKE
#define SPH_SMALL_FOOTPRINT_BLAKE 1
#endif
#if SPH_SMALL_FOOTPRINT_BLAKE
#define SPH_COMPACT_BLAKE_32 1
#endif
#if SPH_64 && (SPH_SMALL_FOOTPRINT_BLAKE || !SPH_64_TRUE)
#define SPH_COMPACT_BLAKE_64 1
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
// Blake-256
static const uint32_t IV256[8] =
@@ -68,7 +48,7 @@ static const uint32_t IV256[8] =
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
};
#if SPH_COMPACT_BLAKE_32 || SPH_COMPACT_BLAKE_64
#if 0
// Blake-256 4 & 8 way, Blake-512 4 way
@@ -273,44 +253,28 @@ static const unsigned sigma[16][16] = {
#define CSx_(n) CSx__(n)
#define CSx__(n) CS ## n
#define CS0 SPH_C32(0x243F6A88)
#define CS1 SPH_C32(0x85A308D3)
#define CS2 SPH_C32(0x13198A2E)
#define CS3 SPH_C32(0x03707344)
#define CS4 SPH_C32(0xA4093822)
#define CS5 SPH_C32(0x299F31D0)
#define CS6 SPH_C32(0x082EFA98)
#define CS7 SPH_C32(0xEC4E6C89)
#define CS8 SPH_C32(0x452821E6)
#define CS9 SPH_C32(0x38D01377)
#define CSA SPH_C32(0xBE5466CF)
#define CSB SPH_C32(0x34E90C6C)
#define CSC SPH_C32(0xC0AC29B7)
#define CSD SPH_C32(0xC97C50DD)
#define CSE SPH_C32(0x3F84D5B5)
#define CSF SPH_C32(0xB5470917)
#if SPH_COMPACT_BLAKE_32
static const sph_u32 CS[16] = {
SPH_C32(0x243F6A88), SPH_C32(0x85A308D3),
SPH_C32(0x13198A2E), SPH_C32(0x03707344),
SPH_C32(0xA4093822), SPH_C32(0x299F31D0),
SPH_C32(0x082EFA98), SPH_C32(0xEC4E6C89),
SPH_C32(0x452821E6), SPH_C32(0x38D01377),
SPH_C32(0xBE5466CF), SPH_C32(0x34E90C6C),
SPH_C32(0xC0AC29B7), SPH_C32(0xC97C50DD),
SPH_C32(0x3F84D5B5), SPH_C32(0xB5470917)
};
#endif
#define CS0 0x243F6A88
#define CS1 0x85A308D3
#define CS2 0x13198A2E
#define CS3 0x03707344
#define CS4 0xA4093822
#define CS5 0x299F31D0
#define CS6 0x082EFA98
#define CS7 0xEC4E6C89
#define CS8 0x452821E6
#define CS9 0x38D01377
#define CSA 0xBE5466CF
#define CSB 0x34E90C6C
#define CSC 0xC0AC29B7
#define CSD 0xC97C50DD
#define CSE 0x3F84D5B5
#define CSF 0xB5470917
/////////////////////////////////////////
//
// Blake-256 1 way SIMD
// Only used for prehash, otherwise 4way is used with SSE2.
// optimize shuffles to reduce latency caused by dependencies on V1.
#define BLAKE256_ROUND( r ) \
{ \
V0 = _mm_add_epi32( V0, _mm_add_epi32( V1, \
@@ -353,52 +317,9 @@ static const sph_u32 CS[16] = {
V2 = mm128_shufll_32( V2 ); \
}
/*
#define BLAKE256_ROUND( r ) \
{ \
V0 = _mm_add_epi32( V0, _mm_add_epi32( V1, \
_mm_set_epi32( CSx( r, 7 ) ^ Mx( r, 6 ), \
CSx( r, 5 ) ^ Mx( r, 4 ), \
CSx( r, 3 ) ^ Mx( r, 2 ), \
CSx( r, 1 ) ^ Mx( r, 0 ) ) ) ); \
V3 = mm128_swap32_16( _mm_xor_si128( V3, V0 ) ); \
V2 = _mm_add_epi32( V2, V3 ); \
V1 = mm128_ror_32( _mm_xor_si128( V1, V2 ), 12 ); \
V0 = _mm_add_epi32( V0, _mm_add_epi32( V1, \
_mm_set_epi32( CSx( r, 6 ) ^ Mx( r, 7 ), \
CSx( r, 4 ) ^ Mx( r, 5 ), \
CSx( r, 2 ) ^ Mx( r, 3 ), \
CSx( r, 0 ) ^ Mx( r, 1 ) ) ) ); \
V3 = mm128_shuflr32_8( _mm_xor_si128( V3, V0 ) ); \
V2 = _mm_add_epi32( V2, V3 ); \
V1 = mm128_ror_32( _mm_xor_si128( V1, V2 ), 7 ); \
V3 = mm128_shufll_32( V3 ); \
V2 = mm128_swap_64( V2 ); \
V1 = mm128_shuflr_32( V1 ); \
V0 = _mm_add_epi32( V0, _mm_add_epi32( V1, \
_mm_set_epi32( CSx( r, F ) ^ Mx( r, E ), \
CSx( r, D ) ^ Mx( r, C ), \
CSx( r, B ) ^ Mx( r, A ), \
CSx( r, 9 ) ^ Mx( r, 8 ) ) ) ); \
V3 = mm128_swap32_16( _mm_xor_si128( V3, V0 ) ); \
V2 = _mm_add_epi32( V2, V3 ); \
V1 = mm128_ror_32( _mm_xor_si128( V1, V2 ), 12 ); \
V0 = _mm_add_epi32( V0, _mm_add_epi32( V1, \
_mm_set_epi32( CSx( r, E ) ^ Mx( r, F ), \
CSx( r, C ) ^ Mx( r, D ), \
CSx( r, A ) ^ Mx( r, B ), \
CSx( r, 8 ) ^ Mx( r, 9 ) ) ) ); \
V3 = mm128_shuflr32_8( _mm_xor_si128( V3, V0 ) ); \
V2 = _mm_add_epi32( V2, V3 ); \
V1 = mm128_ror_32( _mm_xor_si128( V1, V2 ), 7 ); \
V3 = mm128_shuflr_32( V3 ); \
V2 = mm128_swap_64( V2 ); \
V1 = mm128_shufll_32( V1 ); \
}
*/
// Default is 14 rounds, blakecoin & vanilla are 8.
void blake256_transform_le( uint32_t *H, const uint32_t *buf,
const uint32_t T0, const uint32_t T1 )
const uint32_t T0, const uint32_t T1, int rounds )
{
__m128i V0, V1, V2, V3;
uint32_t M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF;
@@ -431,12 +352,15 @@ void blake256_transform_le( uint32_t *H, const uint32_t *buf,
BLAKE256_ROUND( 5 );
BLAKE256_ROUND( 6 );
BLAKE256_ROUND( 7 );
BLAKE256_ROUND( 8 );
BLAKE256_ROUND( 9 );
BLAKE256_ROUND( 0 );
BLAKE256_ROUND( 1 );
BLAKE256_ROUND( 2 );
BLAKE256_ROUND( 3 );
if ( rounds > 8 ) // 14
{
BLAKE256_ROUND( 8 );
BLAKE256_ROUND( 9 );
BLAKE256_ROUND( 0 );
BLAKE256_ROUND( 1 );
BLAKE256_ROUND( 2 );
BLAKE256_ROUND( 3 );
}
casti_m128i( H, 0 ) = mm128_xor3( casti_m128i( H, 0 ), V0, V2 );
casti_m128i( H, 1 ) = mm128_xor3( casti_m128i( H, 1 ), V1, V3 );
}
@@ -459,34 +383,6 @@ void blake256_transform_le( uint32_t *H, const uint32_t *buf,
b = mm128_ror_32( _mm_xor_si128( b, c ), 7 ); \
}
#if SPH_COMPACT_BLAKE_32
// Not used
#if 0
#define ROUND_S_4WAY(r) do { \
GS_4WAY(M[sigma[r][0x0]], M[sigma[r][0x1]], \
CS[sigma[r][0x0]], CS[sigma[r][0x1]], V0, V4, V8, VC); \
GS_4WAY(M[sigma[r][0x2]], M[sigma[r][0x3]], \
CS[sigma[r][0x2]], CS[sigma[r][0x3]], V1, V5, V9, VD); \
GS_4WAY(M[sigma[r][0x4]], M[sigma[r][0x5]], \
CS[sigma[r][0x4]], CS[sigma[r][0x5]], V2, V6, VA, VE); \
GS_4WAY(M[sigma[r][0x6]], M[sigma[r][0x7]], \
CS[sigma[r][0x6]], CS[sigma[r][0x7]], V3, V7, VB, VF); \
GS_4WAY(M[sigma[r][0x8]], M[sigma[r][0x9]], \
CS[sigma[r][0x8]], CS[sigma[r][0x9]], V0, V5, VA, VF); \
GS_4WAY(M[sigma[r][0xA]], M[sigma[r][0xB]], \
CS[sigma[r][0xA]], CS[sigma[r][0xB]], V1, V6, VB, VC); \
GS_4WAY(M[sigma[r][0xC]], M[sigma[r][0xD]], \
CS[sigma[r][0xC]], CS[sigma[r][0xD]], V2, V7, V8, VD); \
GS_4WAY(M[sigma[r][0xE]], M[sigma[r][0xF]], \
CS[sigma[r][0xE]], CS[sigma[r][0xF]], V3, V4, V9, VE); \
} while (0)
#endif
#else
#define ROUND_S_4WAY(r) \
{ \
GS_4WAY(Mx(r, 0), Mx(r, 1), CSx(r, 0), CSx(r, 1), V0, V4, V8, VC); \
@@ -499,8 +395,6 @@ void blake256_transform_le( uint32_t *H, const uint32_t *buf,
GS_4WAY(Mx(r, E), Mx(r, F), CSx(r, E), CSx(r, F), V3, V4, V9, VE); \
}
#endif
#define DECL_STATE32_4WAY \
__m128i H0, H1, H2, H3, H4, H5, H6, H7; \
uint32_t T0, T1;
@@ -531,56 +425,6 @@ void blake256_transform_le( uint32_t *H, const uint32_t *buf,
(state)->T1 = T1; \
} while (0)
#if SPH_COMPACT_BLAKE_32
// not used
#if 0
#define COMPRESS32_4WAY( rounds ) do { \
__m128i M[16]; \
__m128i V0, V1, V2, V3, V4, V5, V6, V7; \
__m128i V8, V9, VA, VB, VC, VD, VE, VF; \
unsigned r; \
V0 = H0; \
V1 = H1; \
V2 = H2; \
V3 = H3; \
V4 = H4; \
V5 = H5; \
V6 = H6; \
V7 = H7; \
V8 = _mm_xor_si128( S0, _mm_set1_epi32( CS0 ) ); \
V9 = _mm_xor_si128( S1, _mm_set1_epi32( CS1 ) ); \
VA = _mm_xor_si128( S2, _mm_set1_epi32( CS2 ) ); \
VB = _mm_xor_si128( S3, _mm_set1_epi32( CS3 ) ); \
VC = _mm_xor_si128( _mm_set1_epi32( T0 ), _mm_set1_epi32( CS4 ) ); \
VD = _mm_xor_si128( _mm_set1_epi32( T0 ), _mm_set1_epi32( CS5 ) ); \
VE = _mm_xor_si128( _mm_set1_epi32( T1 ), _mm_set1_epi32( CS6 ) ); \
VF = _mm_xor_si128( _mm_set1_epi32( T1 ), _mm_set1_epi32( CS7 ) ); \
mm128_block_bswap_32( M, buf ); \
mm128_block_bswap_32( M+8, buf+8 ); \
for (r = 0; r < rounds; r ++) \
ROUND_S_4WAY(r); \
H0 = _mm_xor_si128( _mm_xor_si128( \
_mm_xor_si128( S0, V0 ), V8 ), H0 ); \
H1 = _mm_xor_si128( _mm_xor_si128( \
_mm_xor_si128( S1, V1 ), V9 ), H1 ); \
H2 = _mm_xor_si128( _mm_xor_si128( \
_mm_xor_si128( S2, V2 ), VA ), H2 ); \
H3 = _mm_xor_si128( _mm_xor_si128( \
_mm_xor_si128( S3, V3 ), VB ), H3 ); \
H4 = _mm_xor_si128( _mm_xor_si128( \
_mm_xor_si128( S0, V4 ), VC ), H4 ); \
H5 = _mm_xor_si128( _mm_xor_si128( \
_mm_xor_si128( S1, V5 ), VD ), H5 ); \
H6 = _mm_xor_si128( _mm_xor_si128( \
_mm_xor_si128( S2, V6 ), VE ), H6 ); \
H7 = _mm_xor_si128( _mm_xor_si128( \
_mm_xor_si128( S3, V7 ), VF ), H7 ); \
} while (0)
#endif
#else
// current impl
#if defined(__SSSE3__)
@@ -680,8 +524,6 @@ void blake256_transform_le( uint32_t *H, const uint32_t *buf,
H7 = _mm_xor_si128( _mm_xor_si128( VF, V7 ), H7 ); \
}
#endif
#if defined (__AVX2__)
/////////////////////////////////
@@ -968,7 +810,7 @@ void blake256_transform_le( uint32_t *H, const uint32_t *buf,
#define DECL_STATE32_8WAY \
__m256i H0, H1, H2, H3, H4, H5, H6, H7; \
sph_u32 T0, T1;
uint32_t T0, T1;
#define READ_STATE32_8WAY(state) \
do { \
@@ -1046,7 +888,7 @@ do { \
ROUND_S_8WAY(5); \
ROUND_S_8WAY(6); \
ROUND_S_8WAY(7); \
if (rounds == 14) \
if (rounds > 8) \
{ \
ROUND_S_8WAY(8); \
ROUND_S_8WAY(9); \
@@ -1111,7 +953,7 @@ do { \
ROUND_S_8WAY(5); \
ROUND_S_8WAY(6); \
ROUND_S_8WAY(7); \
if (rounds == 14) \
if (rounds > 8) \
{ \
ROUND_S_8WAY(8); \
ROUND_S_8WAY(9); \
@@ -1156,7 +998,7 @@ void blake256_8way_round0_prehash_le( void *midstate, const void *midhash,
// M[ 0:3 ] contain new message data including unique nonces in M[ 3].
// M[ 5:12, 14 ] are always zero and not needed or used.
// M[ 4], M[ 13], M[15] are constant and are initialized here.
// M[ 4], M[13], M[15] are constant and are initialized here.
// M[ 5] is a special case, used as a cache for (M[13] ^ CSC).
M[ 4] = _mm256_set1_epi32( 0x80000000 );
@@ -1221,7 +1063,7 @@ void blake256_8way_round0_prehash_le( void *midstate, const void *midhash,
}
void blake256_8way_final_rounds_le( void *final_hash, const void *midstate,
const void *midhash, const void *data )
const void *midhash, const void *data, const int rounds )
{
__m256i *H = (__m256i*)final_hash;
const __m256i *h = (const __m256i*)midhash;
@@ -1315,12 +1157,15 @@ void blake256_8way_final_rounds_le( void *final_hash, const void *midstate,
ROUND256_8WAY_5;
ROUND256_8WAY_6;
ROUND256_8WAY_7;
ROUND256_8WAY_8;
ROUND256_8WAY_9;
ROUND256_8WAY_0;
ROUND256_8WAY_1;
ROUND256_8WAY_2;
ROUND256_8WAY_3;
if ( rounds > 8 )
{
ROUND256_8WAY_8;
ROUND256_8WAY_9;
ROUND256_8WAY_0;
ROUND256_8WAY_1;
ROUND256_8WAY_2;
ROUND256_8WAY_3;
}
const __m256i shuf_bswap32 =
mm256_set2_64( 0x0c0d0e0f08090a0b, 0x0405060700010203 );
@@ -1623,7 +1468,7 @@ void blake256_8way_final_rounds_le( void *final_hash, const void *midstate,
#define DECL_STATE32_16WAY \
__m512i H0, H1, H2, H3, H4, H5, H6, H7; \
sph_u32 T0, T1;
uint32_t T0, T1;
#define READ_STATE32_16WAY(state) \
do { \
@@ -1882,8 +1727,9 @@ void blake256_16way_round0_prehash_le( void *midstate, const void *midhash,
_mm512_xor_si512( _mm512_set1_epi32( CSE ), M[15] ) );
}
// Dfault is 14 rounds, blakecoin & vanilla are 8.
void blake256_16way_final_rounds_le( void *final_hash, const void *midstate,
const void *midhash, const void *data )
const void *midhash, const void *data, const int rounds )
{
__m512i *H = (__m512i*)final_hash;
const __m512i *h = (const __m512i*)midhash;
@@ -1988,12 +1834,15 @@ void blake256_16way_final_rounds_le( void *final_hash, const void *midstate,
ROUND256_16WAY_5;
ROUND256_16WAY_6;
ROUND256_16WAY_7;
ROUND256_16WAY_8;
ROUND256_16WAY_9;
ROUND256_16WAY_0;
ROUND256_16WAY_1;
ROUND256_16WAY_2;
ROUND256_16WAY_3;
if ( rounds > 8 )
{
ROUND256_16WAY_8;
ROUND256_16WAY_9;
ROUND256_16WAY_0;
ROUND256_16WAY_1;
ROUND256_16WAY_2;
ROUND256_16WAY_3;
}
// Byte swap final hash
const __m512i shuf_bswap32 =
@@ -2057,7 +1906,7 @@ blake32_4way( blake_4way_small_context *ctx, const void *data,
size_t clen = ( sizeof ctx->buf ) - bptr;
if ( clen > blen )
clen = blen;
clen = blen;
memcpy( buf + vptr, data, clen );
bptr += clen;
data = (const unsigned char *)data + clen;
@@ -2130,11 +1979,11 @@ blake32_4way_close( blake_4way_small_context *ctx, unsigned ub, unsigned n,
// Blake-256 8 way
static const sph_u32 salt_zero_8way_small[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
static const uint32_t salt_zero_8way_small[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
static void
blake32_8way_init( blake_8way_small_context *sc, const sph_u32 *iv,
const sph_u32 *salt, int rounds )
blake32_8way_init( blake_8way_small_context *sc, const uint32_t *iv,
const uint32_t *salt, int rounds )
{
casti_m256i( sc->H, 0 ) = _mm256_set1_epi64x( 0x6A09E6676A09E667 );
casti_m256i( sc->H, 1 ) = _mm256_set1_epi64x( 0xBB67AE85BB67AE85 );
@@ -2181,8 +2030,8 @@ blake32_8way( blake_8way_small_context *sc, const void *data, size_t len )
len -= clen;
if ( ptr == buf_size )
{
if ( ( T0 = SPH_T32(T0 + 512) ) < 512 )
T1 = SPH_T32(T1 + 1);
if ( ( T0 = T0 + 512 ) < 512 )
T1 = T1 + 1;
COMPRESS32_8WAY( sc->rounds );
ptr = 0;
}
@@ -2198,7 +2047,7 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
__m256i buf[16];
size_t ptr;
unsigned bit_len;
sph_u32 th, tl;
uint32_t th, tl;
ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
@@ -2208,13 +2057,13 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
if ( ptr == 0 )
{
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
sc->T0 = 0xFFFFFE00UL;
sc->T1 = 0xFFFFFFFFUL;
}
else if ( sc->T0 == 0 )
{
sc->T0 = SPH_C32(0xFFFFFE00UL) + bit_len;
sc->T1 = SPH_T32(sc->T1 - 1);
sc->T0 = 0xFFFFFE00UL + bit_len;
sc->T1 = sc->T1 - 1;
}
else
sc->T0 -= 512 - bit_len;
@@ -2233,8 +2082,8 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
{
memset_zero_256( buf + (ptr>>2) + 1, (60-ptr) >> 2 );
blake32_8way( sc, buf + (ptr>>2), 64 - ptr );
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
sc->T0 = 0xFFFFFE00UL;
sc->T1 = 0xFFFFFFFFUL;
memset_zero_256( buf, 56>>2 );
if ( out_size_w32 == 8 )
buf[52>>2] = _mm256_set1_epi64x( 0x0100000001000000ULL );
@@ -2277,8 +2126,8 @@ blake32_8way_le( blake_8way_small_context *sc, const void *data, size_t len )
len -= clen;
if ( ptr == buf_size )
{
if ( ( T0 = SPH_T32(T0 + 512) ) < 512 )
T1 = SPH_T32(T1 + 1);
if ( ( T0 = T0 + 512 ) < 512 )
T1 = T1 + 1;
COMPRESS32_8WAY_LE( sc->rounds );
ptr = 0;
}
@@ -2294,7 +2143,7 @@ blake32_8way_close_le( blake_8way_small_context *sc, unsigned ub, unsigned n,
__m256i buf[16];
size_t ptr;
unsigned bit_len;
sph_u32 th, tl;
uint32_t th, tl;
ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
@@ -2304,13 +2153,13 @@ blake32_8way_close_le( blake_8way_small_context *sc, unsigned ub, unsigned n,
if ( ptr == 0 )
{
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
sc->T0 = 0xFFFFFE00UL;
sc->T1 = 0xFFFFFFFFUL;
}
else if ( sc->T0 == 0 )
{
sc->T0 = SPH_C32(0xFFFFFE00UL) + bit_len;
sc->T1 = SPH_T32(sc->T1 - 1);
sc->T0 = 0xFFFFFE00UL + bit_len;
sc->T1 = sc->T1 - 1;
}
else
sc->T0 -= 512 - bit_len;
@@ -2328,8 +2177,8 @@ blake32_8way_close_le( blake_8way_small_context *sc, unsigned ub, unsigned n,
{
memset_zero_256( buf + (ptr>>2) + 1, (60-ptr) >> 2 );
blake32_8way_le( sc, buf + (ptr>>2), 64 - ptr );
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
sc->T0 = 0xFFFFFE00UL;
sc->T1 = 0xFFFFFFFFUL;
memset_zero_256( buf, 56>>2 );
if ( out_size_w32 == 8 )
buf[52>>2] = m256_one_32;
@@ -2348,8 +2197,8 @@ blake32_8way_close_le( blake_8way_small_context *sc, unsigned ub, unsigned n,
//Blake-256 16 way AVX512
static void
blake32_16way_init( blake_16way_small_context *sc, const sph_u32 *iv,
const sph_u32 *salt, int rounds )
blake32_16way_init( blake_16way_small_context *sc, const uint32_t *iv,
const uint32_t *salt, int rounds )
{
casti_m512i( sc->H, 0 ) = _mm512_set1_epi64( 0x6A09E6676A09E667 );
casti_m512i( sc->H, 1 ) = _mm512_set1_epi64( 0xBB67AE85BB67AE85 );
@@ -2411,7 +2260,7 @@ blake32_16way_close( blake_16way_small_context *sc, unsigned ub, unsigned n,
__m512i buf[16];
size_t ptr;
unsigned bit_len;
sph_u32 th, tl;
uint32_t th, tl;
ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
@@ -2508,7 +2357,7 @@ blake32_16way_close_le( blake_16way_small_context *sc, unsigned ub, unsigned n,
__m512i buf[16];
size_t ptr;
unsigned bit_len;
sph_u32 th, tl;
uint32_t th, tl;
ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
@@ -2618,8 +2467,6 @@ blake256r8_16way_close(void *cc, void *dst)
#endif // AVX512
// Blake-256 4 way
// default 14 rounds, backward copatibility
@@ -2754,9 +2601,3 @@ blake256r8_8way_close(void *cc, void *dst)
}
#endif
#ifdef __cplusplus
}
#endif
//#endif

View File

@@ -1,62 +1,22 @@
/* $Id: blake.c 252 2011-06-07 17:55:14Z tp $ */
/*
* BLAKE implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#if defined (__AVX2__)
#include <stddef.h>
#include <string.h>
#include <limits.h>
#include "blake-hash-4way.h"
#ifdef __cplusplus
extern "C"{
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
// Blake-512 common
/*
static const sph_u64 IV512[8] = {
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
static const uint64_t IV512[8] =
{
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
};
static const sph_u64 salt_zero_big[4] = { 0, 0, 0, 0 };
static const uint64_t salt_zero_big[4] = { 0, 0, 0, 0 };
static const unsigned sigma[16][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
@@ -77,15 +37,15 @@ static const unsigned sigma[16][16] = {
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
};
static const sph_u64 CB[16] = {
SPH_C64(0x243F6A8885A308D3), SPH_C64(0x13198A2E03707344),
SPH_C64(0xA4093822299F31D0), SPH_C64(0x082EFA98EC4E6C89),
SPH_C64(0x452821E638D01377), SPH_C64(0xBE5466CF34E90C6C),
SPH_C64(0xC0AC29B7C97C50DD), SPH_C64(0x3F84D5B5B5470917),
SPH_C64(0x9216D5D98979FB1B), SPH_C64(0xD1310BA698DFB5AC),
SPH_C64(0x2FFD72DBD01ADFB7), SPH_C64(0xB8E1AFED6A267E96),
SPH_C64(0xBA7C9045F12C7F99), SPH_C64(0x24A19947B3916CF7),
SPH_C64(0x0801F2E2858EFC16), SPH_C64(0x636920D871574E69)
static const uint64_t CB[16] = {
0x243F6A8885A308D3, 0x13198A2E03707344,
0xA4093822299F31D0, 0x082EFA98EC4E6C89,
0x452821E638D01377, 0xBE5466CF34E90C6C,
0xC0AC29B7C97C50DD, 0x3F84D5B5B5470917,
0x9216D5D98979FB1B, 0xD1310BA698DFB5AC,
0x2FFD72DBD01ADFB7, 0xB8E1AFED6A267E96,
0xBA7C9045F12C7F99, 0x24A19947B3916CF7,
0x0801F2E2858EFC16, 0x636920D871574E69
*/
@@ -1486,7 +1446,7 @@ blake64_4way( blake_4way_big_context *sc, const void *data, size_t len)
if ( ptr == buf_size )
{
if ( (T0 = T0 + 1024 ) < 1024 )
T1 = SPH_T64(T1 + 1);
T1 = T1 + 1;
COMPRESS64_4WAY;
ptr = 0;
}
@@ -1538,8 +1498,8 @@ blake64_4way_close( blake_4way_big_context *sc, void *dst )
memset_zero_256( buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
blake64_4way( sc, buf + (ptr>>3), 128 - ptr );
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
sc->T0 = 0xFFFFFFFFFFFFFC00ULL;
sc->T1 = 0xFFFFFFFFFFFFFFFFULL;
memset_zero_256( buf, 112>>3 );
buf[104>>3] = _mm256_set1_epi64x( 0x0100000000000000ULL );
buf[112>>3] = _mm256_set1_epi64x( bswap_64( th ) );
@@ -1629,8 +1589,4 @@ blake512_4way_close(void *cc, void *dst)
blake64_4way_close( cc, dst );
}
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -4,7 +4,149 @@
#include <stdint.h>
#include <memory.h>
#if defined (BLAKECOIN_4WAY)
#define rounds 8
#if defined (BLAKECOIN_16WAY)
int scanhash_blakecoin_16way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash32[8*16] __attribute__ ((aligned (64)));
uint32_t midstate_vars[16*16] __attribute__ ((aligned (64)));
__m512i block0_hash[8] __attribute__ ((aligned (64)));
__m512i block_buf[16] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash32_d7 = (uint32_t*)&( ((__m512i*)hash32)[7] );
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t targ32_d7 = ptarget[7];
uint32_t phash[8] __attribute__ ((aligned (64))) =
{
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
};
uint32_t n = pdata[19];
const uint32_t first_nonce = (const uint32_t) n;
const uint32_t last_nonce = max_nonce - 16;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
const __m512i sixteen = _mm512_set1_epi32( 16 );
// Prehash first block
blake256_transform_le( phash, pdata, 512, 0, rounds );
block0_hash[0] = _mm512_set1_epi32( phash[0] );
block0_hash[1] = _mm512_set1_epi32( phash[1] );
block0_hash[2] = _mm512_set1_epi32( phash[2] );
block0_hash[3] = _mm512_set1_epi32( phash[3] );
block0_hash[4] = _mm512_set1_epi32( phash[4] );
block0_hash[5] = _mm512_set1_epi32( phash[5] );
block0_hash[6] = _mm512_set1_epi32( phash[6] );
block0_hash[7] = _mm512_set1_epi32( phash[7] );
// Build vectored second block, interleave last 16 bytes of data using
// unique nonces.
block_buf[0] = _mm512_set1_epi32( pdata[16] );
block_buf[1] = _mm512_set1_epi32( pdata[17] );
block_buf[2] = _mm512_set1_epi32( pdata[18] );
block_buf[3] =
_mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
// Partialy prehash second block without touching nonces in block_buf[3].
blake256_16way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
do {
blake256_16way_final_rounds_le( hash32, midstate_vars, block0_hash,
block_buf, rounds );
for ( int lane = 0; lane < 16; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
{
extr_lane_16x32( lane_hash, hash32, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
}
block_buf[3] = _mm512_add_epi32( block_buf[3], sixteen );
n += 16;
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined (BLAKECOIN_8WAY)
int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash32[8*8] __attribute__ ((aligned (64)));
uint32_t midstate_vars[16*8] __attribute__ ((aligned (32)));
__m256i block0_hash[8] __attribute__ ((aligned (32)));
__m256i block_buf[16] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash32_d7 = (uint32_t*)&( ((__m256i*)hash32)[7] );
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t targ32_d7 = ptarget[7];
uint32_t phash[8] __attribute__ ((aligned (32))) =
{
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
};
uint32_t n = pdata[19];
const uint32_t first_nonce = (const uint32_t) n;
const uint32_t last_nonce = max_nonce - 8;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
const __m256i eight = _mm256_set1_epi32( 8 );
// Prehash first block
blake256_transform_le( phash, pdata, 512, 0, rounds );
block0_hash[0] = _mm256_set1_epi32( phash[0] );
block0_hash[1] = _mm256_set1_epi32( phash[1] );
block0_hash[2] = _mm256_set1_epi32( phash[2] );
block0_hash[3] = _mm256_set1_epi32( phash[3] );
block0_hash[4] = _mm256_set1_epi32( phash[4] );
block0_hash[5] = _mm256_set1_epi32( phash[5] );
block0_hash[6] = _mm256_set1_epi32( phash[6] );
block0_hash[7] = _mm256_set1_epi32( phash[7] );
// Build vectored second block, interleave last 16 bytes of data using
// unique nonces.
block_buf[0] = _mm256_set1_epi32( pdata[16] );
block_buf[1] = _mm256_set1_epi32( pdata[17] );
block_buf[2] = _mm256_set1_epi32( pdata[18] );
block_buf[3] = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
// Partialy prehash second block without touching nonces in block_buf[3].
blake256_8way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
do {
blake256_8way_final_rounds_le( hash32, midstate_vars, block0_hash,
block_buf, rounds );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
{
extr_lane_8x32( lane_hash, hash32, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
}
block_buf[3] = _mm256_add_epi32( block_buf[3], eight );
n += 8;
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined (BLAKECOIN_4WAY)
blake256r8_4way_context blakecoin_4w_ctx;
@@ -61,7 +203,8 @@ int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
#endif
#if defined(BLAKECOIN_8WAY)
#if 0
//#if defined(BLAKECOIN_8WAY)
blake256r8_8way_context blakecoin_8w_ctx;
@@ -78,11 +221,84 @@ void blakecoin_8way_hash( void *state, const void *input )
state+160, state+192, state+224, vhash, 256 );
}
/*
int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash32[8*8] __attribute__ ((aligned (64)));
uint32_t midstate_vars[16*8] __attribute__ ((aligned (64)));
__m256i block0_hash[8] __attribute__ ((aligned (64)));
__m256i block_buf[16] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
uint32_t phash[8] __attribute__ ((aligned (32))) =
{
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
};
uint32_t *pdata = work->data;
uint32_t *ptarget = (uint32_t*)work->target;
const uint32_t targ32_d7 = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
const __m256i eight = _mm256_set1_epi32( 8 );
// Prehash first block
blake256_transform_le( phash, pdata, 512, 0, 8 );
block0_hash[0] = _mm256_set1_epi32( phash[0] );
block0_hash[1] = _mm256_set1_epi32( phash[1] );
block0_hash[2] = _mm256_set1_epi32( phash[2] );
block0_hash[3] = _mm256_set1_epi32( phash[3] );
block0_hash[4] = _mm256_set1_epi32( phash[4] );
block0_hash[5] = _mm256_set1_epi32( phash[5] );
block0_hash[6] = _mm256_set1_epi32( phash[6] );
block0_hash[7] = _mm256_set1_epi32( phash[7] );
// Build vectored second block, interleave last 16 bytes of data using
// unique nonces.
block_buf[0] = _mm256_set1_epi32( pdata[16] );
block_buf[1] = _mm256_set1_epi32( pdata[17] );
block_buf[2] = _mm256_set1_epi32( pdata[18] );
block_buf[3] = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
// Partialy prehash second block without touching nonces
blake256_8way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
do {
blake256_8way_final_rounds_le( hash32, midstate_vars, block0_hash,
block_buf );
for ( int lane = 0; lane < 8; lane++ )
if ( hash32_d7[ lane ] <= targ32_d7 )
{
extr_lane_8x32( lane_hash, hash32, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
}
block_buf[3] = _mm256_add_epi32( block_buf[3], eight );
n += 8;
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
*/
int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t hash32[8*8] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
blake256r8_8way_context ctx __attribute__ ((aligned (32)));
uint32_t *hash32_d7 = (uint32_t*)&( ((__m256i*)hash32)[7] );
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@@ -101,15 +317,22 @@ int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
n+3, n+2, n+1, n ) );
pdata[19] = n;
blakecoin_8way_hash( hash, vdata );
for ( int i = 0; i < 8; i++ )
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget )
&& !opt_benchmark )
memcpy( &ctx, &blakecoin_8w_ctx, sizeof ctx );
blake256r8_8way_update( &ctx, (const void*)vdata + (64<<3), 16 );
blake256r8_8way_close( &ctx, hash32 );
for ( int lane = 0; lane < 8; lane++ )
if ( hash32_d7[ lane ] <= HTarget )
{
pdata[19] = n+i;
submit_solution( work, hash+(i<<3), mythr );
extr_lane_8x32( lane_hash, hash32, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
}
n += 8;
} while ( (n < max_nonce) && !work_restart[thr_id].restart );

View File

@@ -4,10 +4,10 @@
// vanilla uses default gen merkle root, otherwise identical to blakecoin
bool register_vanilla_algo( algo_gate_t* gate )
{
#if defined(BLAKECOIN_8WAY)
#if defined(BLAKECOIN_16WAY)
gate->scanhash = (void*)&scanhash_blakecoin_16way;
#elif defined(BLAKECOIN_8WAY)
gate->scanhash = (void*)&scanhash_blakecoin_8way;
gate->hash = (void*)&blakecoin_8way_hash;
#elif defined(BLAKECOIN_4WAY)
gate->scanhash = (void*)&scanhash_blakecoin_4way;
gate->hash = (void*)&blakecoin_4way_hash;
@@ -15,14 +15,14 @@ bool register_vanilla_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_blakecoin;
gate->hash = (void*)&blakecoinhash;
#endif
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
return true;
}
bool register_blakecoin_algo( algo_gate_t* gate )
{
register_vanilla_algo( gate );
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
gate->gen_merkle_root = (void*)&sha256_gen_merkle_root;
return true;
}

View File

@@ -1,30 +1,36 @@
#ifndef __BLAKECOIN_GATE_H__
#define __BLAKECOIN_GATE_H__ 1
#ifndef BLAKECOIN_GATE_H__
#define BLAKECOIN_GATE_H__ 1
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(__SSE4_2__)
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define BLAKECOIN_16WAY
#elif defined(__AVX2__)
#define BLAKECOIN_8WAY
#elif defined(__SSE2__) // always true
#define BLAKECOIN_4WAY
#endif
#if defined(__AVX2__)
#define BLAKECOIN_8WAY
#endif
#if defined (BLAKECOIN_8WAY)
void blakecoin_8way_hash(void *state, const void *input);
#if defined (BLAKECOIN_16WAY)
int scanhash_blakecoin_16way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined (BLAKECOIN_8WAY)
//void blakecoin_8way_hash(void *state, const void *input);
int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif
#if defined (BLAKECOIN_4WAY)
#elif defined (BLAKECOIN_4WAY)
void blakecoin_4way_hash(void *state, const void *input);
int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif
#else // never used
void blakecoinhash( void *state, const void *input );
int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif
#endif

View File

@@ -1,6 +1,6 @@
#include "blakecoin-gate.h"
#if !defined(BLAKECOIN_8WAY) && !defined(BLAKECOIN_4WAY)
#if !defined(BLAKECOIN_16WAY) && !defined(BLAKECOIN_8WAY) && !defined(BLAKECOIN_4WAY)
#define BLAKE32_ROUNDS 8
#include "sph_blake.h"
@@ -12,7 +12,6 @@ void blakecoin_close(void *cc, void *dst);
#include <string.h>
#include <stdint.h>
#include <memory.h>
#include <openssl/sha.h>
// context management is staged for efficiency.
// 1. global initial ctx cached on startup
@@ -35,8 +34,8 @@ void blakecoinhash( void *state, const void *input )
uint8_t hash[64] __attribute__ ((aligned (32)));
uint8_t *ending = (uint8_t*) input + 64;
// copy cached midstate
memcpy( &ctx, &blake_mid_ctx, sizeof ctx );
// copy cached midstate
memcpy( &ctx, &blake_mid_ctx, sizeof ctx );
blakecoin( &ctx, ending, 16 );
blakecoin_close( &ctx, hash );
memcpy( state, hash, 32 );
@@ -45,8 +44,8 @@ void blakecoinhash( void *state, const void *input )
int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t HTarget = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
@@ -60,10 +59,10 @@ int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
HTarget = 0x7f;
// we need big endian data...
for (int kk=0; kk < 19; kk++)
be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]);
for (int kk=0; kk < 19; kk++)
be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]);
blake_midstate_init( endiandata );
blake_midstate_init( endiandata );
#ifdef DEBUG_ALGO
applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]);