mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v23.5
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
#include "keccak-hash-4way.h"
|
||||
#include "keccak-gate.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
//#if defined(__AVX2__)
|
||||
|
||||
static const uint64_t RC[] = {
|
||||
0x0000000000000001, 0x0000000000008082,
|
||||
@@ -48,10 +48,6 @@ static const uint64_t RC[] = {
|
||||
#define a34 (kc->w[23])
|
||||
#define a44 (kc->w[24])
|
||||
|
||||
#define DECL_STATE
|
||||
#define READ_STATE(sc)
|
||||
#define WRITE_STATE(sc)
|
||||
|
||||
#define MOV64(d, s) (d = s)
|
||||
#define XOR64_IOTA XOR
|
||||
|
||||
@@ -131,7 +127,6 @@ keccak64_8way_core( keccak64_ctx_m512i *kc, const void *data, size_t len,
|
||||
__m512i *buf;
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
size_t ptr;
|
||||
DECL_STATE
|
||||
|
||||
buf = kc->buf;
|
||||
ptr = kc->ptr;
|
||||
@@ -142,7 +137,6 @@ keccak64_8way_core( keccak64_ctx_m512i *kc, const void *data, size_t len,
|
||||
kc->ptr = ptr + len;
|
||||
return;
|
||||
}
|
||||
READ_STATE( kc );
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
@@ -161,7 +155,6 @@ keccak64_8way_core( keccak64_ctx_m512i *kc, const void *data, size_t len,
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE( kc );
|
||||
kc->ptr = ptr;
|
||||
}
|
||||
|
||||
@@ -218,6 +211,13 @@ keccak256_8x64_close(void *cc, void *dst)
|
||||
keccak64_8way_close(cc, dst, 32, 136);
|
||||
}
|
||||
|
||||
void keccak256_8x64_ctx( void *cc, void *dst, const void *data, size_t len )
|
||||
{
|
||||
keccak256_8x64_init( cc );
|
||||
keccak256_8x64_update( cc, data, len );
|
||||
keccak256_8x64_close( cc, dst );
|
||||
}
|
||||
|
||||
void keccak512_8x64_init( void *kc )
|
||||
{
|
||||
keccak64_8way_init( kc, 512 );
|
||||
@@ -235,6 +235,13 @@ keccak512_8x64_close(void *cc, void *dst)
|
||||
keccak64_8way_close(cc, dst, 64, 72);
|
||||
}
|
||||
|
||||
void keccak512_8x64_ctx( void *cc, void *dst, const void *data, size_t len )
|
||||
{
|
||||
keccak512_8x64_init( cc );
|
||||
keccak512_8x64_update( cc, data, len );
|
||||
keccak512_8x64_close( cc, dst );
|
||||
}
|
||||
|
||||
#undef INPUT_BUF
|
||||
#undef DECL64
|
||||
#undef XOR64
|
||||
@@ -247,9 +254,10 @@ keccak512_8x64_close(void *cc, void *dst)
|
||||
#undef XOROR
|
||||
#undef XORAND
|
||||
#undef XOR3
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
// AVX2
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#define INPUT_BUF(size) do { \
|
||||
size_t j; \
|
||||
@@ -318,7 +326,6 @@ keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
|
||||
__m256i *buf;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
DECL_STATE
|
||||
|
||||
buf = kc->buf;
|
||||
ptr = kc->ptr;
|
||||
@@ -330,7 +337,6 @@ keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE( kc );
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
@@ -349,7 +355,6 @@ keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE( kc );
|
||||
kc->ptr = ptr;
|
||||
}
|
||||
|
||||
@@ -389,7 +394,7 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
|
||||
memcpy_256( dst, kc->w, m256_len );
|
||||
}
|
||||
|
||||
void keccak256_4way_init( void *kc )
|
||||
void keccak256_4x64_init( void *kc )
|
||||
{
|
||||
keccak64_init( kc, 256 );
|
||||
}
|
||||
@@ -406,6 +411,13 @@ keccak256_4x64_close(void *cc, void *dst)
|
||||
keccak64_close(cc, dst, 32, 136);
|
||||
}
|
||||
|
||||
void keccak256_4x64_ctx( void *cc, void *dst, const void *data, size_t len )
|
||||
{
|
||||
keccak256_4x64_init( cc );
|
||||
keccak256_4x64_update( cc, data, len );
|
||||
keccak256_4x64_close( cc, dst );
|
||||
}
|
||||
|
||||
void keccak512_4x64_init( void *kc )
|
||||
{
|
||||
keccak64_init( kc, 512 );
|
||||
@@ -418,11 +430,219 @@ keccak512_4x64_update(void *cc, const void *data, size_t len)
|
||||
}
|
||||
|
||||
void
|
||||
keccak512_4way_close(void *cc, void *dst)
|
||||
keccak512_4x64_close(void *cc, void *dst)
|
||||
{
|
||||
keccak64_close(cc, dst, 64, 72);
|
||||
}
|
||||
|
||||
void keccak512_4x64_ctx( void *cc, void *dst, const void *data, size_t len )
|
||||
{
|
||||
keccak512_4x64_init( cc );
|
||||
keccak512_4x64_update( cc, data, len );
|
||||
keccak512_4x64_close( cc, dst );
|
||||
}
|
||||
|
||||
#undef INPUT_BUF
|
||||
#undef DECL64
|
||||
#undef XOR64
|
||||
#undef XOR
|
||||
#undef AND64
|
||||
#undef OR64
|
||||
#undef NOT64
|
||||
#undef ROL64
|
||||
#undef KECCAK_F_1600
|
||||
#undef KECCAK_F_1600_256
|
||||
#undef XOROR
|
||||
#undef XORAND
|
||||
#undef XOR3
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
// SSE2 & NEON
|
||||
|
||||
#define INPUT_BUF(size) do { \
|
||||
size_t j; \
|
||||
for (j = 0; j < (size>>3); j++ ) \
|
||||
kc->w[j ] = v128_xor( kc->w[j], buf[j] ); \
|
||||
} while (0)
|
||||
|
||||
#define DECL64(x) v128_t x
|
||||
#define XOR(d, a, b) (d = v128_xor(a,b))
|
||||
#define XOR64 XOR
|
||||
#define AND64(d, a, b) (d = v128_and(a,b))
|
||||
#define OR64(d, a, b) (d = v128_or(a,b))
|
||||
#define NOT64(d, s) (d = v128_not( s ) )
|
||||
#define ROL64(d, v, n) (d = v128_rol64(v, n))
|
||||
#define XOROR(d, a, b, c) (d = v128_xoror( a, b, c ) )
|
||||
#define XORAND(d, a, b, c) (d = v128_xorand( a, b, c ) )
|
||||
#define XOR3( d, a, b, c ) (d = v128_xor3( a, b, c ))
|
||||
|
||||
#include "keccak-macros.c"
|
||||
|
||||
#define KECCAK_F_1600 DO(KECCAK_F_1600_256)
|
||||
|
||||
#define KECCAK_F_1600_256 do { \
|
||||
int j; \
|
||||
for (j = 0; j < 24; j += 8) \
|
||||
{ \
|
||||
KF_ELT( 0, 1, v128_64( RC[j + 0] ) ); \
|
||||
KF_ELT( 1, 2, v128_64( RC[j + 1] ) ); \
|
||||
KF_ELT( 2, 3, v128_64( RC[j + 2] ) ); \
|
||||
KF_ELT( 3, 4, v128_64( RC[j + 3] ) ); \
|
||||
KF_ELT( 4, 5, v128_64( RC[j + 4] ) ); \
|
||||
KF_ELT( 5, 6, v128_64( RC[j + 5] ) ); \
|
||||
KF_ELT( 6, 7, v128_64( RC[j + 6] ) ); \
|
||||
KF_ELT( 7, 8, v128_64( RC[j + 7] ) ); \
|
||||
P8_TO_P0; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void keccak64x2_init( keccak64_ctx_v128 *kc, unsigned out_size )
|
||||
{
|
||||
v128_t zero = v128_zero;
|
||||
v128_t neg1 = v128_neg1;
|
||||
|
||||
// Initialization for the "lane complement".
|
||||
kc->w[ 0] = zero; kc->w[ 1] = neg1;
|
||||
kc->w[ 2] = neg1; kc->w[ 3] = zero;
|
||||
kc->w[ 4] = zero; kc->w[ 5] = zero;
|
||||
kc->w[ 6] = zero; kc->w[ 7] = zero;
|
||||
kc->w[ 8] = neg1; kc->w[ 9] = zero;
|
||||
kc->w[10] = zero; kc->w[11] = zero;
|
||||
kc->w[12] = neg1; kc->w[13] = zero;
|
||||
kc->w[14] = zero; kc->w[15] = zero;
|
||||
kc->w[16] = zero; kc->w[17] = neg1;
|
||||
kc->w[18] = zero; kc->w[19] = zero;
|
||||
kc->w[20] = neg1; kc->w[21] = zero;
|
||||
kc->w[22] = zero; kc->w[23] = zero;
|
||||
kc->w[24] = zero; kc->ptr = 0;
|
||||
kc->lim = 200 - (out_size >> 2);
|
||||
}
|
||||
|
||||
static void
|
||||
keccak64x2_core( keccak64_ctx_v128 *kc, const void *data, size_t len,
|
||||
size_t lim )
|
||||
{
|
||||
v128_t *buf;
|
||||
v128_t *vdata = (v128_t*)data;
|
||||
size_t ptr;
|
||||
|
||||
buf = kc->buf;
|
||||
ptr = kc->ptr;
|
||||
|
||||
if ( len < (lim - ptr) )
|
||||
{
|
||||
v128_memcpy( buf + (ptr>>3), vdata, len>>3 );
|
||||
kc->ptr = ptr + len;
|
||||
return;
|
||||
}
|
||||
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
|
||||
clen = (lim - ptr);
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
v128_memcpy( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
if ( ptr == lim )
|
||||
{
|
||||
INPUT_BUF( lim );
|
||||
KECCAK_F_1600;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
kc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void keccak64x2_close( keccak64_ctx_v128 *kc, void *dst,
|
||||
size_t byte_len, size_t lim )
|
||||
{
|
||||
unsigned eb;
|
||||
union {
|
||||
v128_t tmp[lim + 1];
|
||||
uint64_t dummy; /* for alignment */
|
||||
} u;
|
||||
size_t j;
|
||||
size_t v128_len = byte_len >> 3;
|
||||
|
||||
eb = hard_coded_eb;
|
||||
if ( kc->ptr == (lim - 8) )
|
||||
{
|
||||
const uint64_t t = eb | 0x8000000000000000;
|
||||
u.tmp[0] = v128_64( t );
|
||||
j = 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
j = lim - kc->ptr;
|
||||
u.tmp[0] = v128_64( eb );
|
||||
v128_memset_zero( u.tmp + 1, (j>>3) - 2 );
|
||||
u.tmp[ (j>>3) - 1] = v128_64( 0x8000000000000000 );
|
||||
}
|
||||
keccak64x2_core( kc, u.tmp, j, lim );
|
||||
/* Finalize the "lane complement" */
|
||||
NOT64( kc->w[ 1], kc->w[ 1] );
|
||||
NOT64( kc->w[ 2], kc->w[ 2] );
|
||||
NOT64( kc->w[ 8], kc->w[ 8] );
|
||||
NOT64( kc->w[12], kc->w[12] );
|
||||
NOT64( kc->w[17], kc->w[17] );
|
||||
NOT64( kc->w[20], kc->w[20] );
|
||||
v128_memcpy( dst, kc->w, v128_len );
|
||||
}
|
||||
|
||||
void keccak256_2x64_init( void *kc )
|
||||
{
|
||||
keccak64x2_init( kc, 256 );
|
||||
}
|
||||
|
||||
void
|
||||
keccak256_2x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
keccak64x2_core(cc, data, len, 136);
|
||||
}
|
||||
|
||||
void
|
||||
keccak256_2x64_close(void *cc, void *dst)
|
||||
{
|
||||
keccak64x2_close(cc, dst, 32, 136);
|
||||
}
|
||||
|
||||
void keccak256_2x64_ctx( void *cc, void *dst, const void *data, size_t len )
|
||||
{
|
||||
keccak256_2x64_init( cc );
|
||||
keccak256_2x64_update( cc, data, len );
|
||||
keccak256_2x64_close( cc, dst );
|
||||
}
|
||||
|
||||
void keccak512_2x64_init( void *kc )
|
||||
{
|
||||
keccak64x2_init( kc, 512 );
|
||||
}
|
||||
|
||||
void
|
||||
keccak512_2x64_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
keccak64x2_core(cc, data, len, 72);
|
||||
}
|
||||
|
||||
void
|
||||
keccak512_2x64_close(void *cc, void *dst)
|
||||
{
|
||||
keccak64x2_close(cc, dst, 64, 72);
|
||||
}
|
||||
|
||||
void keccak512_2x64_ctx( void *cc, void *dst, const void *data, size_t len )
|
||||
{
|
||||
keccak512_2x64_init( cc );
|
||||
keccak512_2x64_update( cc, data, len );
|
||||
keccak512_2x64_close( cc, dst );
|
||||
}
|
||||
|
||||
|
||||
#undef INPUT_BUF
|
||||
#undef DECL64
|
||||
#undef XOR64
|
||||
@@ -435,5 +655,5 @@ keccak512_4way_close(void *cc, void *dst)
|
||||
#undef XOROR
|
||||
#undef XORAND
|
||||
#undef XOR3
|
||||
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
@@ -19,10 +19,12 @@ typedef keccak64_ctx_m512i keccak512_8x64_context;
|
||||
void keccak256_8x64_init(void *cc);
|
||||
void keccak256_8x64_update(void *cc, const void *data, size_t len);
|
||||
void keccak256_8x64_close(void *cc, void *dst);
|
||||
void keccak256_8x64_ctx( void *cc, void *dst, const void *data, size_t len );
|
||||
|
||||
void keccak512_8x64_init(void *cc);
|
||||
void keccak512_8x64_update(void *cc, const void *data, size_t len);
|
||||
void keccak512_8x64_close(void *cc, void *dst);
|
||||
void keccak512_8x64_ctx( void *cc, void *dst, const void *data, size_t len );
|
||||
|
||||
// legacy naming
|
||||
#define keccak512_8way_context keccak512_8x64_context
|
||||
@@ -51,10 +53,12 @@ typedef keccak64_ctx_m256i keccak512_4x64_context;
|
||||
void keccak256_4x64_init(void *cc);
|
||||
void keccak256_4x64_update(void *cc, const void *data, size_t len);
|
||||
void keccak256_4x64_close(void *cc, void *dst);
|
||||
void keccak256_4x64_ctx( void *cc, void *dst, const void *data, size_t len );
|
||||
|
||||
void keccak512_4x64_init(void *cc);
|
||||
void keccak512_4x64_update(void *cc, const void *data, size_t len);
|
||||
void keccak512_4x64_close(void *cc, void *dst);
|
||||
void keccak512_4x64_ctx( void *cc, void *dst, const void *data, size_t len );
|
||||
|
||||
// legacy naming
|
||||
#define keccak512_4way_context keccak512_4x64_context
|
||||
@@ -68,27 +72,27 @@ void keccak512_4x64_close(void *cc, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
v128_t buf[144*4];
|
||||
v128_t w[50];
|
||||
size_t ptr, lim;
|
||||
} keccak32_ctx_v128 __attribute__((aligned(64)));
|
||||
v128_t buf[144*8];
|
||||
v128_t w[25];
|
||||
size_t ptr, lim;
|
||||
} keccak64_ctx_v128 __attribute__((aligned(128)));
|
||||
|
||||
typedef keccak32_ctx_v128 keccak256_4x32_context;
|
||||
typedef keccak32_ctx_v128 keccak512_4x32_context;
|
||||
typedef keccak64_ctx_v128 keccak256_2x64_context;
|
||||
typedef keccak64_ctx_v128 keccak512_2x64_context;
|
||||
|
||||
void keccak256_2x64_init (void *cc );
|
||||
void keccak256_2x64_update( void *cc, const void *data, size_t len );
|
||||
void keccak256_2x64_close( void *cc, void *dst );
|
||||
void keccak256_2x64_ctx( void *cc, void *dst, const void *data, size_t len );
|
||||
|
||||
void keccak512_2x64_init( void *cc );
|
||||
void keccak512_2x64_update( void *cc, const void *data, size_t len );
|
||||
void keccak512_2x64_close( void *cc, void *dst );
|
||||
void keccak512_2x64_ctx( void *cc, void *dst, const void *data, size_t len );
|
||||
|
||||
void keccak256_4x32_init(void *cc);
|
||||
void keccak256_4x32_update(void *cc, const void *data, size_t len);
|
||||
void keccak256_4x32_close(void *cc, void *dst);
|
||||
|
||||
void keccak512_4x32_init(void *cc);
|
||||
void keccak512_4x32_update(void *cc, const void *data, size_t len);
|
||||
void keccak512_4x32_close(void *cc, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user