mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.15.3
This commit is contained in:
@@ -39,10 +39,17 @@
|
||||
void
|
||||
SHA256_Buf( const void * in, size_t len, uint8_t digest[32] )
|
||||
{
|
||||
SHA256_CTX ctx;
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_context ctx;
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, in, len );
|
||||
sph_sha256_close( &ctx, digest );
|
||||
#else
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, in, len );
|
||||
SHA256_Final( digest, &ctx );
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -64,35 +71,59 @@ HMAC_SHA256_Buf( const void *K, size_t Klen, const void *in, size_t len,
|
||||
void
|
||||
HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
|
||||
{
|
||||
unsigned char pad[64];
|
||||
unsigned char khash[32];
|
||||
const unsigned char * K = _K;
|
||||
size_t i;
|
||||
unsigned char pad[64];
|
||||
unsigned char khash[32];
|
||||
const unsigned char * K = _K;
|
||||
size_t i;
|
||||
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if ( Klen > 64 )
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if ( Klen > 64 )
|
||||
{
|
||||
SHA256_Init( &ctx->ictx );
|
||||
SHA256_Update( &ctx->ictx, K, Klen );
|
||||
SHA256_Final( khash, &ctx->ictx );
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_init( &ctx->ictx );
|
||||
sph_sha256( &ctx->ictx, K, Klen );
|
||||
sph_sha256_close( &ctx->ictx, khash );
|
||||
#else
|
||||
SHA256_Init( &ctx->ictx );
|
||||
SHA256_Update( &ctx->ictx, K, Klen );
|
||||
SHA256_Final( khash, &ctx->ictx );
|
||||
#endif
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_init( &ctx->ictx );
|
||||
#else
|
||||
SHA256_Init( &ctx->ictx );
|
||||
|
||||
#endif
|
||||
|
||||
for ( i = 0; i < Klen; i++ ) pad[i] = K[i] ^ 0x36;
|
||||
memset( pad + Klen, 0x36, 64 - Klen );
|
||||
SHA256_Update( &ctx->ictx, pad, 64 );
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
SHA256_Init( &ctx->octx );
|
||||
memset( pad + Klen, 0x36, 64 - Klen );
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256( &ctx->ictx, pad, 64 );
|
||||
#else
|
||||
SHA256_Update( &ctx->ictx, pad, 64 );
|
||||
#endif
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_init( &ctx->octx );
|
||||
#else
|
||||
SHA256_Init( &ctx->octx );
|
||||
#endif
|
||||
|
||||
for ( i = 0; i < Klen; i++ ) pad[i] = K[i] ^ 0x5c;
|
||||
|
||||
memset( pad + Klen, 0x5c, 64 - Klen );
|
||||
SHA256_Update( &ctx->octx, pad, 64 );
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256( &ctx->octx, pad, 64 );
|
||||
#else
|
||||
SHA256_Update( &ctx->octx, pad, 64 );
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Add bytes to the HMAC-SHA256 operation. */
|
||||
@@ -100,23 +131,33 @@ void
|
||||
HMAC_SHA256_Update( HMAC_SHA256_CTX *ctx, const void *in, size_t len )
|
||||
{
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
SHA256_Update( &ctx->ictx, in, len );
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256( &ctx->ictx, in, len );
|
||||
#else
|
||||
SHA256_Update( &ctx->ictx, in, len );
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Finish an HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Final( unsigned char digest[32], HMAC_SHA256_CTX *ctx )
|
||||
{
|
||||
unsigned char ihash[32];
|
||||
unsigned char ihash[32];
|
||||
|
||||
/* Finish the inner SHA256 operation. */
|
||||
SHA256_Final( ihash, &ctx->ictx );
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_close( &ctx->ictx, ihash );
|
||||
sph_sha256( &ctx->octx, ihash, 32 );
|
||||
sph_sha256_close( &ctx->octx, digest );
|
||||
#else
|
||||
/* Finish the inner SHA256 operation. */
|
||||
SHA256_Final( ihash, &ctx->ictx );
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
SHA256_Update( &ctx->octx, ihash, 32 );
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
SHA256_Update( &ctx->octx, ihash, 32 );
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
SHA256_Final( digest, &ctx->octx );
|
||||
/* Finish the outer SHA256 operation. */
|
||||
SHA256_Final( digest, &ctx->octx );
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -29,14 +29,24 @@
|
||||
#ifndef HMAC_SHA256_H__
|
||||
#define HMAC_SHA256_H__
|
||||
|
||||
//#define HMAC_SSL_SHA 1
|
||||
#define HMAC_SPH_SHA 1
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_sha2.h"
|
||||
#include <openssl/sha.h>
|
||||
|
||||
|
||||
typedef struct HMAC_SHA256Context
|
||||
{
|
||||
SHA256_CTX ictx;
|
||||
SHA256_CTX octx;
|
||||
#if defined(HMAC_SPH_SHA)
|
||||
sph_sha256_context ictx;
|
||||
sph_sha256_context octx;
|
||||
#else
|
||||
SHA256_CTX ictx;
|
||||
SHA256_CTX octx;
|
||||
#endif
|
||||
} HMAC_SHA256_CTX;
|
||||
|
||||
void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
|
||||
#define EXTERN_SHA256
|
||||
@@ -198,16 +197,6 @@ static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
|
||||
|
||||
extern void sha256d(unsigned char *hash, const unsigned char *data, int len)
|
||||
{
|
||||
#if defined(__SHA__)
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, data, len );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
#else
|
||||
|
||||
uint32_t S[16], T[16];
|
||||
int i, r;
|
||||
|
||||
@@ -229,7 +218,6 @@ extern void sha256d(unsigned char *hash, const unsigned char *data, int len)
|
||||
sha256_transform(T, S, 0);
|
||||
for (i = 0; i < 8; i++)
|
||||
be32enc((uint32_t *)hash + i, T[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void sha256d_preextend(uint32_t *W)
|
||||
@@ -676,14 +664,9 @@ int scanhash_SHA256d( struct work *work, const uint32_t max_nonce,
|
||||
|
||||
bool register_sha256d_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(__SHA__)
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_SHA256d;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256d;
|
||||
#endif
|
||||
gate->hash = (void*)&sha256d;
|
||||
return true;
|
||||
gate->hash = (void*)&sha256d;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
||||
200
algo/sha/sha256-hash-opt.c
Normal file
200
algo/sha/sha256-hash-opt.c
Normal file
@@ -0,0 +1,200 @@
|
||||
/* Intel SHA extensions using C intrinsics */
|
||||
/* Written and place in public domain by Jeffrey Walton */
|
||||
/* Based on code from Intel, and by Sean Gulley for */
|
||||
/* the miTLS project. */
|
||||
|
||||
// A drop in replacement for the function of the same name in sph_sha2.c.
|
||||
|
||||
#if defined(__SHA__)
|
||||
|
||||
#include "simd-utils.h"
|
||||
|
||||
static void sha2_round( const uint8_t input[], uint32_t state[8] )
|
||||
{
|
||||
__m128i STATE0, STATE1;
|
||||
__m128i MSG, TMP, MASK;
|
||||
__m128i TMSG0, TMSG1, TMSG2, TMSG3;
|
||||
__m128i ABEF_SAVE, CDGH_SAVE;
|
||||
|
||||
// Load initial values
|
||||
TMP = _mm_load_si128((__m128i*) &state[0]);
|
||||
STATE1 = _mm_load_si128((__m128i*) &state[4]);
|
||||
MASK = _mm_set_epi64x(0x0c0d0e0f08090a0bULL, 0x0405060700010203ULL);
|
||||
|
||||
TMP = _mm_shuffle_epi32(TMP, 0xB1); // CDAB
|
||||
STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH
|
||||
STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); // ABEF
|
||||
STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); // CDGH
|
||||
|
||||
// Save current hash
|
||||
ABEF_SAVE = STATE0;
|
||||
CDGH_SAVE = STATE1;
|
||||
|
||||
// Rounds 0-3
|
||||
MSG = _mm_load_si128((const __m128i*) (input+0));
|
||||
TMSG0 = _mm_shuffle_epi8(MSG, MASK);
|
||||
MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(0xE9B5DBA5B5C0FBCFULL, 0x71374491428A2F98ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
|
||||
// Rounds 4-7
|
||||
TMSG1 = _mm_load_si128((const __m128i*) (input+16));
|
||||
TMSG1 = _mm_shuffle_epi8(TMSG1, MASK);
|
||||
MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(0xAB1C5ED5923F82A4ULL, 0x59F111F13956C25BULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
|
||||
|
||||
// Rounds 8-11
|
||||
TMSG2 = _mm_load_si128((const __m128i*) (input+32));
|
||||
TMSG2 = _mm_shuffle_epi8(TMSG2, MASK);
|
||||
MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(0x550C7DC3243185BEULL, 0x12835B01D807AA98ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
|
||||
|
||||
// Rounds 12-15
|
||||
TMSG3 = _mm_load_si128((const __m128i*) (input+48));
|
||||
TMSG3 = _mm_shuffle_epi8(TMSG3, MASK);
|
||||
MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(0xC19BF1749BDC06A7ULL, 0x80DEB1FE72BE5D74ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
|
||||
TMSG0 = _mm_add_epi32(TMSG0, TMP);
|
||||
TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
|
||||
|
||||
// Rounds 16-19
|
||||
MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(0x240CA1CC0FC19DC6ULL, 0xEFBE4786E49B69C1ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
|
||||
TMSG1 = _mm_add_epi32(TMSG1, TMP);
|
||||
TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
|
||||
|
||||
// Rounds 20-23
|
||||
MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(0x76F988DA5CB0A9DCULL, 0x4A7484AA2DE92C6FULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
|
||||
TMSG2 = _mm_add_epi32(TMSG2, TMP);
|
||||
TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
|
||||
|
||||
// Rounds 24-27
|
||||
MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(0xBF597FC7B00327C8ULL, 0xA831C66D983E5152ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
|
||||
TMSG3 = _mm_add_epi32(TMSG3, TMP);
|
||||
TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
|
||||
|
||||
// Rounds 28-31
|
||||
MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(0x1429296706CA6351ULL, 0xD5A79147C6E00BF3ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
|
||||
TMSG0 = _mm_add_epi32(TMSG0, TMP);
|
||||
TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
|
||||
|
||||
// Rounds 32-35
|
||||
MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(0x53380D134D2C6DFCULL, 0x2E1B213827B70A85ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
|
||||
TMSG1 = _mm_add_epi32(TMSG1, TMP);
|
||||
TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
|
||||
|
||||
// Rounds 36-39
|
||||
MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(0x92722C8581C2C92EULL, 0x766A0ABB650A7354ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
|
||||
TMSG2 = _mm_add_epi32(TMSG2, TMP);
|
||||
TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
|
||||
|
||||
// Rounds 40-43
|
||||
MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(0xC76C51A3C24B8B70ULL, 0xA81A664BA2BFE8A1ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
|
||||
TMSG3 = _mm_add_epi32(TMSG3, TMP);
|
||||
TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
|
||||
|
||||
// Rounds 44-47
|
||||
MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(0x106AA070F40E3585ULL, 0xD6990624D192E819ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
|
||||
TMSG0 = _mm_add_epi32(TMSG0, TMP);
|
||||
TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
|
||||
|
||||
// Rounds 48-51
|
||||
MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(0x34B0BCB52748774CULL, 0x1E376C0819A4C116ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
|
||||
TMSG1 = _mm_add_epi32(TMSG1, TMP);
|
||||
TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
|
||||
|
||||
// Rounds 52-55
|
||||
MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(0x682E6FF35B9CCA4FULL, 0x4ED8AA4A391C0CB3ULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
|
||||
TMSG2 = _mm_add_epi32(TMSG2, TMP);
|
||||
TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
|
||||
// Rounds 56-59
|
||||
MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(0x8CC7020884C87814ULL, 0x78A5636F748F82EEULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
|
||||
TMSG3 = _mm_add_epi32(TMSG3, TMP);
|
||||
TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
|
||||
// Rounds 60-63
|
||||
MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(0xC67178F2BEF9A3F7ULL, 0xA4506CEB90BEFFFAULL));
|
||||
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
|
||||
MSG = _mm_shuffle_epi32(MSG, 0x0E);
|
||||
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
|
||||
|
||||
// Add values back to state
|
||||
STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
|
||||
STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
|
||||
|
||||
TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA
|
||||
STATE1 = _mm_shuffle_epi32(STATE1, 0xB1); // DCHG
|
||||
STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); // DCBA
|
||||
STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF
|
||||
|
||||
// Save state
|
||||
_mm_store_si128((__m128i*) &state[0], STATE0);
|
||||
_mm_store_si128((__m128i*) &state[4], STATE1);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
@@ -5,6 +5,79 @@
|
||||
#include <stdio.h>
|
||||
#include "sha-hash-4way.h"
|
||||
|
||||
#if defined(SHA256T_16WAY)
|
||||
|
||||
static __thread sha256_16way_context sha256_ctx16 __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256q_16way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[8*16] __attribute__ ((aligned (64)));
|
||||
sha256_16way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx16, sizeof ctx );
|
||||
|
||||
sha256_16way_update( &ctx, input + (64<<4), 16 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256q_16way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash32[8*16] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash32_d7 = &(hash32[7<<4]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||
sha256_16way_init( &sha256_ctx16 );
|
||||
sha256_16way_update( &sha256_ctx16, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = n;
|
||||
sha256q_16way_hash( hash32, vdata );
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||
{
|
||||
extr_lane_16x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||
n += 16;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
static __thread sha256_8way_context sha256_ctx8 __attribute__ ((aligned (64)));
|
||||
@@ -31,68 +104,47 @@ void sha256q_8way_hash( void* output, const void* input )
|
||||
sha256_8way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
int scanhash_sha256q_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash32[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash32_d7 = &(hash32[7<<3]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000 };
|
||||
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
// Need big endian data
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way_update( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
do
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32(
|
||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||
|
||||
pdata[19] = n;
|
||||
sha256q_8way_hash( hash, vdata );
|
||||
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
sha256q_8way_hash( hash32, vdata );
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||
n += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,108 +1,74 @@
|
||||
#include "sha256t-gate.h"
|
||||
|
||||
#if !defined(SHA256T_16WAY) && !defined(SHA256T_8WAY) && !defined(SHA256T_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
static __thread SHA256_CTX sha256q_ctx __attribute__ ((aligned (64)));
|
||||
static __thread sph_sha256_context sha256q_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256q_midstate( const void* input )
|
||||
{
|
||||
SHA256_Init( &sha256q_ctx );
|
||||
SHA256_Update( &sha256q_ctx, input, 64 );
|
||||
sph_sha256_init( &sha256q_ctx );
|
||||
sph_sha256( &sha256q_ctx, input, 64 );
|
||||
}
|
||||
|
||||
void sha256q_hash( void* output, const void* input )
|
||||
int sha256q_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[16];
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
|
||||
SHA256_CTX ctx __attribute__ ((aligned (64)));
|
||||
sph_sha256_context ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &sha256q_ctx, sizeof sha256q_ctx );
|
||||
|
||||
SHA256_Update( &ctx, input + midlen, tail );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256( &ctx, input + midlen, tail );
|
||||
sph_sha256_close( &ctx, hash );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, hash, 32 );
|
||||
sph_sha256_close( &ctx, hash );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, hash, 32 );
|
||||
sph_sha256_close( &ctx, hash );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, hash, 32 );
|
||||
sph_sha256_close( &ctx, output );
|
||||
|
||||
memcpy( output, hash, 32 );
|
||||
return 1;
|
||||
}
|
||||
|
||||
int scanhash_sha256q( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
#ifdef _MSC_VER
|
||||
uint32_t __declspec(align(32)) hash64[8];
|
||||
#else
|
||||
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||
#endif
|
||||
uint32_t endiandata[32];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t last_nonce = max_nonce - 1;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
sha256q_midstate( edata );
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
sha256q_midstate( endiandata );
|
||||
|
||||
for ( int m = 0; m < 6; m++ )
|
||||
do
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
edata[19] = n;
|
||||
if ( likely( sha256q_hash( hash, edata ) ) )
|
||||
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
sha256q_hash( hash64, endiandata );
|
||||
if ( !( hash64[7] & mask ) )
|
||||
if ( fulltest( hash64, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash64, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
break;
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
n++;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -5,6 +5,75 @@
|
||||
#include <stdio.h>
|
||||
#include "sha-hash-4way.h"
|
||||
|
||||
#if defined(SHA256T_16WAY)
|
||||
|
||||
static __thread sha256_16way_context sha256_ctx16 __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256t_16way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[8*16] __attribute__ ((aligned (64)));
|
||||
sha256_16way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx16, sizeof ctx );
|
||||
|
||||
sha256_16way_update( &ctx, input + (64<<4), 16 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, vhash );
|
||||
|
||||
sha256_16way_init( &ctx );
|
||||
sha256_16way_update( &ctx, vhash, 32 );
|
||||
sha256_16way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash32[8*16] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash32_d7 = &(hash32[7<<4]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||
sha256_16way_init( &sha256_ctx16 );
|
||||
sha256_16way_update( &sha256_ctx16, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = n;
|
||||
sha256t_16way_hash( hash32, vdata );
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||
{
|
||||
extr_lane_16x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||
n += 16;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
static __thread sha256_8way_context sha256_ctx8 __attribute__ ((aligned (64)));
|
||||
@@ -31,61 +100,43 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash32[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *hash32_d7 = &(hash32[7<<3]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000 };
|
||||
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
|
||||
// Need big endian data
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way_update( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
do
|
||||
{
|
||||
const uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32(
|
||||
n+7,n+6,n+5,n+4,n+3,n+2,n+1,n ) );
|
||||
pdata[19] = n;
|
||||
sha256t_8way_hash( hash, vdata );
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
sha256t_8way_hash( hash32, vdata );
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||
n += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -2,40 +2,41 @@
|
||||
|
||||
bool register_sha256t_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(SHA256T_8WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
#if defined(SHA256T_16WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256t_16way;
|
||||
gate->hash = (void*)&sha256t_16way_hash;
|
||||
#elif defined(__SHA__)
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t;
|
||||
gate->hash = (void*)&sha256t_hash;
|
||||
#elif defined(SHA256T_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
||||
gate->hash = (void*)&sha256t_8way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
||||
gate->hash = (void*)&sha256t_4way_hash;
|
||||
/*
|
||||
#else
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t;
|
||||
gate->hash = (void*)&sha256t_hash;
|
||||
*/
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool register_sha256q_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(SHA256T_8WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
#if defined(SHA256T_16WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256q_16way;
|
||||
gate->hash = (void*)&sha256q_16way_hash;
|
||||
#elif defined(__SHA__)
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256q;
|
||||
gate->hash = (void*)&sha256q_hash;
|
||||
#elif defined(SHA256T_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256q_8way;
|
||||
gate->hash = (void*)&sha256q_8way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_sha256q_4way;
|
||||
gate->hash = (void*)&sha256q_4way_hash;
|
||||
/*
|
||||
#else
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256q;
|
||||
gate->hash = (void*)&sha256q_hash;
|
||||
*/
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -4,15 +4,27 @@
|
||||
#include <stdint.h>
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define SHA256T_8WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define SHA256T_16WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
#define SHA256T_8WAY 1
|
||||
#else
|
||||
#define SHA256T_4WAY
|
||||
#define SHA256T_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_sha256t_algo( algo_gate_t* gate );
|
||||
bool register_sha256q_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(SHA256T_16WAY)
|
||||
|
||||
void sha256t_16way_hash( void *output, const void *input );
|
||||
int scanhash_sha256t_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void sha256q_16way_hash( void *output, const void *input );
|
||||
int scanhash_sha256q_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
void sha256t_8way_hash( void *output, const void *input );
|
||||
@@ -33,13 +45,13 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
/*
|
||||
void sha256t_hash( void *output, const void *input );
|
||||
|
||||
int sha256t_hash( void *output, const void *input );
|
||||
int scanhash_sha256t( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void sha256q_hash( void *output, const void *input );
|
||||
int sha256q_hash( void *output, const void *input );
|
||||
int scanhash_sha256q( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,107 +1,73 @@
|
||||
#include "sha256t-gate.h"
|
||||
|
||||
// Obsolete
|
||||
|
||||
#if !defined(SHA256T_16WAY) && !defined(SHA256T_8WAY) && !defined(SHA256T_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
|
||||
// Only used on CPUs with SHA
|
||||
|
||||
static __thread sph_sha256_context sha256t_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256t_midstate( const void* input )
|
||||
{
|
||||
SHA256_Init( &sha256t_ctx );
|
||||
SHA256_Update( &sha256t_ctx, input, 64 );
|
||||
sph_sha256_init( &sha256t_ctx );
|
||||
sph_sha256( &sha256t_ctx, input, 64 );
|
||||
}
|
||||
|
||||
void sha256t_hash( void* output, const void* input )
|
||||
int sha256t_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[16];
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
|
||||
SHA256_CTX ctx __attribute__ ((aligned (64)));
|
||||
sph_sha256_context ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &sha256t_ctx, sizeof sha256t_ctx );
|
||||
|
||||
SHA256_Update( &ctx, input + midlen, tail );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256( &ctx, input + midlen, tail );
|
||||
sph_sha256_close( &ctx, hash );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, hash, 32 );
|
||||
sph_sha256_close( &ctx, hash );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
sph_sha256_init( &ctx );
|
||||
sph_sha256( &ctx, hash, 32 );
|
||||
sph_sha256_close( &ctx, output );
|
||||
|
||||
memcpy( output, hash, 32 );
|
||||
return 1;
|
||||
}
|
||||
|
||||
int scanhash_sha256t( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
#ifdef _MSC_VER
|
||||
uint32_t __declspec(align(32)) hash64[8];
|
||||
#else
|
||||
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||
#endif
|
||||
uint32_t endiandata[32];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t last_nonce = max_nonce - 1;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
sha256t_midstate( edata );
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
sha256t_midstate( endiandata );
|
||||
|
||||
for ( int m = 0; m < 6; m++ )
|
||||
do
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
edata[19] = n;
|
||||
if ( likely( sha256t_hash( hash, edata ) ) )
|
||||
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
sha256t_hash( hash64, endiandata );
|
||||
if ( !(hash64[7] & mask) )
|
||||
if ( fulltest( hash64, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash64, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
break;
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
n++;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -71,7 +71,11 @@ static const sph_u32 H256[8] = {
|
||||
* of the compression function.
|
||||
*/
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_SHA2
|
||||
#if defined(__SHA__)
|
||||
|
||||
#include "sha256-hash-opt.c"
|
||||
|
||||
#else // no SHA
|
||||
|
||||
static const sph_u32 K[64] = {
|
||||
SPH_C32(0x428A2F98), SPH_C32(0x71374491),
|
||||
@@ -108,6 +112,8 @@ static const sph_u32 K[64] = {
|
||||
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
|
||||
};
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_SHA2
|
||||
|
||||
#define SHA2_MEXP1(in, pc) do { \
|
||||
W[pc] = in(pc); \
|
||||
} while (0)
|
||||
@@ -191,7 +197,7 @@ static const sph_u32 K[64] = {
|
||||
(r)[7] = SPH_T32((r)[7] + H); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
#else // large footprint (default)
|
||||
|
||||
#define SHA2_ROUND_BODY(in, r) do { \
|
||||
sph_u32 A, B, C, D, E, F, G, H, T1, T2; \
|
||||
@@ -600,7 +606,7 @@ static const sph_u32 K[64] = {
|
||||
(r)[7] = SPH_T32((r)[7] + H); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
#endif // small footprint else
|
||||
|
||||
/*
|
||||
* One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access.
|
||||
@@ -613,6 +619,9 @@ sha2_round(const unsigned char *data, sph_u32 r[8])
|
||||
#undef SHA2_IN
|
||||
}
|
||||
|
||||
#endif // SHA else
|
||||
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_init(void *cc)
|
||||
@@ -653,7 +662,7 @@ void
|
||||
sph_sha224_close(void *cc, void *dst)
|
||||
{
|
||||
sha224_close(cc, dst, 7);
|
||||
sph_sha224_init(cc);
|
||||
// sph_sha224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
@@ -661,7 +670,7 @@ void
|
||||
sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha224_addbits_and_close(cc, ub, n, dst, 7);
|
||||
sph_sha224_init(cc);
|
||||
// sph_sha224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
@@ -677,14 +686,14 @@ void
|
||||
sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha224_addbits_and_close(cc, ub, n, dst, 8);
|
||||
sph_sha256_init(cc);
|
||||
// sph_sha256_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8])
|
||||
{
|
||||
#define SHA2_IN(x) msg[x]
|
||||
SHA2_ROUND_BODY(SHA2_IN, val);
|
||||
#undef SHA2_IN
|
||||
}
|
||||
//void
|
||||
//sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8])
|
||||
//{
|
||||
//#define SHA2_IN(x) msg[x]
|
||||
// SHA2_ROUND_BODY(SHA2_IN, val);
|
||||
//#undef SHA2_IN
|
||||
//}
|
||||
|
||||
@@ -73,7 +73,7 @@ typedef struct {
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_sha224_context;
|
||||
} sph_sha224_context __attribute__((aligned(64)));
|
||||
|
||||
/**
|
||||
* This structure is a context for SHA-256 computations. It is identical
|
||||
|
||||
Reference in New Issue
Block a user