mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.8.1.1
This commit is contained in:
@@ -159,6 +159,10 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.8.1.1
|
||||
|
||||
Fixed Windows AVX2 crash.
|
||||
|
||||
v3.8.1
|
||||
|
||||
Fixes x16r on CPUs with only SSE2.
|
||||
|
@@ -51,7 +51,9 @@ extern "C"{
|
||||
|
||||
// BMW small has a bug not present in big. Lanes 0 & 2 produce valid hash
|
||||
// while lanes 1 & 3 produce invalid hash. The cause is not known.
|
||||
|
||||
// Some things that could cause it are: using epi64 instead of epi32,
|
||||
// a memory write that is the wrong size, an attempt to index a vector
|
||||
// like an array (only works for 64 bit elements).
|
||||
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
|
@@ -20,11 +20,11 @@ typedef struct {
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
#endif
|
||||
#ifndef USE_SPH_SHA
|
||||
SHA256_CTX sha;
|
||||
#else
|
||||
//#ifndef USE_SPH_SHA
|
||||
// SHA256_CTX sha;
|
||||
//#else
|
||||
sph_sha256_context sha;
|
||||
#endif
|
||||
//#endif
|
||||
} myrgr_ctx_holder;
|
||||
|
||||
myrgr_ctx_holder myrgr_ctx;
|
||||
@@ -36,11 +36,11 @@ void init_myrgr_ctx()
|
||||
#else
|
||||
init_groestl (&myrgr_ctx.groestl, 64 );
|
||||
#endif
|
||||
#ifndef USE_SPH_SHA
|
||||
SHA256_Init( &myrgr_ctx.sha );
|
||||
#else
|
||||
//#ifndef USE_SPH_SHA
|
||||
// SHA256_Init( &myrgr_ctx.sha );
|
||||
//#else
|
||||
sph_sha256_init( &myrgr_ctx.sha );
|
||||
#endif
|
||||
//#endif
|
||||
}
|
||||
|
||||
void myriadhash( void *output, const void *input )
|
||||
@@ -57,13 +57,13 @@ void myriadhash( void *output, const void *input )
|
||||
(const char*)input, 640 );
|
||||
#endif
|
||||
|
||||
#ifndef USE_SPH_SHA
|
||||
SHA256_Update( &ctx.sha, hash, 64 );
|
||||
SHA256_Final( (unsigned char*) hash, &ctx.sha );
|
||||
#else
|
||||
//#ifndef USE_SPH_SHA
|
||||
// SHA256_Update( &ctx.sha, hash, 64 );
|
||||
// SHA256_Final( (unsigned char*) hash, &ctx.sha );
|
||||
//#else
|
||||
sph_sha256(&ctx.sha, hash, 64);
|
||||
sph_sha256_close(&ctx.sha, hash);
|
||||
#endif
|
||||
//#endif
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
|
@@ -251,10 +251,11 @@ __m256i CNS[32];
|
||||
/* Round function */
|
||||
/* state: hash context */
|
||||
|
||||
static void rnd512_2way( luffa_2way_context *state, __m256i msg1, __m256i msg0 )
|
||||
void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
{
|
||||
__m256i t[2];
|
||||
__m256i *chainv = state->chainv;
|
||||
__m256i msg0, msg1;
|
||||
__m256i tmp[2];
|
||||
__m256i x[8];
|
||||
|
||||
@@ -272,8 +273,8 @@ static void rnd512_2way( luffa_2way_context *state, __m256i msg1, __m256i msg0 )
|
||||
|
||||
MULT2( t[0], t[1] );
|
||||
|
||||
msg0 = _mm256_shuffle_epi32( msg0, 27 );
|
||||
msg1 = _mm256_shuffle_epi32( msg1, 27 );
|
||||
msg0 = _mm256_shuffle_epi32( msg[0], 27 );
|
||||
msg1 = _mm256_shuffle_epi32( msg[1], 27 );
|
||||
|
||||
chainv[0] = _mm256_xor_si256( chainv[0], t[0] );
|
||||
chainv[1] = _mm256_xor_si256( chainv[1], t[1] );
|
||||
@@ -359,7 +360,6 @@ static void rnd512_2way( luffa_2way_context *state, __m256i msg1, __m256i msg0 )
|
||||
chainv[9] = _mm256_or_si256( _mm256_slli_epi32( chainv[9], 4 ),
|
||||
_mm256_srli_epi32( chainv[9], 28 ) );
|
||||
|
||||
|
||||
NMLTOM1024( chainv[0], chainv[2], chainv[4], chainv[6],
|
||||
x[0], x[1], x[2], x[3],
|
||||
chainv[1],chainv[3],chainv[5],chainv[7],
|
||||
@@ -382,6 +382,7 @@ static void rnd512_2way( luffa_2way_context *state, __m256i msg1, __m256i msg0 )
|
||||
/* Process last 256-bit block */
|
||||
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS[16], CNS[17],
|
||||
tmp[0], tmp[1] );
|
||||
|
||||
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS[18], CNS[19],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS[20], CNS[21],
|
||||
@@ -404,14 +405,16 @@ static void rnd512_2way( luffa_2way_context *state, __m256i msg1, __m256i msg0 )
|
||||
/* state: hash context */
|
||||
/* b[8]: hash values */
|
||||
|
||||
static void finalization512_2way( luffa_2way_context *state, uint32 *b )
|
||||
void finalization512_2way( luffa_2way_context *state, uint32 *b )
|
||||
{
|
||||
uint32 hash[8] __attribute((aligned(64)));
|
||||
__m256i* chainv = state->chainv;
|
||||
__m256i t[2];
|
||||
__m256i zero[2];
|
||||
zero[0] = zero[1] = _mm256_setzero_si256();
|
||||
|
||||
/*---- blank round with m=0 ----*/
|
||||
rnd512_2way( state, m256_zero, m256_zero );
|
||||
rnd512_2way( state, zero );
|
||||
|
||||
t[0] = chainv[0];
|
||||
t[1] = chainv[1];
|
||||
@@ -434,7 +437,7 @@ static void finalization512_2way( luffa_2way_context *state, uint32 *b )
|
||||
casti_m256i( b, 0 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 1 ) = mm256_bswap_32( casti_m256i( hash, 1 ) );
|
||||
|
||||
rnd512_2way( state, m256_zero, m256_zero );
|
||||
rnd512_2way( state, zero );
|
||||
|
||||
t[0] = chainv[0];
|
||||
t[1] = chainv[1];
|
||||
@@ -487,6 +490,7 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buffer = (__m256i*)state->buffer;
|
||||
__m256i msg[2];
|
||||
int i;
|
||||
int blocks = (int)len / 32;
|
||||
state-> rembytes = (int)len % 32;
|
||||
@@ -494,8 +498,9 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
rnd512_2way( state, mm256_bswap_32( vdata[1] ) ,
|
||||
mm256_bswap_32( vdata[0] ) );
|
||||
msg[0] = mm256_bswap_32( vdata[ i ] );
|
||||
msg[1] = mm256_bswap_32( vdata[ i+1 ] );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
// 16 byte partial block exists for 80 byte len
|
||||
@@ -513,17 +518,19 @@ int luffa_2way_update( luffa_2way_context *state, const void *data,
|
||||
int luffa_2way_close( luffa_2way_context *state, void *hashval )
|
||||
{
|
||||
__m256i *buffer = (__m256i*)state->buffer;
|
||||
__m256i msg[2];
|
||||
|
||||
// transform pad block
|
||||
if ( state->rembytes )
|
||||
// not empty, data is in buffer
|
||||
rnd512_2way( state, buffer[1], buffer[0] );
|
||||
rnd512_2way( state, buffer );
|
||||
else
|
||||
// empty pad block, constant data
|
||||
rnd512_2way( state, m256_zero,
|
||||
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ) );
|
||||
|
||||
{ // empty pad block, constant data
|
||||
msg[0] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
msg[1] = m256_zero;
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
finalization512_2way( state, (uint32*)hashval );
|
||||
|
||||
if ( state->hashbitlen > 512 )
|
||||
@@ -535,28 +542,37 @@ int luffa_2way_update_close( luffa_2way_context *state,
|
||||
void *output, const void *data, size_t inlen )
|
||||
{
|
||||
// Optimized for integrals of 16 bytes, good for 64 and 80 byte len
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
const __m256i *vdata = (__m256i*)data;
|
||||
__m256i msg[2];
|
||||
int i;
|
||||
int blocks = (int)( inlen / 32 );
|
||||
state->rembytes = inlen % 32;
|
||||
const int blocks = (int)( inlen >> 5 );
|
||||
state->rembytes = inlen & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
rnd512_2way( state, mm256_bswap_32( vdata[1] ),
|
||||
mm256_bswap_32( vdata[0] ) );
|
||||
{
|
||||
msg[0] = mm256_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm256_bswap_32( vdata[ 1 ] );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
// 16 byte partial block exists for 80 byte len
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// padding of partial block
|
||||
rnd512_2way( state,
|
||||
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ),
|
||||
mm256_bswap_32( vdata[0] ) );
|
||||
msg[0] = mm256_bswap_32( vdata[0] );
|
||||
msg[1] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
else
|
||||
{
|
||||
// empty pad block
|
||||
rnd512_2way( state, m256_zero,
|
||||
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ) );
|
||||
msg[0] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
msg[1] = m256_zero;
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
finalization512_2way( state, (uint32*)output );
|
||||
if ( state->hashbitlen > 512 )
|
||||
|
@@ -116,7 +116,7 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+64 );
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
n += 2;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
|
@@ -30,13 +30,235 @@
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#if defined(__AVX__)
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sha2-hash-4way.h"
|
||||
|
||||
// SHA256 4 way 32 bit
|
||||
|
||||
static const sph_u32 H256[8] = {
|
||||
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85),
|
||||
SPH_C32(0x3C6EF372), SPH_C32(0xA54FF53A),
|
||||
SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
|
||||
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
|
||||
};
|
||||
|
||||
static const sph_u32 K256[80] = {
|
||||
SPH_C32(0x428A2F98), SPH_C32(0x71374491),
|
||||
SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5),
|
||||
SPH_C32(0x3956C25B), SPH_C32(0x59F111F1),
|
||||
SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5),
|
||||
SPH_C32(0xD807AA98), SPH_C32(0x12835B01),
|
||||
SPH_C32(0x243185BE), SPH_C32(0x550C7DC3),
|
||||
SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE),
|
||||
SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174),
|
||||
SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786),
|
||||
SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC),
|
||||
SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA),
|
||||
SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA),
|
||||
SPH_C32(0x983E5152), SPH_C32(0xA831C66D),
|
||||
SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7),
|
||||
SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147),
|
||||
SPH_C32(0x06CA6351), SPH_C32(0x14292967),
|
||||
SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138),
|
||||
SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13),
|
||||
SPH_C32(0x650A7354), SPH_C32(0x766A0ABB),
|
||||
SPH_C32(0x81C2C92E), SPH_C32(0x92722C85),
|
||||
SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B),
|
||||
SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3),
|
||||
SPH_C32(0xD192E819), SPH_C32(0xD6990624),
|
||||
SPH_C32(0xF40E3585), SPH_C32(0x106AA070),
|
||||
SPH_C32(0x19A4C116), SPH_C32(0x1E376C08),
|
||||
SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5),
|
||||
SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A),
|
||||
SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3),
|
||||
SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
|
||||
SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
|
||||
SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
|
||||
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2),
|
||||
SPH_C32(0xCA273ECE), SPH_C32(0xD186B8C7),
|
||||
SPH_C32(0xEADA7DD6), SPH_C32(0xF57D4F7F),
|
||||
SPH_C32(0x06F067AA), SPH_C32(0x0A637DC5),
|
||||
SPH_C32(0x113F9804), SPH_C32(0x1B710B35),
|
||||
SPH_C32(0x28DB77F5), SPH_C32(0x32CAAB7B),
|
||||
SPH_C32(0x3C9EBE0A), SPH_C32(0x431D67C4),
|
||||
SPH_C32(0x4CC5D4BE), SPH_C32(0x597F299C),
|
||||
SPH_C32(0x5FCB6FAB), SPH_C32(0x6C44198C)
|
||||
|
||||
};
|
||||
|
||||
|
||||
#define CHs(X, Y, Z) \
|
||||
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( Y, Z ), X ), Z )
|
||||
|
||||
#define MAJs(X, Y, Z) \
|
||||
_mm_or_si128( _mm_and_si128( X, Y ), \
|
||||
_mm_and_si128( _mm_or_si128( X, Y ), Z ) )
|
||||
|
||||
#define BSG2_0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 2), mm_rotr_32(x, 13) ), mm_rotr_32( x, 22) )
|
||||
|
||||
#define BSG2_1(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 6), mm_rotr_32(x, 11) ), mm_rotr_32( x, 25) )
|
||||
|
||||
#define SSG2_0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 7), mm_rotr_32(x, 18) ), _mm_srli_epi32(x, 3) )
|
||||
|
||||
#define SSG2_1(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 17), mm_rotr_32(x, 19) ), _mm_srli_epi32(x, 10) )
|
||||
|
||||
#define SHA256_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
__m128i T1, T2; \
|
||||
T1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( \
|
||||
_mm_add_epi32( H, BSG2_1(E) ), CHs(E, F, G) ), \
|
||||
_mm_set1_epi32( K256[i] ) ), W[i] ); \
|
||||
T2 = _mm_add_epi32( BSG2_0(A), MAJs(A, B, C) ); \
|
||||
D = _mm_add_epi32( D, T1 ); \
|
||||
H = _mm_add_epi32( T1, T2 ); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
sha256_4way_round( __m128i *in, __m128i r[8] )
|
||||
{
|
||||
int i;
|
||||
__m128i A, B, C, D, E, F, G, H;
|
||||
__m128i W[80];
|
||||
|
||||
for ( i = 0; i < 16; i++ )
|
||||
W[i] = mm_bswap_32( in[i] );
|
||||
for ( i = 16; i < 80; i++ )
|
||||
W[i] = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32(
|
||||
SSG2_1( W[ i-2 ] ), W[ i-7 ] ), SSG2_0( W[ i-15 ] ) ), W[ i-16 ] );
|
||||
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
C = r[2];
|
||||
D = r[3];
|
||||
E = r[4];
|
||||
F = r[5];
|
||||
G = r[6];
|
||||
H = r[7];
|
||||
|
||||
for ( i = 0; i < 80; i += 8 )
|
||||
{
|
||||
SHA256_4WAY_STEP( A, B, C, D, E, F, G, H, i + 0 );
|
||||
SHA256_4WAY_STEP( H, A, B, C, D, E, F, G, i + 1 );
|
||||
SHA256_4WAY_STEP( G, H, A, B, C, D, E, F, i + 2 );
|
||||
SHA256_4WAY_STEP( F, G, H, A, B, C, D, E, i + 3 );
|
||||
SHA256_4WAY_STEP( E, F, G, H, A, B, C, D, i + 4 );
|
||||
SHA256_4WAY_STEP( D, E, F, G, H, A, B, C, i + 5 );
|
||||
SHA256_4WAY_STEP( C, D, E, F, G, H, A, B, i + 6 );
|
||||
SHA256_4WAY_STEP( B, C, D, E, F, G, H, A, i + 7 );
|
||||
}
|
||||
|
||||
r[0] = _mm_add_epi32( r[0], A );
|
||||
r[1] = _mm_add_epi32( r[1], B );
|
||||
r[2] = _mm_add_epi32( r[2], C );
|
||||
r[3] = _mm_add_epi32( r[3], D );
|
||||
r[4] = _mm_add_epi32( r[4], E );
|
||||
r[5] = _mm_add_epi32( r[5], F );
|
||||
r[6] = _mm_add_epi32( r[6], G );
|
||||
r[7] = _mm_add_epi32( r[7], H );
|
||||
}
|
||||
|
||||
void sha256_4way_init( sha256_4way_context *sc )
|
||||
{
|
||||
sc->count_high = sc->count_low = 0;
|
||||
sc->val[0] = _mm_set1_epi32( H256[0] );
|
||||
sc->val[1] = _mm_set1_epi32( H256[1] );
|
||||
sc->val[2] = _mm_set1_epi32( H256[2] );
|
||||
sc->val[3] = _mm_set1_epi32( H256[3] );
|
||||
sc->val[4] = _mm_set1_epi32( H256[4] );
|
||||
sc->val[5] = _mm_set1_epi32( H256[5] );
|
||||
sc->val[6] = _mm_set1_epi32( H256[6] );
|
||||
sc->val[7] = _mm_set1_epi32( H256[7] );
|
||||
}
|
||||
|
||||
void sha256_4way( sha256_4way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
size_t ptr;
|
||||
const int buf_size = 64;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (buf_size - 1U);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
uint32_t clow, clow2;
|
||||
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_128( sc->buf + (ptr>>2), vdata, clen>>2 );
|
||||
vdata = vdata + (clen>>2);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
sha256_4way_round( sc->buf, sc->val );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
clow2 = SPH_T32( clow + clen );
|
||||
sc->count_low = clow2;
|
||||
if ( clow2 < clow )
|
||||
sc->count_high++;
|
||||
}
|
||||
}
|
||||
|
||||
void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
uint32_t low, high;
|
||||
const int buf_size = 64;
|
||||
const int pad = buf_size - 8;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (buf_size - 1U);
|
||||
sc->buf[ ptr>>2 ] = _mm_set1_epi32( 0x80 );
|
||||
ptr += 4;
|
||||
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_128( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||
sha256_4way_round( sc->buf, sc->val );
|
||||
memset_zero_128( sc->buf, pad >> 2 );
|
||||
}
|
||||
else
|
||||
memset_zero_128( sc->buf + (ptr>>2), (pad - ptr) >> 2 );
|
||||
|
||||
low = sc->count_low;
|
||||
high = (sc->count_high << 3) | (low >> 29);
|
||||
low = low << 3;
|
||||
|
||||
sc->buf[ pad >> 2 ] =
|
||||
mm_bswap_32( _mm_set1_epi32( high ) );
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm_bswap_32( _mm_set1_epi32( low ) );
|
||||
sha256_4way_round( sc->buf, sc->val );
|
||||
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
((__m128i*)dst)[u] = mm_bswap_32( sc->val[u] );
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// SHA512 4 way 64 bit
|
||||
|
||||
static const sph_u64 H512[8] = {
|
||||
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
|
||||
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
|
||||
SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
|
||||
SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
|
||||
};
|
||||
|
||||
static const sph_u64 K512[80] = {
|
||||
SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD),
|
||||
SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC),
|
||||
@@ -80,13 +302,6 @@ static const sph_u64 K512[80] = {
|
||||
SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817)
|
||||
};
|
||||
|
||||
static const sph_u64 H512[8] = {
|
||||
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
|
||||
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
|
||||
SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
|
||||
SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
|
||||
};
|
||||
|
||||
#define CH(X, Y, Z) \
|
||||
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
||||
|
||||
@@ -182,7 +397,7 @@ void sha512_4way( sha512_4way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
int buf_size = 128;
|
||||
const int buf_size = 128;
|
||||
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
while ( len > 0 )
|
||||
@@ -207,8 +422,8 @@ void sha512_4way( sha512_4way_context *sc, const void *data, size_t len )
|
||||
void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
int buf_size = 128;
|
||||
int pad = buf_size - 16;
|
||||
const int buf_size = 128;
|
||||
const int pad = buf_size - 16;
|
||||
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
sc->buf[ ptr>>3 ] = _mm256_set1_epi64x( 0x80 );
|
||||
@@ -233,4 +448,5 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
((__m256i*)dst)[u] = mm256_bswap_64( sc->val[u] );
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // __AVX2__
|
||||
#endif // __AVX__
|
||||
|
@@ -44,47 +44,19 @@
|
||||
#include "sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
#if 0
|
||||
|
||||
#define SPH_SIZE_sha224 224
|
||||
#if defined(__AVX__)
|
||||
|
||||
#define SPH_SIZE_sha256 256
|
||||
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
sph_u32 val[8];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_sha224_context;
|
||||
__m128i buf[64>>2];
|
||||
__m128i val[8];
|
||||
uint32_t count_high, count_low;
|
||||
} sha256_4way_context;
|
||||
|
||||
typedef sph_sha224_context sph_sha256_context;
|
||||
|
||||
void sph_sha224_init(void *cc);
|
||||
|
||||
void sph_sha224(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_sha224_close(void *cc, void *dst);
|
||||
|
||||
void sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]);
|
||||
|
||||
void sph_sha256_init(void *cc);
|
||||
|
||||
void sph_sha256(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_sha256_close(void *cc, void *dst);
|
||||
|
||||
void sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void sph_sha256_comp(const sph_u32 msg[16], sph_u32 val[8]);
|
||||
|
||||
#endif
|
||||
void sha256_4way_init( sha256_4way_context *sc );
|
||||
void sha256_4way( sha256_4way_context *sc, const void *data, size_t len );
|
||||
void sha256_4way_close( sha256_4way_context *sc, void *dst );
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
@@ -102,3 +74,4 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst );
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
17
avxdefs.h
17
avxdefs.h
@@ -29,7 +29,7 @@
|
||||
//
|
||||
// operation;
|
||||
// data: variable/constant name
|
||||
// function: dexcription of operation
|
||||
// function: deription of operation
|
||||
//
|
||||
// size: size of element if applicable
|
||||
//
|
||||
@@ -99,17 +99,17 @@ typedef union m128_v8 m128_v8;
|
||||
#define mm_setc1_64( x ) {{ x, x }}
|
||||
|
||||
#define mm_setc_32( x3, x2, x1, x0 ) {{ x3, x2, x1, x0 }}
|
||||
#define mm_setc1_32( x ) {{ [0 ... 3] = x }}
|
||||
#define mm_setc1_32( x ) {{ x,x,x,x }}
|
||||
|
||||
#define mm_setc_16( x7, x6, x5, x4, x3, x2, x1, x0 ) \
|
||||
{{ x7, x6, x5, x4, x3, x2, x1, x0 }}
|
||||
#define mm_setc1_16( x ) {{ [0 ... 7] = x }}
|
||||
#define mm_setc1_16( x ) {{ x,x,x,x, x,x,x,x }}
|
||||
|
||||
#define mm_setc_8( x15, x14, x13, x12, x11, x10, x09, x08, \
|
||||
x07, x06, x05, x04, x03, x02, x01, x00 ) \
|
||||
{{ x15, x14, x13, x12, x11, x10, x09, x08, \
|
||||
x07, x06, x05, x04, x03, x02, x01, x00 }}
|
||||
#define mm_setc1_8( x ) {{ [0 ... 15] = x }}
|
||||
#define mm_setc1_8( x ) {{ x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x }}
|
||||
|
||||
// Compile time constants, use only for initializing.
|
||||
#define c128_zero mm_setc1_64( 0ULL )
|
||||
@@ -582,17 +582,17 @@ typedef union m256_v8 m256_v8;
|
||||
// simple constant vectors.
|
||||
|
||||
#define mm256_setc_64( x3, x2, x1, x0 ) {{ x3, x2, x1, x0 }}
|
||||
#define mm256_setc1_64( x ) {{ [0 ... 3] = x }}
|
||||
#define mm256_setc1_64( x ) {{ x,x,x,x }}
|
||||
|
||||
#define mm256_setc_32( x7, x6, x5, x4, x3, x2, x1, x0 ) \
|
||||
{{ x7, x6, x5, x4, x3, x2, x1, x0 }}
|
||||
#define mm256_setc1_32( x ) {{ [0 ... 7] = x }}
|
||||
#define mm256_setc1_32( x ) {{ x,x,x,x, x,x,x,x }}
|
||||
|
||||
#define mm256_setc_16( x15, x14, x13, x12, x11, x10, x09, x08, \
|
||||
x07, x06, x05, x04, x03, x02, x01, x00 ) \
|
||||
{{ x15, x14, x13, x12, x11, x10, x09, x08, \
|
||||
x07, x06, x05, x04, x03, x02, x01, x00 }}
|
||||
#define mm256_setc1_16( x ) {{ [0 ... 15] = x }}
|
||||
#define mm256_setc1_16( x ) {{ x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x }}
|
||||
|
||||
#define mm256_setc_8( x31, x30, x29, x28, x27, x26, x25, x24, \
|
||||
x23, x22, x21, x20, x19, x18, x17, x16, \
|
||||
@@ -602,7 +602,8 @@ typedef union m256_v8 m256_v8;
|
||||
x23, x22, x21, x20, x19, x18, x17, x16, \
|
||||
x15, x14, x13, x12, x11, x10, x09, x08, \
|
||||
x07, x06, x05, x04, x03, x02, x01, x00 }}
|
||||
#define mm256_setc1_8( x ) {{ [0 ... 31] = x }}
|
||||
#define mm256_setc1_8( x ) {{ x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, \
|
||||
x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x }}
|
||||
|
||||
// Predefined compile time constant vectors.
|
||||
// Use Pseudo constants at run time for all simple constant vectors.
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.1.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.1.1.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.8.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.1'
|
||||
PACKAGE_VERSION='3.8.1.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.1.1'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.8.1 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.8.1.1 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1392,7 +1392,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.1:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.1.1:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1497,7 +1497,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.8.1
|
||||
cpuminer-opt configure 3.8.1.1
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.8.1, which was
|
||||
It was created by cpuminer-opt $as_me 3.8.1.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2981,7 +2981,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.8.1'
|
||||
VERSION='3.8.1.1'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.8.1, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.8.1.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6743,7 +6743,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.8.1
|
||||
cpuminer-opt config.status 3.8.1.1
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.8.1])
|
||||
AC_INIT([cpuminer-opt], [3.8.1.1])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
Reference in New Issue
Block a user