mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.7.5
This commit is contained in:
@@ -20,7 +20,7 @@ void skeinhash_4way( void *state, const void *input )
|
||||
skein512_4way( &ctx_skein, input, 80 );
|
||||
skein512_4way_close( &ctx_skein, vhash );
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash0, 64 );
|
||||
@@ -38,21 +38,20 @@ void skeinhash_4way( void *state, const void *input )
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash3, 64 );
|
||||
SHA256_Final( (unsigned char*)hash3, &ctx_sha256 );
|
||||
|
||||
memcpy( (char*)state, (char*)hash0, 32 );
|
||||
memcpy( ((char*)state) + 32, (char*)hash1, 32 );
|
||||
memcpy( ((char*)state) + 64, (char*)hash2, 32 );
|
||||
memcpy( ((char*)state) + 96, (char*)hash3, 32 );
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state + 32, hash1, 32 );
|
||||
memcpy( state + 64, hash2, 32 );
|
||||
memcpy( state + 96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
@@ -63,9 +62,9 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
// data is 80 bytes, 20 u32 or 4 u64.
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_interleave_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
|
||||
@@ -6,8 +6,8 @@ int64_t skein_get_max64() { return 0x7ffffLL; }
|
||||
|
||||
bool register_skein_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT| AVX2_OPT | SHA_OPT;
|
||||
#if defined (SKEIN_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein_4way;
|
||||
gate->hash = (void*)&skeinhash_4way;
|
||||
#else
|
||||
|
||||
@@ -463,7 +463,7 @@ skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
|
||||
if ( len <= buf_size - ptr )
|
||||
{
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, len>>3 );
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
sc->ptr = ptr + len;
|
||||
return;
|
||||
}
|
||||
@@ -483,7 +483,7 @@ skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, clen>>3 );
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>3);
|
||||
len -= clen;
|
||||
@@ -520,11 +520,11 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
|
||||
READ_STATE_BIG(sc);
|
||||
|
||||
memset_zero_m256i( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
memset_zero_256( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
et = 352 + ((bcount == 0) << 7);
|
||||
UBI_BIG_4WAY( et, ptr );
|
||||
|
||||
memset_zero_m256i( buf, buf_size >> 3 );
|
||||
memset_zero_256( buf, buf_size >> 3 );
|
||||
bcount = 0;
|
||||
UBI_BIG_4WAY( 510, 8 );
|
||||
|
||||
@@ -537,7 +537,7 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
buf[6] = h6;
|
||||
buf[7] = h7;
|
||||
|
||||
memcpy_m256i( dst, buf, out_len >> 3 );
|
||||
memcpy_256( dst, buf, out_len >> 3 );
|
||||
}
|
||||
|
||||
static const sph_u64 IV256[] = {
|
||||
|
||||
@@ -19,13 +19,13 @@ void skein2hash_4way( void *output, const void *input )
|
||||
skein512_4way( &ctx, hash, 64 );
|
||||
skein512_4way_close( &ctx, hash );
|
||||
|
||||
m256_deinterleave_4x64( out64, out64+4, out64+8, out64+12, hash, 256 );
|
||||
mm256_deinterleave_4x64( out64, out64+4, out64+8, out64+12, hash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
@@ -41,7 +41,7 @@ int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_interleave_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
|
||||
@@ -9,12 +9,12 @@ int64_t skein2_get_max64 ()
|
||||
|
||||
bool register_skein2_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = FOUR_WAY_OPT;
|
||||
#if defined (FOUR_WAY) && defined (__AVX2__)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein2_4way;
|
||||
gate->hash = (void*)&skein2hash_4way;
|
||||
four_way_not_tested();
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein2;
|
||||
gate->hash = (void*)&skein2hash;
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user