mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.16.2
This commit is contained in:
@@ -196,6 +196,7 @@ cpuminer_SOURCES = \
|
||||
algo/verthash/Verthash.c \
|
||||
algo/verthash/fopen_utf8.c \
|
||||
algo/verthash/tiny_sha3/sha3.c \
|
||||
algo/verthash/tiny_sha3/sha3-4way.c \
|
||||
algo/whirlpool/sph_whirlpool.c \
|
||||
algo/whirlpool/whirlpool-hash-4way.c \
|
||||
algo/whirlpool/whirlpool-gate.c \
|
||||
|
@@ -135,7 +135,7 @@ Supported Algorithms
|
||||
x14 X14
|
||||
x15 X15
|
||||
x16r
|
||||
x16rv2 Ravencoin (RVN)
|
||||
x16rv2
|
||||
x16rt Gincoin (GIN)
|
||||
x16rt-veil Veil (VEIL)
|
||||
x16s Pigeoncoin (PGN)
|
||||
|
@@ -65,6 +65,14 @@ If not what makes it happen or not happen?
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.16.2
|
||||
|
||||
Verthash: midstate prehash optimization for all architectures.
|
||||
Verthash: AVX2 optimization.
|
||||
GBT: added support for Bech32 addresses, untested.
|
||||
Linux: added CPU frequency to benchmark log.
|
||||
Fixed integer overflow in time calculations.
|
||||
|
||||
v3.16.1
|
||||
|
||||
New options for verthash:
|
||||
@@ -72,16 +80,12 @@ New options for verthash:
|
||||
data file, default is "verthash.dat" in the current directory.
|
||||
--verify to perform the data file integrity check at startup, default is
|
||||
not to verify data file integrity.
|
||||
|
||||
Support for creation of default verthash data file if:
|
||||
1) --data-file option is not used,
|
||||
2) no default data file is found in the current directory, and,
|
||||
3) --verify option is used.
|
||||
|
||||
More detailed logs related to verthash data file.
|
||||
|
||||
Small verthash performance improvement.
|
||||
|
||||
Fixed detection of corrupt stats caused by networking issues.
|
||||
|
||||
v3.16.0
|
||||
|
@@ -134,87 +134,117 @@ static inline uint32_t fnv1a(const uint32_t a, const uint32_t b)
|
||||
return (a ^ b) * 0x1000193;
|
||||
}
|
||||
|
||||
void verthash_hash(const unsigned char* blob_bytes,
|
||||
const size_t blob_size,
|
||||
const unsigned char(*input)[VH_HEADER_SIZE],
|
||||
unsigned char(*output)[VH_HASH_OUT_SIZE])
|
||||
void verthash_hash( const unsigned char* blob_bytes,
|
||||
const size_t blob_size,
|
||||
const unsigned char(*input)[VH_HEADER_SIZE],
|
||||
unsigned char(*output)[VH_HASH_OUT_SIZE] )
|
||||
{
|
||||
unsigned char p1[VH_HASH_OUT_SIZE] __attribute__ ((aligned (64)));
|
||||
sha3(&input[0], VH_HEADER_SIZE, &p1[0], VH_HASH_OUT_SIZE);
|
||||
|
||||
unsigned char p0[VH_N_SUBSET];
|
||||
|
||||
unsigned char input_header[VH_HEADER_SIZE] __attribute__ ((aligned (64)));
|
||||
memcpy(input_header, input, VH_HEADER_SIZE);
|
||||
|
||||
for (size_t i = 0; i < VH_N_ITER; ++i)
|
||||
{
|
||||
input_header[0] += 1;
|
||||
sha3(&input_header[0], VH_HEADER_SIZE, p0 + i * VH_P0_SIZE, VH_P0_SIZE);
|
||||
}
|
||||
|
||||
uint32_t* p0_index = (uint32_t*)p0;
|
||||
unsigned char p1[ VH_HASH_OUT_SIZE ] __attribute__ ((aligned (64)));
|
||||
unsigned char p0[ VH_N_SUBSET ] __attribute__ ((aligned (64)));
|
||||
uint32_t seek_indexes[VH_N_INDEXES] __attribute__ ((aligned (64)));
|
||||
uint32_t* p0_index = (uint32_t*)p0;
|
||||
|
||||
verthash_sha3_512_final_8( p0, ( (uint64_t*)input )[ 9 ] );
|
||||
|
||||
for ( size_t x = 0; x < VH_N_ROT; ++x )
|
||||
{
|
||||
memcpy( seek_indexes + x * (VH_N_SUBSET / sizeof(uint32_t)),
|
||||
p0, VH_N_SUBSET);
|
||||
|
||||
//#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
// 512 bit vector processing is actually slower because it reduces the CPU
|
||||
// clock significantly, which also slows mem access. The AVX512 rol instruction
|
||||
// is still available for smaller vectors.
|
||||
|
||||
// for ( size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); y += 16 )
|
||||
// {
|
||||
// __m512i *p0_v = (__m512i*)( p0_index + y );
|
||||
// *p0_v = mm512_rol_32( *p0_v, 1 );
|
||||
// }
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
for ( size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); y += 8 )
|
||||
for ( size_t y = 0; y < VH_N_SUBSET / sizeof(__m256i); y += 8)
|
||||
{
|
||||
__m256i *p0_v = (__m256i*)( p0_index + y );
|
||||
*p0_v = mm256_rol_32( *p0_v, 1 );
|
||||
casti_m256i( p0_index, y ) = mm256_rol_32(
|
||||
casti_m256i( p0_index, y ), 1 );
|
||||
casti_m256i( p0_index, y+1 ) = mm256_rol_32(
|
||||
casti_m256i( p0_index, y+1 ), 1 );
|
||||
casti_m256i( p0_index, y+2 ) = mm256_rol_32(
|
||||
casti_m256i( p0_index, y+2 ), 1 );
|
||||
casti_m256i( p0_index, y+3 ) = mm256_rol_32(
|
||||
casti_m256i( p0_index, y+3 ), 1 );
|
||||
casti_m256i( p0_index, y+4 ) = mm256_rol_32(
|
||||
casti_m256i( p0_index, y+4 ), 1 );
|
||||
casti_m256i( p0_index, y+5 ) = mm256_rol_32(
|
||||
casti_m256i( p0_index, y+5 ), 1 );
|
||||
casti_m256i( p0_index, y+6 ) = mm256_rol_32(
|
||||
casti_m256i( p0_index, y+6 ), 1 );
|
||||
casti_m256i( p0_index, y+7 ) = mm256_rol_32(
|
||||
casti_m256i( p0_index, y+7 ), 1 );
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for ( size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); y += 4 )
|
||||
for ( size_t y = 0; y < VH_N_SUBSET / sizeof(__m128i); y += 8)
|
||||
{
|
||||
__m128i *p0_v = (__m128i*)( p0_index + y );
|
||||
*p0_v = mm128_rol_32( *p0_v, 1 );
|
||||
casti_m128i( p0_index, y ) = mm128_rol_32(
|
||||
casti_m128i( p0_index, y ), 1 );
|
||||
casti_m128i( p0_index, y+1 ) = mm128_rol_32(
|
||||
casti_m128i( p0_index, y+1 ), 1 );
|
||||
casti_m128i( p0_index, y+2 ) = mm128_rol_32(
|
||||
casti_m128i( p0_index, y+2 ), 1 );
|
||||
casti_m128i( p0_index, y+3 ) = mm128_rol_32(
|
||||
casti_m128i( p0_index, y+3 ), 1 );
|
||||
casti_m128i( p0_index, y+4 ) = mm128_rol_32(
|
||||
casti_m128i( p0_index, y+4 ), 1 );
|
||||
casti_m128i( p0_index, y+5 ) = mm128_rol_32(
|
||||
casti_m128i( p0_index, y+5 ), 1 );
|
||||
casti_m128i( p0_index, y+6 ) = mm128_rol_32(
|
||||
casti_m128i( p0_index, y+6 ), 1 );
|
||||
casti_m128i( p0_index, y+7 ) = mm128_rol_32(
|
||||
casti_m128i( p0_index, y+7 ), 1 );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// for (size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); ++y)
|
||||
// {
|
||||
// *(p0_index + y) = ( *(p0_index + y) << 1 )
|
||||
// | ( 1 & (*(p0_index + y) >> 31) );
|
||||
// }
|
||||
}
|
||||
|
||||
sha3( &input[0], VH_HEADER_SIZE, &p1[0], VH_HASH_OUT_SIZE );
|
||||
|
||||
uint32_t* p1_32 = (uint32_t*)p1;
|
||||
uint32_t* blob_bytes_32 = (uint32_t*)blob_bytes;
|
||||
uint32_t value_accumulator = 0x811c9dc5;
|
||||
const uint32_t mdiv = ((blob_size - VH_HASH_OUT_SIZE) / VH_BYTE_ALIGNMENT) + 1;
|
||||
for (size_t i = 0; i < VH_N_INDEXES; i++)
|
||||
const uint32_t mdiv = ( ( blob_size - VH_HASH_OUT_SIZE )
|
||||
/ VH_BYTE_ALIGNMENT ) + 1;
|
||||
#if defined (__AVX2__)
|
||||
const __m256i k = _mm256_set1_epi32( 0x1000193 );
|
||||
#elif defined(__SSE41__)
|
||||
const __m128i k = _mm_set1_epi32( 0x1000193 );
|
||||
#endif
|
||||
|
||||
for ( size_t i = 0; i < VH_N_INDEXES; i++ )
|
||||
{
|
||||
const uint32_t offset = (fnv1a(seek_indexes[i], value_accumulator) % mdiv) * VH_BYTE_ALIGNMENT / sizeof(uint32_t);
|
||||
const uint32_t offset =
|
||||
( fnv1a( seek_indexes[i], value_accumulator) % mdiv )
|
||||
* ( VH_BYTE_ALIGNMENT / sizeof(uint32_t) );
|
||||
const uint32_t *blob_off = blob_bytes_32 + offset;
|
||||
for (size_t i2 = 0; i2 < VH_HASH_OUT_SIZE / sizeof(uint32_t); i2++)
|
||||
{
|
||||
const uint32_t value = *( blob_off + i2 );
|
||||
uint32_t* p1_ptr = p1_32 + i2;
|
||||
*p1_ptr = fnv1a( *p1_ptr, value );
|
||||
value_accumulator = fnv1a( value_accumulator, value );
|
||||
}
|
||||
|
||||
// update value accumulator for next seek index
|
||||
value_accumulator = fnv1a( value_accumulator, blob_off[0] );
|
||||
value_accumulator = fnv1a( value_accumulator, blob_off[1] );
|
||||
value_accumulator = fnv1a( value_accumulator, blob_off[2] );
|
||||
value_accumulator = fnv1a( value_accumulator, blob_off[3] );
|
||||
value_accumulator = fnv1a( value_accumulator, blob_off[4] );
|
||||
value_accumulator = fnv1a( value_accumulator, blob_off[5] );
|
||||
value_accumulator = fnv1a( value_accumulator, blob_off[6] );
|
||||
value_accumulator = fnv1a( value_accumulator, blob_off[7] );
|
||||
|
||||
#if defined (__AVX2__)
|
||||
*(__m256i*)p1_32 = _mm256_mullo_epi32( _mm256_xor_si256(
|
||||
*(__m256i*)p1_32, *(__m256i*)blob_off ), k );
|
||||
#elif defined(__SSE41__)
|
||||
casti_m128i( p1_32, 0 ) = _mm_mullo_epi32( _mm_xor_si128(
|
||||
casti_m128i( p1_32, 0 ), casti_m128i( blob_off, 0 ) ), k );
|
||||
casti_m128i( p1_32, 1 ) = _mm_mullo_epi32( _mm_xor_si128(
|
||||
casti_m128i( p1_32, 1 ), casti_m128i( blob_off, 1 ) ), k );
|
||||
#else
|
||||
for ( size_t i2 = 0; i2 < VH_HASH_OUT_SIZE / sizeof(uint32_t); i2++ )
|
||||
p1_32[i2] = fnv1a( p1_32[i2], blob_off[i2] );
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
memcpy(output, p1, VH_HASH_OUT_SIZE);
|
||||
memcpy( output, p1, VH_HASH_OUT_SIZE );
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
@@ -52,6 +52,8 @@ void verthash_hash(const unsigned char* blob_bytes,
|
||||
const unsigned char(*input)[VH_HEADER_SIZE],
|
||||
unsigned char(*output)[VH_HASH_OUT_SIZE]);
|
||||
|
||||
void verthash_sha3_512_prehash_72( const void *input );
|
||||
void verthash_sha3_512_final_8( void *hash, const uint64_t nonce );
|
||||
|
||||
#endif // !Verthash_INCLUDE_ONCE
|
||||
|
||||
|
301
algo/verthash/tiny_sha3/sha3-4way.c
Normal file
301
algo/verthash/tiny_sha3/sha3-4way.c
Normal file
@@ -0,0 +1,301 @@
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// sha3-4way.c
|
||||
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
|
||||
// vectorization by JayDDee 2021-03-27
|
||||
//
|
||||
// Revised 07-Aug-15 to match with official release of FIPS PUB 202 "SHA3"
|
||||
// Revised 03-Sep-15 for portability + OpenSSL - style API
|
||||
|
||||
#include "sha3-4way.h"
|
||||
|
||||
// constants
|
||||
static const uint64_t keccakf_rndc[24] = {
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
void sha3_4way_keccakf( __m256i st[25] )
|
||||
{
|
||||
int i, j, r;
|
||||
__m256i t, bc[5];
|
||||
|
||||
for ( r = 0; r < KECCAKF_ROUNDS; r++ )
|
||||
{
|
||||
// Theta
|
||||
bc[0] = _mm256_xor_si256( st[0],
|
||||
mm256_xor4( st[5], st[10], st[15], st[20] ) );
|
||||
bc[1] = _mm256_xor_si256( st[1],
|
||||
mm256_xor4( st[6], st[11], st[16], st[21] ) );
|
||||
bc[2] = _mm256_xor_si256( st[2],
|
||||
mm256_xor4( st[7], st[12], st[17], st[22] ) );
|
||||
bc[3] = _mm256_xor_si256( st[3],
|
||||
mm256_xor4( st[8], st[13], st[18], st[23] ) );
|
||||
bc[4] = _mm256_xor_si256( st[4],
|
||||
mm256_xor4( st[9], st[14], st[19], st[24] ) );
|
||||
|
||||
for ( i = 0; i < 5; i++ )
|
||||
{
|
||||
t = _mm256_xor_si256( bc[ (i+4) % 5 ],
|
||||
mm256_rol_64( bc[ (i+1) % 5 ], 1 ) );
|
||||
st[ i ] = _mm256_xor_si256( st[ i ], t );
|
||||
st[ i+5 ] = _mm256_xor_si256( st[ i+ 5 ], t );
|
||||
st[ i+10 ] = _mm256_xor_si256( st[ i+10 ], t );
|
||||
st[ i+15 ] = _mm256_xor_si256( st[ i+15 ], t );
|
||||
st[ i+20 ] = _mm256_xor_si256( st[ i+20 ], t );
|
||||
}
|
||||
|
||||
// Rho Pi
|
||||
#define RHO_PI( i, c ) \
|
||||
bc[0] = st[ i ]; \
|
||||
st[ i ] = mm256_rol_64( t, c ); \
|
||||
t = bc[0]
|
||||
|
||||
t = st[1];
|
||||
|
||||
RHO_PI( 10, 1 );
|
||||
RHO_PI( 7, 3 );
|
||||
RHO_PI( 11, 6 );
|
||||
RHO_PI( 17, 10 );
|
||||
RHO_PI( 18, 15 );
|
||||
RHO_PI( 3, 21 );
|
||||
RHO_PI( 5, 28 );
|
||||
RHO_PI( 16, 36 );
|
||||
RHO_PI( 8, 45 );
|
||||
RHO_PI( 21, 55 );
|
||||
RHO_PI( 24, 2 );
|
||||
RHO_PI( 4, 14 );
|
||||
RHO_PI( 15, 27 );
|
||||
RHO_PI( 23, 41 );
|
||||
RHO_PI( 19, 56 );
|
||||
RHO_PI( 13, 8 );
|
||||
RHO_PI( 12, 25 );
|
||||
RHO_PI( 2, 43 );
|
||||
RHO_PI( 20, 62 );
|
||||
RHO_PI( 14, 18 );
|
||||
RHO_PI( 22, 39 );
|
||||
RHO_PI( 9, 61 );
|
||||
RHO_PI( 6, 20 );
|
||||
RHO_PI( 1, 44 );
|
||||
|
||||
#undef RHO_PI
|
||||
|
||||
// Chi
|
||||
for ( j = 0; j < 25; j += 5 )
|
||||
{
|
||||
memcpy( bc, &st[ j ], 5*32 );
|
||||
st[ j ] = _mm256_xor_si256( st[ j ],
|
||||
_mm256_andnot_si256( bc[1], bc[2] ) );
|
||||
st[ j+1 ] = _mm256_xor_si256( st[ j+1 ],
|
||||
_mm256_andnot_si256( bc[2], bc[3] ) );
|
||||
st[ j+2 ] = _mm256_xor_si256( st[ j+2 ],
|
||||
_mm256_andnot_si256( bc[3], bc[4] ) );
|
||||
st[ j+3 ] = _mm256_xor_si256( st[ j+3 ],
|
||||
_mm256_andnot_si256( bc[4], bc[0] ) );
|
||||
st[ j+4 ] = _mm256_xor_si256( st[ j+4 ],
|
||||
_mm256_andnot_si256( bc[0], bc[1] ) );
|
||||
}
|
||||
|
||||
// Iota
|
||||
st[0] = _mm256_xor_si256( st[0],
|
||||
_mm256_set1_epi64x( keccakf_rndc[ r ] ) );
|
||||
}
|
||||
}
|
||||
|
||||
int sha3_4way_init( sha3_4way_ctx_t *c, int mdlen )
|
||||
{
|
||||
for ( int i = 0; i < 25; i++ ) c->st[ i ] = m256_zero;
|
||||
c->mdlen = mdlen;
|
||||
c->rsiz = 200 - 2 * mdlen;
|
||||
c->pt = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sha3_4way_update( sha3_4way_ctx_t *c, const void *data, size_t len )
|
||||
{
|
||||
size_t i;
|
||||
int j = c->pt;
|
||||
const int rsiz = c->rsiz / 8;
|
||||
const int l = len / 8;
|
||||
|
||||
for ( i = 0; i < l; i++ )
|
||||
{
|
||||
c->st[ j ] = _mm256_xor_si256( c->st[ j ],
|
||||
( (const __m256i*)data )[i] );
|
||||
j++;
|
||||
if ( j >= rsiz )
|
||||
{
|
||||
sha3_4way_keccakf( c->st );
|
||||
j = 0;
|
||||
}
|
||||
}
|
||||
c->pt = j;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sha3_4way_final( void *md, sha3_4way_ctx_t *c )
|
||||
{
|
||||
c->st[ c->pt ] = _mm256_xor_si256( c->st[ c->pt ],
|
||||
m256_const1_64( 6 ) );
|
||||
c->st[ c->rsiz / 8 - 1 ] =
|
||||
_mm256_xor_si256( c->st[ c->rsiz / 8 - 1 ],
|
||||
m256_const1_64( 0x8000000000000000 ) );
|
||||
sha3_4way_keccakf( c->st );
|
||||
memcpy( md, c->st, c->mdlen * 4 );
|
||||
return 1;
|
||||
}
|
||||
|
||||
void *sha3_4way( const void *in, size_t inlen, void *md, int mdlen )
|
||||
{
|
||||
sha3_4way_ctx_t ctx;
|
||||
sha3_4way_init( &ctx, mdlen);
|
||||
sha3_4way_update( &ctx, in, inlen );
|
||||
sha3_4way_final( md, &ctx );
|
||||
return md;
|
||||
}
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
void sha3_8way_keccakf( __m512i st[25] )
|
||||
{
|
||||
int i, j, r;
|
||||
__m512i t, bc[5];
|
||||
|
||||
// actual iteration
|
||||
for ( r = 0; r < KECCAKF_ROUNDS; r++ )
|
||||
{
|
||||
|
||||
// Theta
|
||||
for ( i = 0; i < 5; i++ )
|
||||
bc[i] = _mm512_xor_si512( st[i],
|
||||
mm512_xor4( st[ i+5 ], st[ i+10 ], st[ i+15 ], st[i+20 ] ) );
|
||||
|
||||
for ( i = 0; i < 5; i++ )
|
||||
{
|
||||
t = _mm512_xor_si512( bc[(i + 4) % 5],
|
||||
_mm512_rol_epi64( bc[(i + 1) % 5], 1 ) );
|
||||
for ( j = 0; j < 25; j += 5 )
|
||||
st[j + i] = _mm512_xor_si512( st[j + i], t );
|
||||
}
|
||||
|
||||
// Rho Pi
|
||||
#define RHO_PI( i, c ) \
|
||||
bc[0] = st[ i ]; \
|
||||
st[ i ] = _mm512_rol_epi64( t, c ); \
|
||||
t = bc[0]
|
||||
|
||||
t = st[1];
|
||||
|
||||
RHO_PI( 10, 1 );
|
||||
RHO_PI( 7, 3 );
|
||||
RHO_PI( 11, 6 );
|
||||
RHO_PI( 17, 10 );
|
||||
RHO_PI( 18, 15 );
|
||||
RHO_PI( 3, 21 );
|
||||
RHO_PI( 5, 28 );
|
||||
RHO_PI( 16, 36 );
|
||||
RHO_PI( 8, 45 );
|
||||
RHO_PI( 21, 55 );
|
||||
RHO_PI( 24, 2 );
|
||||
RHO_PI( 4, 14 );
|
||||
RHO_PI( 15, 27 );
|
||||
RHO_PI( 23, 41 );
|
||||
RHO_PI( 19, 56 );
|
||||
RHO_PI( 13, 8 );
|
||||
RHO_PI( 12, 25 );
|
||||
RHO_PI( 2, 43 );
|
||||
RHO_PI( 20, 62 );
|
||||
RHO_PI( 14, 18 );
|
||||
RHO_PI( 22, 39 );
|
||||
RHO_PI( 9, 61 );
|
||||
RHO_PI( 6, 20 );
|
||||
RHO_PI( 1, 44 );
|
||||
|
||||
#undef RHO_PI
|
||||
|
||||
// Chi
|
||||
for ( j = 0; j < 25; j += 5 )
|
||||
{
|
||||
for ( i = 0; i < 5; i++ )
|
||||
bc[i] = st[j + i];
|
||||
for ( i = 0; i < 5; i++ )
|
||||
st[ j+i ] = _mm512_xor_si512( st[ j+i ], _mm512_andnot_si512(
|
||||
bc[ (i+1) % 5 ], bc[ (i+2) % 5 ] ) );
|
||||
}
|
||||
|
||||
// Iota
|
||||
st[0] = _mm512_xor_si512( st[0], _mm512_set1_epi64( keccakf_rndc[r] ) );
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the context for SHA3
|
||||
|
||||
int sha3_8way_init( sha3_8way_ctx_t *c, int mdlen )
|
||||
{
|
||||
for ( int i = 0; i < 25; i++ ) c->st[ i ] = m512_zero;
|
||||
c->mdlen = mdlen;
|
||||
c->rsiz = 200 - 2 * mdlen;
|
||||
c->pt = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// update state with more data
|
||||
|
||||
int sha3_8way_update( sha3_8way_ctx_t *c, const void *data, size_t len )
|
||||
{
|
||||
size_t i;
|
||||
int j = c->pt;
|
||||
const int rsiz = c->rsiz / 8;
|
||||
const int l = len / 8;
|
||||
|
||||
for ( i = 0; i < l; i++ )
|
||||
{
|
||||
c->st[ j ] = _mm512_xor_si512( c->st[ j ],
|
||||
( (const __m512i*)data )[i] );
|
||||
j++;
|
||||
if ( j >= rsiz )
|
||||
{
|
||||
sha3_8way_keccakf( c->st );
|
||||
j = 0;
|
||||
}
|
||||
}
|
||||
c->pt = j;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// finalize and output a hash
|
||||
|
||||
int sha3_8way_final( void *md, sha3_8way_ctx_t *c )
|
||||
{
|
||||
c->st[ c->pt ] =
|
||||
_mm512_xor_si512( c->st[ c->pt ],
|
||||
m512_const1_64( 6 ) );
|
||||
c->st[ c->rsiz / 8 - 1 ] =
|
||||
_mm512_xor_si512( c->st[ c->rsiz / 8 - 1 ],
|
||||
m512_const1_64( 0x8000000000000000 ) );
|
||||
sha3_8way_keccakf( c->st );
|
||||
memcpy( md, c->st, c->mdlen * 8 );
|
||||
return 1;
|
||||
}
|
||||
|
||||
// compute a SHA-3 hash (md) of given byte length from "in"
|
||||
|
||||
void *sha3_8way( const void *in, size_t inlen, void *md, int mdlen )
|
||||
{
|
||||
sha3_8way_ctx_t sha3;
|
||||
sha3_8way_init( &sha3, mdlen);
|
||||
sha3_8way_update( &sha3, in, inlen );
|
||||
sha3_8way_final( md, &sha3 );
|
||||
return md;
|
||||
}
|
||||
|
||||
#endif // AVX512
|
||||
#endif // AVX2
|
67
algo/verthash/tiny_sha3/sha3-4way.h
Normal file
67
algo/verthash/tiny_sha3/sha3-4way.h
Normal file
@@ -0,0 +1,67 @@
|
||||
// sha3.h
|
||||
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
|
||||
// 2021-03-27 JayDDee
|
||||
//
|
||||
#ifndef SHA3_4WAY_H
|
||||
#define SHA3_4WAY_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include "simd-utils.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef KECCAKF_ROUNDS
|
||||
#define KECCAKF_ROUNDS 24
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m256i st[25]; // 64-bit words * 4 lanes
|
||||
int pt, rsiz, mdlen; // these don't overflow
|
||||
} sha3_4way_ctx_t __attribute__ ((aligned (64)));;
|
||||
|
||||
// Compression function.
|
||||
void sha3_4way_keccakf( __m256i st[25] );
|
||||
|
||||
// OpenSSL - like interfece
|
||||
int sha3_4way_init( sha3_4way_ctx_t *c, int mdlen ); // mdlen = hash output in bytes
|
||||
int sha3_4way_update( sha3_4way_ctx_t *c, const void *data, size_t len );
|
||||
int sha3_4way_final( void *md, sha3_4way_ctx_t *c ); // digest goes to md
|
||||
|
||||
// compute a sha3 hash (md) of given byte length from "in"
|
||||
void *sha3_4way( const void *in, size_t inlen, void *md, int mdlen );
|
||||
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
// state context
|
||||
typedef struct
|
||||
{
|
||||
__m512i st[25]; // 64-bit words * 8 lanes
|
||||
int pt, rsiz, mdlen; // these don't overflow
|
||||
} sha3_8way_ctx_t __attribute__ ((aligned (64)));;
|
||||
|
||||
// Compression function.
|
||||
void sha3_8way_keccakf( __m512i st[25] );
|
||||
|
||||
// OpenSSL - like interfece
|
||||
int sha3_8way_init( sha3_8way_ctx_t *c, int mdlen ); // mdlen = hash output in bytes
|
||||
int sha3_8way_update( sha3_8way_ctx_t *c, const void *data, size_t len );
|
||||
int sha3_8way_final( void *md, sha3_8way_ctx_t *c ); // digest goes to md
|
||||
|
||||
// compute a sha3 hash (md) of given byte length from "in"
|
||||
void *sha3_8way( const void *in, size_t inlen, void *md, int mdlen );
|
||||
|
||||
#endif // AVX512
|
||||
#endif // AVX2
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -5,6 +5,7 @@
|
||||
// Revised 03-Sep-15 for portability + OpenSSL - style API
|
||||
|
||||
#include "sha3.h"
|
||||
#include <string.h>
|
||||
|
||||
// update the state with given number of rounds
|
||||
|
||||
@@ -21,6 +22,7 @@ void sha3_keccakf(uint64_t st[25])
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
/*
|
||||
const int keccakf_rotc[24] = {
|
||||
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
|
||||
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
|
||||
@@ -29,6 +31,7 @@ void sha3_keccakf(uint64_t st[25])
|
||||
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
|
||||
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
|
||||
};
|
||||
*/
|
||||
|
||||
// variables
|
||||
int i, j, r;
|
||||
@@ -60,14 +63,50 @@ void sha3_keccakf(uint64_t st[25])
|
||||
st[j + i] ^= t;
|
||||
}
|
||||
|
||||
|
||||
// Rho Pi
|
||||
#define RHO_PI( i, c ) \
|
||||
bc[0] = st[ i ]; \
|
||||
st[ i ] = ROTL64( t, c ); \
|
||||
t = bc[0]
|
||||
|
||||
t = st[1];
|
||||
|
||||
RHO_PI( 10, 1 );
|
||||
RHO_PI( 7, 3 );
|
||||
RHO_PI( 11, 6 );
|
||||
RHO_PI( 17, 10 );
|
||||
RHO_PI( 18, 15 );
|
||||
RHO_PI( 3, 21 );
|
||||
RHO_PI( 5, 28 );
|
||||
RHO_PI( 16, 36 );
|
||||
RHO_PI( 8, 45 );
|
||||
RHO_PI( 21, 55 );
|
||||
RHO_PI( 24, 2 );
|
||||
RHO_PI( 4, 14 );
|
||||
RHO_PI( 15, 27 );
|
||||
RHO_PI( 23, 41 );
|
||||
RHO_PI( 19, 56 );
|
||||
RHO_PI( 13, 8 );
|
||||
RHO_PI( 12, 25 );
|
||||
RHO_PI( 2, 43 );
|
||||
RHO_PI( 20, 62 );
|
||||
RHO_PI( 14, 18 );
|
||||
RHO_PI( 22, 39 );
|
||||
RHO_PI( 9, 61 );
|
||||
RHO_PI( 6, 20 );
|
||||
RHO_PI( 1, 44 );
|
||||
|
||||
#undef RHO_PI
|
||||
|
||||
/*
|
||||
for (i = 0; i < 24; i++) {
|
||||
j = keccakf_piln[i];
|
||||
bc[0] = st[j];
|
||||
st[j] = ROTL64(t, keccakf_rotc[i]);
|
||||
t = bc[0];
|
||||
}
|
||||
*/
|
||||
|
||||
// Chi
|
||||
for (j = 0; j < 25; j += 5) {
|
||||
@@ -118,17 +157,20 @@ int sha3_init(sha3_ctx_t *c, int mdlen)
|
||||
int sha3_update(sha3_ctx_t *c, const void *data, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
int j;
|
||||
int j = c->pt / 8;
|
||||
const int rsiz = c->rsiz / 8;
|
||||
const int l = len / 8;
|
||||
|
||||
j = c->pt;
|
||||
for (i = 0; i < len; i++) {
|
||||
c->st.b[j++] ^= ((const uint8_t *) data)[i];
|
||||
if (j >= c->rsiz) {
|
||||
sha3_keccakf(c->st.q);
|
||||
for ( i = 0; i < l; i++ )
|
||||
{
|
||||
c->st.q[ j++ ] ^= ( ((const uint64_t *) data) [i] );
|
||||
if ( j >= rsiz )
|
||||
{
|
||||
sha3_keccakf( c->st.q );
|
||||
j = 0;
|
||||
}
|
||||
}
|
||||
c->pt = j;
|
||||
c->pt = j*8;
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -137,16 +179,10 @@ int sha3_update(sha3_ctx_t *c, const void *data, size_t len)
|
||||
|
||||
int sha3_final(void *md, sha3_ctx_t *c)
|
||||
{
|
||||
int i;
|
||||
|
||||
c->st.b[c->pt] ^= 0x06;
|
||||
c->st.b[c->rsiz - 1] ^= 0x80;
|
||||
c->st.q[ c->pt / 8 ] ^= 6;
|
||||
c->st.q[ c->rsiz / 8 - 1 ] ^= 0x8000000000000000;
|
||||
sha3_keccakf(c->st.q);
|
||||
|
||||
for (i = 0; i < c->mdlen; i++) {
|
||||
((uint8_t *) md)[i] = c->st.b[i];
|
||||
}
|
||||
|
||||
memcpy( md, c->st.q, c->mdlen );
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -155,7 +191,6 @@ int sha3_final(void *md, sha3_ctx_t *c)
|
||||
void *sha3(const void *in, size_t inlen, void *md, int mdlen)
|
||||
{
|
||||
sha3_ctx_t sha3;
|
||||
|
||||
sha3_init(&sha3, mdlen);
|
||||
sha3_update(&sha3, in, inlen);
|
||||
sha3_final(md, &sha3);
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#include "Verthash.h"
|
||||
#include "tiny_sha3/sha3-4way.h"
|
||||
|
||||
static verthash_info_t verthashInfo;
|
||||
|
||||
@@ -12,6 +13,82 @@ static const uint8_t verthashDatFileHash_bytes[32] =
|
||||
0x29, 0xec, 0xf8, 0x8f, 0x8a, 0xd4, 0x76, 0x39,
|
||||
0xb6, 0xed, 0xed, 0xaf, 0xd7, 0x21, 0xaa, 0x48 };
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
static __thread sha3_4way_ctx_t sha3_mid_ctxA;
|
||||
static __thread sha3_4way_ctx_t sha3_mid_ctxB;
|
||||
|
||||
#else
|
||||
|
||||
static __thread sha3_ctx_t sha3_mid_ctx[8];
|
||||
|
||||
#endif
|
||||
|
||||
void verthash_sha3_512_prehash_72( const void *input )
|
||||
{
|
||||
#if defined(__AVX2__)
|
||||
|
||||
__m256i vin[10];
|
||||
mm256_intrlv80_4x64( vin, input );
|
||||
|
||||
sha3_4way_init( &sha3_mid_ctxA, 64 );
|
||||
sha3_4way_init( &sha3_mid_ctxB, 64 );
|
||||
|
||||
vin[0] = _mm256_add_epi8( vin[0], _mm256_set_epi64x( 4,3,2,1 ) );
|
||||
sha3_4way_update( &sha3_mid_ctxA, vin, 72 );
|
||||
|
||||
vin[0] = _mm256_add_epi8( vin[0], _mm256_set1_epi64x( 4 ) );
|
||||
sha3_4way_update( &sha3_mid_ctxB, vin, 72 );
|
||||
|
||||
#else
|
||||
|
||||
char in[80] __attribute__ ((aligned (64)));
|
||||
memcpy( in, input, 80 );
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
{
|
||||
in[0] += 1;
|
||||
sha3_init( &sha3_mid_ctx[i], 64 );
|
||||
sha3_update( &sha3_mid_ctx[i], in, 72 );
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void verthash_sha3_512_final_8( void *hash, const uint64_t nonce )
|
||||
{
|
||||
#if defined(__AVX2__)
|
||||
|
||||
__m256i vhashA[ 10 ] __attribute__ ((aligned (64)));
|
||||
__m256i vhashB[ 10 ] __attribute__ ((aligned (64)));
|
||||
|
||||
sha3_4way_ctx_t ctx;
|
||||
__m256i vnonce = _mm256_set1_epi64x( nonce );
|
||||
|
||||
memcpy( &ctx, &sha3_mid_ctxA, sizeof ctx );
|
||||
sha3_4way_update( &ctx, &vnonce, 8 );
|
||||
sha3_4way_final( vhashA, &ctx );
|
||||
|
||||
memcpy( &ctx, &sha3_mid_ctxB, sizeof ctx );
|
||||
sha3_4way_update( &ctx, &vnonce, 8 );
|
||||
sha3_4way_final( vhashB, &ctx );
|
||||
|
||||
dintrlv_4x64( hash, hash+64, hash+128, hash+192, vhashA, 512 );
|
||||
dintrlv_4x64( hash+256, hash+320, hash+384, hash+448, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
{
|
||||
sha3_ctx_t ctx;
|
||||
memcpy( &ctx, &sha3_mid_ctx[i], sizeof ctx );
|
||||
sha3_update( &ctx, &nonce, 8 );
|
||||
sha3_final( hash + i*64, &ctx );
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int scanhash_verthash( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
@@ -26,6 +103,8 @@ int scanhash_verthash( struct work *work, uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
verthash_sha3_512_prehash_72( edata );
|
||||
|
||||
do
|
||||
{
|
||||
edata[19] = n;
|
||||
@@ -51,15 +130,14 @@ bool register_verthash_algo( algo_gate_t* gate )
|
||||
|
||||
opt_target_factor = 256.0;
|
||||
gate->scanhash = (void*)&scanhash_verthash;
|
||||
gate->optimizations = AVX2_OPT;
|
||||
|
||||
// verthash data file
|
||||
char *verthash_data_file = opt_data_file ? opt_data_file
|
||||
: default_verthash_data_file;
|
||||
|
||||
int vhLoadResult = verthash_info_init( &verthashInfo, verthash_data_file );
|
||||
if (vhLoadResult == 0) // No Error
|
||||
{
|
||||
// and verify data file(if it was enabled)
|
||||
if ( opt_verify )
|
||||
{
|
||||
uint8_t vhDataFileHash[32] = { 0 };
|
||||
@@ -78,7 +156,6 @@ bool register_verthash_algo( algo_gate_t* gate )
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
{
|
||||
// Handle Verthash error codes
|
||||
if ( vhLoadResult == 1 )
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.16.1.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.16.2.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.16.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.16.1'
|
||||
PACKAGE_VERSION='3.16.2'
|
||||
PACKAGE_STRING='cpuminer-opt 3.16.2'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.16.1 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.16.2 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.16.1:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.16.2:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.16.1
|
||||
cpuminer-opt configure 3.16.2
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.16.1, which was
|
||||
It was created by cpuminer-opt $as_me 3.16.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.16.1'
|
||||
VERSION='3.16.2'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.16.1, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.16.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.16.1
|
||||
cpuminer-opt config.status 3.16.2
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.16.1])
|
||||
AC_INIT([cpuminer-opt], [3.16.2])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
49
cpu-miner.c
49
cpu-miner.c
@@ -555,7 +555,11 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
if ( !s )
|
||||
continue;
|
||||
if ( !strcmp( s, "segwit" ) || !strcmp( s, "!segwit" ) )
|
||||
{
|
||||
segwit = true;
|
||||
if ( opt_debug )
|
||||
applog( LOG_INFO, "GBT: SegWit is enabled" );
|
||||
}
|
||||
}
|
||||
}
|
||||
// Segwit END
|
||||
@@ -954,25 +958,25 @@ void scale_hash_for_display ( double* hashrate, char* prefix )
|
||||
else { *prefix = 'Y'; *hashrate /= 1e24; }
|
||||
}
|
||||
|
||||
static inline void sprintf_et( char *str, int seconds )
|
||||
static inline void sprintf_et( char *str, long unsigned int seconds )
|
||||
{
|
||||
// sprintf doesn't like uint64_t, Linux thinks it's long, Windows long long.
|
||||
unsigned int min = seconds / 60;
|
||||
unsigned int sec = seconds % 60;
|
||||
unsigned int hrs = min / 60;
|
||||
long unsigned int min = seconds / 60;
|
||||
long unsigned int sec = seconds % 60;
|
||||
long unsigned int hrs = min / 60;
|
||||
|
||||
if ( unlikely( hrs ) )
|
||||
{
|
||||
unsigned int years = hrs / (24*365);
|
||||
unsigned int days = hrs / 24;
|
||||
if ( years )
|
||||
sprintf( str, "%uy%ud", years, years % 365 );
|
||||
else if ( days ) //0d00h
|
||||
sprintf( str, "%ud%02uh", days, hrs % 24 );
|
||||
long unsigned int days = hrs / 24;
|
||||
long unsigned int years = days / 365;
|
||||
if ( years ) // 0y000d
|
||||
sprintf( str, "%luy%lud", years, years % 365 );
|
||||
else if ( days ) // 0d00h
|
||||
sprintf( str, "%lud%02luh", days, hrs % 24 );
|
||||
else // 0h00m
|
||||
sprintf( str, "%uh%02um", hrs, min % 60 );
|
||||
sprintf( str, "%luh%02lum", hrs, min % 60 );
|
||||
}
|
||||
else // 0m00s
|
||||
sprintf( str, "%um%02us", min, sec );
|
||||
sprintf( str, "%lum%02lus", min, sec );
|
||||
}
|
||||
|
||||
const long double exp32 = EXP32; // 2**32
|
||||
@@ -1071,7 +1075,8 @@ void report_summary_log( bool force )
|
||||
|
||||
double share_time = (double)et.tv_sec + (double)et.tv_usec / 1e6;
|
||||
double ghrate = global_hashrate;
|
||||
double shrate = safe_div( exp32 * last_targetdiff * (double)(accepts),
|
||||
double target_diff = exp32 * last_targetdiff;
|
||||
double shrate = safe_div( target_diff * (double)(accepts),
|
||||
share_time, 0. );
|
||||
double sess_hrate = safe_div( exp32 * norm_diff_sum,
|
||||
(double)uptime.tv_sec, 0. );
|
||||
@@ -1099,12 +1104,12 @@ void report_summary_log( bool force )
|
||||
|
||||
if ( accepted_share_count < submitted_share_count )
|
||||
{
|
||||
double ltd = exp32 * last_targetdiff;
|
||||
double lost_ghrate = uptime.tv_sec == 0 ? 0.
|
||||
: ltd * (double)(submitted_share_count - accepted_share_count )
|
||||
: target_diff
|
||||
* (double)(submitted_share_count - accepted_share_count )
|
||||
/ (double)uptime.tv_sec;
|
||||
double lost_shrate = share_time == 0. ? 0.
|
||||
: ltd * (double)(submits - accepts ) / share_time;
|
||||
: target_diff * (double)(submits - accepts ) / share_time;
|
||||
char lshr_units[4] = {0};
|
||||
char lghr_units[4] = {0};
|
||||
scale_hash_for_display( &lost_shrate, lshr_units );
|
||||
@@ -2437,10 +2442,14 @@ static void *miner_thread( void *userdata )
|
||||
#if ((defined(_WIN64) || defined(__WINDOWS__)) || defined(_WIN32))
|
||||
applog( LOG_NOTICE, "Total: %s %sH/s", hr, hr_units );
|
||||
#else
|
||||
applog( LOG_NOTICE, "Total: %s %sH/s, CPU temp: %dC",
|
||||
hr, hr_units, (uint32_t)cpu_temp(0) );
|
||||
float lo_freq = 0., hi_freq = 0.;
|
||||
linux_cpu_hilo_freq( &lo_freq, &hi_freq );
|
||||
applog( LOG_NOTICE,
|
||||
"Total: %s %sH/s, Temp: %dC, Freq: %.3f/%.3f GHz",
|
||||
hr, hr_units, (uint32_t)cpu_temp(0), lo_freq / 1e6,
|
||||
hi_freq / 1e6 );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
} // benchmark
|
||||
|
||||
// conditional mining
|
||||
|
2
miner.h
2
miner.h
@@ -900,7 +900,7 @@ Options:\n\
|
||||
--benchmark run in offline benchmark mode\n\
|
||||
--cpu-affinity set process affinity to cpu core(s), mask 0x3 for cores 0 and 1\n\
|
||||
--cpu-priority set process priority (default: 0 idle, 2 normal to 5 highest)\n\
|
||||
-b, --api-bind IP/Port for the miner API (default: 127.0.0.1:4048)\n\
|
||||
-b, --api-bind=address[:port] IP address for the miner API, default port is 4048)\n\
|
||||
--api-remote Allow remote control\n\
|
||||
--max-temp=N Only mine if cpu temp is less than specified value (linux)\n\
|
||||
--max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
|
||||
|
@@ -1225,37 +1225,6 @@ static inline void intrlv_4x64( void *dst, const void *src0,
|
||||
d[31] = _mm_unpackhi_epi64( s2[7], s3[7] );
|
||||
}
|
||||
|
||||
/*
|
||||
static inline void intrlv_4x64( void *dst, void *src0,
|
||||
void *src1, void *src2, void *src3, int bit_len )
|
||||
{
|
||||
uint64_t *d = (uint64_t*)dst;
|
||||
uint64_t *s0 = (uint64_t*)src0;
|
||||
uint64_t *s1 = (uint64_t*)src1;
|
||||
uint64_t *s2 = (uint64_t*)src2;
|
||||
uint64_t *s3 = (uint64_t*)src3;
|
||||
d[ 0] = s0[ 0]; d[ 1] = s1[ 0]; d[ 2] = s2[ 0]; d[ 3] = s3[ 0];
|
||||
d[ 4] = s0[ 1]; d[ 5] = s1[ 1]; d[ 6] = s2[ 1]; d[ 7] = s3[ 1];
|
||||
d[ 8] = s0[ 2]; d[ 9] = s1[ 2]; d[ 10] = s2[ 2]; d[ 11] = s3[ 2];
|
||||
d[ 12] = s0[ 3]; d[ 13] = s1[ 3]; d[ 14] = s2[ 3]; d[ 15] = s3[ 3];
|
||||
if ( bit_len <= 256 ) return;
|
||||
d[ 16] = s0[ 4]; d[ 17] = s1[ 4]; d[ 18] = s2[ 4]; d[ 19] = s3[ 4];
|
||||
d[ 20] = s0[ 5]; d[ 21] = s1[ 5]; d[ 22] = s2[ 5]; d[ 23] = s3[ 5];
|
||||
d[ 24] = s0[ 6]; d[ 25] = s1[ 6]; d[ 26] = s2[ 6]; d[ 27] = s3[ 6];
|
||||
d[ 28] = s0[ 7]; d[ 29] = s1[ 7]; d[ 30] = s2[ 7]; d[ 31] = s3[ 7];
|
||||
if ( bit_len <= 512 ) return;
|
||||
d[ 32] = s0[ 8]; d[ 33] = s1[ 8]; d[ 34] = s2[ 8]; d[ 35] = s3[ 8];
|
||||
d[ 36] = s0[ 9]; d[ 37] = s1[ 9]; d[ 38] = s2[ 9]; d[ 39] = s3[ 9];
|
||||
if ( bit_len <= 640 ) return;
|
||||
d[ 40] = s0[10]; d[ 41] = s1[10]; d[ 42] = s2[10]; d[ 43] = s3[10];
|
||||
d[ 44] = s0[11]; d[ 45] = s1[11]; d[ 46] = s2[11]; d[ 47] = s3[11];
|
||||
d[ 48] = s0[12]; d[ 49] = s1[12]; d[ 50] = s2[12]; d[ 51] = s3[12];
|
||||
d[ 52] = s0[13]; d[ 53] = s1[13]; d[ 54] = s2[13]; d[ 55] = s3[13];
|
||||
d[ 56] = s0[14]; d[ 57] = s1[14]; d[ 58] = s2[14]; d[ 59] = s3[14];
|
||||
d[ 60] = s0[15]; d[ 61] = s1[15]; d[ 62] = s2[15]; d[ 63] = s3[15];
|
||||
}
|
||||
*/
|
||||
|
||||
static inline void intrlv_4x64_512( void *dst, const void *src0,
|
||||
const void *src1, const void *src2, const void *src3 )
|
||||
{
|
||||
@@ -1282,26 +1251,6 @@ static inline void intrlv_4x64_512( void *dst, const void *src0,
|
||||
d[15] = _mm_unpackhi_epi64( s2[3], s3[3] );
|
||||
}
|
||||
|
||||
/*
|
||||
static inline void intrlv_4x64_512( void *dst, const void *src0,
|
||||
const void *src1, const void *src2, const void *src3 )
|
||||
{
|
||||
uint64_t *d = (uint64_t*)dst;
|
||||
const uint64_t *s0 = (const uint64_t*)src0;
|
||||
const uint64_t *s1 = (const uint64_t*)src1;
|
||||
const uint64_t *s2 = (const uint64_t*)src2;
|
||||
const uint64_t *s3 = (const uint64_t*)src3;
|
||||
d[ 0] = s0[ 0]; d[ 1] = s1[ 0]; d[ 2] = s2[ 0]; d[ 3] = s3[ 0];
|
||||
d[ 4] = s0[ 1]; d[ 5] = s1[ 1]; d[ 6] = s2[ 1]; d[ 7] = s3[ 1];
|
||||
d[ 8] = s0[ 2]; d[ 9] = s1[ 2]; d[ 10] = s2[ 2]; d[ 11] = s3[ 2];
|
||||
d[ 12] = s0[ 3]; d[ 13] = s1[ 3]; d[ 14] = s2[ 3]; d[ 15] = s3[ 3];
|
||||
d[ 16] = s0[ 4]; d[ 17] = s1[ 4]; d[ 18] = s2[ 4]; d[ 19] = s3[ 4];
|
||||
d[ 20] = s0[ 5]; d[ 21] = s1[ 5]; d[ 22] = s2[ 5]; d[ 23] = s3[ 5];
|
||||
d[ 24] = s0[ 6]; d[ 25] = s1[ 6]; d[ 26] = s2[ 6]; d[ 27] = s3[ 6];
|
||||
d[ 28] = s0[ 7]; d[ 29] = s1[ 7]; d[ 30] = s2[ 7]; d[ 31] = s3[ 7];
|
||||
}
|
||||
*/
|
||||
|
||||
static inline void dintrlv_4x64( void *dst0, void *dst1, void *dst2,
|
||||
void *dst3, const void *src, const int bit_len )
|
||||
{
|
||||
@@ -1347,38 +1296,6 @@ static inline void dintrlv_4x64( void *dst0, void *dst1, void *dst2,
|
||||
d3[7] = _mm_unpackhi_epi64( s[29], s[31] );
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
static inline void dintrlv_4x64( void *dst0, void *dst1, void *dst2,
|
||||
void *dst3, const void *src, int bit_len )
|
||||
{
|
||||
uint64_t *d0 = (uint64_t*)dst0;
|
||||
uint64_t *d1 = (uint64_t*)dst1;
|
||||
uint64_t *d2 = (uint64_t*)dst2;
|
||||
uint64_t *d3 = (uint64_t*)dst3;
|
||||
const uint64_t *s = (const uint64_t*)src;
|
||||
d0[ 0] = s[ 0]; d1[ 0] = s[ 1]; d2[ 0] = s[ 2]; d3[ 0] = s[ 3];
|
||||
d0[ 1] = s[ 4]; d1[ 1] = s[ 5]; d2[ 1] = s[ 6]; d3[ 1] = s[ 7];
|
||||
d0[ 2] = s[ 8]; d1[ 2] = s[ 9]; d2[ 2] = s[10]; d3[ 2] = s[11];
|
||||
d0[ 3] = s[12]; d1[ 3] = s[13]; d2[ 3] = s[14]; d3[ 3] = s[15];
|
||||
if ( bit_len <= 256 ) return;
|
||||
d0[ 4] = s[16]; d1[ 4] = s[17]; d2[ 4] = s[18]; d3[ 4] = s[19];
|
||||
d0[ 5] = s[20]; d1[ 5] = s[21]; d2[ 5] = s[22]; d3[ 5] = s[23];
|
||||
d0[ 6] = s[24]; d1[ 6] = s[25]; d2[ 6] = s[26]; d3[ 6] = s[27];
|
||||
d0[ 7] = s[28]; d1[ 7] = s[29]; d2[ 7] = s[30]; d3[ 7] = s[31];
|
||||
if ( bit_len <= 512 ) return;
|
||||
d0[ 8] = s[32]; d1[ 8] = s[33]; d2[ 8] = s[34]; d3[ 8] = s[35];
|
||||
d0[ 9] = s[36]; d1[ 9] = s[37]; d2[ 9] = s[38]; d3[ 9] = s[39];
|
||||
if ( bit_len <= 640 ) return;
|
||||
d0[10] = s[40]; d1[10] = s[41]; d2[10] = s[42]; d3[10] = s[43];
|
||||
d0[11] = s[44]; d1[11] = s[45]; d2[11] = s[46]; d3[11] = s[47];
|
||||
d0[12] = s[48]; d1[12] = s[49]; d2[12] = s[50]; d3[12] = s[51];
|
||||
d0[13] = s[52]; d1[13] = s[53]; d2[13] = s[54]; d3[13] = s[55];
|
||||
d0[14] = s[56]; d1[14] = s[57]; d2[14] = s[58]; d3[14] = s[59];
|
||||
d0[15] = s[60]; d1[15] = s[61]; d2[15] = s[62]; d3[15] = s[63];
|
||||
}
|
||||
*/
|
||||
|
||||
static inline void dintrlv_4x64_512( void *dst0, void *dst1, void *dst2,
|
||||
void *dst3, const void *src )
|
||||
{
|
||||
@@ -1405,26 +1322,6 @@ static inline void dintrlv_4x64_512( void *dst0, void *dst1, void *dst2,
|
||||
d3[3] = _mm_unpackhi_epi64( s[13], s[15] );
|
||||
}
|
||||
|
||||
/*
|
||||
static inline void dintrlv_4x64_512( void *dst0, void *dst1, void *dst2,
|
||||
void *dst3, const void *src )
|
||||
{
|
||||
uint64_t *d0 = (uint64_t*)dst0;
|
||||
uint64_t *d1 = (uint64_t*)dst1;
|
||||
uint64_t *d2 = (uint64_t*)dst2;
|
||||
uint64_t *d3 = (uint64_t*)dst3;
|
||||
const uint64_t *s = (const uint64_t*)src;
|
||||
d0[ 0] = s[ 0]; d1[ 0] = s[ 1]; d2[ 0] = s[ 2]; d3[ 0] = s[ 3];
|
||||
d0[ 1] = s[ 4]; d1[ 1] = s[ 5]; d2[ 1] = s[ 6]; d3[ 1] = s[ 7];
|
||||
d0[ 2] = s[ 8]; d1[ 2] = s[ 9]; d2[ 2] = s[10]; d3[ 2] = s[11];
|
||||
d0[ 3] = s[12]; d1[ 3] = s[13]; d2[ 3] = s[14]; d3[ 3] = s[15];
|
||||
d0[ 4] = s[16]; d1[ 4] = s[17]; d2[ 4] = s[18]; d3[ 4] = s[19];
|
||||
d0[ 5] = s[20]; d1[ 5] = s[21]; d2[ 5] = s[22]; d3[ 5] = s[23];
|
||||
d0[ 6] = s[24]; d1[ 6] = s[25]; d2[ 6] = s[26]; d3[ 6] = s[27];
|
||||
d0[ 7] = s[28]; d1[ 7] = s[29]; d2[ 7] = s[30]; d3[ 7] = s[31];
|
||||
}
|
||||
*/
|
||||
|
||||
static inline void extr_lane_4x64( void *d, const void *s,
|
||||
const int lane, const int bit_len )
|
||||
{
|
||||
@@ -1440,9 +1337,41 @@ static inline void extr_lane_4x64( void *d, const void *s,
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
// Doesn't really need AVX2, just SSSE3, but is only used with AVX2 code.
|
||||
|
||||
// There a alignment problems with the source buffer on Wwindows,
|
||||
// can't use 256 bit bswap.
|
||||
static inline void mm256_intrlv80_4x64( void *d, const void *src )
|
||||
{
|
||||
__m128i s0 = casti_m128i( src,0 );
|
||||
__m128i s1 = casti_m128i( src,1 );
|
||||
__m128i s2 = casti_m128i( src,2 );
|
||||
__m128i s3 = casti_m128i( src,3 );
|
||||
__m128i s4 = casti_m128i( src,4 );
|
||||
|
||||
casti_m128i( d, 0 ) =
|
||||
casti_m128i( d, 1 ) = _mm_shuffle_epi32( s0, 0x44 );
|
||||
casti_m128i( d, 2 ) =
|
||||
casti_m128i( d, 3 ) = _mm_shuffle_epi32( s0, 0xee );
|
||||
|
||||
casti_m128i( d, 4 ) =
|
||||
casti_m128i( d, 5 ) = _mm_shuffle_epi32( s1, 0x44 );
|
||||
casti_m128i( d, 6 ) =
|
||||
casti_m128i( d, 7 ) = _mm_shuffle_epi32( s1, 0xee );
|
||||
|
||||
casti_m128i( d, 8 ) =
|
||||
casti_m128i( d, 9 ) = _mm_shuffle_epi32( s2, 0x44 );
|
||||
casti_m128i( d, 10 ) =
|
||||
casti_m128i( d, 11 ) = _mm_shuffle_epi32( s2, 0xee );
|
||||
|
||||
casti_m128i( d, 12 ) =
|
||||
casti_m128i( d, 13 ) = _mm_shuffle_epi32( s3, 0x44 );
|
||||
casti_m128i( d, 14 ) =
|
||||
casti_m128i( d, 15 ) = _mm_shuffle_epi32( s3, 0xee );
|
||||
|
||||
casti_m128i( d, 16 ) =
|
||||
casti_m128i( d, 17 ) = _mm_shuffle_epi32( s4, 0x44 );
|
||||
casti_m128i( d, 18 ) =
|
||||
casti_m128i( d, 19 ) = _mm_shuffle_epi32( s4, 0xee );
|
||||
}
|
||||
|
||||
static inline void mm256_bswap32_intrlv80_4x64( void *d, const void *src )
|
||||
{
|
||||
@@ -1636,40 +1565,6 @@ static inline void intrlv_8x64_512( void *dst, const void *src0,
|
||||
d[31] = _mm_unpackhi_epi64( s6[3], s7[3] );
|
||||
}
|
||||
|
||||
/*
|
||||
#define ILEAVE_8x64( i ) do \
|
||||
{ \
|
||||
uint64_t *d = (uint64_t*)(dst) + ( (i) << 3 ); \
|
||||
d[0] = *( (const uint64_t*)(s0) +(i) ); \
|
||||
d[1] = *( (const uint64_t*)(s1) +(i) ); \
|
||||
d[2] = *( (const uint64_t*)(s2) +(i) ); \
|
||||
d[3] = *( (const uint64_t*)(s3) +(i) ); \
|
||||
d[4] = *( (const uint64_t*)(s4) +(i) ); \
|
||||
d[5] = *( (const uint64_t*)(s5) +(i) ); \
|
||||
d[6] = *( (const uint64_t*)(s6) +(i) ); \
|
||||
d[7] = *( (const uint64_t*)(s7) +(i) ); \
|
||||
} while(0)
|
||||
|
||||
static inline void intrlv_8x64( void *dst, const void *s0,
|
||||
const void *s1, const void *s2, const void *s3, const void *s4,
|
||||
const void *s5, const void *s6, const void *s7, int bit_len )
|
||||
{
|
||||
ILEAVE_8x64( 0 ); ILEAVE_8x64( 1 );
|
||||
ILEAVE_8x64( 2 ); ILEAVE_8x64( 3 );
|
||||
if ( bit_len <= 256 ) return;
|
||||
ILEAVE_8x64( 4 ); ILEAVE_8x64( 5 );
|
||||
ILEAVE_8x64( 6 ); ILEAVE_8x64( 7 );
|
||||
if ( bit_len <= 512 ) return;
|
||||
ILEAVE_8x64( 8 ); ILEAVE_8x64( 9 );
|
||||
if ( bit_len <= 640 ) return;
|
||||
ILEAVE_8x64( 10 ); ILEAVE_8x64( 11 );
|
||||
ILEAVE_8x64( 12 ); ILEAVE_8x64( 13 );
|
||||
ILEAVE_8x64( 14 ); ILEAVE_8x64( 15 );
|
||||
}
|
||||
|
||||
#undef ILEAVE_8x64
|
||||
*/
|
||||
|
||||
|
||||
static inline void dintrlv_8x64( void *dst0, void *dst1, void *dst2,
|
||||
void *dst3, void *dst4, void *dst5, void *dst6, void *dst7,
|
||||
@@ -1815,39 +1710,6 @@ static inline void dintrlv_8x64_512( void *dst0, void *dst1, void *dst2,
|
||||
d7[3] = _mm_unpackhi_epi64( s[27], s[31] );
|
||||
}
|
||||
|
||||
/*
|
||||
#define DLEAVE_8x64( i ) do \
|
||||
{ \
|
||||
const uint64_t *s = (const uint64_t*)(src) + ( (i) << 3 ); \
|
||||
*( (uint64_t*)(d0) +(i) ) = s[0]; \
|
||||
*( (uint64_t*)(d1) +(i) ) = s[1]; \
|
||||
*( (uint64_t*)(d2) +(i) ) = s[2]; \
|
||||
*( (uint64_t*)(d3) +(i) ) = s[3]; \
|
||||
*( (uint64_t*)(d4) +(i) ) = s[4]; \
|
||||
*( (uint64_t*)(d5) +(i) ) = s[5]; \
|
||||
*( (uint64_t*)(d6) +(i) ) = s[6]; \
|
||||
*( (uint64_t*)(d7) +(i) ) = s[7]; \
|
||||
} while(0)
|
||||
|
||||
static inline void dintrlv_8x64( void *d0, void *d1, void *d2, void *d3,
|
||||
void *d4, void *d5, void *d6, void *d7, const void *src, int bit_len )
|
||||
{
|
||||
DLEAVE_8x64( 0 ); DLEAVE_8x64( 1 );
|
||||
DLEAVE_8x64( 2 ); DLEAVE_8x64( 3 );
|
||||
if ( bit_len <= 256 ) return;
|
||||
DLEAVE_8x64( 4 ); DLEAVE_8x64( 5 );
|
||||
DLEAVE_8x64( 6 ); DLEAVE_8x64( 7 );
|
||||
if ( bit_len <= 512 ) return;
|
||||
DLEAVE_8x64( 8 ); DLEAVE_8x64( 9 );
|
||||
if ( bit_len <= 640 ) return;
|
||||
DLEAVE_8x64( 10 ); DLEAVE_8x64( 11 );
|
||||
DLEAVE_8x64( 12 ); DLEAVE_8x64( 13 );
|
||||
DLEAVE_8x64( 14 ); DLEAVE_8x64( 15 );
|
||||
}
|
||||
|
||||
#undef DLEAVE_8x64
|
||||
*/
|
||||
|
||||
static inline void extr_lane_8x64( void *d, const void *s,
|
||||
const int lane, const int bit_len )
|
||||
{
|
||||
|
@@ -178,7 +178,7 @@ static inline __m128i mm128_mask_32( const __m128i v, const int m )
|
||||
// Basic operations without equivalent SIMD intrinsic
|
||||
|
||||
// Bitwise not (~v)
|
||||
#define mm128_not( v ) _mm_xor_si128( (v), m128_neg1 )
|
||||
#define mm128_not( v ) _mm_xor_si128( v, m128_neg1 )
|
||||
|
||||
// Unary negation of elements (-v)
|
||||
#define mm128_negate_64( v ) _mm_sub_epi64( m128_zero, v )
|
||||
@@ -263,7 +263,8 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
||||
_mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) )
|
||||
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(__AVX512VL__)
|
||||
//#if defined(__AVX512F__) && defined(__AVX512VL__)
|
||||
|
||||
#define mm128_ror_64 _mm_ror_epi64
|
||||
#define mm128_rol_64 _mm_rol_epi64
|
||||
@@ -291,16 +292,13 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
||||
#define mm128_swap_64( v ) _mm_shuffle_epi32( v, 0x4e )
|
||||
#define mm128_ror_1x32( v ) _mm_shuffle_epi32( v, 0x39 )
|
||||
#define mm128_rol_1x32( v ) _mm_shuffle_epi32( v, 0x93 )
|
||||
//#define mm128_swap_64( v ) _mm_alignr_epi8( v, v, 8 )
|
||||
//#define mm128_ror_1x32( v ) _mm_alignr_epi8( v, v, 4 )
|
||||
//#define mm128_rol_1x32( v ) _mm_alignr_epi8( v, v, 12 )
|
||||
|
||||
// Swap 32 bit elements in 64 bit lanes
|
||||
#define mm128_swap64_32( v ) _mm_shuffle_epi32( v, 0xb1 )
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
// Rotate right by c bytes
|
||||
// Rotate right by c bytes, no SSE2 equivalent.
|
||||
static inline __m128i mm128_ror_x8( const __m128i v, const int c )
|
||||
{ return _mm_alignr_epi8( v, v, c ); }
|
||||
|
||||
|
@@ -18,7 +18,7 @@
|
||||
#define mm256_mov64_256( i ) _mm256_castsi128_si256( mm128_mov64_128( i ) )
|
||||
#define mm256_mov32_256( i ) _mm256_castsi128_si256( mm128_mov32_128( i ) )
|
||||
|
||||
// Mo0ve low element of vector to integer.
|
||||
// Move low element of vector to integer.
|
||||
#define mm256_mov256_64( v ) mm128_mov128_64( _mm256_castsi256_si128( v ) )
|
||||
#define mm256_mov256_32( v ) mm128_mov128_32( _mm256_castsi256_si128( v ) )
|
||||
|
||||
@@ -42,7 +42,7 @@ static inline __m256i m256_const_64( const uint64_t i3, const uint64_t i2,
|
||||
// 128 bit vector argument
|
||||
#define m256_const1_128( v ) \
|
||||
_mm256_permute4x64_epi64( _mm256_castsi128_si256( v ), 0x44 )
|
||||
// 64 bit integer argument
|
||||
// 64 bit integer argument zero extended to 128 bits.
|
||||
#define m256_const1_i128( i ) m256_const1_128( mm128_mov64_128( i ) )
|
||||
#define m256_const1_64( i ) _mm256_broadcastq_epi64( mm128_mov64_128( i ) )
|
||||
#define m256_const1_32( i ) _mm256_broadcastd_epi32( mm128_mov32_128( i ) )
|
||||
@@ -168,7 +168,10 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
|
||||
_mm256_srli_epi32( v, 32-(c) ) )
|
||||
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
// The spec says both F & VL are required, but just in case AMD
|
||||
// decides to implement ROL/R without AVX512F.
|
||||
#if defined(__AVX512VL__)
|
||||
//#if defined(__AVX512F__) && defined(__AVX512VL__)
|
||||
|
||||
// AVX512, control must be 8 bit immediate.
|
||||
|
||||
@@ -198,21 +201,14 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
|
||||
//
|
||||
// Rotate elements accross all lanes.
|
||||
//
|
||||
// AVX2 has no full vector permute for elements less than 32 bits.
|
||||
// AVX512 has finer granularity full vector permutes.
|
||||
// AVX512 has full vector alignr which might be faster, especially for 32 bit
|
||||
// Swap 128 bit elements in 256 bit vector.
|
||||
#define mm256_swap_128( v ) _mm256_permute4x64_epi64( v, 0x4e )
|
||||
|
||||
// Rotate 256 bit vector by one 64 bit element
|
||||
#define mm256_ror_1x64( v ) _mm256_permute4x64_epi64( v, 0x39 )
|
||||
#define mm256_rol_1x64( v ) _mm256_permute4x64_epi64( v, 0x93 )
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
static inline __m256i mm256_swap_128( const __m256i v )
|
||||
{ return _mm256_alignr_epi64( v, v, 2 ); }
|
||||
|
||||
static inline __m256i mm256_ror_1x64( const __m256i v )
|
||||
{ return _mm256_alignr_epi64( v, v, 1 ); }
|
||||
|
||||
static inline __m256i mm256_rol_1x64( const __m256i v )
|
||||
{ return _mm256_alignr_epi64( v, v, 3 ); }
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__)
|
||||
|
||||
static inline __m256i mm256_ror_1x32( const __m256i v )
|
||||
{ return _mm256_alignr_epi32( v, v, 1 ); }
|
||||
@@ -220,21 +216,8 @@ static inline __m256i mm256_ror_1x32( const __m256i v )
|
||||
static inline __m256i mm256_rol_1x32( const __m256i v )
|
||||
{ return _mm256_alignr_epi32( v, v, 7 ); }
|
||||
|
||||
static inline __m256i mm256_ror_3x32( const __m256i v )
|
||||
{ return _mm256_alignr_epi32( v, v, 3 ); }
|
||||
|
||||
static inline __m256i mm256_rol_3x32( const __m256i v )
|
||||
{ return _mm256_alignr_epi32( v, v, 5 ); }
|
||||
|
||||
#else // AVX2
|
||||
|
||||
// Swap 128 bit elements in 256 bit vector.
|
||||
#define mm256_swap_128( v ) _mm256_permute4x64_epi64( v, 0x4e )
|
||||
|
||||
// Rotate 256 bit vector by one 64 bit element
|
||||
#define mm256_ror_1x64( v ) _mm256_permute4x64_epi64( v, 0x39 )
|
||||
#define mm256_rol_1x64( v ) _mm256_permute4x64_epi64( v, 0x93 )
|
||||
|
||||
// Rotate 256 bit vector by one 32 bit element.
|
||||
#define mm256_ror_1x32( v ) \
|
||||
_mm256_permutevar8x32_epi32( v, \
|
||||
@@ -246,17 +229,6 @@ static inline __m256i mm256_rol_3x32( const __m256i v )
|
||||
m256_const_64( 0x0000000600000005, 0x0000000400000003, \
|
||||
0x0000000200000001, 0x0000000000000007 )
|
||||
|
||||
// Rotate 256 bit vector by three 32 bit elements (96 bits).
|
||||
#define mm256_ror_3x32( v ) \
|
||||
_mm256_permutevar8x32_epi32( v, \
|
||||
m256_const_64( 0x0000000200000001, 0x0000000000000007, \
|
||||
0x0000000600000005, 0x0000000400000003 )
|
||||
|
||||
#define mm256_rol_3x32( v ) \
|
||||
_mm256_permutevar8x32_epi32( v, \
|
||||
m256_const_64( 0x0000000400000003, 0x0000000200000001, \
|
||||
0x0000000000000007, 0x0000000600000005 )
|
||||
|
||||
#endif // AVX512 else AVX2
|
||||
|
||||
//
|
||||
|
148
util.c
148
util.c
@@ -943,6 +943,140 @@ bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen)
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint32_t bech32_polymod_step(uint32_t pre) {
|
||||
uint8_t b = pre >> 25;
|
||||
return ((pre & 0x1FFFFFF) << 5) ^
|
||||
(-((b >> 0) & 1) & 0x3b6a57b2UL) ^
|
||||
(-((b >> 1) & 1) & 0x26508e6dUL) ^
|
||||
(-((b >> 2) & 1) & 0x1ea119faUL) ^
|
||||
(-((b >> 3) & 1) & 0x3d4233ddUL) ^
|
||||
(-((b >> 4) & 1) & 0x2a1462b3UL);
|
||||
}
|
||||
|
||||
static const int8_t bech32_charset_rev[128] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1,
|
||||
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
|
||||
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1,
|
||||
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
|
||||
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1
|
||||
};
|
||||
|
||||
static bool bech32_decode(char *hrp, uint8_t *data, size_t *data_len, const char *input) {
|
||||
uint32_t chk = 1;
|
||||
size_t i;
|
||||
size_t input_len = strlen(input);
|
||||
size_t hrp_len;
|
||||
int have_lower = 0, have_upper = 0;
|
||||
if (input_len < 8 || input_len > 90) {
|
||||
return false;
|
||||
}
|
||||
*data_len = 0;
|
||||
while (*data_len < input_len && input[(input_len - 1) - *data_len] != '1') {
|
||||
++(*data_len);
|
||||
}
|
||||
hrp_len = input_len - (1 + *data_len);
|
||||
if (1 + *data_len >= input_len || *data_len < 6) {
|
||||
return false;
|
||||
}
|
||||
*(data_len) -= 6;
|
||||
for (i = 0; i < hrp_len; ++i) {
|
||||
int ch = input[i];
|
||||
if (ch < 33 || ch > 126) {
|
||||
return false;
|
||||
}
|
||||
if (ch >= 'a' && ch <= 'z') {
|
||||
have_lower = 1;
|
||||
} else if (ch >= 'A' && ch <= 'Z') {
|
||||
have_upper = 1;
|
||||
ch = (ch - 'A') + 'a';
|
||||
}
|
||||
hrp[i] = ch;
|
||||
chk = bech32_polymod_step(chk) ^ (ch >> 5);
|
||||
}
|
||||
hrp[i] = 0;
|
||||
chk = bech32_polymod_step(chk);
|
||||
for (i = 0; i < hrp_len; ++i) {
|
||||
chk = bech32_polymod_step(chk) ^ (input[i] & 0x1f);
|
||||
}
|
||||
++i;
|
||||
while (i < input_len) {
|
||||
int v = (input[i] & 0x80) ? -1 : bech32_charset_rev[(int)input[i]];
|
||||
if (input[i] >= 'a' && input[i] <= 'z') have_lower = 1;
|
||||
if (input[i] >= 'A' && input[i] <= 'Z') have_upper = 1;
|
||||
if (v == -1) {
|
||||
return false;
|
||||
}
|
||||
chk = bech32_polymod_step(chk) ^ v;
|
||||
if (i + 6 < input_len) {
|
||||
data[i - (1 + hrp_len)] = v;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (have_lower && have_upper) {
|
||||
return false;
|
||||
}
|
||||
return chk == 1;
|
||||
}
|
||||
|
||||
static bool convert_bits(uint8_t *out, size_t *outlen, int outbits, const uint8_t *in, size_t inlen, int inbits, int pad) {
|
||||
uint32_t val = 0;
|
||||
int bits = 0;
|
||||
uint32_t maxv = (((uint32_t)1) << outbits) - 1;
|
||||
while (inlen--) {
|
||||
val = (val << inbits) | *(in++);
|
||||
bits += inbits;
|
||||
while (bits >= outbits) {
|
||||
bits -= outbits;
|
||||
out[(*outlen)++] = (val >> bits) & maxv;
|
||||
}
|
||||
}
|
||||
if (pad) {
|
||||
if (bits) {
|
||||
out[(*outlen)++] = (val << (outbits - bits)) & maxv;
|
||||
}
|
||||
} else if (((val << (outbits - bits)) & maxv) || bits >= inbits) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool segwit_addr_decode(int *witver, uint8_t *witdata, size_t *witdata_len, const char *addr) {
|
||||
uint8_t data[84];
|
||||
char hrp_actual[84];
|
||||
size_t data_len;
|
||||
if (!bech32_decode(hrp_actual, data, &data_len, addr)) return false;
|
||||
if (data_len == 0 || data_len > 65) return false;
|
||||
if (data[0] > 16) return false;
|
||||
*witdata_len = 0;
|
||||
if (!convert_bits(witdata, witdata_len, 8, data + 1, data_len - 1, 5, 0)) return false;
|
||||
if (*witdata_len < 2 || *witdata_len > 40) return false;
|
||||
if (data[0] == 0 && *witdata_len != 20 && *witdata_len != 32) return false;
|
||||
*witver = data[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
static size_t bech32_to_script(uint8_t *out, size_t outsz, const char *addr) {
|
||||
uint8_t witprog[40];
|
||||
size_t witprog_len;
|
||||
int witver;
|
||||
|
||||
if (!segwit_addr_decode(&witver, witprog, &witprog_len, addr))
|
||||
return 0;
|
||||
if (outsz < witprog_len + 2)
|
||||
return 0;
|
||||
out[0] = witver ? (0x50 + witver) : 0;
|
||||
out[1] = witprog_len;
|
||||
memcpy(out + 2, witprog, witprog_len);
|
||||
|
||||
if ( opt_debug )
|
||||
applog( LOG_INFO, "Coinbase address uses Bech32 coding");
|
||||
|
||||
return witprog_len + 2;
|
||||
}
|
||||
|
||||
size_t address_to_script( unsigned char *out, size_t outsz, const char *addr )
|
||||
{
|
||||
unsigned char addrbin[ pk_buffer_size_max ];
|
||||
@@ -950,12 +1084,15 @@ size_t address_to_script( unsigned char *out, size_t outsz, const char *addr )
|
||||
size_t rv;
|
||||
|
||||
if ( !b58dec( addrbin, outsz, addr ) )
|
||||
return 0;
|
||||
return bech32_to_script( out, outsz, addr );
|
||||
|
||||
addrver = b58check( addrbin, outsz, addr );
|
||||
if ( addrver < 0 )
|
||||
return 0;
|
||||
|
||||
if ( opt_debug )
|
||||
applog( LOG_INFO, "Coinbase address uses B58 coding");
|
||||
|
||||
switch ( addrver )
|
||||
{
|
||||
case 5: /* Bitcoin script hash */
|
||||
@@ -1486,9 +1623,6 @@ static bool stratum_parse_extranonce(struct stratum_ctx *sctx, json_t *params, i
|
||||
if ( !opt_quiet ) /* pool dynamic change */
|
||||
applog( LOG_INFO, "Stratum extranonce1= %s, extranonce2 size= %d",
|
||||
xnonce1, xn2_size);
|
||||
// if (pndx == 0 && opt_debug)
|
||||
// applog(LOG_DEBUG, "Stratum set nonce %s with extranonce2 size=%d",
|
||||
// xnonce1, xn2_size);
|
||||
|
||||
return true;
|
||||
out:
|
||||
@@ -1638,8 +1772,6 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
|
||||
opt_extranonce = false;
|
||||
goto out;
|
||||
}
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_INFO, "Extranonce subscription enabled" );
|
||||
|
||||
sret = stratum_recv_line( sctx );
|
||||
if ( sret )
|
||||
@@ -1658,8 +1790,8 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
|
||||
applog( LOG_WARNING, "Stratum answer id is not correct!" );
|
||||
}
|
||||
res_val = json_object_get( extra, "result" );
|
||||
// if (opt_debug && (!res_val || json_is_false(res_val)))
|
||||
// applog(LOG_DEBUG, "extranonce subscribe not supported");
|
||||
if (opt_debug && (!res_val || json_is_false(res_val)))
|
||||
applog(LOG_DEBUG, "Method extranonce.subscribe is not supported");
|
||||
json_decref( extra );
|
||||
}
|
||||
free(sret);
|
||||
|
Reference in New Issue
Block a user