This commit is contained in:
Jay D Dee
2021-04-08 18:09:31 -04:00
parent 902ec046dd
commit f3333b0070
17 changed files with 826 additions and 336 deletions

View File

@@ -196,6 +196,7 @@ cpuminer_SOURCES = \
algo/verthash/Verthash.c \ algo/verthash/Verthash.c \
algo/verthash/fopen_utf8.c \ algo/verthash/fopen_utf8.c \
algo/verthash/tiny_sha3/sha3.c \ algo/verthash/tiny_sha3/sha3.c \
algo/verthash/tiny_sha3/sha3-4way.c \
algo/whirlpool/sph_whirlpool.c \ algo/whirlpool/sph_whirlpool.c \
algo/whirlpool/whirlpool-hash-4way.c \ algo/whirlpool/whirlpool-hash-4way.c \
algo/whirlpool/whirlpool-gate.c \ algo/whirlpool/whirlpool-gate.c \

View File

@@ -135,7 +135,7 @@ Supported Algorithms
x14 X14 x14 X14
x15 X15 x15 X15
x16r x16r
x16rv2 Ravencoin (RVN) x16rv2
x16rt Gincoin (GIN) x16rt Gincoin (GIN)
x16rt-veil Veil (VEIL) x16rt-veil Veil (VEIL)
x16s Pigeoncoin (PGN) x16s Pigeoncoin (PGN)

View File

@@ -65,6 +65,14 @@ If not what makes it happen or not happen?
Change Log Change Log
---------- ----------
v3.16.2
Verthash: midstate prehash optimization for all architectures.
Verthash: AVX2 optimization.
GBT: added support for Bech32 addresses, untested.
Linux: added CPU frequency to benchmark log.
Fixed integer overflow in time calculations.
v3.16.1 v3.16.1
New options for verthash: New options for verthash:
@@ -72,16 +80,12 @@ New options for verthash:
data file, default is "verthash.dat" in the current directory. data file, default is "verthash.dat" in the current directory.
--verify to perform the data file integrity check at startup, default is --verify to perform the data file integrity check at startup, default is
not to verify data file integrity. not to verify data file integrity.
Support for creation of default verthash data file if: Support for creation of default verthash data file if:
1) --data-file option is not used, 1) --data-file option is not used,
2) no default data file is found in the current directory, and, 2) no default data file is found in the current directory, and,
3) --verify option is used. 3) --verify option is used.
More detailed logs related to verthash data file. More detailed logs related to verthash data file.
Small verthash performance improvement. Small verthash performance improvement.
Fixed detection of corrupt stats caused by networking issues. Fixed detection of corrupt stats caused by networking issues.
v3.16.0 v3.16.0

View File

@@ -134,87 +134,117 @@ static inline uint32_t fnv1a(const uint32_t a, const uint32_t b)
return (a ^ b) * 0x1000193; return (a ^ b) * 0x1000193;
} }
void verthash_hash(const unsigned char* blob_bytes, void verthash_hash( const unsigned char* blob_bytes,
const size_t blob_size, const size_t blob_size,
const unsigned char(*input)[VH_HEADER_SIZE], const unsigned char(*input)[VH_HEADER_SIZE],
unsigned char(*output)[VH_HASH_OUT_SIZE]) unsigned char(*output)[VH_HASH_OUT_SIZE] )
{ {
unsigned char p1[VH_HASH_OUT_SIZE] __attribute__ ((aligned (64))); unsigned char p1[ VH_HASH_OUT_SIZE ] __attribute__ ((aligned (64)));
sha3(&input[0], VH_HEADER_SIZE, &p1[0], VH_HASH_OUT_SIZE); unsigned char p0[ VH_N_SUBSET ] __attribute__ ((aligned (64)));
unsigned char p0[VH_N_SUBSET];
unsigned char input_header[VH_HEADER_SIZE] __attribute__ ((aligned (64)));
memcpy(input_header, input, VH_HEADER_SIZE);
for (size_t i = 0; i < VH_N_ITER; ++i)
{
input_header[0] += 1;
sha3(&input_header[0], VH_HEADER_SIZE, p0 + i * VH_P0_SIZE, VH_P0_SIZE);
}
uint32_t* p0_index = (uint32_t*)p0;
uint32_t seek_indexes[VH_N_INDEXES] __attribute__ ((aligned (64))); uint32_t seek_indexes[VH_N_INDEXES] __attribute__ ((aligned (64)));
uint32_t* p0_index = (uint32_t*)p0;
verthash_sha3_512_final_8( p0, ( (uint64_t*)input )[ 9 ] );
for ( size_t x = 0; x < VH_N_ROT; ++x ) for ( size_t x = 0; x < VH_N_ROT; ++x )
{ {
memcpy( seek_indexes + x * (VH_N_SUBSET / sizeof(uint32_t)), memcpy( seek_indexes + x * (VH_N_SUBSET / sizeof(uint32_t)),
p0, VH_N_SUBSET); p0, VH_N_SUBSET);
//#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
// 512 bit vector processing is actually slower because it reduces the CPU
// clock significantly, which also slows mem access. The AVX512 rol instruction
// is still available for smaller vectors.
// for ( size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); y += 16 )
// {
// __m512i *p0_v = (__m512i*)( p0_index + y );
// *p0_v = mm512_rol_32( *p0_v, 1 );
// }
#if defined(__AVX2__) #if defined(__AVX2__)
for ( size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); y += 8 ) for ( size_t y = 0; y < VH_N_SUBSET / sizeof(__m256i); y += 8)
{ {
__m256i *p0_v = (__m256i*)( p0_index + y ); casti_m256i( p0_index, y ) = mm256_rol_32(
*p0_v = mm256_rol_32( *p0_v, 1 ); casti_m256i( p0_index, y ), 1 );
casti_m256i( p0_index, y+1 ) = mm256_rol_32(
casti_m256i( p0_index, y+1 ), 1 );
casti_m256i( p0_index, y+2 ) = mm256_rol_32(
casti_m256i( p0_index, y+2 ), 1 );
casti_m256i( p0_index, y+3 ) = mm256_rol_32(
casti_m256i( p0_index, y+3 ), 1 );
casti_m256i( p0_index, y+4 ) = mm256_rol_32(
casti_m256i( p0_index, y+4 ), 1 );
casti_m256i( p0_index, y+5 ) = mm256_rol_32(
casti_m256i( p0_index, y+5 ), 1 );
casti_m256i( p0_index, y+6 ) = mm256_rol_32(
casti_m256i( p0_index, y+6 ), 1 );
casti_m256i( p0_index, y+7 ) = mm256_rol_32(
casti_m256i( p0_index, y+7 ), 1 );
} }
#else #else
for ( size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); y += 4 ) for ( size_t y = 0; y < VH_N_SUBSET / sizeof(__m128i); y += 8)
{ {
__m128i *p0_v = (__m128i*)( p0_index + y ); casti_m128i( p0_index, y ) = mm128_rol_32(
*p0_v = mm128_rol_32( *p0_v, 1 ); casti_m128i( p0_index, y ), 1 );
casti_m128i( p0_index, y+1 ) = mm128_rol_32(
casti_m128i( p0_index, y+1 ), 1 );
casti_m128i( p0_index, y+2 ) = mm128_rol_32(
casti_m128i( p0_index, y+2 ), 1 );
casti_m128i( p0_index, y+3 ) = mm128_rol_32(
casti_m128i( p0_index, y+3 ), 1 );
casti_m128i( p0_index, y+4 ) = mm128_rol_32(
casti_m128i( p0_index, y+4 ), 1 );
casti_m128i( p0_index, y+5 ) = mm128_rol_32(
casti_m128i( p0_index, y+5 ), 1 );
casti_m128i( p0_index, y+6 ) = mm128_rol_32(
casti_m128i( p0_index, y+6 ), 1 );
casti_m128i( p0_index, y+7 ) = mm128_rol_32(
casti_m128i( p0_index, y+7 ), 1 );
} }
#endif #endif
// for (size_t y = 0; y < VH_N_SUBSET / sizeof(uint32_t); ++y)
// {
// *(p0_index + y) = ( *(p0_index + y) << 1 )
// | ( 1 & (*(p0_index + y) >> 31) );
// }
} }
sha3( &input[0], VH_HEADER_SIZE, &p1[0], VH_HASH_OUT_SIZE );
uint32_t* p1_32 = (uint32_t*)p1; uint32_t* p1_32 = (uint32_t*)p1;
uint32_t* blob_bytes_32 = (uint32_t*)blob_bytes; uint32_t* blob_bytes_32 = (uint32_t*)blob_bytes;
uint32_t value_accumulator = 0x811c9dc5; uint32_t value_accumulator = 0x811c9dc5;
const uint32_t mdiv = ((blob_size - VH_HASH_OUT_SIZE) / VH_BYTE_ALIGNMENT) + 1; const uint32_t mdiv = ( ( blob_size - VH_HASH_OUT_SIZE )
for (size_t i = 0; i < VH_N_INDEXES; i++) / VH_BYTE_ALIGNMENT ) + 1;
#if defined (__AVX2__)
const __m256i k = _mm256_set1_epi32( 0x1000193 );
#elif defined(__SSE41__)
const __m128i k = _mm_set1_epi32( 0x1000193 );
#endif
for ( size_t i = 0; i < VH_N_INDEXES; i++ )
{ {
const uint32_t offset = (fnv1a(seek_indexes[i], value_accumulator) % mdiv) * VH_BYTE_ALIGNMENT / sizeof(uint32_t); const uint32_t offset =
( fnv1a( seek_indexes[i], value_accumulator) % mdiv )
* ( VH_BYTE_ALIGNMENT / sizeof(uint32_t) );
const uint32_t *blob_off = blob_bytes_32 + offset; const uint32_t *blob_off = blob_bytes_32 + offset;
for (size_t i2 = 0; i2 < VH_HASH_OUT_SIZE / sizeof(uint32_t); i2++)
{ // update value accumulator for next seek index
const uint32_t value = *( blob_off + i2 ); value_accumulator = fnv1a( value_accumulator, blob_off[0] );
uint32_t* p1_ptr = p1_32 + i2; value_accumulator = fnv1a( value_accumulator, blob_off[1] );
*p1_ptr = fnv1a( *p1_ptr, value ); value_accumulator = fnv1a( value_accumulator, blob_off[2] );
value_accumulator = fnv1a( value_accumulator, value ); value_accumulator = fnv1a( value_accumulator, blob_off[3] );
} value_accumulator = fnv1a( value_accumulator, blob_off[4] );
value_accumulator = fnv1a( value_accumulator, blob_off[5] );
value_accumulator = fnv1a( value_accumulator, blob_off[6] );
value_accumulator = fnv1a( value_accumulator, blob_off[7] );
#if defined (__AVX2__)
*(__m256i*)p1_32 = _mm256_mullo_epi32( _mm256_xor_si256(
*(__m256i*)p1_32, *(__m256i*)blob_off ), k );
#elif defined(__SSE41__)
casti_m128i( p1_32, 0 ) = _mm_mullo_epi32( _mm_xor_si128(
casti_m128i( p1_32, 0 ), casti_m128i( blob_off, 0 ) ), k );
casti_m128i( p1_32, 1 ) = _mm_mullo_epi32( _mm_xor_si128(
casti_m128i( p1_32, 1 ), casti_m128i( blob_off, 1 ) ), k );
#else
for ( size_t i2 = 0; i2 < VH_HASH_OUT_SIZE / sizeof(uint32_t); i2++ )
p1_32[i2] = fnv1a( p1_32[i2], blob_off[i2] );
#endif
} }
memcpy(output, p1, VH_HASH_OUT_SIZE); memcpy( output, p1, VH_HASH_OUT_SIZE );
} }
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------

View File

@@ -52,6 +52,8 @@ void verthash_hash(const unsigned char* blob_bytes,
const unsigned char(*input)[VH_HEADER_SIZE], const unsigned char(*input)[VH_HEADER_SIZE],
unsigned char(*output)[VH_HASH_OUT_SIZE]); unsigned char(*output)[VH_HASH_OUT_SIZE]);
void verthash_sha3_512_prehash_72( const void *input );
void verthash_sha3_512_final_8( void *hash, const uint64_t nonce );
#endif // !Verthash_INCLUDE_ONCE #endif // !Verthash_INCLUDE_ONCE

View File

@@ -0,0 +1,301 @@
#if defined(__AVX2__)
// sha3-4way.c
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
// vectorization by JayDDee 2021-03-27
//
// Revised 07-Aug-15 to match with official release of FIPS PUB 202 "SHA3"
// Revised 03-Sep-15 for portability + OpenSSL - style API
#include "sha3-4way.h"
// constants
static const uint64_t keccakf_rndc[24] = {
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
void sha3_4way_keccakf( __m256i st[25] )
{
int i, j, r;
__m256i t, bc[5];
for ( r = 0; r < KECCAKF_ROUNDS; r++ )
{
// Theta
bc[0] = _mm256_xor_si256( st[0],
mm256_xor4( st[5], st[10], st[15], st[20] ) );
bc[1] = _mm256_xor_si256( st[1],
mm256_xor4( st[6], st[11], st[16], st[21] ) );
bc[2] = _mm256_xor_si256( st[2],
mm256_xor4( st[7], st[12], st[17], st[22] ) );
bc[3] = _mm256_xor_si256( st[3],
mm256_xor4( st[8], st[13], st[18], st[23] ) );
bc[4] = _mm256_xor_si256( st[4],
mm256_xor4( st[9], st[14], st[19], st[24] ) );
for ( i = 0; i < 5; i++ )
{
t = _mm256_xor_si256( bc[ (i+4) % 5 ],
mm256_rol_64( bc[ (i+1) % 5 ], 1 ) );
st[ i ] = _mm256_xor_si256( st[ i ], t );
st[ i+5 ] = _mm256_xor_si256( st[ i+ 5 ], t );
st[ i+10 ] = _mm256_xor_si256( st[ i+10 ], t );
st[ i+15 ] = _mm256_xor_si256( st[ i+15 ], t );
st[ i+20 ] = _mm256_xor_si256( st[ i+20 ], t );
}
// Rho Pi
#define RHO_PI( i, c ) \
bc[0] = st[ i ]; \
st[ i ] = mm256_rol_64( t, c ); \
t = bc[0]
t = st[1];
RHO_PI( 10, 1 );
RHO_PI( 7, 3 );
RHO_PI( 11, 6 );
RHO_PI( 17, 10 );
RHO_PI( 18, 15 );
RHO_PI( 3, 21 );
RHO_PI( 5, 28 );
RHO_PI( 16, 36 );
RHO_PI( 8, 45 );
RHO_PI( 21, 55 );
RHO_PI( 24, 2 );
RHO_PI( 4, 14 );
RHO_PI( 15, 27 );
RHO_PI( 23, 41 );
RHO_PI( 19, 56 );
RHO_PI( 13, 8 );
RHO_PI( 12, 25 );
RHO_PI( 2, 43 );
RHO_PI( 20, 62 );
RHO_PI( 14, 18 );
RHO_PI( 22, 39 );
RHO_PI( 9, 61 );
RHO_PI( 6, 20 );
RHO_PI( 1, 44 );
#undef RHO_PI
// Chi
for ( j = 0; j < 25; j += 5 )
{
memcpy( bc, &st[ j ], 5*32 );
st[ j ] = _mm256_xor_si256( st[ j ],
_mm256_andnot_si256( bc[1], bc[2] ) );
st[ j+1 ] = _mm256_xor_si256( st[ j+1 ],
_mm256_andnot_si256( bc[2], bc[3] ) );
st[ j+2 ] = _mm256_xor_si256( st[ j+2 ],
_mm256_andnot_si256( bc[3], bc[4] ) );
st[ j+3 ] = _mm256_xor_si256( st[ j+3 ],
_mm256_andnot_si256( bc[4], bc[0] ) );
st[ j+4 ] = _mm256_xor_si256( st[ j+4 ],
_mm256_andnot_si256( bc[0], bc[1] ) );
}
// Iota
st[0] = _mm256_xor_si256( st[0],
_mm256_set1_epi64x( keccakf_rndc[ r ] ) );
}
}
int sha3_4way_init( sha3_4way_ctx_t *c, int mdlen )
{
for ( int i = 0; i < 25; i++ ) c->st[ i ] = m256_zero;
c->mdlen = mdlen;
c->rsiz = 200 - 2 * mdlen;
c->pt = 0;
return 1;
}
int sha3_4way_update( sha3_4way_ctx_t *c, const void *data, size_t len )
{
size_t i;
int j = c->pt;
const int rsiz = c->rsiz / 8;
const int l = len / 8;
for ( i = 0; i < l; i++ )
{
c->st[ j ] = _mm256_xor_si256( c->st[ j ],
( (const __m256i*)data )[i] );
j++;
if ( j >= rsiz )
{
sha3_4way_keccakf( c->st );
j = 0;
}
}
c->pt = j;
return 1;
}
int sha3_4way_final( void *md, sha3_4way_ctx_t *c )
{
c->st[ c->pt ] = _mm256_xor_si256( c->st[ c->pt ],
m256_const1_64( 6 ) );
c->st[ c->rsiz / 8 - 1 ] =
_mm256_xor_si256( c->st[ c->rsiz / 8 - 1 ],
m256_const1_64( 0x8000000000000000 ) );
sha3_4way_keccakf( c->st );
memcpy( md, c->st, c->mdlen * 4 );
return 1;
}
void *sha3_4way( const void *in, size_t inlen, void *md, int mdlen )
{
sha3_4way_ctx_t ctx;
sha3_4way_init( &ctx, mdlen);
sha3_4way_update( &ctx, in, inlen );
sha3_4way_final( md, &ctx );
return md;
}
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
void sha3_8way_keccakf( __m512i st[25] )
{
int i, j, r;
__m512i t, bc[5];
// actual iteration
for ( r = 0; r < KECCAKF_ROUNDS; r++ )
{
// Theta
for ( i = 0; i < 5; i++ )
bc[i] = _mm512_xor_si512( st[i],
mm512_xor4( st[ i+5 ], st[ i+10 ], st[ i+15 ], st[i+20 ] ) );
for ( i = 0; i < 5; i++ )
{
t = _mm512_xor_si512( bc[(i + 4) % 5],
_mm512_rol_epi64( bc[(i + 1) % 5], 1 ) );
for ( j = 0; j < 25; j += 5 )
st[j + i] = _mm512_xor_si512( st[j + i], t );
}
// Rho Pi
#define RHO_PI( i, c ) \
bc[0] = st[ i ]; \
st[ i ] = _mm512_rol_epi64( t, c ); \
t = bc[0]
t = st[1];
RHO_PI( 10, 1 );
RHO_PI( 7, 3 );
RHO_PI( 11, 6 );
RHO_PI( 17, 10 );
RHO_PI( 18, 15 );
RHO_PI( 3, 21 );
RHO_PI( 5, 28 );
RHO_PI( 16, 36 );
RHO_PI( 8, 45 );
RHO_PI( 21, 55 );
RHO_PI( 24, 2 );
RHO_PI( 4, 14 );
RHO_PI( 15, 27 );
RHO_PI( 23, 41 );
RHO_PI( 19, 56 );
RHO_PI( 13, 8 );
RHO_PI( 12, 25 );
RHO_PI( 2, 43 );
RHO_PI( 20, 62 );
RHO_PI( 14, 18 );
RHO_PI( 22, 39 );
RHO_PI( 9, 61 );
RHO_PI( 6, 20 );
RHO_PI( 1, 44 );
#undef RHO_PI
// Chi
for ( j = 0; j < 25; j += 5 )
{
for ( i = 0; i < 5; i++ )
bc[i] = st[j + i];
for ( i = 0; i < 5; i++ )
st[ j+i ] = _mm512_xor_si512( st[ j+i ], _mm512_andnot_si512(
bc[ (i+1) % 5 ], bc[ (i+2) % 5 ] ) );
}
// Iota
st[0] = _mm512_xor_si512( st[0], _mm512_set1_epi64( keccakf_rndc[r] ) );
}
}
// Initialize the context for SHA3
int sha3_8way_init( sha3_8way_ctx_t *c, int mdlen )
{
for ( int i = 0; i < 25; i++ ) c->st[ i ] = m512_zero;
c->mdlen = mdlen;
c->rsiz = 200 - 2 * mdlen;
c->pt = 0;
return 1;
}
// update state with more data
int sha3_8way_update( sha3_8way_ctx_t *c, const void *data, size_t len )
{
size_t i;
int j = c->pt;
const int rsiz = c->rsiz / 8;
const int l = len / 8;
for ( i = 0; i < l; i++ )
{
c->st[ j ] = _mm512_xor_si512( c->st[ j ],
( (const __m512i*)data )[i] );
j++;
if ( j >= rsiz )
{
sha3_8way_keccakf( c->st );
j = 0;
}
}
c->pt = j;
return 1;
}
// finalize and output a hash
int sha3_8way_final( void *md, sha3_8way_ctx_t *c )
{
c->st[ c->pt ] =
_mm512_xor_si512( c->st[ c->pt ],
m512_const1_64( 6 ) );
c->st[ c->rsiz / 8 - 1 ] =
_mm512_xor_si512( c->st[ c->rsiz / 8 - 1 ],
m512_const1_64( 0x8000000000000000 ) );
sha3_8way_keccakf( c->st );
memcpy( md, c->st, c->mdlen * 8 );
return 1;
}
// compute a SHA-3 hash (md) of given byte length from "in"
void *sha3_8way( const void *in, size_t inlen, void *md, int mdlen )
{
sha3_8way_ctx_t sha3;
sha3_8way_init( &sha3, mdlen);
sha3_8way_update( &sha3, in, inlen );
sha3_8way_final( md, &sha3 );
return md;
}
#endif // AVX512
#endif // AVX2

View File

@@ -0,0 +1,67 @@
// sha3.h
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
// 2021-03-27 JayDDee
//
#ifndef SHA3_4WAY_H
#define SHA3_4WAY_H
#include <stddef.h>
#include <stdint.h>
#include "simd-utils.h"
#if defined(__cplusplus)
extern "C" {
#endif
#ifndef KECCAKF_ROUNDS
#define KECCAKF_ROUNDS 24
#endif
#if defined(__AVX2__)
typedef struct
{
__m256i st[25]; // 64-bit words * 4 lanes
int pt, rsiz, mdlen; // these don't overflow
} sha3_4way_ctx_t __attribute__ ((aligned (64)));;
// Compression function.
void sha3_4way_keccakf( __m256i st[25] );
// OpenSSL - like interfece
int sha3_4way_init( sha3_4way_ctx_t *c, int mdlen ); // mdlen = hash output in bytes
int sha3_4way_update( sha3_4way_ctx_t *c, const void *data, size_t len );
int sha3_4way_final( void *md, sha3_4way_ctx_t *c ); // digest goes to md
// compute a sha3 hash (md) of given byte length from "in"
void *sha3_4way( const void *in, size_t inlen, void *md, int mdlen );
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
// state context
typedef struct
{
__m512i st[25]; // 64-bit words * 8 lanes
int pt, rsiz, mdlen; // these don't overflow
} sha3_8way_ctx_t __attribute__ ((aligned (64)));;
// Compression function.
void sha3_8way_keccakf( __m512i st[25] );
// OpenSSL - like interfece
int sha3_8way_init( sha3_8way_ctx_t *c, int mdlen ); // mdlen = hash output in bytes
int sha3_8way_update( sha3_8way_ctx_t *c, const void *data, size_t len );
int sha3_8way_final( void *md, sha3_8way_ctx_t *c ); // digest goes to md
// compute a sha3 hash (md) of given byte length from "in"
void *sha3_8way( const void *in, size_t inlen, void *md, int mdlen );
#endif // AVX512
#endif // AVX2
#if defined(__cplusplus)
}
#endif
#endif

View File

@@ -5,6 +5,7 @@
// Revised 03-Sep-15 for portability + OpenSSL - style API // Revised 03-Sep-15 for portability + OpenSSL - style API
#include "sha3.h" #include "sha3.h"
#include <string.h>
// update the state with given number of rounds // update the state with given number of rounds
@@ -21,6 +22,7 @@ void sha3_keccakf(uint64_t st[25])
0x000000000000800a, 0x800000008000000a, 0x8000000080008081, 0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
}; };
/*
const int keccakf_rotc[24] = { const int keccakf_rotc[24] = {
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
@@ -29,6 +31,7 @@ void sha3_keccakf(uint64_t st[25])
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
}; };
*/
// variables // variables
int i, j, r; int i, j, r;
@@ -60,14 +63,50 @@ void sha3_keccakf(uint64_t st[25])
st[j + i] ^= t; st[j + i] ^= t;
} }
// Rho Pi // Rho Pi
#define RHO_PI( i, c ) \
bc[0] = st[ i ]; \
st[ i ] = ROTL64( t, c ); \
t = bc[0]
t = st[1]; t = st[1];
RHO_PI( 10, 1 );
RHO_PI( 7, 3 );
RHO_PI( 11, 6 );
RHO_PI( 17, 10 );
RHO_PI( 18, 15 );
RHO_PI( 3, 21 );
RHO_PI( 5, 28 );
RHO_PI( 16, 36 );
RHO_PI( 8, 45 );
RHO_PI( 21, 55 );
RHO_PI( 24, 2 );
RHO_PI( 4, 14 );
RHO_PI( 15, 27 );
RHO_PI( 23, 41 );
RHO_PI( 19, 56 );
RHO_PI( 13, 8 );
RHO_PI( 12, 25 );
RHO_PI( 2, 43 );
RHO_PI( 20, 62 );
RHO_PI( 14, 18 );
RHO_PI( 22, 39 );
RHO_PI( 9, 61 );
RHO_PI( 6, 20 );
RHO_PI( 1, 44 );
#undef RHO_PI
/*
for (i = 0; i < 24; i++) { for (i = 0; i < 24; i++) {
j = keccakf_piln[i]; j = keccakf_piln[i];
bc[0] = st[j]; bc[0] = st[j];
st[j] = ROTL64(t, keccakf_rotc[i]); st[j] = ROTL64(t, keccakf_rotc[i]);
t = bc[0]; t = bc[0];
} }
*/
// Chi // Chi
for (j = 0; j < 25; j += 5) { for (j = 0; j < 25; j += 5) {
@@ -118,17 +157,20 @@ int sha3_init(sha3_ctx_t *c, int mdlen)
int sha3_update(sha3_ctx_t *c, const void *data, size_t len) int sha3_update(sha3_ctx_t *c, const void *data, size_t len)
{ {
size_t i; size_t i;
int j; int j = c->pt / 8;
const int rsiz = c->rsiz / 8;
const int l = len / 8;
j = c->pt; for ( i = 0; i < l; i++ )
for (i = 0; i < len; i++) { {
c->st.b[j++] ^= ((const uint8_t *) data)[i]; c->st.q[ j++ ] ^= ( ((const uint64_t *) data) [i] );
if (j >= c->rsiz) { if ( j >= rsiz )
sha3_keccakf(c->st.q); {
sha3_keccakf( c->st.q );
j = 0; j = 0;
} }
} }
c->pt = j; c->pt = j*8;
return 1; return 1;
} }
@@ -137,16 +179,10 @@ int sha3_update(sha3_ctx_t *c, const void *data, size_t len)
int sha3_final(void *md, sha3_ctx_t *c) int sha3_final(void *md, sha3_ctx_t *c)
{ {
int i; c->st.q[ c->pt / 8 ] ^= 6;
c->st.q[ c->rsiz / 8 - 1 ] ^= 0x8000000000000000;
c->st.b[c->pt] ^= 0x06;
c->st.b[c->rsiz - 1] ^= 0x80;
sha3_keccakf(c->st.q); sha3_keccakf(c->st.q);
memcpy( md, c->st.q, c->mdlen );
for (i = 0; i < c->mdlen; i++) {
((uint8_t *) md)[i] = c->st.b[i];
}
return 1; return 1;
} }
@@ -155,7 +191,6 @@ int sha3_final(void *md, sha3_ctx_t *c)
void *sha3(const void *in, size_t inlen, void *md, int mdlen) void *sha3(const void *in, size_t inlen, void *md, int mdlen)
{ {
sha3_ctx_t sha3; sha3_ctx_t sha3;
sha3_init(&sha3, mdlen); sha3_init(&sha3, mdlen);
sha3_update(&sha3, in, inlen); sha3_update(&sha3, in, inlen);
sha3_final(md, &sha3); sha3_final(md, &sha3);

View File

@@ -1,6 +1,7 @@
#include "algo-gate-api.h" #include "algo-gate-api.h"
#include "algo/sha/sph_sha2.h" #include "algo/sha/sph_sha2.h"
#include "Verthash.h" #include "Verthash.h"
#include "tiny_sha3/sha3-4way.h"
static verthash_info_t verthashInfo; static verthash_info_t verthashInfo;
@@ -12,6 +13,82 @@ static const uint8_t verthashDatFileHash_bytes[32] =
0x29, 0xec, 0xf8, 0x8f, 0x8a, 0xd4, 0x76, 0x39, 0x29, 0xec, 0xf8, 0x8f, 0x8a, 0xd4, 0x76, 0x39,
0xb6, 0xed, 0xed, 0xaf, 0xd7, 0x21, 0xaa, 0x48 }; 0xb6, 0xed, 0xed, 0xaf, 0xd7, 0x21, 0xaa, 0x48 };
#if defined(__AVX2__)
static __thread sha3_4way_ctx_t sha3_mid_ctxA;
static __thread sha3_4way_ctx_t sha3_mid_ctxB;
#else
static __thread sha3_ctx_t sha3_mid_ctx[8];
#endif
void verthash_sha3_512_prehash_72( const void *input )
{
#if defined(__AVX2__)
__m256i vin[10];
mm256_intrlv80_4x64( vin, input );
sha3_4way_init( &sha3_mid_ctxA, 64 );
sha3_4way_init( &sha3_mid_ctxB, 64 );
vin[0] = _mm256_add_epi8( vin[0], _mm256_set_epi64x( 4,3,2,1 ) );
sha3_4way_update( &sha3_mid_ctxA, vin, 72 );
vin[0] = _mm256_add_epi8( vin[0], _mm256_set1_epi64x( 4 ) );
sha3_4way_update( &sha3_mid_ctxB, vin, 72 );
#else
char in[80] __attribute__ ((aligned (64)));
memcpy( in, input, 80 );
for ( int i = 0; i < 8; i++ )
{
in[0] += 1;
sha3_init( &sha3_mid_ctx[i], 64 );
sha3_update( &sha3_mid_ctx[i], in, 72 );
}
#endif
}
void verthash_sha3_512_final_8( void *hash, const uint64_t nonce )
{
#if defined(__AVX2__)
__m256i vhashA[ 10 ] __attribute__ ((aligned (64)));
__m256i vhashB[ 10 ] __attribute__ ((aligned (64)));
sha3_4way_ctx_t ctx;
__m256i vnonce = _mm256_set1_epi64x( nonce );
memcpy( &ctx, &sha3_mid_ctxA, sizeof ctx );
sha3_4way_update( &ctx, &vnonce, 8 );
sha3_4way_final( vhashA, &ctx );
memcpy( &ctx, &sha3_mid_ctxB, sizeof ctx );
sha3_4way_update( &ctx, &vnonce, 8 );
sha3_4way_final( vhashB, &ctx );
dintrlv_4x64( hash, hash+64, hash+128, hash+192, vhashA, 512 );
dintrlv_4x64( hash+256, hash+320, hash+384, hash+448, vhashB, 512 );
#else
for ( int i = 0; i < 8; i++ )
{
sha3_ctx_t ctx;
memcpy( &ctx, &sha3_mid_ctx[i], sizeof ctx );
sha3_update( &ctx, &nonce, 8 );
sha3_final( hash + i*64, &ctx );
}
#endif
}
int scanhash_verthash( struct work *work, uint32_t max_nonce, int scanhash_verthash( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
@@ -26,6 +103,8 @@ int scanhash_verthash( struct work *work, uint32_t max_nonce,
const bool bench = opt_benchmark; const bool bench = opt_benchmark;
mm128_bswap32_80( edata, pdata ); mm128_bswap32_80( edata, pdata );
verthash_sha3_512_prehash_72( edata );
do do
{ {
edata[19] = n; edata[19] = n;
@@ -51,15 +130,14 @@ bool register_verthash_algo( algo_gate_t* gate )
opt_target_factor = 256.0; opt_target_factor = 256.0;
gate->scanhash = (void*)&scanhash_verthash; gate->scanhash = (void*)&scanhash_verthash;
gate->optimizations = AVX2_OPT;
// verthash data file
char *verthash_data_file = opt_data_file ? opt_data_file char *verthash_data_file = opt_data_file ? opt_data_file
: default_verthash_data_file; : default_verthash_data_file;
int vhLoadResult = verthash_info_init( &verthashInfo, verthash_data_file ); int vhLoadResult = verthash_info_init( &verthashInfo, verthash_data_file );
if (vhLoadResult == 0) // No Error if (vhLoadResult == 0) // No Error
{ {
// and verify data file(if it was enabled)
if ( opt_verify ) if ( opt_verify )
{ {
uint8_t vhDataFileHash[32] = { 0 }; uint8_t vhDataFileHash[32] = { 0 };
@@ -78,7 +156,6 @@ bool register_verthash_algo( algo_gate_t* gate )
} }
} }
else else
{ {
// Handle Verthash error codes // Handle Verthash error codes
if ( vhLoadResult == 1 ) if ( vhLoadResult == 1 )

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.16.1. # Generated by GNU Autoconf 2.69 for cpuminer-opt 3.16.2.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.16.1' PACKAGE_VERSION='3.16.2'
PACKAGE_STRING='cpuminer-opt 3.16.1' PACKAGE_STRING='cpuminer-opt 3.16.2'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures cpuminer-opt 3.16.1 to adapt to many kinds of systems. \`configure' configures cpuminer-opt 3.16.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.16.1:";; short | recursive ) echo "Configuration of cpuminer-opt 3.16.2:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 3.16.1 cpuminer-opt configure 3.16.2
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.16.1, which was It was created by cpuminer-opt $as_me 3.16.2, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='3.16.1' VERSION='3.16.2'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 3.16.1, which was This file was extended by cpuminer-opt $as_me 3.16.2, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 3.16.1 cpuminer-opt config.status 3.16.2
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.16.1]) AC_INIT([cpuminer-opt], [3.16.2])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -555,7 +555,11 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
if ( !s ) if ( !s )
continue; continue;
if ( !strcmp( s, "segwit" ) || !strcmp( s, "!segwit" ) ) if ( !strcmp( s, "segwit" ) || !strcmp( s, "!segwit" ) )
{
segwit = true; segwit = true;
if ( opt_debug )
applog( LOG_INFO, "GBT: SegWit is enabled" );
}
} }
} }
// Segwit END // Segwit END
@@ -954,25 +958,25 @@ void scale_hash_for_display ( double* hashrate, char* prefix )
else { *prefix = 'Y'; *hashrate /= 1e24; } else { *prefix = 'Y'; *hashrate /= 1e24; }
} }
static inline void sprintf_et( char *str, int seconds ) static inline void sprintf_et( char *str, long unsigned int seconds )
{ {
// sprintf doesn't like uint64_t, Linux thinks it's long, Windows long long. long unsigned int min = seconds / 60;
unsigned int min = seconds / 60; long unsigned int sec = seconds % 60;
unsigned int sec = seconds % 60; long unsigned int hrs = min / 60;
unsigned int hrs = min / 60;
if ( unlikely( hrs ) ) if ( unlikely( hrs ) )
{ {
unsigned int years = hrs / (24*365); long unsigned int days = hrs / 24;
unsigned int days = hrs / 24; long unsigned int years = days / 365;
if ( years ) if ( years ) // 0y000d
sprintf( str, "%uy%ud", years, years % 365 ); sprintf( str, "%luy%lud", years, years % 365 );
else if ( days ) //0d00h else if ( days ) // 0d00h
sprintf( str, "%ud%02uh", days, hrs % 24 ); sprintf( str, "%lud%02luh", days, hrs % 24 );
else // 0h00m else // 0h00m
sprintf( str, "%uh%02um", hrs, min % 60 ); sprintf( str, "%luh%02lum", hrs, min % 60 );
} }
else // 0m00s else // 0m00s
sprintf( str, "%um%02us", min, sec ); sprintf( str, "%lum%02lus", min, sec );
} }
const long double exp32 = EXP32; // 2**32 const long double exp32 = EXP32; // 2**32
@@ -1071,7 +1075,8 @@ void report_summary_log( bool force )
double share_time = (double)et.tv_sec + (double)et.tv_usec / 1e6; double share_time = (double)et.tv_sec + (double)et.tv_usec / 1e6;
double ghrate = global_hashrate; double ghrate = global_hashrate;
double shrate = safe_div( exp32 * last_targetdiff * (double)(accepts), double target_diff = exp32 * last_targetdiff;
double shrate = safe_div( target_diff * (double)(accepts),
share_time, 0. ); share_time, 0. );
double sess_hrate = safe_div( exp32 * norm_diff_sum, double sess_hrate = safe_div( exp32 * norm_diff_sum,
(double)uptime.tv_sec, 0. ); (double)uptime.tv_sec, 0. );
@@ -1099,12 +1104,12 @@ void report_summary_log( bool force )
if ( accepted_share_count < submitted_share_count ) if ( accepted_share_count < submitted_share_count )
{ {
double ltd = exp32 * last_targetdiff;
double lost_ghrate = uptime.tv_sec == 0 ? 0. double lost_ghrate = uptime.tv_sec == 0 ? 0.
: ltd * (double)(submitted_share_count - accepted_share_count ) : target_diff
* (double)(submitted_share_count - accepted_share_count )
/ (double)uptime.tv_sec; / (double)uptime.tv_sec;
double lost_shrate = share_time == 0. ? 0. double lost_shrate = share_time == 0. ? 0.
: ltd * (double)(submits - accepts ) / share_time; : target_diff * (double)(submits - accepts ) / share_time;
char lshr_units[4] = {0}; char lshr_units[4] = {0};
char lghr_units[4] = {0}; char lghr_units[4] = {0};
scale_hash_for_display( &lost_shrate, lshr_units ); scale_hash_for_display( &lost_shrate, lshr_units );
@@ -2437,10 +2442,14 @@ static void *miner_thread( void *userdata )
#if ((defined(_WIN64) || defined(__WINDOWS__)) || defined(_WIN32)) #if ((defined(_WIN64) || defined(__WINDOWS__)) || defined(_WIN32))
applog( LOG_NOTICE, "Total: %s %sH/s", hr, hr_units ); applog( LOG_NOTICE, "Total: %s %sH/s", hr, hr_units );
#else #else
applog( LOG_NOTICE, "Total: %s %sH/s, CPU temp: %dC", float lo_freq = 0., hi_freq = 0.;
hr, hr_units, (uint32_t)cpu_temp(0) ); linux_cpu_hilo_freq( &lo_freq, &hi_freq );
applog( LOG_NOTICE,
"Total: %s %sH/s, Temp: %dC, Freq: %.3f/%.3f GHz",
hr, hr_units, (uint32_t)cpu_temp(0), lo_freq / 1e6,
hi_freq / 1e6 );
#endif #endif
} }
} // benchmark } // benchmark
// conditional mining // conditional mining

View File

@@ -900,7 +900,7 @@ Options:\n\
--benchmark run in offline benchmark mode\n\ --benchmark run in offline benchmark mode\n\
--cpu-affinity set process affinity to cpu core(s), mask 0x3 for cores 0 and 1\n\ --cpu-affinity set process affinity to cpu core(s), mask 0x3 for cores 0 and 1\n\
--cpu-priority set process priority (default: 0 idle, 2 normal to 5 highest)\n\ --cpu-priority set process priority (default: 0 idle, 2 normal to 5 highest)\n\
-b, --api-bind IP/Port for the miner API (default: 127.0.0.1:4048)\n\ -b, --api-bind=address[:port] IP address for the miner API, default port is 4048)\n\
--api-remote Allow remote control\n\ --api-remote Allow remote control\n\
--max-temp=N Only mine if cpu temp is less than specified value (linux)\n\ --max-temp=N Only mine if cpu temp is less than specified value (linux)\n\
--max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\ --max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\

View File

@@ -1225,37 +1225,6 @@ static inline void intrlv_4x64( void *dst, const void *src0,
d[31] = _mm_unpackhi_epi64( s2[7], s3[7] ); d[31] = _mm_unpackhi_epi64( s2[7], s3[7] );
} }
/*
static inline void intrlv_4x64( void *dst, void *src0,
void *src1, void *src2, void *src3, int bit_len )
{
uint64_t *d = (uint64_t*)dst;
uint64_t *s0 = (uint64_t*)src0;
uint64_t *s1 = (uint64_t*)src1;
uint64_t *s2 = (uint64_t*)src2;
uint64_t *s3 = (uint64_t*)src3;
d[ 0] = s0[ 0]; d[ 1] = s1[ 0]; d[ 2] = s2[ 0]; d[ 3] = s3[ 0];
d[ 4] = s0[ 1]; d[ 5] = s1[ 1]; d[ 6] = s2[ 1]; d[ 7] = s3[ 1];
d[ 8] = s0[ 2]; d[ 9] = s1[ 2]; d[ 10] = s2[ 2]; d[ 11] = s3[ 2];
d[ 12] = s0[ 3]; d[ 13] = s1[ 3]; d[ 14] = s2[ 3]; d[ 15] = s3[ 3];
if ( bit_len <= 256 ) return;
d[ 16] = s0[ 4]; d[ 17] = s1[ 4]; d[ 18] = s2[ 4]; d[ 19] = s3[ 4];
d[ 20] = s0[ 5]; d[ 21] = s1[ 5]; d[ 22] = s2[ 5]; d[ 23] = s3[ 5];
d[ 24] = s0[ 6]; d[ 25] = s1[ 6]; d[ 26] = s2[ 6]; d[ 27] = s3[ 6];
d[ 28] = s0[ 7]; d[ 29] = s1[ 7]; d[ 30] = s2[ 7]; d[ 31] = s3[ 7];
if ( bit_len <= 512 ) return;
d[ 32] = s0[ 8]; d[ 33] = s1[ 8]; d[ 34] = s2[ 8]; d[ 35] = s3[ 8];
d[ 36] = s0[ 9]; d[ 37] = s1[ 9]; d[ 38] = s2[ 9]; d[ 39] = s3[ 9];
if ( bit_len <= 640 ) return;
d[ 40] = s0[10]; d[ 41] = s1[10]; d[ 42] = s2[10]; d[ 43] = s3[10];
d[ 44] = s0[11]; d[ 45] = s1[11]; d[ 46] = s2[11]; d[ 47] = s3[11];
d[ 48] = s0[12]; d[ 49] = s1[12]; d[ 50] = s2[12]; d[ 51] = s3[12];
d[ 52] = s0[13]; d[ 53] = s1[13]; d[ 54] = s2[13]; d[ 55] = s3[13];
d[ 56] = s0[14]; d[ 57] = s1[14]; d[ 58] = s2[14]; d[ 59] = s3[14];
d[ 60] = s0[15]; d[ 61] = s1[15]; d[ 62] = s2[15]; d[ 63] = s3[15];
}
*/
static inline void intrlv_4x64_512( void *dst, const void *src0, static inline void intrlv_4x64_512( void *dst, const void *src0,
const void *src1, const void *src2, const void *src3 ) const void *src1, const void *src2, const void *src3 )
{ {
@@ -1282,26 +1251,6 @@ static inline void intrlv_4x64_512( void *dst, const void *src0,
d[15] = _mm_unpackhi_epi64( s2[3], s3[3] ); d[15] = _mm_unpackhi_epi64( s2[3], s3[3] );
} }
/*
static inline void intrlv_4x64_512( void *dst, const void *src0,
const void *src1, const void *src2, const void *src3 )
{
uint64_t *d = (uint64_t*)dst;
const uint64_t *s0 = (const uint64_t*)src0;
const uint64_t *s1 = (const uint64_t*)src1;
const uint64_t *s2 = (const uint64_t*)src2;
const uint64_t *s3 = (const uint64_t*)src3;
d[ 0] = s0[ 0]; d[ 1] = s1[ 0]; d[ 2] = s2[ 0]; d[ 3] = s3[ 0];
d[ 4] = s0[ 1]; d[ 5] = s1[ 1]; d[ 6] = s2[ 1]; d[ 7] = s3[ 1];
d[ 8] = s0[ 2]; d[ 9] = s1[ 2]; d[ 10] = s2[ 2]; d[ 11] = s3[ 2];
d[ 12] = s0[ 3]; d[ 13] = s1[ 3]; d[ 14] = s2[ 3]; d[ 15] = s3[ 3];
d[ 16] = s0[ 4]; d[ 17] = s1[ 4]; d[ 18] = s2[ 4]; d[ 19] = s3[ 4];
d[ 20] = s0[ 5]; d[ 21] = s1[ 5]; d[ 22] = s2[ 5]; d[ 23] = s3[ 5];
d[ 24] = s0[ 6]; d[ 25] = s1[ 6]; d[ 26] = s2[ 6]; d[ 27] = s3[ 6];
d[ 28] = s0[ 7]; d[ 29] = s1[ 7]; d[ 30] = s2[ 7]; d[ 31] = s3[ 7];
}
*/
static inline void dintrlv_4x64( void *dst0, void *dst1, void *dst2, static inline void dintrlv_4x64( void *dst0, void *dst1, void *dst2,
void *dst3, const void *src, const int bit_len ) void *dst3, const void *src, const int bit_len )
{ {
@@ -1347,38 +1296,6 @@ static inline void dintrlv_4x64( void *dst0, void *dst1, void *dst2,
d3[7] = _mm_unpackhi_epi64( s[29], s[31] ); d3[7] = _mm_unpackhi_epi64( s[29], s[31] );
} }
/*
static inline void dintrlv_4x64( void *dst0, void *dst1, void *dst2,
void *dst3, const void *src, int bit_len )
{
uint64_t *d0 = (uint64_t*)dst0;
uint64_t *d1 = (uint64_t*)dst1;
uint64_t *d2 = (uint64_t*)dst2;
uint64_t *d3 = (uint64_t*)dst3;
const uint64_t *s = (const uint64_t*)src;
d0[ 0] = s[ 0]; d1[ 0] = s[ 1]; d2[ 0] = s[ 2]; d3[ 0] = s[ 3];
d0[ 1] = s[ 4]; d1[ 1] = s[ 5]; d2[ 1] = s[ 6]; d3[ 1] = s[ 7];
d0[ 2] = s[ 8]; d1[ 2] = s[ 9]; d2[ 2] = s[10]; d3[ 2] = s[11];
d0[ 3] = s[12]; d1[ 3] = s[13]; d2[ 3] = s[14]; d3[ 3] = s[15];
if ( bit_len <= 256 ) return;
d0[ 4] = s[16]; d1[ 4] = s[17]; d2[ 4] = s[18]; d3[ 4] = s[19];
d0[ 5] = s[20]; d1[ 5] = s[21]; d2[ 5] = s[22]; d3[ 5] = s[23];
d0[ 6] = s[24]; d1[ 6] = s[25]; d2[ 6] = s[26]; d3[ 6] = s[27];
d0[ 7] = s[28]; d1[ 7] = s[29]; d2[ 7] = s[30]; d3[ 7] = s[31];
if ( bit_len <= 512 ) return;
d0[ 8] = s[32]; d1[ 8] = s[33]; d2[ 8] = s[34]; d3[ 8] = s[35];
d0[ 9] = s[36]; d1[ 9] = s[37]; d2[ 9] = s[38]; d3[ 9] = s[39];
if ( bit_len <= 640 ) return;
d0[10] = s[40]; d1[10] = s[41]; d2[10] = s[42]; d3[10] = s[43];
d0[11] = s[44]; d1[11] = s[45]; d2[11] = s[46]; d3[11] = s[47];
d0[12] = s[48]; d1[12] = s[49]; d2[12] = s[50]; d3[12] = s[51];
d0[13] = s[52]; d1[13] = s[53]; d2[13] = s[54]; d3[13] = s[55];
d0[14] = s[56]; d1[14] = s[57]; d2[14] = s[58]; d3[14] = s[59];
d0[15] = s[60]; d1[15] = s[61]; d2[15] = s[62]; d3[15] = s[63];
}
*/
static inline void dintrlv_4x64_512( void *dst0, void *dst1, void *dst2, static inline void dintrlv_4x64_512( void *dst0, void *dst1, void *dst2,
void *dst3, const void *src ) void *dst3, const void *src )
{ {
@@ -1405,26 +1322,6 @@ static inline void dintrlv_4x64_512( void *dst0, void *dst1, void *dst2,
d3[3] = _mm_unpackhi_epi64( s[13], s[15] ); d3[3] = _mm_unpackhi_epi64( s[13], s[15] );
} }
/*
static inline void dintrlv_4x64_512( void *dst0, void *dst1, void *dst2,
void *dst3, const void *src )
{
uint64_t *d0 = (uint64_t*)dst0;
uint64_t *d1 = (uint64_t*)dst1;
uint64_t *d2 = (uint64_t*)dst2;
uint64_t *d3 = (uint64_t*)dst3;
const uint64_t *s = (const uint64_t*)src;
d0[ 0] = s[ 0]; d1[ 0] = s[ 1]; d2[ 0] = s[ 2]; d3[ 0] = s[ 3];
d0[ 1] = s[ 4]; d1[ 1] = s[ 5]; d2[ 1] = s[ 6]; d3[ 1] = s[ 7];
d0[ 2] = s[ 8]; d1[ 2] = s[ 9]; d2[ 2] = s[10]; d3[ 2] = s[11];
d0[ 3] = s[12]; d1[ 3] = s[13]; d2[ 3] = s[14]; d3[ 3] = s[15];
d0[ 4] = s[16]; d1[ 4] = s[17]; d2[ 4] = s[18]; d3[ 4] = s[19];
d0[ 5] = s[20]; d1[ 5] = s[21]; d2[ 5] = s[22]; d3[ 5] = s[23];
d0[ 6] = s[24]; d1[ 6] = s[25]; d2[ 6] = s[26]; d3[ 6] = s[27];
d0[ 7] = s[28]; d1[ 7] = s[29]; d2[ 7] = s[30]; d3[ 7] = s[31];
}
*/
static inline void extr_lane_4x64( void *d, const void *s, static inline void extr_lane_4x64( void *d, const void *s,
const int lane, const int bit_len ) const int lane, const int bit_len )
{ {
@@ -1440,9 +1337,41 @@ static inline void extr_lane_4x64( void *d, const void *s,
} }
#if defined(__AVX2__) #if defined(__AVX2__)
// Doesn't really need AVX2, just SSSE3, but is only used with AVX2 code.
// There a alignment problems with the source buffer on Wwindows, static inline void mm256_intrlv80_4x64( void *d, const void *src )
// can't use 256 bit bswap. {
__m128i s0 = casti_m128i( src,0 );
__m128i s1 = casti_m128i( src,1 );
__m128i s2 = casti_m128i( src,2 );
__m128i s3 = casti_m128i( src,3 );
__m128i s4 = casti_m128i( src,4 );
casti_m128i( d, 0 ) =
casti_m128i( d, 1 ) = _mm_shuffle_epi32( s0, 0x44 );
casti_m128i( d, 2 ) =
casti_m128i( d, 3 ) = _mm_shuffle_epi32( s0, 0xee );
casti_m128i( d, 4 ) =
casti_m128i( d, 5 ) = _mm_shuffle_epi32( s1, 0x44 );
casti_m128i( d, 6 ) =
casti_m128i( d, 7 ) = _mm_shuffle_epi32( s1, 0xee );
casti_m128i( d, 8 ) =
casti_m128i( d, 9 ) = _mm_shuffle_epi32( s2, 0x44 );
casti_m128i( d, 10 ) =
casti_m128i( d, 11 ) = _mm_shuffle_epi32( s2, 0xee );
casti_m128i( d, 12 ) =
casti_m128i( d, 13 ) = _mm_shuffle_epi32( s3, 0x44 );
casti_m128i( d, 14 ) =
casti_m128i( d, 15 ) = _mm_shuffle_epi32( s3, 0xee );
casti_m128i( d, 16 ) =
casti_m128i( d, 17 ) = _mm_shuffle_epi32( s4, 0x44 );
casti_m128i( d, 18 ) =
casti_m128i( d, 19 ) = _mm_shuffle_epi32( s4, 0xee );
}
static inline void mm256_bswap32_intrlv80_4x64( void *d, const void *src ) static inline void mm256_bswap32_intrlv80_4x64( void *d, const void *src )
{ {
@@ -1636,40 +1565,6 @@ static inline void intrlv_8x64_512( void *dst, const void *src0,
d[31] = _mm_unpackhi_epi64( s6[3], s7[3] ); d[31] = _mm_unpackhi_epi64( s6[3], s7[3] );
} }
/*
#define ILEAVE_8x64( i ) do \
{ \
uint64_t *d = (uint64_t*)(dst) + ( (i) << 3 ); \
d[0] = *( (const uint64_t*)(s0) +(i) ); \
d[1] = *( (const uint64_t*)(s1) +(i) ); \
d[2] = *( (const uint64_t*)(s2) +(i) ); \
d[3] = *( (const uint64_t*)(s3) +(i) ); \
d[4] = *( (const uint64_t*)(s4) +(i) ); \
d[5] = *( (const uint64_t*)(s5) +(i) ); \
d[6] = *( (const uint64_t*)(s6) +(i) ); \
d[7] = *( (const uint64_t*)(s7) +(i) ); \
} while(0)
static inline void intrlv_8x64( void *dst, const void *s0,
const void *s1, const void *s2, const void *s3, const void *s4,
const void *s5, const void *s6, const void *s7, int bit_len )
{
ILEAVE_8x64( 0 ); ILEAVE_8x64( 1 );
ILEAVE_8x64( 2 ); ILEAVE_8x64( 3 );
if ( bit_len <= 256 ) return;
ILEAVE_8x64( 4 ); ILEAVE_8x64( 5 );
ILEAVE_8x64( 6 ); ILEAVE_8x64( 7 );
if ( bit_len <= 512 ) return;
ILEAVE_8x64( 8 ); ILEAVE_8x64( 9 );
if ( bit_len <= 640 ) return;
ILEAVE_8x64( 10 ); ILEAVE_8x64( 11 );
ILEAVE_8x64( 12 ); ILEAVE_8x64( 13 );
ILEAVE_8x64( 14 ); ILEAVE_8x64( 15 );
}
#undef ILEAVE_8x64
*/
static inline void dintrlv_8x64( void *dst0, void *dst1, void *dst2, static inline void dintrlv_8x64( void *dst0, void *dst1, void *dst2,
void *dst3, void *dst4, void *dst5, void *dst6, void *dst7, void *dst3, void *dst4, void *dst5, void *dst6, void *dst7,
@@ -1815,39 +1710,6 @@ static inline void dintrlv_8x64_512( void *dst0, void *dst1, void *dst2,
d7[3] = _mm_unpackhi_epi64( s[27], s[31] ); d7[3] = _mm_unpackhi_epi64( s[27], s[31] );
} }
/*
#define DLEAVE_8x64( i ) do \
{ \
const uint64_t *s = (const uint64_t*)(src) + ( (i) << 3 ); \
*( (uint64_t*)(d0) +(i) ) = s[0]; \
*( (uint64_t*)(d1) +(i) ) = s[1]; \
*( (uint64_t*)(d2) +(i) ) = s[2]; \
*( (uint64_t*)(d3) +(i) ) = s[3]; \
*( (uint64_t*)(d4) +(i) ) = s[4]; \
*( (uint64_t*)(d5) +(i) ) = s[5]; \
*( (uint64_t*)(d6) +(i) ) = s[6]; \
*( (uint64_t*)(d7) +(i) ) = s[7]; \
} while(0)
static inline void dintrlv_8x64( void *d0, void *d1, void *d2, void *d3,
void *d4, void *d5, void *d6, void *d7, const void *src, int bit_len )
{
DLEAVE_8x64( 0 ); DLEAVE_8x64( 1 );
DLEAVE_8x64( 2 ); DLEAVE_8x64( 3 );
if ( bit_len <= 256 ) return;
DLEAVE_8x64( 4 ); DLEAVE_8x64( 5 );
DLEAVE_8x64( 6 ); DLEAVE_8x64( 7 );
if ( bit_len <= 512 ) return;
DLEAVE_8x64( 8 ); DLEAVE_8x64( 9 );
if ( bit_len <= 640 ) return;
DLEAVE_8x64( 10 ); DLEAVE_8x64( 11 );
DLEAVE_8x64( 12 ); DLEAVE_8x64( 13 );
DLEAVE_8x64( 14 ); DLEAVE_8x64( 15 );
}
#undef DLEAVE_8x64
*/
static inline void extr_lane_8x64( void *d, const void *s, static inline void extr_lane_8x64( void *d, const void *s,
const int lane, const int bit_len ) const int lane, const int bit_len )
{ {

View File

@@ -178,7 +178,7 @@ static inline __m128i mm128_mask_32( const __m128i v, const int m )
// Basic operations without equivalent SIMD intrinsic // Basic operations without equivalent SIMD intrinsic
// Bitwise not (~v) // Bitwise not (~v)
#define mm128_not( v ) _mm_xor_si128( (v), m128_neg1 ) #define mm128_not( v ) _mm_xor_si128( v, m128_neg1 )
// Unary negation of elements (-v) // Unary negation of elements (-v)
#define mm128_negate_64( v ) _mm_sub_epi64( m128_zero, v ) #define mm128_negate_64( v ) _mm_sub_epi64( m128_zero, v )
@@ -263,7 +263,8 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
_mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) ) _mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) )
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) #if defined(__AVX512VL__)
//#if defined(__AVX512F__) && defined(__AVX512VL__)
#define mm128_ror_64 _mm_ror_epi64 #define mm128_ror_64 _mm_ror_epi64
#define mm128_rol_64 _mm_rol_epi64 #define mm128_rol_64 _mm_rol_epi64
@@ -291,16 +292,13 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
#define mm128_swap_64( v ) _mm_shuffle_epi32( v, 0x4e ) #define mm128_swap_64( v ) _mm_shuffle_epi32( v, 0x4e )
#define mm128_ror_1x32( v ) _mm_shuffle_epi32( v, 0x39 ) #define mm128_ror_1x32( v ) _mm_shuffle_epi32( v, 0x39 )
#define mm128_rol_1x32( v ) _mm_shuffle_epi32( v, 0x93 ) #define mm128_rol_1x32( v ) _mm_shuffle_epi32( v, 0x93 )
//#define mm128_swap_64( v ) _mm_alignr_epi8( v, v, 8 )
//#define mm128_ror_1x32( v ) _mm_alignr_epi8( v, v, 4 )
//#define mm128_rol_1x32( v ) _mm_alignr_epi8( v, v, 12 )
// Swap 32 bit elements in 64 bit lanes // Swap 32 bit elements in 64 bit lanes
#define mm128_swap64_32( v ) _mm_shuffle_epi32( v, 0xb1 ) #define mm128_swap64_32( v ) _mm_shuffle_epi32( v, 0xb1 )
#if defined(__SSSE3__) #if defined(__SSSE3__)
// Rotate right by c bytes // Rotate right by c bytes, no SSE2 equivalent.
static inline __m128i mm128_ror_x8( const __m128i v, const int c ) static inline __m128i mm128_ror_x8( const __m128i v, const int c )
{ return _mm_alignr_epi8( v, v, c ); } { return _mm_alignr_epi8( v, v, c ); }

View File

@@ -18,7 +18,7 @@
#define mm256_mov64_256( i ) _mm256_castsi128_si256( mm128_mov64_128( i ) ) #define mm256_mov64_256( i ) _mm256_castsi128_si256( mm128_mov64_128( i ) )
#define mm256_mov32_256( i ) _mm256_castsi128_si256( mm128_mov32_128( i ) ) #define mm256_mov32_256( i ) _mm256_castsi128_si256( mm128_mov32_128( i ) )
// Mo0ve low element of vector to integer. // Move low element of vector to integer.
#define mm256_mov256_64( v ) mm128_mov128_64( _mm256_castsi256_si128( v ) ) #define mm256_mov256_64( v ) mm128_mov128_64( _mm256_castsi256_si128( v ) )
#define mm256_mov256_32( v ) mm128_mov128_32( _mm256_castsi256_si128( v ) ) #define mm256_mov256_32( v ) mm128_mov128_32( _mm256_castsi256_si128( v ) )
@@ -42,7 +42,7 @@ static inline __m256i m256_const_64( const uint64_t i3, const uint64_t i2,
// 128 bit vector argument // 128 bit vector argument
#define m256_const1_128( v ) \ #define m256_const1_128( v ) \
_mm256_permute4x64_epi64( _mm256_castsi128_si256( v ), 0x44 ) _mm256_permute4x64_epi64( _mm256_castsi128_si256( v ), 0x44 )
// 64 bit integer argument // 64 bit integer argument zero extended to 128 bits.
#define m256_const1_i128( i ) m256_const1_128( mm128_mov64_128( i ) ) #define m256_const1_i128( i ) m256_const1_128( mm128_mov64_128( i ) )
#define m256_const1_64( i ) _mm256_broadcastq_epi64( mm128_mov64_128( i ) ) #define m256_const1_64( i ) _mm256_broadcastq_epi64( mm128_mov64_128( i ) )
#define m256_const1_32( i ) _mm256_broadcastd_epi32( mm128_mov32_128( i ) ) #define m256_const1_32( i ) _mm256_broadcastd_epi32( mm128_mov32_128( i ) )
@@ -168,7 +168,10 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
_mm256_srli_epi32( v, 32-(c) ) ) _mm256_srli_epi32( v, 32-(c) ) )
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) // The spec says both F & VL are required, but just in case AMD
// decides to implement ROL/R without AVX512F.
#if defined(__AVX512VL__)
//#if defined(__AVX512F__) && defined(__AVX512VL__)
// AVX512, control must be 8 bit immediate. // AVX512, control must be 8 bit immediate.
@@ -198,21 +201,14 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
// //
// Rotate elements accross all lanes. // Rotate elements accross all lanes.
// //
// AVX2 has no full vector permute for elements less than 32 bits. // Swap 128 bit elements in 256 bit vector.
// AVX512 has finer granularity full vector permutes. #define mm256_swap_128( v ) _mm256_permute4x64_epi64( v, 0x4e )
// AVX512 has full vector alignr which might be faster, especially for 32 bit
// Rotate 256 bit vector by one 64 bit element
#define mm256_ror_1x64( v ) _mm256_permute4x64_epi64( v, 0x39 )
#define mm256_rol_1x64( v ) _mm256_permute4x64_epi64( v, 0x93 )
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) #if defined(__AVX512F__) && defined(__AVX512VL__)
static inline __m256i mm256_swap_128( const __m256i v )
{ return _mm256_alignr_epi64( v, v, 2 ); }
static inline __m256i mm256_ror_1x64( const __m256i v )
{ return _mm256_alignr_epi64( v, v, 1 ); }
static inline __m256i mm256_rol_1x64( const __m256i v )
{ return _mm256_alignr_epi64( v, v, 3 ); }
static inline __m256i mm256_ror_1x32( const __m256i v ) static inline __m256i mm256_ror_1x32( const __m256i v )
{ return _mm256_alignr_epi32( v, v, 1 ); } { return _mm256_alignr_epi32( v, v, 1 ); }
@@ -220,21 +216,8 @@ static inline __m256i mm256_ror_1x32( const __m256i v )
static inline __m256i mm256_rol_1x32( const __m256i v ) static inline __m256i mm256_rol_1x32( const __m256i v )
{ return _mm256_alignr_epi32( v, v, 7 ); } { return _mm256_alignr_epi32( v, v, 7 ); }
static inline __m256i mm256_ror_3x32( const __m256i v )
{ return _mm256_alignr_epi32( v, v, 3 ); }
static inline __m256i mm256_rol_3x32( const __m256i v )
{ return _mm256_alignr_epi32( v, v, 5 ); }
#else // AVX2 #else // AVX2
// Swap 128 bit elements in 256 bit vector.
#define mm256_swap_128( v ) _mm256_permute4x64_epi64( v, 0x4e )
// Rotate 256 bit vector by one 64 bit element
#define mm256_ror_1x64( v ) _mm256_permute4x64_epi64( v, 0x39 )
#define mm256_rol_1x64( v ) _mm256_permute4x64_epi64( v, 0x93 )
// Rotate 256 bit vector by one 32 bit element. // Rotate 256 bit vector by one 32 bit element.
#define mm256_ror_1x32( v ) \ #define mm256_ror_1x32( v ) \
_mm256_permutevar8x32_epi32( v, \ _mm256_permutevar8x32_epi32( v, \
@@ -246,17 +229,6 @@ static inline __m256i mm256_rol_3x32( const __m256i v )
m256_const_64( 0x0000000600000005, 0x0000000400000003, \ m256_const_64( 0x0000000600000005, 0x0000000400000003, \
0x0000000200000001, 0x0000000000000007 ) 0x0000000200000001, 0x0000000000000007 )
// Rotate 256 bit vector by three 32 bit elements (96 bits).
#define mm256_ror_3x32( v ) \
_mm256_permutevar8x32_epi32( v, \
m256_const_64( 0x0000000200000001, 0x0000000000000007, \
0x0000000600000005, 0x0000000400000003 )
#define mm256_rol_3x32( v ) \
_mm256_permutevar8x32_epi32( v, \
m256_const_64( 0x0000000400000003, 0x0000000200000001, \
0x0000000000000007, 0x0000000600000005 )
#endif // AVX512 else AVX2 #endif // AVX512 else AVX2
// //

148
util.c
View File

@@ -943,6 +943,140 @@ bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen)
return true; return true;
} }
static uint32_t bech32_polymod_step(uint32_t pre) {
uint8_t b = pre >> 25;
return ((pre & 0x1FFFFFF) << 5) ^
(-((b >> 0) & 1) & 0x3b6a57b2UL) ^
(-((b >> 1) & 1) & 0x26508e6dUL) ^
(-((b >> 2) & 1) & 0x1ea119faUL) ^
(-((b >> 3) & 1) & 0x3d4233ddUL) ^
(-((b >> 4) & 1) & 0x2a1462b3UL);
}
static const int8_t bech32_charset_rev[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1,
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1,
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1
};
static bool bech32_decode(char *hrp, uint8_t *data, size_t *data_len, const char *input) {
uint32_t chk = 1;
size_t i;
size_t input_len = strlen(input);
size_t hrp_len;
int have_lower = 0, have_upper = 0;
if (input_len < 8 || input_len > 90) {
return false;
}
*data_len = 0;
while (*data_len < input_len && input[(input_len - 1) - *data_len] != '1') {
++(*data_len);
}
hrp_len = input_len - (1 + *data_len);
if (1 + *data_len >= input_len || *data_len < 6) {
return false;
}
*(data_len) -= 6;
for (i = 0; i < hrp_len; ++i) {
int ch = input[i];
if (ch < 33 || ch > 126) {
return false;
}
if (ch >= 'a' && ch <= 'z') {
have_lower = 1;
} else if (ch >= 'A' && ch <= 'Z') {
have_upper = 1;
ch = (ch - 'A') + 'a';
}
hrp[i] = ch;
chk = bech32_polymod_step(chk) ^ (ch >> 5);
}
hrp[i] = 0;
chk = bech32_polymod_step(chk);
for (i = 0; i < hrp_len; ++i) {
chk = bech32_polymod_step(chk) ^ (input[i] & 0x1f);
}
++i;
while (i < input_len) {
int v = (input[i] & 0x80) ? -1 : bech32_charset_rev[(int)input[i]];
if (input[i] >= 'a' && input[i] <= 'z') have_lower = 1;
if (input[i] >= 'A' && input[i] <= 'Z') have_upper = 1;
if (v == -1) {
return false;
}
chk = bech32_polymod_step(chk) ^ v;
if (i + 6 < input_len) {
data[i - (1 + hrp_len)] = v;
}
++i;
}
if (have_lower && have_upper) {
return false;
}
return chk == 1;
}
static bool convert_bits(uint8_t *out, size_t *outlen, int outbits, const uint8_t *in, size_t inlen, int inbits, int pad) {
uint32_t val = 0;
int bits = 0;
uint32_t maxv = (((uint32_t)1) << outbits) - 1;
while (inlen--) {
val = (val << inbits) | *(in++);
bits += inbits;
while (bits >= outbits) {
bits -= outbits;
out[(*outlen)++] = (val >> bits) & maxv;
}
}
if (pad) {
if (bits) {
out[(*outlen)++] = (val << (outbits - bits)) & maxv;
}
} else if (((val << (outbits - bits)) & maxv) || bits >= inbits) {
return false;
}
return true;
}
static bool segwit_addr_decode(int *witver, uint8_t *witdata, size_t *witdata_len, const char *addr) {
uint8_t data[84];
char hrp_actual[84];
size_t data_len;
if (!bech32_decode(hrp_actual, data, &data_len, addr)) return false;
if (data_len == 0 || data_len > 65) return false;
if (data[0] > 16) return false;
*witdata_len = 0;
if (!convert_bits(witdata, witdata_len, 8, data + 1, data_len - 1, 5, 0)) return false;
if (*witdata_len < 2 || *witdata_len > 40) return false;
if (data[0] == 0 && *witdata_len != 20 && *witdata_len != 32) return false;
*witver = data[0];
return true;
}
static size_t bech32_to_script(uint8_t *out, size_t outsz, const char *addr) {
uint8_t witprog[40];
size_t witprog_len;
int witver;
if (!segwit_addr_decode(&witver, witprog, &witprog_len, addr))
return 0;
if (outsz < witprog_len + 2)
return 0;
out[0] = witver ? (0x50 + witver) : 0;
out[1] = witprog_len;
memcpy(out + 2, witprog, witprog_len);
if ( opt_debug )
applog( LOG_INFO, "Coinbase address uses Bech32 coding");
return witprog_len + 2;
}
size_t address_to_script( unsigned char *out, size_t outsz, const char *addr ) size_t address_to_script( unsigned char *out, size_t outsz, const char *addr )
{ {
unsigned char addrbin[ pk_buffer_size_max ]; unsigned char addrbin[ pk_buffer_size_max ];
@@ -950,12 +1084,15 @@ size_t address_to_script( unsigned char *out, size_t outsz, const char *addr )
size_t rv; size_t rv;
if ( !b58dec( addrbin, outsz, addr ) ) if ( !b58dec( addrbin, outsz, addr ) )
return 0; return bech32_to_script( out, outsz, addr );
addrver = b58check( addrbin, outsz, addr ); addrver = b58check( addrbin, outsz, addr );
if ( addrver < 0 ) if ( addrver < 0 )
return 0; return 0;
if ( opt_debug )
applog( LOG_INFO, "Coinbase address uses B58 coding");
switch ( addrver ) switch ( addrver )
{ {
case 5: /* Bitcoin script hash */ case 5: /* Bitcoin script hash */
@@ -1486,9 +1623,6 @@ static bool stratum_parse_extranonce(struct stratum_ctx *sctx, json_t *params, i
if ( !opt_quiet ) /* pool dynamic change */ if ( !opt_quiet ) /* pool dynamic change */
applog( LOG_INFO, "Stratum extranonce1= %s, extranonce2 size= %d", applog( LOG_INFO, "Stratum extranonce1= %s, extranonce2 size= %d",
xnonce1, xn2_size); xnonce1, xn2_size);
// if (pndx == 0 && opt_debug)
// applog(LOG_DEBUG, "Stratum set nonce %s with extranonce2 size=%d",
// xnonce1, xn2_size);
return true; return true;
out: out:
@@ -1638,8 +1772,6 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
opt_extranonce = false; opt_extranonce = false;
goto out; goto out;
} }
if ( !opt_quiet )
applog( LOG_INFO, "Extranonce subscription enabled" );
sret = stratum_recv_line( sctx ); sret = stratum_recv_line( sctx );
if ( sret ) if ( sret )
@@ -1658,8 +1790,8 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
applog( LOG_WARNING, "Stratum answer id is not correct!" ); applog( LOG_WARNING, "Stratum answer id is not correct!" );
} }
res_val = json_object_get( extra, "result" ); res_val = json_object_get( extra, "result" );
// if (opt_debug && (!res_val || json_is_false(res_val))) if (opt_debug && (!res_val || json_is_false(res_val)))
// applog(LOG_DEBUG, "extranonce subscribe not supported"); applog(LOG_DEBUG, "Method extranonce.subscribe is not supported");
json_decref( extra ); json_decref( extra );
} }
free(sret); free(sret);