Compare commits

..

2 Commits

Author SHA1 Message Date
Jay D Dee
3363d61524 v3.8.4.1 2018-03-22 14:28:03 -04:00
Jay D Dee
20fe05054c v3.8.4 2018-03-18 12:51:03 -04:00
20 changed files with 1028 additions and 848 deletions

View File

@@ -68,6 +68,7 @@ cpuminer_SOURCES = \
algo/cryptonight/cryptonight.c\
algo/cubehash/sph_cubehash.c \
algo/cubehash/sse2/cubehash_sse2.c\
algo/cubehash/cube-hash-2way.c \
algo/echo/sph_echo.c \
algo/echo/aes_ni/hash.c\
algo/gost/sph_gost.c \
@@ -242,7 +243,7 @@ cpuminer_SOURCES = \
algo/x17/hmq1725.c \
algo/yescrypt/yescrypt.c \
algo/yescrypt/sha256_Y.c \
algo/yescrypt/yescrypt-simd.c
algo/yescrypt/yescrypt-best.c
disable_flags =

View File

@@ -28,11 +28,12 @@ performance.
ARM CPUs are not supported.
2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
Centos are known to work and have all dependencies in their repositories.
Others may work but may require more effort.
Centos, are known to work and have all dependencies in their repositories.
Others may work but may require more effort. Older versions such as Centos 6
don't work due to missing features.
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
MacOS, OSx is not supported.
MacOS, OSx and Android are not supported.
3. Stratum pool. Some algos may work wallet mining using getwork or GBT. YMMV.
@@ -110,6 +111,7 @@ Supported Algorithms
yescrypt Globalboost-Y (BSTY)
yescryptr8 BitZeny (ZNY)
yescryptr16 Yenten (YTN)
yescryptr32 WAVI
zr5 Ziftr
Errata

View File

@@ -1,4 +1,4 @@
cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
puminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
This feature requires recent SW including GCC version 5 or higher and
openssl version 1.1 or higher. It may also require using "-march=znver1"
compile flag.
@@ -160,6 +160,18 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------
v3.8.4.1
Fixed sha256t low difficulty rejects.
Fixed compile error on CPUs with AVX512.
v3.8.4
Added yescryptr32 algo for WAVI coin.
Added URL to API data.
Improved detection of __int128 support (linux only)
Compile support for CPUs without SSSE3 (no binary support)
v3.8.3.3
Integrated getblocktemplate with algo_gate.

View File

@@ -227,6 +227,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
case ALGO_ZR5: register_zr5_algo ( gate ); break;
default:
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );

View File

@@ -0,0 +1,205 @@
#if defined(__AVX2__)
#include <stdbool.h>
#include <unistd.h>
#include <memory.h>
#include "cube-hash-2way.h"
// 2x128
static void transform_2way( cube_2way_context *sp )
{
int r;
const int rounds = sp->rounds;
__m256i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3;
x0 = _mm256_load_si256( (__m256i*)sp->h );
x1 = _mm256_load_si256( (__m256i*)sp->h + 1 );
x2 = _mm256_load_si256( (__m256i*)sp->h + 2 );
x3 = _mm256_load_si256( (__m256i*)sp->h + 3 );
x4 = _mm256_load_si256( (__m256i*)sp->h + 4 );
x5 = _mm256_load_si256( (__m256i*)sp->h + 5 );
x6 = _mm256_load_si256( (__m256i*)sp->h + 6 );
x7 = _mm256_load_si256( (__m256i*)sp->h + 7 );
for ( r = 0; r < rounds; ++r )
{
x4 = _mm256_add_epi32( x0, x4 );
x5 = _mm256_add_epi32( x1, x5 );
x6 = _mm256_add_epi32( x2, x6 );
x7 = _mm256_add_epi32( x3, x7 );
y0 = x2;
y1 = x3;
y2 = x0;
y3 = x1;
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
_mm256_srli_epi32( y0, 25 ) );
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 7 ),
_mm256_srli_epi32( y1, 25 ) );
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 7 ),
_mm256_srli_epi32( y2, 25 ) );
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 7 ),
_mm256_srli_epi32( y3, 25 ) );
x0 = _mm256_xor_si256( x0, x4 );
x1 = _mm256_xor_si256( x1, x5 );
x2 = _mm256_xor_si256( x2, x6 );
x3 = _mm256_xor_si256( x3, x7 );
x4 = mm256_swap128_64( x4 );
x5 = mm256_swap128_64( x5 );
x6 = mm256_swap128_64( x6 );
x7 = mm256_swap128_64( x7 );
x4 = _mm256_add_epi32( x0, x4 );
x5 = _mm256_add_epi32( x1, x5 );
x6 = _mm256_add_epi32( x2, x6 );
x7 = _mm256_add_epi32( x3, x7 );
y0 = x1;
y1 = x0;
y2 = x3;
y3 = x2;
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
_mm256_srli_epi32( y0, 21 ) );
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
_mm256_srli_epi32( y1, 21 ) );
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 11 ),
_mm256_srli_epi32( y2, 21 ) );
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 11 ),
_mm256_srli_epi32( y3, 21 ) );
x0 = _mm256_xor_si256( x0, x4 );
x1 = _mm256_xor_si256( x1, x5 );
x2 = _mm256_xor_si256( x2, x6 );
x3 = _mm256_xor_si256( x3, x7 );
x4 = mm256_swap64_32( x4 );
x5 = mm256_swap64_32( x5 );
x6 = mm256_swap64_32( x6 );
x7 = mm256_swap64_32( x7 );
}
_mm256_store_si256( (__m256i*)sp->h, x0 );
_mm256_store_si256( (__m256i*)sp->h + 1, x1 );
_mm256_store_si256( (__m256i*)sp->h + 2, x2 );
_mm256_store_si256( (__m256i*)sp->h + 3, x3 );
_mm256_store_si256( (__m256i*)sp->h + 4, x4 );
_mm256_store_si256( (__m256i*)sp->h + 5, x5 );
_mm256_store_si256( (__m256i*)sp->h + 6, x6 );
_mm256_store_si256( (__m256i*)sp->h + 7, x7 );
}
cube_2way_context cube_2way_ctx_cache __attribute__ ((aligned (64)));
int cube_2way_reinit( cube_2way_context *sp )
{
memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
return 0;
}
int cube_2way_init( cube_2way_context *sp, int hashbitlen, int rounds,
int blockbytes )
{
int i;
// all sizes of __m128i
cube_2way_ctx_cache.hashlen = hashbitlen/128;
cube_2way_ctx_cache.blocksize = blockbytes/16;
cube_2way_ctx_cache.rounds = rounds;
cube_2way_ctx_cache.pos = 0;
for ( i = 0; i < 8; ++i )
cube_2way_ctx_cache.h[i] = m256_zero;
cube_2way_ctx_cache.h[0] = _mm256_set_epi32(
0, rounds, blockbytes, hashbitlen / 8,
0, rounds, blockbytes, hashbitlen / 8 );
for ( i = 0; i < 10; ++i )
transform_2way( &cube_2way_ctx_cache );
memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
return 0;
}
int cube_2way_update( cube_2way_context *sp, const void *data, size_t size )
{
const int len = size / 16;
const __m256i *in = (__m256i*)data;
int i;
// It is assumed data is aligned to 256 bits and is a multiple of 128 bits.
// Current usage sata is either 64 or 80 bytes.
for ( i = 0; i < len; i++ )
{
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
sp->pos++;
if ( sp->pos == sp->blocksize )
{
transform_2way( sp );
sp->pos = 0;
}
}
return 0;
}
int cube_2way_close( cube_2way_context *sp, void *output )
{
__m256i *hash = (__m256i*)output;
int i;
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
transform_2way( sp );
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
1,0,0,0 ) );
for ( i = 0; i < 10; ++i )
transform_2way( &cube_2way_ctx_cache );
for ( i = 0; i < sp->hashlen; i++ )
hash[i] = sp->h[i];
return 0;
}
int cube_2way_update_close( cube_2way_context *sp, void *output,
const void *data, size_t size )
{
const int len = size / 16;
const __m256i *in = (__m256i*)data;
__m256i *hash = (__m256i*)output;
int i;
for ( i = 0; i < len; i++ )
{
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
sp->pos++;
if ( sp->pos == sp->blocksize )
{
transform_2way( sp );
sp->pos = 0;
}
}
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
transform_2way( sp );
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
1,0,0,0 ) );
for ( i = 0; i < 10; ++i )
transform_2way( &cube_2way_ctx_cache );
for ( i = 0; i < sp->hashlen; i++ )
hash[i] = sp->h[i];
return 0;
}
#endif

View File

@@ -0,0 +1,36 @@
#ifndef CUBE_HASH_2WAY_H__
#define CUBE_HASH_2WAY_H__
#if defined(__AVX2__)
#include <stdint.h>
#include "avxdefs.h"
// 2x128, 2 way parallel SSE2
struct _cube_2way_context
{
int hashlen; // __m128i
int rounds;
int blocksize; // __m128i
int pos; // number of __m128i read into x from current block
__m256i h[8] __attribute__ ((aligned (64)));
};
typedef struct _cube_2way_context cube_2way_context;
int cube_2way_init( cube_2way_context* sp, int hashbitlen, int rounds,
int blockbytes );
// reinitialize context with same parameters, much faster.
int cube_2way_reinit( cube_2way_context *sp );
int cube_2way_update( cube_2way_context *sp, const void *data, size_t size );
int cube_2way_close( cube_2way_context *sp, void *output );
int cube_2way_update_close( cube_2way_context *sp, void *output,
const void *data, size_t size );
#endif
#endif

View File

@@ -76,7 +76,6 @@ char* hodl_malloc_txs_request( struct work *work )
return req;
}
void hodl_build_block_header( struct work* g_work, uint32_t version,
uint32_t *prevhash, uint32_t *merkle_tree,
uint32_t ntime, uint32_t nbits )
@@ -88,16 +87,16 @@ void hodl_build_block_header( struct work* g_work, uint32_t version,
if ( have_stratum )
for ( i = 0; i < 8; i++ )
g_work->data[1 + i] = le32dec( prevhash + i );
g_work->data[ 1+i ] = le32dec( prevhash + i );
else
for (i = 0; i < 8; i++)
g_work->data[ 8-i ] = le32dec( prevhash + i );
for ( i = 0; i < 8; i++ )
g_work->data[9 + i] = be32dec( merkle_tree + i );
g_work->data[ 9+i ] = be32dec( merkle_tree + i );
g_work->data[ algo_gate.ntime_index ] = ntime;
g_work->data[ algo_gate.nbits_index ] = nbits;
g_work->data[ algo_gate.ntime_index ] = ntime;
g_work->data[ algo_gate.nbits_index ] = nbits;
g_work->data[22] = 0x80000000;
g_work->data[31] = 0x00000280;
}
@@ -194,8 +193,13 @@ bool register_hodl_algo( algo_gate_t* gate )
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
return false;
#endif
// if ( TOTAL_CHUNKS % opt_n_threads )
// {
// applog(LOG_ERR,"Thread count must be power of 2.");
// return false;
// }
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = AES_OPT | AVX_OPT | AVX2_OPT;
gate->scanhash = (void*)&hodl_scanhash;
gate->get_new_work = (void*)&hodl_get_new_work;
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;

View File

@@ -10,23 +10,26 @@
#ifndef NO_AES_NI
void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, void *MidHash)
void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount,
void *MidHash )
{
#ifdef __AVX__
uint64_t* TempBufs[SHA512_PARALLEL_N] ;
uint64_t* desination[SHA512_PARALLEL_N];
const int Chunk = TOTAL_CHUNKS / ThreadCount;
const uint32_t StartChunk = ThreadID * Chunk;
const uint32_t EndChunk = StartChunk + Chunk;
for ( int i=0; i<SHA512_PARALLEL_N; ++i )
#ifdef __AVX__
uint64_t* TempBufs[ SHA512_PARALLEL_N ] ;
uint64_t* desination[ SHA512_PARALLEL_N ];
for ( int i=0; i < SHA512_PARALLEL_N; ++i )
{
TempBufs[i] = (uint64_t*)malloc(32);
memcpy(TempBufs[i], MidHash, 32);
TempBufs[i] = (uint64_t*)malloc( 32 );
memcpy( TempBufs[i], MidHash, 32 );
}
uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
for ( uint32_t i = StartChunk;
i < StartChunk + (TOTAL_CHUNKS / ThreadCount); i+= SHA512_PARALLEL_N )
for ( uint32_t i = StartChunk; i < EndChunk; i += SHA512_PARALLEL_N )
{
for ( int j=0; j<SHA512_PARALLEL_N; ++j )
for ( int j = 0; j < SHA512_PARALLEL_N; ++j )
{
( (uint32_t*)TempBufs[j] )[0] = i + j;
desination[j] = (uint64_t*)( (uint8_t *)Garbage + ( (i+j)
@@ -35,15 +38,13 @@ void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, voi
sha512Compute32b_parallel( TempBufs, desination );
}
for ( int i=0; i<SHA512_PARALLEL_N; ++i )
for ( int i = 0; i < SHA512_PARALLEL_N; ++i )
free( TempBufs[i] );
#else
uint32_t TempBuf[8];
memcpy( TempBuf, MidHash, 32 );
uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
for ( uint32_t i = StartChunk;
i < StartChunk + (TOTAL_CHUNKS / ThreadCount); ++i )
for ( uint32_t i = StartChunk; i < EndChunk; ++i )
{
TempBuf[0] = i;
SHA512( ( uint8_t *)TempBuf, 32,

View File

@@ -55,23 +55,23 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
// returns void, updates all args
#define G_4X64(a,b,c,d) \
a = _mm256_add_epi64( a, b ); \
d = mm256_rotr_64( _mm256_xor_si256( d, a), 32 ); \
d = mm256_ror_64( _mm256_xor_si256( d, a), 32 ); \
c = _mm256_add_epi64( c, d ); \
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 24 ); \
b = mm256_ror_64( _mm256_xor_si256( b, c ), 24 ); \
a = _mm256_add_epi64( a, b ); \
d = mm256_rotr_64( _mm256_xor_si256( d, a ), 16 ); \
d = mm256_ror_64( _mm256_xor_si256( d, a ), 16 ); \
c = _mm256_add_epi64( c, d ); \
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 63 );
b = mm256_ror_64( _mm256_xor_si256( b, c ), 63 );
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
G_4X64( s0, s1, s2, s3 ); \
s1 = mm256_rotr256_1x64( s1); \
s1 = mm256_ror256_1x64( s1); \
s2 = mm256_swap_128( s2 ); \
s3 = mm256_rotl256_1x64( s3 ); \
s3 = mm256_rol256_1x64( s3 ); \
G_4X64( s0, s1, s2, s3 ); \
s1 = mm256_rotl256_1x64( s1 ); \
s1 = mm256_rol256_1x64( s1 ); \
s2 = mm256_swap_128( s2 ); \
s3 = mm256_rotr256_1x64( s3 );
s3 = mm256_ror256_1x64( s3 );
#define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
@@ -94,25 +94,25 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
// returns void, all args updated
#define G_2X64(a,b,c,d) \
a = _mm_add_epi64( a, b ); \
d = mm_rotr_64( _mm_xor_si128( d, a), 32 ); \
d = mm_ror_64( _mm_xor_si128( d, a), 32 ); \
c = _mm_add_epi64( c, d ); \
b = mm_rotr_64( _mm_xor_si128( b, c ), 24 ); \
b = mm_ror_64( _mm_xor_si128( b, c ), 24 ); \
a = _mm_add_epi64( a, b ); \
d = mm_rotr_64( _mm_xor_si128( d, a ), 16 ); \
d = mm_ror_64( _mm_xor_si128( d, a ), 16 ); \
c = _mm_add_epi64( c, d ); \
b = mm_rotr_64( _mm_xor_si128( b, c ), 63 );
b = mm_ror_64( _mm_xor_si128( b, c ), 63 );
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
G_2X64( s0, s2, s4, s6 ); \
G_2X64( s1, s3, s5, s7 ); \
mm_rotl256_1x64( s2, s3 ); \
mm_ror256_1x64( s2, s3 ); \
mm_swap_128( s4, s5 ); \
mm_rotr256_1x64( s6, s7 ); \
mm_rol256_1x64( s6, s7 ); \
G_2X64( s0, s2, s4, s6 ); \
G_2X64( s1, s3, s5, s7 ); \
mm_rotr256_1x64( s2, s3 ); \
mm_rol256_1x64( s2, s3 ); \
mm_swap_128( s4, s5 ); \
mm_rotl256_1x64( s6, s7 );
mm_ror256_1x64( s6, s7 );
#define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \

View File

@@ -155,7 +155,7 @@ bool register_sha256t_algo( algo_gate_t* gate )
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256t;
gate->hash = (void*)&sha256t_hash;
gate->set_target = (void*)&sha256t_set_target;
// gate->set_target = (void*)&sha256t_set_target;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
}

View File

@@ -52,21 +52,6 @@ extern "C"{
#define C32 SPH_C32
/*
* As of round 2 of the SHA-3 competition, the published reference
* implementation and test vectors are wrong, because they use
* big-endian AES tables while the internal decoding uses little-endian.
* The code below follows the specification. To turn it into a code
* which follows the reference implementation (the one called "BugFix"
* on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out
* the code below (from the '#define AES_BIG_ENDIAN...' to the definition
* of the AES_ROUND_NOKEY macro) and replace it with the version which
* is commented out afterwards.
*/
#define AES_BIG_ENDIAN 0
#include "algo/sha/aes_helper.c"
static const sph_u32 IV512[] = {
C32(0x72FCCDD8), C32(0x79CA4727), C32(0x128A077B), C32(0x40D55AEC),
C32(0xD1901A06), C32(0x430AE307), C32(0xB29F5CD1), C32(0xDF07FBFC),
@@ -74,210 +59,19 @@ static const sph_u32 IV512[] = {
C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
};
// Return hi 128 bits with elements shifted one lane with vacated lane filled
// with data rotated from lo.
// Partially rotate elements in two 128 bit vectors as one 256 bit vector
// and return the rotated high 128 bits.
// Similar to mm_rotr256_1x32 but only a partial rotation as lo is not
// completed. It's faster than a full rotation.
#if defined(__SSSE3__)
static inline __m128i mm_rotr256hi_1x32( __m128i hi, __m128i lo, int n )
{ return _mm_or_si128( _mm_srli_si128( hi, n<<2 ),
_mm_slli_si128( lo, 16 - (n<<2) ) );
}
#define mm_rotr256hi_1x32( hi, lo ) _mm_alignr_epi8( lo, hi, 4 )
#define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \
sph_u32 t0 = (x0); \
sph_u32 t1 = (x1); \
sph_u32 t2 = (x2); \
sph_u32 t3 = (x3); \
AES_ROUND_NOKEY_LE(t0, t1, t2, t3, x0, x1, x2, x3); \
} while (0)
#else // SSE2
#define KEY_EXPAND_ELT(k0, k1, k2, k3) do { \
sph_u32 kt; \
AES_ROUND_NOKEY(k1, k2, k3, k0); \
kt = (k0); \
(k0) = (k1); \
(k1) = (k2); \
(k2) = (k3); \
(k3) = kt; \
} while (0)
#define mm_rotr256hi_1x32( hi, lo ) \
_mm_or_si128( _mm_srli_si128( hi, 4 ), \
_mm_slli_si128( lo, 12 ) )
#if SPH_SMALL_FOOTPRINT_SHAVITE
/*
* This function assumes that "msg" is aligned for 32-bit access.
*/
static void
c512(sph_shavite_big_context *sc, const void *msg)
{
sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
sph_u32 p8, p9, pA, pB, pC, pD, pE, pF;
sph_u32 rk[448];
size_t u;
int r, s;
#if SPH_LITTLE_ENDIAN
memcpy(rk, msg, 128);
#else
for (u = 0; u < 32; u += 4) {
rk[u + 0] = sph_dec32le_aligned(
(const unsigned char *)msg + (u << 2) + 0);
rk[u + 1] = sph_dec32le_aligned(
(const unsigned char *)msg + (u << 2) + 4);
rk[u + 2] = sph_dec32le_aligned(
(const unsigned char *)msg + (u << 2) + 8);
rk[u + 3] = sph_dec32le_aligned(
(const unsigned char *)msg + (u << 2) + 12);
}
#endif
u = 32;
for (;;) {
for (s = 0; s < 4; s ++) {
sph_u32 x0, x1, x2, x3;
x0 = rk[u - 31];
x1 = rk[u - 30];
x2 = rk[u - 29];
x3 = rk[u - 32];
AES_ROUND_NOKEY(x0, x1, x2, x3);
rk[u + 0] = x0 ^ rk[u - 4];
rk[u + 1] = x1 ^ rk[u - 3];
rk[u + 2] = x2 ^ rk[u - 2];
rk[u + 3] = x3 ^ rk[u - 1];
if (u == 32) {
rk[ 32] ^= sc->count0;
rk[ 33] ^= sc->count1;
rk[ 34] ^= sc->count2;
rk[ 35] ^= SPH_T32(~sc->count3);
} else if (u == 440) {
rk[440] ^= sc->count1;
rk[441] ^= sc->count0;
rk[442] ^= sc->count3;
rk[443] ^= SPH_T32(~sc->count2);
}
u += 4;
x0 = rk[u - 31];
x1 = rk[u - 30];
x2 = rk[u - 29];
x3 = rk[u - 32];
AES_ROUND_NOKEY(x0, x1, x2, x3);
rk[u + 0] = x0 ^ rk[u - 4];
rk[u + 1] = x1 ^ rk[u - 3];
rk[u + 2] = x2 ^ rk[u - 2];
rk[u + 3] = x3 ^ rk[u - 1];
if (u == 164) {
rk[164] ^= sc->count3;
rk[165] ^= sc->count2;
rk[166] ^= sc->count1;
rk[167] ^= SPH_T32(~sc->count0);
} else if (u == 316) {
rk[316] ^= sc->count2;
rk[317] ^= sc->count3;
rk[318] ^= sc->count0;
rk[319] ^= SPH_T32(~sc->count1);
}
u += 4;
}
if (u == 448)
break;
for (s = 0; s < 8; s ++) {
rk[u + 0] = rk[u - 32] ^ rk[u - 7];
rk[u + 1] = rk[u - 31] ^ rk[u - 6];
rk[u + 2] = rk[u - 30] ^ rk[u - 5];
rk[u + 3] = rk[u - 29] ^ rk[u - 4];
u += 4;
}
}
p0 = sc->h[0x0];
p1 = sc->h[0x1];
p2 = sc->h[0x2];
p3 = sc->h[0x3];
p4 = sc->h[0x4];
p5 = sc->h[0x5];
p6 = sc->h[0x6];
p7 = sc->h[0x7];
p8 = sc->h[0x8];
p9 = sc->h[0x9];
pA = sc->h[0xA];
pB = sc->h[0xB];
pC = sc->h[0xC];
pD = sc->h[0xD];
pE = sc->h[0xE];
pF = sc->h[0xF];
u = 0;
for (r = 0; r < 14; r ++) {
#define C512_ELT(l0, l1, l2, l3, r0, r1, r2, r3) do { \
sph_u32 x0, x1, x2, x3; \
x0 = r0 ^ rk[u ++]; \
x1 = r1 ^ rk[u ++]; \
x2 = r2 ^ rk[u ++]; \
x3 = r3 ^ rk[u ++]; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk[u ++]; \
x1 ^= rk[u ++]; \
x2 ^= rk[u ++]; \
x3 ^= rk[u ++]; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk[u ++]; \
x1 ^= rk[u ++]; \
x2 ^= rk[u ++]; \
x3 ^= rk[u ++]; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk[u ++]; \
x1 ^= rk[u ++]; \
x2 ^= rk[u ++]; \
x3 ^= rk[u ++]; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
l0 ^= x0; \
l1 ^= x1; \
l2 ^= x2; \
l3 ^= x3; \
} while (0)
#define WROT(a, b, c, d) do { \
sph_u32 t = d; \
d = c; \
c = b; \
b = a; \
a = t; \
} while (0)
C512_ELT(p0, p1, p2, p3, p4, p5, p6, p7);
C512_ELT(p8, p9, pA, pB, pC, pD, pE, pF);
WROT(p0, p4, p8, pC);
WROT(p1, p5, p9, pD);
WROT(p2, p6, pA, pE);
WROT(p3, p7, pB, pF);
#undef C512_ELT
#undef WROT
}
sc->h[0x0] ^= p0;
sc->h[0x1] ^= p1;
sc->h[0x2] ^= p2;
sc->h[0x3] ^= p3;
sc->h[0x4] ^= p4;
sc->h[0x5] ^= p5;
sc->h[0x6] ^= p6;
sc->h[0x7] ^= p7;
sc->h[0x8] ^= p8;
sc->h[0x9] ^= p9;
sc->h[0xA] ^= pA;
sc->h[0xB] ^= pB;
sc->h[0xC] ^= pC;
sc->h[0xD] ^= pD;
sc->h[0xE] ^= pE;
sc->h[0xF] ^= pF;
}
#else
static void
c512( sph_shavite_big_context *sc, const void *msg )
@@ -331,7 +125,7 @@ c512( sph_shavite_big_context *sc, const void *msg )
for ( r = 0; r < 3; r ++ )
{
// round 1, 5, 9
k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = _mm_xor_si128( k00, k13 );
if ( r == 0 )
@@ -340,7 +134,7 @@ c512( sph_shavite_big_context *sc, const void *msg )
x = _mm_xor_si128( p0, k00 );
x = _mm_aesenc_si128( x, m128_zero );
k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = _mm_xor_si128( k01, k00 );
if ( r == 1 )
@@ -349,33 +143,33 @@ c512( sph_shavite_big_context *sc, const void *msg )
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, m128_zero );
k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = _mm_xor_si128( k02, k01 );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, m128_zero );
k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = _mm_xor_si128( k03, k02 );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, m128_zero );
p3 = _mm_xor_si128( p3, x );
k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = _mm_xor_si128( k10, k03 );
x = _mm_xor_si128( p2, k10 );
x = _mm_aesenc_si128( x, m128_zero );
k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = _mm_xor_si128( k11, k10 );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, m128_zero );
k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = _mm_xor_si128( k12, k11 );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, m128_zero );
k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = _mm_xor_si128( k13, k12 );
if ( r == 2 )
@@ -388,80 +182,80 @@ c512( sph_shavite_big_context *sc, const void *msg )
// round 2, 6, 10
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13, 1 ) );
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13 ) );
x = _mm_xor_si128( p3, k00 );
x = _mm_aesenc_si128( x, m128_zero );
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00, 1 ) );
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00 ) );
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, m128_zero );
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01, 1 ) );
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01 ) );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, m128_zero );
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02, 1 ) );
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02 ) );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, m128_zero );
p2 = _mm_xor_si128( p2, x );
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03, 1 ) );
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03 ) );
x = _mm_xor_si128( p1, k10 );
x = _mm_aesenc_si128( x, m128_zero );
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10, 1 ) );
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10 ) );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, m128_zero );
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11, 1 ) );
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11 ) );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, m128_zero );
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12, 1 ) );
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12 ) );
x = _mm_xor_si128( x, k13 );
x = _mm_aesenc_si128( x, m128_zero );
p0 = _mm_xor_si128( p0, x );
// round 3, 7, 11
k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = _mm_xor_si128( k00, k13 );
x = _mm_xor_si128( p2, k00 );
x = _mm_aesenc_si128( x, m128_zero );
k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = _mm_xor_si128( k01, k00 );
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, m128_zero );
k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = _mm_xor_si128( k02, k01 );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, m128_zero );
k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = _mm_xor_si128( k03, k02 );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, m128_zero );
p1 = _mm_xor_si128( p1, x );
k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = _mm_xor_si128( k10, k03 );
x = _mm_xor_si128( p0, k10 );
x = _mm_aesenc_si128( x, m128_zero );
k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = _mm_xor_si128( k11, k10 );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, m128_zero );
k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = _mm_xor_si128( k12, k11 );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, m128_zero );
k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = _mm_xor_si128( k13, k12 );
x = _mm_xor_si128( x, k13 );
@@ -470,36 +264,36 @@ c512( sph_shavite_big_context *sc, const void *msg )
// round 4, 8, 12
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13, 1 ) );
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13 ) );
x = _mm_xor_si128( p1, k00 );
x = _mm_aesenc_si128( x, m128_zero );
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00, 1 ) );
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00 ) );
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, m128_zero );
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01, 1 ) );
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01 ) );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, m128_zero );
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02, 1 ) );
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02 ) );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, m128_zero );
p0 = _mm_xor_si128( p0, x );
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03, 1 ) );
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03 ) );
x = _mm_xor_si128( p3, k10 );
x = _mm_aesenc_si128( x, m128_zero );
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10, 1 ) );
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10 ) );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, m128_zero );
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11, 1 ) );
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11 ) );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, m128_zero );
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12, 1 ) );
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12 ) );
x = _mm_xor_si128( x, k13 );
x = _mm_aesenc_si128( x, m128_zero );
@@ -508,44 +302,44 @@ c512( sph_shavite_big_context *sc, const void *msg )
// round 13
k00 = mm_rotr_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = mm_ror_1x32( _mm_aesenc_si128( k00, m128_zero ) );
k00 = _mm_xor_si128( k00, k13 );
x = _mm_xor_si128( p0, k00 );
x = _mm_aesenc_si128( x, m128_zero );
k01 = mm_rotr_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = mm_ror_1x32( _mm_aesenc_si128( k01, m128_zero ) );
k01 = _mm_xor_si128( k01, k00 );
x = _mm_xor_si128( x, k01 );
x = _mm_aesenc_si128( x, m128_zero );
k02 = mm_rotr_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = mm_ror_1x32( _mm_aesenc_si128( k02, m128_zero ) );
k02 = _mm_xor_si128( k02, k01 );
x = _mm_xor_si128( x, k02 );
x = _mm_aesenc_si128( x, m128_zero );
k03 = mm_rotr_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = mm_ror_1x32( _mm_aesenc_si128( k03, m128_zero ) );
k03 = _mm_xor_si128( k03, k02 );
x = _mm_xor_si128( x, k03 );
x = _mm_aesenc_si128( x, m128_zero );
p3 = _mm_xor_si128( p3, x );
k10 = mm_rotr_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = mm_ror_1x32( _mm_aesenc_si128( k10, m128_zero ) );
k10 = _mm_xor_si128( k10, k03 );
x = _mm_xor_si128( p2, k10 );
x = _mm_aesenc_si128( x, m128_zero );
k11 = mm_rotr_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = mm_ror_1x32( _mm_aesenc_si128( k11, m128_zero ) );
k11 = _mm_xor_si128( k11, k10 );
x = _mm_xor_si128( x, k11 );
x = _mm_aesenc_si128( x, m128_zero );
k12 = mm_rotr_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = mm_ror_1x32( _mm_aesenc_si128( k12, m128_zero ) );
k12 = _mm_xor_si128( k12, _mm_xor_si128( k11, _mm_set_epi32(
~sc->count2, sc->count3, sc->count0, sc->count1 ) ) );
x = _mm_xor_si128( x, k12 );
x = _mm_aesenc_si128( x, m128_zero );
k13 = mm_rotr_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = mm_ror_1x32( _mm_aesenc_si128( k13, m128_zero ) );
k13 = _mm_xor_si128( k13, k12 );
x = _mm_xor_si128( x, k13 );
@@ -558,7 +352,6 @@ c512( sph_shavite_big_context *sc, const void *msg )
h[3] = _mm_xor_si128( h[3], p1 );
}
#endif
static void
shavite_big_aesni_init( sph_shavite_big_context *sc, const sph_u32 *iv )

View File

@@ -1363,10 +1363,11 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
{
HMAC_SHA256_CTX ctx;
HMAC_SHA256_Init(&ctx, buf, buflen);
if ( client_key_hack ) // GlobalBoost-Y buggy yescrypt
HMAC_SHA256_Update(&ctx, salt, saltlen);
else // Proper yescrypt
HMAC_SHA256_Update(&ctx, "Client Key", 10);
if ( yescrypt_client_key )
HMAC_SHA256_Update( &ctx, (uint8_t*)yescrypt_client_key,
yescrypt_client_key_len );
else
HMAC_SHA256_Update( &ctx, salt, saltlen );
HMAC_SHA256_Final(sha256, &ctx);
}
/* Compute StoredKey */

View File

@@ -25,7 +25,7 @@
#include "compat.h"
#include "yescrypt.h"
#include "sha256_Y.h"
#include "algo-gate-api.h"
#define BYTES2CHARS(bytes) \
@@ -366,7 +366,8 @@ static int yescrypt_bsty(const uint8_t * passwd, size_t passwdlen,
uint64_t YESCRYPT_N;
uint32_t YESCRYPT_R;
uint32_t YESCRYPT_P;
bool client_key_hack;
char *yescrypt_client_key = NULL;
int yescrypt_client_key_len = 0;
/* main hash 80 bytes input */
void yescrypt_hash( const char *input, char *output, uint32_t len )
@@ -436,7 +437,8 @@ bool register_yescrypt_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescrypt_get_max64;
client_key_hack = true;
yescrypt_client_key = NULL;
yescrypt_client_key_len = 0;
YESCRYPT_N = 2048;
YESCRYPT_R = 8;
YESCRYPT_P = 1;
@@ -447,7 +449,8 @@ bool register_yescryptr8_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescrypt_get_max64;
client_key_hack = false;
yescrypt_client_key = "Client Key";
yescrypt_client_key_len = 10;
YESCRYPT_N = 2048;
YESCRYPT_R = 8;
YESCRYPT_P = 1;
@@ -458,10 +461,23 @@ bool register_yescryptr16_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescryptr16_get_max64;
client_key_hack = false;
yescrypt_client_key = "Client Key";
yescrypt_client_key_len = 10;
YESCRYPT_N = 4096;
YESCRYPT_R = 16;
YESCRYPT_P = 1;
return true;
}
bool register_yescryptr32_algo( algo_gate_t* gate )
{
yescrypt_gate_base( gate );
gate->get_max64 = (void*)&yescryptr16_get_max64;
yescrypt_client_key = "WaviBanana";
yescrypt_client_key_len = 10;
YESCRYPT_N = 4096;
YESCRYPT_R = 32;
YESCRYPT_P = 1;
return true;
}

View File

@@ -108,7 +108,8 @@ typedef enum {
__YESCRYPT_INIT_SHARED = 0x30000
} yescrypt_flags_t;
extern bool client_key_hack; // true for GlobalBoost-Y
extern char *yescrypt_client_key;
extern int yescrypt_client_key_len;
#define YESCRYPT_KNOWN_FLAGS \

5
api.c
View File

@@ -158,11 +158,12 @@ static char *getsummary( char *params )
*buffer = '\0';
sprintf( buffer, "NAME=%s;VER=%s;API=%s;"
"ALGO=%s;CPUS=%d;HS=%.2f;KHS=%.2f;ACC=%d;REJ=%d;SOL=%d;"
"ALGO=%s;CPUS=%d;URL=%s;"
"HS=%.2f;KHS=%.2f;ACC=%d;REJ=%d;SOL=%d;"
"ACCMN=%.3f;DIFF=%s;TEMP=%.1f;FAN=%d;FREQ=%d;"
"UPTIME=%.0f;TS=%u|",
PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
algo, opt_n_threads, hrate, hrate/1000.0,
algo, opt_n_threads, short_url, hrate, hrate/1000.0,
accepted_count, rejected_count, solved_count,
accps, diff_str, cpu.cpu_temp, cpu.cpu_fan, cpu.cpu_clock,
uptime, (uint32_t) ts);

1149
avxdefs.h

File diff suppressed because it is too large Load Diff

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.3.3.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.4.1.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.8.3.3'
PACKAGE_STRING='cpuminer-opt 3.8.3.3'
PACKAGE_VERSION='3.8.4.1'
PACKAGE_STRING='cpuminer-opt 3.8.4.1'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.8.3.3 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.8.4.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1392,7 +1392,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.8.3.3:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.8.4.1:";;
esac
cat <<\_ACEOF
@@ -1497,7 +1497,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.8.3.3
cpuminer-opt configure 3.8.4.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.8.3.3, which was
It was created by cpuminer-opt $as_me 3.8.4.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2981,7 +2981,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.8.3.3'
VERSION='3.8.4.1'
cat >>confdefs.h <<_ACEOF
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.8.3.3, which was
This file was extended by cpuminer-opt $as_me 3.8.4.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6743,7 +6743,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.8.3.3
cpuminer-opt config.status 3.8.4.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.8.3.3])
AC_INIT([cpuminer-opt], [3.8.4.1])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -103,7 +103,7 @@ enum algos opt_algo = ALGO_NULL;
int opt_scrypt_n = 0;
int opt_pluck_n = 128;
int opt_n_threads = 0;
#ifdef __GNUC__
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
__int128_t opt_affinity = -1LL;
#else
int64_t opt_affinity = -1LL;
@@ -200,20 +200,20 @@ static inline void drop_policy(void)
#define pthread_setaffinity_np(tid,sz,s) {} /* only do process affinity */
#endif
#ifdef __GNUC__
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
static void affine_to_cpu_mask( int id, unsigned __int128 mask )
#else
static void affine_to_cpu_mask( int id, unsigned long long mask )
#endif
{
cpu_set_t set;
CPU_ZERO(&set);
CPU_ZERO( &set );
uint8_t ncpus = (num_cpus > 256) ? 256 : num_cpus;
for ( uint8_t i = 0; i < ncpus; i++ )
{
// cpu mask
#ifdef __GNUC__
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
if( ( mask & ( (unsigned __int128)1ULL << i ) ) ) CPU_SET( i, &set );
#else
if( (ncpus > 64) || ( mask & (1ULL << i) ) ) CPU_SET( i, &set );
@@ -1792,7 +1792,7 @@ static void *miner_thread( void *userdata )
if (opt_debug)
applog( LOG_DEBUG, "Binding thread %d to cpu %d (mask %x)",
thr_id, thr_id % num_cpus, ( 1ULL << (thr_id % num_cpus) ) );
#ifdef __GNUC__
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
affine_to_cpu_mask( thr_id,
(unsigned __int128)1LL << (thr_id % num_cpus) );
#else

View File

@@ -424,7 +424,7 @@ extern size_t rpc2_bloblen;
extern uint32_t rpc2_target;
extern char *rpc2_job_id;
extern char *rpc_user;
extern char *short_url;
json_t *json_rpc2_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, int *curl_err, int flags);
bool rpc2_login(CURL *curl);
@@ -553,6 +553,7 @@ enum algos {
ALGO_YESCRYPT,
ALGO_YESCRYPTR8,
ALGO_YESCRYPTR16,
ALGO_YESCRYPTR32,
ALGO_ZR5,
ALGO_COUNT
};
@@ -629,6 +630,7 @@ static const char* const algo_names[] = {
"yescrypt",
"yescryptr8",
"yescryptr16",
"yescryptr32",
"zr5",
"\0"
};
@@ -764,6 +766,7 @@ Options:\n\
yescrypt Globlboost-Y (BSTY)\n\
yescryptr8 BitZeny (ZNY)\n\
yescryptr16 Yenten (YTN)\n\
yescryptr32 WAVI\n\
zr5 Ziftr\n\
-o, --url=URL URL of mining server\n\
-O, --userpass=U:P username:password pair for mining server\n\