This commit is contained in:
Jay D Dee
2019-07-17 17:54:38 -04:00
parent e2d5762ef2
commit 6f49ba09b7
34 changed files with 1930 additions and 382 deletions

View File

@@ -71,6 +71,9 @@ cpuminer_SOURCES = \
algo/bmw/bmw256-hash-4way.c \ algo/bmw/bmw256-hash-4way.c \
algo/bmw/bmw512-hash-4way.c \ algo/bmw/bmw512-hash-4way.c \
algo/bmw/bmw256.c \ algo/bmw/bmw256.c \
algo/bmw/bmw512-gate.c \
algo/bmw/bmw512.c \
algo/bmw/bmw512-4way.c \
algo/cryptonight/cryptolight.c \ algo/cryptonight/cryptolight.c \
algo/cryptonight/cryptonight-common.c\ algo/cryptonight/cryptonight-common.c\
algo/cryptonight/cryptonight-aesni.c\ algo/cryptonight/cryptonight-aesni.c\
@@ -238,6 +241,8 @@ cpuminer_SOURCES = \
algo/x13/skunk-4way.c \ algo/x13/skunk-4way.c \
algo/x13/skunk.c \ algo/x13/skunk.c \
algo/x13/drop.c \ algo/x13/drop.c \
algo/x13/x13bcd-4way.c \
algo/x13/x13bcd.c \
algo/x14/x14-gate.c \ algo/x14/x14-gate.c \
algo/x14/x14.c \ algo/x14/x14.c \
algo/x14/x14-4way.c \ algo/x14/x14-4way.c \
@@ -254,6 +259,8 @@ cpuminer_SOURCES = \
algo/x16/x16r-gate.c \ algo/x16/x16r-gate.c \
algo/x16/x16r.c \ algo/x16/x16r.c \
algo/x16/x16r-4way.c \ algo/x16/x16r-4way.c \
algo/x16/x16rt.c \
algo/x16/x16rt-4way.c \
algo/x17/x17-gate.c \ algo/x17/x17-gate.c \
algo/x17/x17.c \ algo/x17/x17.c \
algo/x17/x17-4way.c \ algo/x17/x17-4way.c \

View File

@@ -58,6 +58,7 @@ Supported Algorithms
blakecoin blake256r8 blakecoin blake256r8
blake2s Blake-2 S blake2s Blake-2 S
bmw BMW 256 bmw BMW 256
bmw512 BMW 512
c11 Chaincoin c11 Chaincoin
decred decred
deep Deepcoin (DCN) deep Deepcoin (DCN)
@@ -113,11 +114,14 @@ Supported Algorithms
x11gost sib (SibCoin) x11gost sib (SibCoin)
x12 Galaxie Cash (GCH) x12 Galaxie Cash (GCH)
x13 X13 x13 X13
x13bcd bcd
x13sm3 hsr (Hshare) x13sm3 hsr (Hshare)
x14 X14 x14 X14
x15 X15 x15 X15
x16r Ravencoin (RVN) x16r Ravencoin (RVN)
x16s pigeoncoin (PGN) x16rt Gincoin (GIN)
x16rt_veil Veil (VEIL)
x16s Pigeoncoin (PGN)
x17 x17
xevan Bitsend (BSD) xevan Bitsend (BSD)
yescrypt Globalboost-Y (BSTY) yescrypt Globalboost-Y (BSTY)

View File

@@ -38,6 +38,10 @@ supported.
Change Log Change Log
---------- ----------
v3.9.6
New algos: bmw512, x16rt, x16rt-veil (alias veil), x13bcd (alias bcd).
v3.9.5.4 v3.9.5.4
Fixed sha256q AVX2 poor performance. Fixed sha256q AVX2 poor performance.

View File

@@ -170,6 +170,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break; case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break; // case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break; case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
case ALGO_BMW512: register_bmw512_algo ( gate ); break;
case ALGO_C11: register_c11_algo ( gate ); break; case ALGO_C11: register_c11_algo ( gate ); break;
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break; case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break; case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
@@ -227,10 +228,13 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_X11GOST: register_x11gost_algo ( gate ); break; case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
case ALGO_X12: register_x12_algo ( gate ); break; case ALGO_X12: register_x12_algo ( gate ); break;
case ALGO_X13: register_x13_algo ( gate ); break; case ALGO_X13: register_x13_algo ( gate ); break;
case ALGO_X13BCD: register_x13bcd_algo ( gate ); break;
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break; case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
case ALGO_X14: register_x14_algo ( gate ); break; case ALGO_X14: register_x14_algo ( gate ); break;
case ALGO_X15: register_x15_algo ( gate ); break; case ALGO_X15: register_x15_algo ( gate ); break;
case ALGO_X16R: register_x16r_algo ( gate ); break; case ALGO_X16R: register_x16r_algo ( gate ); break;
case ALGO_X16RT: register_x16rt_algo ( gate ); break;
case ALGO_X16RT_VEIL: register_x16rt_veil_algo ( gate ); break;
case ALGO_X16S: register_x16s_algo ( gate ); break; case ALGO_X16S: register_x16s_algo ( gate ); break;
case ALGO_X17: register_x17_algo ( gate ); break; case ALGO_X17: register_x17_algo ( gate ); break;
case ALGO_XEVAN: register_xevan_algo ( gate ); break; case ALGO_XEVAN: register_xevan_algo ( gate ); break;
@@ -327,7 +331,6 @@ const char* const algo_alias_map[][2] =
{ "lyra2", "lyra2re" }, { "lyra2", "lyra2re" },
{ "lyra2v2", "lyra2rev2" }, { "lyra2v2", "lyra2rev2" },
{ "lyra2v3", "lyra2rev3" }, { "lyra2v3", "lyra2rev3" },
{ "lyra2zoin", "lyra2z330" },
{ "myrgr", "myr-gr" }, { "myrgr", "myr-gr" },
{ "myriad", "myr-gr" }, { "myriad", "myr-gr" },
{ "neo", "neoscrypt" }, { "neo", "neoscrypt" },
@@ -335,11 +338,9 @@ const char* const algo_alias_map[][2] =
// { "sia", "blake2b" }, // { "sia", "blake2b" },
{ "sib", "x11gost" }, { "sib", "x11gost" },
{ "timetravel8", "timetravel" }, { "timetravel8", "timetravel" },
{ "ziftr", "zr5" }, { "veil", "x16rt-veil" },
{ "yenten", "yescryptr16" }, { "yenten", "yescryptr16" },
{ "yescryptr8k", "yescrypt" }, { "ziftr", "zr5" },
{ "zcoin", "lyra2z" },
{ "zoin", "lyra2z330" },
{ NULL, NULL } { NULL, NULL }
}; };

View File

@@ -36,35 +36,31 @@ void argon2d_crds_hash( void *output, const void *input )
int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce, int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
uint32_t _ALIGN(64) endiandata[20]; uint32_t _ALIGN(64) endiandata[20];
uint32_t _ALIGN(64) hash[8]; uint32_t _ALIGN(64) hash[8];
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
int thr_id = mythr->id; // thr_id arg is deprecated int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t nonce = first_nonce;
const uint32_t first_nonce = pdata[19]; swab32_array( endiandata, pdata, 20 );
const uint32_t Htarg = ptarget[7];
uint32_t nonce = first_nonce; do {
be32enc(&endiandata[19], nonce);
argon2d_crds_hash( hash, endiandata );
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
swab32_array( endiandata, pdata, 20 ); pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
do { return 0;
be32enc(&endiandata[19], nonce);
argon2d_crds_hash( hash, endiandata );
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
work_set_target_ratio(work, hash);
return 1;
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
} }
bool register_argon2d_crds_algo( algo_gate_t* gate ) bool register_argon2d_crds_algo( algo_gate_t* gate )
@@ -107,35 +103,32 @@ void argon2d_dyn_hash( void *output, const void *input )
int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce, int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
uint32_t _ALIGN(64) endiandata[20]; uint32_t _ALIGN(64) endiandata[20];
uint32_t _ALIGN(64) hash[8]; uint32_t _ALIGN(64) hash[8];
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
int thr_id = mythr->id; // thr_id arg is deprecated int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t nonce = first_nonce;
const uint32_t first_nonce = pdata[19]; swab32_array( endiandata, pdata, 20 );
const uint32_t Htarg = ptarget[7];
uint32_t nonce = first_nonce; do
{
be32enc(&endiandata[19], nonce);
argon2d_dyn_hash( hash, endiandata );
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
swab32_array( endiandata, pdata, 20 ); pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
do { return 0;
be32enc(&endiandata[19], nonce);
argon2d_dyn_hash( hash, endiandata );
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
work_set_target_ratio(work, hash);
return 1;
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
} }
bool register_argon2d_dyn_algo( algo_gate_t* gate ) bool register_argon2d_dyn_algo( algo_gate_t* gate )
@@ -171,11 +164,10 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
be32enc( &endiandata[19], n ); be32enc( &endiandata[19], n );
argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) endiandata, 80, argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) endiandata, 80,
(char*) endiandata, 80, (char*) vhash, 32, ARGON2_VERSION_13 ); (char*) endiandata, 80, (char*) vhash, 32, ARGON2_VERSION_13 );
if ( vhash[7] < Htarg && fulltest( vhash, ptarget ) ) if ( vhash[7] < Htarg && fulltest( vhash, ptarget ) && !opt_benchmark )
{ {
*hashes_done = n - first_nonce + 1;
pdata[19] = n; pdata[19] = n;
return true; submit_solution( work, vhash, mythr );
} }
n++; n++;

59
algo/bmw/bmw512-4way.c Normal file
View File

@@ -0,0 +1,59 @@
#include "bmw512-gate.h"
#ifdef BMW512_4WAY
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
//#include "sph_keccak.h"
#include "bmw-hash-4way.h"
void bmw512hash_4way(void *state, const void *input)
{
bmw512_4way_context ctx;
bmw512_4way_init( &ctx );
bmw512_4way( &ctx, input, 80 );
bmw512_4way_close( &ctx, state );
}
int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t hash[16*4] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[25]); // 3*8+1
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
// const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
mm256_bswap32_intrlv80_4x64( vdata, pdata );
do {
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
bmw512hash_4way( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( ( ( hash7[ lane<<1 ] & 0xFFFFFF00 ) == 0 ) )
{
extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
n += 4;
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
return 0;
}
#endif

20
algo/bmw/bmw512-gate.c Normal file
View File

@@ -0,0 +1,20 @@
#include "bmw512-gate.h"
int64_t bmw512_get_max64() { return 0x7ffffLL; }
bool register_bmw512_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
gate->get_max64 = (void*)&bmw512_get_max64;
#if defined (BMW512_4WAY)
gate->scanhash = (void*)&scanhash_bmw512_4way;
gate->hash = (void*)&bmw512hash_4way;
#else
gate->scanhash = (void*)&scanhash_bmw512;
gate->hash = (void*)&bmw512hash;
#endif
return true;
};

23
algo/bmw/bmw512-gate.h Normal file
View File

@@ -0,0 +1,23 @@
#ifndef BMW512_GATE_H__
#define BMW512_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(__AVX2__)
#define BMW512_4WAY 1
#endif
#if defined(BMW512_4WAY)
void bmw512hash_4way( void *state, const void *input );
int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif
void bmw512hash( void *state, const void *input );
int scanhash_bmw512( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif

53
algo/bmw/bmw512.c Normal file
View File

@@ -0,0 +1,53 @@
#include "algo-gate-api.h"
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "sph_bmw.h"
void bmw512hash(void *state, const void *input)
{
sph_bmw512_context ctx;
uint32_t hash[32];
sph_bmw512_init( &ctx );
sph_bmw512( &ctx,input, 80 );
sph_bmw512_close( &ctx, hash );
memcpy( state, hash, 32 );
}
int scanhash_bmw512( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
//const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t _ALIGN(32) hash64[8];
uint32_t endiandata[32];
for (int i=0; i < 19; i++)
be32enc(&endiandata[i], pdata[i]);
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
bmw512hash(hash64, endiandata);
if (((hash64[7]&0xFFFFFF00)==0) &&
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}

View File

@@ -7,6 +7,7 @@
// 2x128 // 2x128
/*
// The result of hashing 10 rounds of initial data which consists of params // The result of hashing 10 rounds of initial data which consists of params
// zero padded. // zero padded.
static const uint64_t IV256[] = static const uint64_t IV256[] =
@@ -24,13 +25,14 @@ static const uint64_t IV512[] =
0x148FE485FCD398D9, 0xB64445321B017BEF, 0x2FF5781C6A536159, 0x0DBADEA991FA7934, 0x148FE485FCD398D9, 0xB64445321B017BEF, 0x2FF5781C6A536159, 0x0DBADEA991FA7934,
0xA5A70E75D65C8A2B, 0xBC796576B1C62456, 0xE7989AF11921C8F7, 0xD43E3B447795D246 0xA5A70E75D65C8A2B, 0xBC796576B1C62456, 0xE7989AF11921C8F7, 0xD43E3B447795D246
}; };
*/
static void transform_2way( cube_2way_context *sp ) static void transform_2way( cube_2way_context *sp )
{ {
int r; int r;
const int rounds = sp->rounds; const int rounds = sp->rounds;
__m256i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3; __m256i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1;
x0 = _mm256_load_si256( (__m256i*)sp->h ); x0 = _mm256_load_si256( (__m256i*)sp->h );
x1 = _mm256_load_si256( (__m256i*)sp->h + 1 ); x1 = _mm256_load_si256( (__m256i*)sp->h + 1 );
@@ -47,18 +49,12 @@ static void transform_2way( cube_2way_context *sp )
x5 = _mm256_add_epi32( x1, x5 ); x5 = _mm256_add_epi32( x1, x5 );
x6 = _mm256_add_epi32( x2, x6 ); x6 = _mm256_add_epi32( x2, x6 );
x7 = _mm256_add_epi32( x3, x7 ); x7 = _mm256_add_epi32( x3, x7 );
y0 = x2; y0 = x0;
y1 = x3; y1 = x1;
y2 = x0; x0 = mm256_rol_32( x2, 7 );
y3 = x1; x1 = mm256_rol_32( x3, 7 );
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ), x2 = mm256_rol_32( y0, 7 );
_mm256_srli_epi32( y0, 25 ) ); x3 = mm256_rol_32( y1, 7 );
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 7 ),
_mm256_srli_epi32( y1, 25 ) );
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 7 ),
_mm256_srli_epi32( y2, 25 ) );
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 7 ),
_mm256_srli_epi32( y3, 25 ) );
x0 = _mm256_xor_si256( x0, x4 ); x0 = _mm256_xor_si256( x0, x4 );
x1 = _mm256_xor_si256( x1, x5 ); x1 = _mm256_xor_si256( x1, x5 );
x2 = _mm256_xor_si256( x2, x6 ); x2 = _mm256_xor_si256( x2, x6 );
@@ -71,18 +67,12 @@ static void transform_2way( cube_2way_context *sp )
x5 = _mm256_add_epi32( x1, x5 ); x5 = _mm256_add_epi32( x1, x5 );
x6 = _mm256_add_epi32( x2, x6 ); x6 = _mm256_add_epi32( x2, x6 );
x7 = _mm256_add_epi32( x3, x7 ); x7 = _mm256_add_epi32( x3, x7 );
y0 = x1; y0 = x0;
y1 = x0; y1 = x2;
y2 = x3; x0 = mm256_rol_32( x1, 11 );
y3 = x2; x1 = mm256_rol_32( y0, 11 );
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ), x2 = mm256_rol_32( x3, 11 );
_mm256_srli_epi32( y0, 21 ) ); x3 = mm256_rol_32( y1, 11 );
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
_mm256_srli_epi32( y1, 21 ) );
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 11 ),
_mm256_srli_epi32( y2, 21 ) );
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 11 ),
_mm256_srli_epi32( y3, 21 ) );
x0 = _mm256_xor_si256( x0, x4 ); x0 = _mm256_xor_si256( x0, x4 );
x1 = _mm256_xor_si256( x1, x5 ); x1 = _mm256_xor_si256( x1, x5 );
x2 = _mm256_xor_si256( x2, x6 ); x2 = _mm256_xor_si256( x2, x6 );
@@ -107,23 +97,40 @@ static void transform_2way( cube_2way_context *sp )
int cube_2way_init( cube_2way_context *sp, int hashbitlen, int rounds, int cube_2way_init( cube_2way_context *sp, int hashbitlen, int rounds,
int blockbytes ) int blockbytes )
{ {
const uint64_t* iv = hashbitlen == 512 ? IV512 : IV256; __m128i* h = (__m128i*)sp->h;
sp->hashlen = hashbitlen/128; sp->hashlen = hashbitlen/128;
sp->blocksize = blockbytes/16; sp->blocksize = blockbytes/16;
sp->rounds = rounds; sp->rounds = rounds;
sp->pos = 0; sp->pos = 0;
__m256i* h = (__m256i*)sp->h; if ( hashbitlen == 512 )
{
h[0] = _mm256_set_epi64x( iv[ 1], iv[ 0], iv[ 1], iv[ 0] );
h[1] = _mm256_set_epi64x( iv[ 3], iv[ 2], iv[ 3], iv[ 2] );
h[2] = _mm256_set_epi64x( iv[ 5], iv[ 4], iv[ 5], iv[ 4] );
h[3] = _mm256_set_epi64x( iv[ 7], iv[ 6], iv[ 7], iv[ 6] );
h[4] = _mm256_set_epi64x( iv[ 9], iv[ 8], iv[ 9], iv[ 8] );
h[5] = _mm256_set_epi64x( iv[11], iv[10], iv[11], iv[10] );
h[6] = _mm256_set_epi64x( iv[13], iv[12], iv[13], iv[12] );
h[7] = _mm256_set_epi64x( iv[15], iv[14], iv[15], iv[14] );
h[ 0] = m128_const_64( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 );
h[ 2] = m128_const_64( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 );
h[ 4] = m128_const_64( 0x825B453797CF0BEF, 0xA647A8B34D42C787 );
h[ 6] = m128_const_64( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 );
h[ 8] = m128_const_64( 0xB64445321B017BEF, 0x148FE485FCD398D9 );
h[10] = m128_const_64( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 );
h[12] = m128_const_64( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B );
h[14] = m128_const_64( 0xD43E3B447795D246, 0xE7989AF11921C8F7 );
h[1] = h[ 0]; h[ 3] = h[ 2]; h[ 5] = h[ 4]; h[ 7] = h[ 6];
h[9] = h[ 8]; h[11] = h[10]; h[13] = h[12]; h[15] = h[14];
}
else
{
h[ 0] = m128_const_64( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 );
h[ 2] = m128_const_64( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B );
h[ 4] = m128_const_64( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 );
h[ 6] = m128_const_64( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 );
h[ 8] = m128_const_64( 0xF0B266796C859D41, 0x6107FBD5D89041C3 );
h[10] = m128_const_64( 0x93CB628565C892FD, 0x5FA2560309392549 );
h[12] = m128_const_64( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE );
h[14] = m128_const_64( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB );
h[1] = h[ 0]; h[ 3] = h[ 2]; h[ 5] = h[ 4]; h[ 7] = h[ 6];
h[9] = h[ 8]; h[11] = h[10]; h[13] = h[12]; h[15] = h[14];
}
return 0; return 0;
} }
@@ -165,7 +172,7 @@ int cube_2way_close( cube_2way_context *sp, void *output )
for ( i = 0; i < 10; ++i ) transform_2way( sp ); for ( i = 0; i < 10; ++i ) transform_2way( sp );
for ( i = 0; i < sp->hashlen; i++ ) hash[i] = sp->h[i]; memcpy( hash, sp->h, sp->hashlen<<5 );
return 0; return 0;
} }
@@ -198,7 +205,7 @@ int cube_2way_update_close( cube_2way_context *sp, void *output,
for ( i = 0; i < 10; ++i ) transform_2way( sp ); for ( i = 0; i < 10; ++i ) transform_2way( sp );
for ( i = 0; i < sp->hashlen; i++ ) hash[i] = sp->h[i]; memcpy( hash, sp->h, sp->hashlen<<5 );
return 0; return 0;
} }

View File

@@ -16,24 +16,6 @@
#include "simd-utils.h" #include "simd-utils.h"
#include <stdio.h> #include <stdio.h>
// The result of hashing 10 rounds of initial data which is params and
// mostly zeros.
static const uint64_t IV256[] =
{
0xCCD6F29FEA2BD4B4, 0x35481EAE63117E71, 0xE5D94E6322512D5B, 0xF4CC12BE7E624131,
0x42AF2070C2D0B696, 0x3361DA8CD0720C35, 0x8EF8AD8328CCECA4, 0x40E5FBAB4680AC00,
0x6107FBD5D89041C3, 0xF0B266796C859D41, 0x5FA2560309392549, 0x93CB628565C892FD,
0x9E4B4E602AF2B5AE, 0x85254725774ABFDD, 0x4AB6AAD615815AEB, 0xD6032C0A9CDAF8AF
};
static const uint64_t IV512[] =
{
0x50F494D42AEA2A61, 0x4167D83E2D538B8B, 0xC701CF8C3FEE2313, 0x50AC5695CC39968E,
0xA647A8B34D42C787, 0x825B453797CF0BEF, 0xF22090C4EEF864D2, 0xA23911AED0E5CD33,
0x148FE485FCD398D9, 0xB64445321B017BEF, 0x2FF5781C6A536159, 0x0DBADEA991FA7934,
0xA5A70E75D65C8A2B, 0xBC796576B1C62456, 0xE7989AF11921C8F7, 0xD43E3B447795D246
};
static void transform( cubehashParam *sp ) static void transform( cubehashParam *sp )
{ {
int r; int r;
@@ -53,26 +35,22 @@ static void transform( cubehashParam *sp )
x2 = _mm256_add_epi32( x0, x2 ); x2 = _mm256_add_epi32( x0, x2 );
x3 = _mm256_add_epi32( x1, x3 ); x3 = _mm256_add_epi32( x1, x3 );
y0 = x0; y0 = x0;
x0 = _mm256_xor_si256( _mm256_slli_epi32( x1, 7 ), x0 = mm256_rol_32( x1, 7 );
_mm256_srli_epi32( x1, 25 ) ); x1 = mm256_rol_32( y0, 7 );
x1 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
_mm256_srli_epi32( y0, 25 ) );
x0 = _mm256_xor_si256( x0, x2 ); x0 = _mm256_xor_si256( x0, x2 );
x1 = _mm256_xor_si256( x1, x3 ); x1 = _mm256_xor_si256( x1, x3 );
x2 = _mm256_shuffle_epi32( x2, 0x4e ); x2 = mm256_swap64_128( x2 );
x3 = _mm256_shuffle_epi32( x3, 0x4e ); x3 = mm256_swap64_128( x3 );
x2 = _mm256_add_epi32( x0, x2 ); x2 = _mm256_add_epi32( x0, x2 );
x3 = _mm256_add_epi32( x1, x3 ); x3 = _mm256_add_epi32( x1, x3 );
y0 = _mm256_permute4x64_epi64( x0, 0x4e ); y0 = mm256_swap_128( x0 );
y1 = _mm256_permute4x64_epi64( x1, 0x4e ); y1 = mm256_swap_128( x1 );
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ), x0 = mm256_rol_32( y0, 11 );
_mm256_srli_epi32( y0, 21 ) ); x1 = mm256_rol_32( y1, 11 );
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
_mm256_srli_epi32( y1, 21 ) );
x0 = _mm256_xor_si256( x0, x2 ); x0 = _mm256_xor_si256( x0, x2 );
x1 = _mm256_xor_si256( x1, x3 ); x1 = _mm256_xor_si256( x1, x3 );
x2 = _mm256_shuffle_epi32( x2, 0xb1 ); x2 = mm256_swap32_64( x2 );
x3 = _mm256_shuffle_epi32( x3, 0xb1 ); x3 = mm256_swap32_64( x3 );
} }
_mm256_store_si256( (__m256i*)sp->x, x0 ); _mm256_store_si256( (__m256i*)sp->x, x0 );
@@ -147,37 +125,58 @@ static void transform( cubehashParam *sp )
#endif #endif
} // transform } // transform
/*
// The result of hashing 10 rounds of initial data which is params and
// mostly zeros.
static const uint64_t IV256[] =
{
0xCCD6F29FEA2BD4B4, 0x35481EAE63117E71, 0xE5D94E6322512D5B, 0xF4CC12BE7E624131,
0x42AF2070C2D0B696, 0x3361DA8CD0720C35, 0x8EF8AD8328CCECA4, 0x40E5FBAB4680AC00,
0x6107FBD5D89041C3, 0xF0B266796C859D41, 0x5FA2560309392549, 0x93CB628565C892FD,
0x9E4B4E602AF2B5AE, 0x85254725774ABFDD, 0x4AB6AAD615815AEB, 0xD6032C0A9CDAF8AF
};
static const uint64_t IV512[] =
{
0x50F494D42AEA2A61, 0x4167D83E2D538B8B, 0xC701CF8C3FEE2313, 0x50AC5695CC39968E,
0xA647A8B34D42C787, 0x825B453797CF0BEF, 0xF22090C4EEF864D2, 0xA23911AED0E5CD33,
0x148FE485FCD398D9, 0xB64445321B017BEF, 0x2FF5781C6A536159, 0x0DBADEA991FA7934,
0xA5A70E75D65C8A2B, 0xBC796576B1C62456, 0xE7989AF11921C8F7, 0xD43E3B447795D246
};
*/
int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes) int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes)
{ {
const uint64_t* iv = hashbitlen == 512 ? IV512 : IV256; __m128i *x = (__m128i*)sp->x;
sp->hashlen = hashbitlen/128; sp->hashlen = hashbitlen/128;
sp->blocksize = blockbytes/16; sp->blocksize = blockbytes/16;
sp->rounds = rounds; sp->rounds = rounds;
sp->pos = 0; sp->pos = 0;
#if defined(__AVX2__)
__m256i* x = (__m256i*)sp->x; if ( hashbitlen == 512 )
{
x[0] = _mm256_set_epi64x( iv[ 3], iv[ 2], iv[ 1], iv[ 0] ); x[0] = m128_const_64( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 );
x[1] = _mm256_set_epi64x( iv[ 7], iv[ 6], iv[ 5], iv[ 4] ); x[1] = m128_const_64( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 );
x[2] = _mm256_set_epi64x( iv[11], iv[10], iv[ 9], iv[ 8] ); x[2] = m128_const_64( 0x825B453797CF0BEF, 0xA647A8B34D42C787 );
x[3] = _mm256_set_epi64x( iv[15], iv[14], iv[13], iv[12] ); x[3] = m128_const_64( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 );
x[4] = m128_const_64( 0xB64445321B017BEF, 0x148FE485FCD398D9 );
x[5] = m128_const_64( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 );
x[6] = m128_const_64( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B );
x[7] = m128_const_64( 0xD43E3B447795D246, 0xE7989AF11921C8F7 );
}
else
{
x[0] = m128_const_64( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 );
x[1] = m128_const_64( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B );
x[2] = m128_const_64( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 );
x[3] = m128_const_64( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 );
x[4] = m128_const_64( 0xF0B266796C859D41, 0x6107FBD5D89041C3 );
x[5] = m128_const_64( 0x93CB628565C892FD, 0x5FA2560309392549 );
x[6] = m128_const_64( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE );
x[7] = m128_const_64( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB );
}
#else
__m128i* x = (__m128i*)sp->x;
x[0] = _mm_set_epi64x( iv[ 1], iv[ 0] );
x[1] = _mm_set_epi64x( iv[ 3], iv[ 2] );
x[2] = _mm_set_epi64x( iv[ 5], iv[ 4] );
x[3] = _mm_set_epi64x( iv[ 7], iv[ 6] );
x[4] = _mm_set_epi64x( iv[ 9], iv[ 8] );
x[5] = _mm_set_epi64x( iv[11], iv[10] );
x[6] = _mm_set_epi64x( iv[13], iv[12] );
x[7] = _mm_set_epi64x( iv[15], iv[14] );
#endif
return SUCCESS; return SUCCESS;
} }

View File

@@ -323,7 +323,7 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
mpz_clears(magipi, magisw, product, bns0, bns1, NULL); mpz_clears(magipi, magisw, product, bns0, bns1, NULL);
*hashes_done = n - first_nonce + 1; *hashes_done = n - first_nonce + 1;
return rc; return 0;
} }
bool register_m7m_algo( algo_gate_t *gate ) bool register_m7m_algo( algo_gate_t *gate )

283
algo/x13/x13bcd-4way.c Normal file
View File

@@ -0,0 +1,283 @@
#include "x13sm3-gate.h"
#if defined(X13SM3_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
//#include "algo/luffa/luffa-hash-2way.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/simd-hash-2way.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/sm3/sm3-hash-4way.h"
#include "algo/hamsi/hamsi-hash-4way.h"
#include "algo/fugue/sph_fugue.h"
typedef struct {
blake512_4way_context blake;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
// luffa_2way_context luffa;
cubehashParam cube;
sph_shavite512_context shavite;
simd_2way_context simd;
hashState_echo echo;
sm3_4way_ctx_t sm3;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
} x13bcd_4way_ctx_holder;
x13bcd_4way_ctx_holder x13bcd_4way_ctx __attribute__ ((aligned (64)));
static __thread blake512_4way_context x13bcd_ctx_mid;
void init_x13bcd_4way_ctx()
{
blake512_4way_init( &x13bcd_4way_ctx.blake );
bmw512_4way_init( &x13bcd_4way_ctx.bmw );
init_groestl( &x13bcd_4way_ctx.groestl, 64 );
skein512_4way_init( &x13bcd_4way_ctx.skein );
jh512_4way_init( &x13bcd_4way_ctx.jh );
keccak512_4way_init( &x13bcd_4way_ctx.keccak );
// luffa_2way_init( &x13bcd_4way_ctx.luffa, 512 );
cubehashInit( &x13bcd_4way_ctx.cube, 512, 16, 32 );
sph_shavite512_init( &x13bcd_4way_ctx.shavite );
simd_2way_init( &x13bcd_4way_ctx.simd, 512 );
init_echo( &x13bcd_4way_ctx.echo, 512 );
sm3_4way_init( &x13bcd_4way_ctx.sm3 );
hamsi512_4way_init( &x13bcd_4way_ctx.hamsi );
sph_fugue512_init( &x13bcd_4way_ctx.fugue );
};
void x13bcd_4way_hash( void *state, const void *input )
{
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
x13bcd_4way_ctx_holder ctx;
memcpy( &ctx, &x13bcd_4way_ctx, sizeof(x13bcd_4way_ctx) );
// Blake
memcpy( &ctx.blake, &x13bcd_ctx_mid, sizeof(x13bcd_ctx_mid) );
blake512_4way( &ctx.blake, input + (64<<2), 16 );
// blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// Bmw
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
reinit_groestl( &ctx.groestl );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
// Parallel 4way
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
// Skein
skein512_4way( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
// JH
jh512_4way( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash );
// Keccak
keccak512_4way( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
// SM3 parallel 32 bit
uint32_t sm3_vhash[32*4] __attribute__ ((aligned (64)));
memset( sm3_vhash, 0, sizeof sm3_vhash );
uint32_t sm3_hash0[32] __attribute__ ((aligned (32)));
memset( sm3_hash0, 0, sizeof sm3_hash0 );
uint32_t sm3_hash1[32] __attribute__ ((aligned (32)));
memset( sm3_hash1, 0, sizeof sm3_hash1 );
uint32_t sm3_hash2[32] __attribute__ ((aligned (32)));
memset( sm3_hash2, 0, sizeof sm3_hash2 );
uint32_t sm3_hash3[32] __attribute__ ((aligned (32)));
memset( sm3_hash3, 0, sizeof sm3_hash3 );
sm3_4way( &ctx.sm3, vhash, 64 );
sm3_4way_close( &ctx.sm3, sm3_vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
/*
// Luffa
intrlv_2x128( vhash, hash0, hash1, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, hash2, hash3, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
*/
// Cubehash
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
memcpy( &ctx.cube, &x13bcd_4way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1, 64 );
memcpy( &ctx.cube, &x13bcd_4way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2, 64 );
memcpy( &ctx.cube, &x13bcd_4way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 );
// Shavite
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
memcpy( &ctx.shavite, &x13bcd_4way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
memcpy( &ctx.shavite, &x13bcd_4way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
memcpy( &ctx.shavite, &x13bcd_4way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
// Simd
intrlv_2x128( vhash, hash0, hash1, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, hash2, hash3, 512 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
// Echo
update_final_echo( &ctx.echo, (BitSequence *)hash0,
(const BitSequence *) hash0, 512 );
memcpy( &ctx.echo, &x13bcd_4way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash1,
(const BitSequence *) hash1, 512 );
memcpy( &ctx.echo, &x13bcd_4way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash2,
(const BitSequence *) hash2, 512 );
memcpy( &ctx.echo, &x13bcd_4way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 );
/*
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
// SM3 parallel 32 bit
uint32_t sm3_vhash[32*4] __attribute__ ((aligned (64)));
memset( sm3_vhash, 0, sizeof sm3_vhash );
uint32_t sm3_hash0[32] __attribute__ ((aligned (32)));
memset( sm3_hash0, 0, sizeof sm3_hash0 );
uint32_t sm3_hash1[32] __attribute__ ((aligned (32)));
memset( sm3_hash1, 0, sizeof sm3_hash1 );
uint32_t sm3_hash2[32] __attribute__ ((aligned (32)));
memset( sm3_hash2, 0, sizeof sm3_hash2 );
uint32_t sm3_hash3[32] __attribute__ ((aligned (32)));
memset( sm3_hash3, 0, sizeof sm3_hash3 );
sm3_4way( &ctx.sm3, vhash, 64 );
sm3_4way_close( &ctx.sm3, sm3_vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, sm3_vhash, 512 );
*/
// Hamsi parallel 4x32x2
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
hamsi512_4way( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// Fugue serial
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
memcpy( state, hash0, 32 );
memcpy( state+32, hash1, 32 );
memcpy( state+64, hash2, 32 );
memcpy( state+96, hash3, 32 );
}
int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
mm256_bswap32_intrlv80_4x64( vdata, pdata );
blake512_4way_init( &x13bcd_ctx_mid );
blake512_4way( &x13bcd_ctx_mid, vdata, 64 );
for ( int m=0; m < 6; m++ )
if ( Htarg <= htmax[m] )
{
uint32_t mask = masks[m];
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x13bcd_4way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 ) )
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
n += 4;
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
break;
}
*hashes_done = n - first_nonce + 1;
return 0;
}
#endif

258
algo/x13/x13bcd.c Normal file
View File

@@ -0,0 +1,258 @@
#include "x13sm3-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/groestl/sph_groestl.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/sm3/sph_sm3.h"
//#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
#include "algo/blake/sse2/blake.c"
#include "algo/bmw/sse2/bmw.c"
#include "algo/keccak/sse2/keccak.c"
#include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h"
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#endif
typedef struct {
#ifdef NO_AES_NI
sph_groestl512_context groestl;
sph_echo512_context echo;
#else
hashState_echo echo;
hashState_groestl groestl;
#endif
// hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
sm3_ctx_t sm3;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
} x13bcd_ctx_holder;
x13bcd_ctx_holder x13bcd_ctx;
void init_x13bcd_ctx()
{
#ifdef NO_AES_NI
sph_groestl512_init(&x13bcd_ctx.groestl);
sph_echo512_init(&x13bcd_ctx.echo);
#else
init_echo(&x13bcd_ctx.echo, 512);
init_groestl(&x13bcd_ctx.groestl, 64 );
#endif
// init_luffa(&x13bcd_ctx.luffa,512);
cubehashInit(&x13bcd_ctx.cube,512,16,32);
sph_shavite512_init(&x13bcd_ctx.shavite);
init_sd(&x13bcd_ctx.simd,512);
sm3_init( &x13bcd_ctx.sm3 );
sph_hamsi512_init(&x13bcd_ctx.hamsi);
sph_fugue512_init(&x13bcd_ctx.fugue);
};
void x13bcd_hash(void *output, const void *input)
{
unsigned char hash[128] __attribute__ ((aligned (32)));
x13bcd_ctx_holder ctx;
memcpy(&ctx, &x13bcd_ctx, sizeof(x13bcd_ctx));
unsigned char hashbuf[128];
size_t hashptr;
sph_u64 hashctA;
sph_u64 hashctB;
//---blake1---
DECL_BLK;
BLK_I;
BLK_W;
BLK_C;
//---bmw2---
DECL_BMW;
BMW_I;
BMW_U;
#define M(x) sph_dec64le_aligned(data + 8 * (x))
#define H(x) (h[x])
#define dH(x) (dh[x])
BMW_C;
#undef M
#undef H
#undef dH
//---groestl----
#ifdef NO_AES_NI
sph_groestl512 (&ctx.groestl, hash, 64);
sph_groestl512_close(&ctx.groestl, hash);
#else
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)hash, 512 );
#endif
//---skein4---
DECL_SKN;
SKN_I;
SKN_U;
SKN_C;
//---jh5------
DECL_JH;
JH_H;
//---keccak6---
DECL_KEC;
KEC_I;
KEC_U;
KEC_C;
uint32_t sm3_hash[32] __attribute__ ((aligned (32)));
memset(sm3_hash, 0, sizeof sm3_hash);
sph_sm3(&ctx.sm3, hash, 64);
sph_sm3_close(&ctx.sm3, sm3_hash);
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
(const byte*)sm3_hash, 64 );
/*
//--- luffa7
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)hash, 64 );
// 8 Cube
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
(const byte*)hash, 64 );
*/
// 9 Shavite
sph_shavite512( &ctx.shavite, hash, 64);
sph_shavite512_close( &ctx.shavite, hash);
// 10 Simd
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
//11---echo---
#ifdef NO_AES_NI
sph_echo512(&ctx.echo, hash, 64);
sph_echo512_close(&ctx.echo, hash);
#else
update_final_echo ( &ctx.echo, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
/*
uint32_t sm3_hash[32] __attribute__ ((aligned (32)));
memset(sm3_hash, 0, sizeof sm3_hash);
sph_sm3(&ctx.sm3, hash, 64);
sph_sm3_close(&ctx.sm3, sm3_hash);
sph_hamsi512(&ctx.hamsi, sm3_hash, 64);
*/
sph_hamsi512(&ctx.hamsi, hash, 64);
sph_hamsi512_close(&ctx.hamsi, hash);
sph_fugue512(&ctx.fugue, hash, 64);
sph_fugue512_close(&ctx.fugue, hash);
asm volatile ("emms");
memcpy(output, hash, 32);
}
int scanhash_x13bcd( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
// we need bigendian data...
swab32_array( endiandata, pdata, 20 );
#ifdef DEBUG_ALGO
if (Htarg != 0)
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
x13bcd_hash(hash64, endiandata);
#ifndef DEBUG_ALGO
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
work_set_target_ratio( work, hash64 );
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}

View File

@@ -16,3 +16,19 @@ bool register_x13sm3_algo( algo_gate_t* gate )
return true; return true;
}; };
bool register_x13bcd_algo( algo_gate_t* gate )
{
#if defined (X13SM3_4WAY)
init_x13bcd_4way_ctx();
gate->scanhash = (void*)&scanhash_x13bcd_4way;
gate->hash = (void*)&x13bcd_4way_hash;
#else
init_x13bcd_ctx();
gate->scanhash = (void*)&scanhash_x13bcd;
gate->hash = (void*)&x13bcd_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -10,23 +10,31 @@
bool register_x13sm3_algo( algo_gate_t* gate ); bool register_x13sm3_algo( algo_gate_t* gate );
bool register_x13bcd_algo( algo_gate_t* gate );
#if defined(X13SM3_4WAY) #if defined(X13SM3_4WAY)
void x13sm3_4way_hash( void *state, const void *input ); void x13sm3_4way_hash( void *state, const void *input );
int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce, int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ); uint64_t *hashes_done, struct thr_info *mythr );
void init_x13sm3_4way_ctx(); void init_x13sm3_4way_ctx();
void x13bcd_4way_hash( void *state, const void *input );
int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void init_x13bcd_4way_ctx();
#endif #endif
void x13sm3_hash( void *state, const void *input ); void x13sm3_hash( void *state, const void *input );
int scanhash_x13sm3( struct work *work, uint32_t max_nonce, int scanhash_x13sm3( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ); uint64_t *hashes_done, struct thr_info *mythr );
void init_x13sm3_ctx(); void init_x13sm3_ctx();
void x13bcd_hash( void *state, const void *input );
int scanhash_x13bcd( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void init_x13bcd_ctx();
#endif #endif

View File

@@ -62,3 +62,149 @@ bool register_x16s_algo( algo_gate_t* gate )
return true; return true;
}; };
////////////////
//
// X16RT
void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash )
{
int32_t maskedTime = timeStamp & 0xffffff80;
sha256d( (unsigned char*)timeHash, (const unsigned char*)( &maskedTime ),
sizeof( maskedTime ) );
}
void x16rt_getAlgoString( const uint32_t *timeHash, char *output)
{
char *sptr = output;
uint8_t* data = (uint8_t*)timeHash;
for (uint8_t j = 0; j < X16R_HASH_FUNC_COUNT; j++) {
uint8_t b = (15 - j) >> 1; // 16 ascii hex chars, reversed
uint8_t algoDigit = (j & 1) ? data[b] & 0xF : data[b] >> 4;
if (algoDigit >= 10)
sprintf(sptr, "%c", 'A' + (algoDigit - 10));
else
sprintf(sptr, "%u", (uint32_t) algoDigit);
sptr++;
}
*sptr = '\0';
}
void x16rt_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{
uchar merkle_tree[64] = { 0 };
size_t t;
algo_gate.gen_merkle_root( merkle_tree, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
// algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
// (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
// le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
int i;
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );
if ( have_stratum )
for ( i = 0; i < 8; i++ )
g_work->data[ 1+i ] = le32dec( (uint32_t*)sctx->job.prevhash + i );
else
for (i = 0; i < 8; i++)
g_work->data[ 8-i ] = le32dec( (uint32_t*)sctx->job.prevhash + i );
g_work->data[ algo_gate.ntime_index ] = le32dec( sctx->job.ntime );
g_work->data[ algo_gate.nbits_index ] = le32dec( sctx->job.nbits );
g_work->data[20] = 0x80000000;
g_work->data[31] = 0x00000280;
for ( i = 0; i < 8; i++ )
g_work->merkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
for ( i = 0; i < 8; i++ )
g_work->witmerkleroothash[7 - i] = be32dec((uint32_t *)merkle_tree + i);
for ( i = 0; i < 8; i++ )
g_work->denom10[i] = le32dec((uint32_t *)sctx->job.denom10 + i);
for ( i = 0; i < 8; i++ )
g_work->denom100[i] = le32dec((uint32_t *)sctx->job.denom100 + i);
for ( i = 0; i < 8; i++ )
g_work->denom1000[i] = le32dec((uint32_t *)sctx->job.denom1000 + i);
for ( i = 0; i < 8; i++ )
g_work->denom10000[i] = le32dec((uint32_t *)sctx->job.denom10000 + i);
uint32_t pofnhash[8];
memset(pofnhash, 0x00, 32);
char denom10_str [ 2 * sizeof( g_work->denom10 ) + 1 ];
char denom100_str [ 2 * sizeof( g_work->denom100 ) + 1 ];
char denom1000_str [ 2 * sizeof( g_work->denom1000 ) + 1 ];
char denom10000_str [ 2 * sizeof( g_work->denom10000 ) + 1 ];
char merkleroot_str [ 2 * sizeof( g_work->merkleroothash ) + 1 ];
char witmerkleroot_str[ 2 * sizeof( g_work->witmerkleroothash ) + 1 ];
char pofn_str [ 2 * sizeof( pofnhash ) + 1 ];
cbin2hex( denom10_str, (char*) g_work->denom10, 32 );
cbin2hex( denom100_str, (char*) g_work->denom100, 32 );
cbin2hex( denom1000_str, (char*) g_work->denom1000, 32 );
cbin2hex( denom10000_str, (char*) g_work->denom10000, 32 );
cbin2hex( merkleroot_str, (char*) g_work->merkleroothash, 32 );
cbin2hex( witmerkleroot_str, (char*) g_work->witmerkleroothash, 32 );
cbin2hex( pofn_str, (char*) pofnhash, 32 );
if ( true )
{
char* data;
data = (char*)malloc( 2 + strlen( denom10_str ) * 4 + 16 * 4
+ strlen( merkleroot_str ) * 3 );
// Build the block header veildatahash in hex
sprintf( data, "%s%s%s%s%s%s%s%s%s%s%s%s",
merkleroot_str, witmerkleroot_str, "04",
"0a00000000000000", denom10_str,
"6400000000000000", denom100_str,
"e803000000000000", denom1000_str,
"1027000000000000", denom10000_str, pofn_str );
// Covert the hex to binary
uint32_t test[100];
hex2bin( (unsigned char*)(&test), data, 257);
// Compute the sha256d of the binary
uint32_t _ALIGN(64) hash[8];
sha256d( (unsigned char*)hash, (unsigned char*)&(test), 257);
// assign the veildatahash in the blockheader
for ( i = 0; i < 8; i++ )
g_work->data[16 - i] = le32dec(hash + i);
free(data);
}
}
bool register_x16rt_algo( algo_gate_t* gate )
{
#if defined (X16R_4WAY)
gate->scanhash = (void*)&scanhash_x16rt_4way;
gate->hash = (void*)&x16rt_4way_hash;
#else
gate->scanhash = (void*)&scanhash_x16rt;
gate->hash = (void*)&x16rt_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
return true;
};
bool register_x16rt_veil_algo( algo_gate_t* gate )
{
#if defined (X16R_4WAY)
gate->scanhash = (void*)&scanhash_x16rt_4way;
gate->hash = (void*)&x16rt_4way_hash;
#else
gate->scanhash = (void*)&scanhash_x16rt;
gate->hash = (void*)&x16rt_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->set_target = (void*)&alt_set_target;
gate->build_extraheader = (void*)&x16rt_build_extraheader;
return true;
};

View File

@@ -4,6 +4,7 @@
#include "algo-gate-api.h" #include "algo-gate-api.h"
#include "simd-utils.h" #include "simd-utils.h"
#include <stdint.h> #include <stdint.h>
#include <unistd.h>
#if defined(__AVX2__) && defined(__AES__) #if defined(__AVX2__) && defined(__AES__)
#define X16R_4WAY #define X16R_4WAY
@@ -30,11 +31,15 @@ enum x16r_Algo {
}; };
void (*x16_r_s_getAlgoString) ( const uint8_t*, char* ); void (*x16_r_s_getAlgoString) ( const uint8_t*, char* );
void x16r_getAlgoString( const uint8_t* prevblock, char *output ); void x16r_getAlgoString( const uint8_t *prevblock, char *output );
void x16s_getAlgoString( const uint8_t* prevblock, char *output ); void x16s_getAlgoString( const uint8_t *prevblock, char *output );
void x16rt_getAlgoString( const uint32_t *timeHash, char *output );
void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash );
bool register_x16r_algo( algo_gate_t* gate ); bool register_x16r_algo( algo_gate_t* gate );
bool register_x16s_algo( algo_gate_t* gate ); bool register_x16s_algo( algo_gate_t* gate );
bool register_x16rt_algo( algo_gate_t* gate );
#if defined(X16R_4WAY) #if defined(X16R_4WAY)
@@ -42,11 +47,18 @@ void x16r_4way_hash( void *state, const void *input );
int scanhash_x16r_4way( struct work *work, uint32_t max_nonce, int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ); uint64_t *hashes_done, struct thr_info *mythr );
void x16rt_4way_hash( void *state, const void *input );
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif #endif
void x16r_hash( void *state, const void *input ); void x16r_hash( void *state, const void *input );
int scanhash_x16r( struct work *work, uint32_t max_nonce, int scanhash_x16r( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ); uint64_t *hashes_done, struct thr_info *mythr );
void x16rt_hash( void *state, const void *input );
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif #endif

353
algo/x16/x16rt-4way.c Normal file
View File

@@ -0,0 +1,353 @@
#include "x16r-gate.h"
#if defined (X16R_4WAY)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa-hash-2way.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/simd-hash-2way.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/hamsi/hamsi-hash-4way.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/sha/sha2-hash-4way.h"
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread bool s_implemented = false;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rt_4way_context_overlay
{
blake512_4way_context blake;
bmw512_4way_context bmw;
hashState_echo echo;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
luffa_2way_context luffa;
cubehashParam cube;
sph_shavite512_context shavite;
simd_2way_context simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
sha512_4way_context sha512;
};
typedef union _x16rt_4way_context_overlay x16rt_4way_context_overlay;
void x16rt_4way_hash( void* output, const void* input )
{
uint32_t hash0[24] __attribute__ ((aligned (64)));
uint32_t hash1[24] __attribute__ ((aligned (64)));
uint32_t hash2[24] __attribute__ ((aligned (64)));
uint32_t hash3[24] __attribute__ ((aligned (64)));
uint32_t vhash[24*4] __attribute__ ((aligned (64)));
x16rt_4way_context_overlay ctx;
void *in0 = (void*) hash0;
void *in1 = (void*) hash1;
void *in2 = (void*) hash2;
void *in3 = (void*) hash3;
int size = 80;
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
/*
void *in = (void*) input;
uint32_t *in32 = (uint32_t*) hash0;
uint32_t ntime = in32[17];
if ( s_ntime == UINT32_MAX )
{
uint32_t _ALIGN(64) timeHash[8];
x16rt_getTimeHash(ntime, &timeHash);
x16rt_getAlgoString(&timeHash[0], hashOrder);
}
*/
// Input data is both 64 bit interleaved (input)
// and deinterleaved in inp0-3.
// If First function uses 64 bit data it is not required to interleave inp
// first. It may use the inerleaved data dmost convenient, ie 4way 64 bit.
// All other functions assume data is deinterleaved in hash0-3
// All functions must exit with data deinterleaved in hash0-3.
// Alias in0-3 points to either inp0-3 or hash0-3 according to
// its hashOrder position. Size is also set accordingly.
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
blake512_4way_init( &ctx.blake );
if ( i == 0 )
blake512_4way( &ctx.blake, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
blake512_4way( &ctx.blake, vhash, size );
}
blake512_4way_close( &ctx.blake, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case BMW:
bmw512_4way_init( &ctx.bmw );
if ( i == 0 )
bmw512_4way( &ctx.bmw, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
bmw512_4way( &ctx.bmw, vhash, size );
}
bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case GROESTL:
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(const char*)in0, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(const char*)in1, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(const char*)in2, size<<3 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(const char*)in3, size<<3 );
break;
case SKEIN:
skein512_4way_init( &ctx.skein );
if ( i == 0 )
skein512_4way( &ctx.skein, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
skein512_4way( &ctx.skein, vhash, size );
}
skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case JH:
jh512_4way_init( &ctx.jh );
if ( i == 0 )
jh512_4way( &ctx.jh, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
jh512_4way( &ctx.jh, vhash, size );
}
jh512_4way_close( &ctx.jh, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case KECCAK:
keccak512_4way_init( &ctx.keccak );
if ( i == 0 )
keccak512_4way( &ctx.keccak, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
keccak512_4way( &ctx.keccak, vhash, size );
}
keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case LUFFA:
intrlv_2x128( vhash, in0, in1, size<<3 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, in2, in3, size<<3 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size);
dintrlv_2x128( hash2, hash3, vhash, 512 );
break;
case CUBEHASH:
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0,
(const byte*)in0, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1,
(const byte*)in1, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2,
(const byte*)in2, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3,
(const byte*)in3, size );
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in0, size );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in1, size );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in2, size );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in3, size );
sph_shavite512_close( &ctx.shavite, hash3 );
break;
case SIMD:
intrlv_2x128( vhash, in0, in1, size<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, in2, in3, size<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
break;
case ECHO:
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
(const BitSequence*)in0, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash1,
(const BitSequence*)in1, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash2,
(const BitSequence*)in2, size<<3 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash3,
(const BitSequence*)in3, size<<3 );
break;
case HAMSI:
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, size );
hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in0, size );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in1, size );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in2, size );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in3, size );
sph_fugue512_close( &ctx.fugue, hash3 );
break;
case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, size );
shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case WHIRLPOOL:
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in0, size );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in1, size );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in2, size );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in3, size );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
break;
case SHA_512:
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, size );
sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
}
size = 64;
}
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
}
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t _ALIGN(64) timeHash[4*8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
__m256i *noncev = (__m256i*)vdata + 9; // aligned
volatile uint8_t *restart = &(work_restart[thr_id].restart);
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
uint32_t ntime = swab32( pdata[17] );
if ( s_ntime != ntime )
{
x16rt_getTimeHash( ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], hashOrder );
s_ntime = ntime;
s_implemented = true;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
hashOrder, ntime, timeHash );
}
if ( !s_implemented )
{
applog( LOG_WARNING, "s not implemented");
sleep(1);
return 0;
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
uint64_t *edata = (uint64_t*)endiandata;
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x16rt_4way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 4; i++ ) if ( (hash+(i<<3))[7] <= Htarg )
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
n += 4;
} while ( ( n < max_nonce ) && !(*restart) );
*hashes_done = n - first_nonce + 1;
return 0;
}
#endif

239
algo/x16/x16rt.c Normal file
View File

@@ -0,0 +1,239 @@
#include "x16r-gate.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include <openssl/sha.h>
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread bool s_implemented = false;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rt_context_overlay
{
#if defined(__AES__)
hashState_echo echo;
hashState_groestl groestl;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
#endif
sph_blake512_context blake;
sph_bmw512_context bmw;
sph_skein512_context skein;
sph_jh512_context jh;
sph_keccak512_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
SHA512_CTX sha512;
};
typedef union _x16rt_context_overlay x16rt_context_overlay;
void x16rt_hash( void* output, const void* input )
{
uint32_t _ALIGN(128) hash[16];
x16rt_context_overlay ctx;
int size = 80;
void *in = (void*) input;
/*
void *in = (void*) input;
uint32_t *in32 = (uint32_t*) in;
uint32_t ntime = in32[17];
if ( s_ntime == UINT32_MAX )
{
uint32_t _ALIGN(64) timeHash[8];
x16rt_getTimeHash(ntime, &timeHash);
x16rt_getAlgoString(&timeHash[0], hashOrder);
}
*/
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
sph_blake512_init( &ctx.blake );
sph_blake512( &ctx.blake, in, size );
sph_blake512_close( &ctx.blake, hash );
break;
case BMW:
sph_bmw512_init( &ctx.bmw );
sph_bmw512(&ctx.bmw, in, size);
sph_bmw512_close(&ctx.bmw, hash);
break;
case GROESTL:
#if defined(__AES__)
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)in, size<<3 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in, size );
sph_groestl512_close(&ctx.groestl, hash);
#endif
break;
case SKEIN:
sph_skein512_init( &ctx.skein );
sph_skein512( &ctx.skein, in, size );
sph_skein512_close( &ctx.skein, hash );
break;
case JH:
sph_jh512_init( &ctx.jh );
sph_jh512(&ctx.jh, in, size );
sph_jh512_close(&ctx.jh, hash );
break;
case KECCAK:
sph_keccak512_init( &ctx.keccak );
sph_keccak512( &ctx.keccak, in, size );
sph_keccak512_close( &ctx.keccak, hash );
break;
case LUFFA:
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)in, size );
break;
case CUBEHASH:
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
(const byte*)in, size );
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in, size );
sph_shavite512_close( &ctx.shavite, hash );
break;
case SIMD:
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
break;
case ECHO:
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in, size );
sph_echo512_close( &ctx.echo, hash );
#endif
break;
case HAMSI:
sph_hamsi512_init( &ctx.hamsi );
sph_hamsi512( &ctx.hamsi, in, size );
sph_hamsi512_close( &ctx.hamsi, hash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in, size );
sph_fugue512_close( &ctx.fugue, hash );
break;
case SHABAL:
sph_shabal512_init( &ctx.shabal );
sph_shabal512( &ctx.shabal, in, size );
sph_shabal512_close( &ctx.shabal, hash );
break;
case WHIRLPOOL:
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in, size );
sph_whirlpool_close( &ctx.whirlpool, hash );
break;
case SHA_512:
SHA512_Init( &ctx.sha512 );
SHA512_Update( &ctx.sha512, in, size );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
break;
}
in = (void*) hash;
size = 64;
}
memcpy(output, hash, 32);
}
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t _ALIGN(64) timeHash[8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
uint32_t ntime = swab32( pdata[17] );
if ( s_ntime != ntime )
{
x16rt_getTimeHash( ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], hashOrder );
s_ntime = ntime;
s_implemented = true;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
hashOrder, ntime, timeHash );
}
if ( !s_implemented )
{
applog( LOG_WARNING, "s not implemented");
sleep(1);
return 0;
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
do
{
be32enc( &endiandata[19], nonce );
x16rt_hash( hash32, endiandata );
if ( hash32[7] <= Htarg )
if (fulltest( hash32, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash32, mythr );
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

View File

@@ -69,7 +69,7 @@ void sonoa_4way_hash( void *state, const void *input )
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
@@ -80,7 +80,7 @@ void sonoa_4way_hash( void *state, const void *input )
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way( &ctx.skein, vhash, 64 );
@@ -134,13 +134,13 @@ void sonoa_4way_hash( void *state, const void *input )
// 2 // 2
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
@@ -151,7 +151,7 @@ void sonoa_4way_hash( void *state, const void *input )
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way( &ctx.skein, vhash, 64 );
@@ -203,7 +203,7 @@ void sonoa_4way_hash( void *state, const void *input )
update_final_echo( &ctx.echo, (BitSequence *)hash3, update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 ); (const BitSequence *) hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way( &ctx.hamsi, vhash, 64 );
@@ -215,7 +215,7 @@ void sonoa_4way_hash( void *state, const void *input )
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
@@ -226,7 +226,7 @@ void sonoa_4way_hash( void *state, const void *input )
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way( &ctx.skein, vhash, 64 );
@@ -278,13 +278,13 @@ void sonoa_4way_hash( void *state, const void *input )
update_final_echo( &ctx.echo, (BitSequence *)hash3, update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 ); (const BitSequence *) hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue ); sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 ); sph_fugue512( &ctx.fugue, hash0, 64 );
@@ -300,13 +300,13 @@ void sonoa_4way_hash( void *state, const void *input )
sph_fugue512_close( &ctx.fugue, hash3 ); sph_fugue512_close( &ctx.fugue, hash3 );
// 4 // 4
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
@@ -317,7 +317,7 @@ void sonoa_4way_hash( void *state, const void *input )
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way( &ctx.skein, vhash, 64 );
@@ -369,13 +369,13 @@ void sonoa_4way_hash( void *state, const void *input )
update_final_echo( &ctx.echo, (BitSequence *)hash3, update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 ); (const BitSequence *) hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue ); sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 ); sph_fugue512( &ctx.fugue, hash0, 64 );
@@ -390,7 +390,7 @@ void sonoa_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 ); sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 ); sph_fugue512_close( &ctx.fugue, hash3 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way( &ctx.shabal, vhash, 64 );
@@ -402,7 +402,7 @@ void sonoa_4way_hash( void *state, const void *input )
hamsi512_4way( &ctx.hamsi, vhashB, 64 ); hamsi512_4way( &ctx.hamsi, vhashB, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
init_echo( &ctx.echo, 512 ); init_echo( &ctx.echo, 512 );
update_final_echo( &ctx.echo, (BitSequence *)hash0, update_final_echo( &ctx.echo, (BitSequence *)hash0,
@@ -438,7 +438,7 @@ void sonoa_4way_hash( void *state, const void *input )
shabal512_4way( &ctx.shabal, vhashB, 64 ); shabal512_4way( &ctx.shabal, vhashB, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
@@ -449,7 +449,7 @@ void sonoa_4way_hash( void *state, const void *input )
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way( &ctx.skein, vhash, 64 );
@@ -501,13 +501,13 @@ void sonoa_4way_hash( void *state, const void *input )
update_final_echo( &ctx.echo, (BitSequence *)hash3, update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 ); (const BitSequence *) hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue ); sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 ); sph_fugue512( &ctx.fugue, hash0, 64 );
@@ -522,13 +522,13 @@ void sonoa_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 ); sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 ); sph_fugue512_close( &ctx.fugue, hash3 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
sph_whirlpool_init( &ctx.whirlpool ); sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 ); sph_whirlpool( &ctx.whirlpool, hash0, 64 );
@@ -545,13 +545,13 @@ void sonoa_4way_hash( void *state, const void *input )
// 6 // 6
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
@@ -562,7 +562,7 @@ void sonoa_4way_hash( void *state, const void *input )
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way( &ctx.skein, vhash, 64 );
@@ -614,13 +614,13 @@ void sonoa_4way_hash( void *state, const void *input )
update_final_echo( &ctx.echo, (BitSequence *)hash3, update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 ); (const BitSequence *) hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue ); sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 ); sph_fugue512( &ctx.fugue, hash0, 64 );
@@ -635,13 +635,13 @@ void sonoa_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 ); sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 ); sph_fugue512_close( &ctx.fugue, hash3 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
sph_whirlpool_init( &ctx.whirlpool ); sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 ); sph_whirlpool( &ctx.whirlpool, hash0, 64 );
@@ -656,13 +656,13 @@ void sonoa_4way_hash( void *state, const void *input )
sph_whirlpool( &ctx.whirlpool, hash3, 64 ); sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 ); sph_whirlpool_close( &ctx.whirlpool, hash3 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 ); sha512_4way( &ctx.sha512, vhash, 64 );
sha512_4way_close( &ctx.sha512, vhash ); sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_whirlpool_init( &ctx.whirlpool ); sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 ); sph_whirlpool( &ctx.whirlpool, hash0, 64 );
@@ -679,13 +679,13 @@ void sonoa_4way_hash( void *state, const void *input )
// 7 // 7
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
bmw512_4way_init( &ctx.bmw ); bmw512_4way_init( &ctx.bmw );
bmw512_4way( &ctx.bmw, vhash, 64 ); bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
@@ -696,7 +696,7 @@ void sonoa_4way_hash( void *state, const void *input )
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 ); skein512_4way( &ctx.skein, vhash, 64 );
@@ -748,13 +748,13 @@ void sonoa_4way_hash( void *state, const void *input )
update_final_echo( &ctx.echo, (BitSequence *)hash3, update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 ); (const BitSequence *) hash3, 512 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
sph_fugue512_init( &ctx.fugue ); sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash0, 64 ); sph_fugue512( &ctx.fugue, hash0, 64 );
@@ -769,13 +769,13 @@ void sonoa_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 ); sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 ); sph_fugue512_close( &ctx.fugue, hash3 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
sph_whirlpool_init( &ctx.whirlpool ); sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hash0, 64 ); sph_whirlpool( &ctx.whirlpool, hash0, 64 );
@@ -790,7 +790,7 @@ void sonoa_4way_hash( void *state, const void *input )
sph_whirlpool( &ctx.whirlpool, hash3, 64 ); sph_whirlpool( &ctx.whirlpool, hash3, 64 );
sph_whirlpool_close( &ctx.whirlpool, hash3 ); sph_whirlpool_close( &ctx.whirlpool, hash3 );
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 ); sha512_4way( &ctx.sha512, vhash, 64 );
@@ -806,7 +806,7 @@ void sonoa_4way_hash( void *state, const void *input )
int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce, int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]); uint32_t *hash7 = &(hash[7<<2]);
@@ -816,7 +816,7 @@ int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned __m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated int thr_id = mythr->id;
uint64_t htmax[] = { 0, 0xF, 0xFF, uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 }; 0xFFF, 0xFFFF, 0x10000000 };
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,

View File

@@ -68,7 +68,7 @@ void x17_4way_hash( void *state, const void *input )
bmw512_4way_close( &ctx.bmw, vhash ); bmw512_4way_close( &ctx.bmw, vhash );
// Serialize // Serialize
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
// 3 Groestl // 3 Groestl
init_groestl( &ctx.groestl, 64 ); init_groestl( &ctx.groestl, 64 );
@@ -81,7 +81,7 @@ void x17_4way_hash( void *state, const void *input )
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 ); update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
// Parallellize // Parallellize
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
// 4 Skein parallel 4 way 64 bit // 4 Skein parallel 4 way 64 bit
skein512_4way_init( &ctx.skein ); skein512_4way_init( &ctx.skein );
@@ -142,13 +142,13 @@ void x17_4way_hash( void *state, const void *input )
(const BitSequence *) hash3, 512 ); (const BitSequence *) hash3, 512 );
// 12 Hamsi parallel 4 way 64 bit // 12 Hamsi parallel 4 way 64 bit
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
hamsi512_4way_init( &ctx.hamsi ); hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way( &ctx.hamsi, vhash, 64 ); hamsi512_4way( &ctx.hamsi, vhash, 64 );
hamsi512_4way_close( &ctx.hamsi, vhash ); hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
// 13 Fugue serial // 13 Fugue serial
sph_fugue512_init( &ctx.fugue ); sph_fugue512_init( &ctx.fugue );
@@ -165,13 +165,13 @@ void x17_4way_hash( void *state, const void *input )
sph_fugue512_close( &ctx.fugue, hash3 ); sph_fugue512_close( &ctx.fugue, hash3 );
// 14 Shabal, parallel 4 way 32 bit // 14 Shabal, parallel 4 way 32 bit
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
shabal512_4way_init( &ctx.shabal ); shabal512_4way_init( &ctx.shabal );
shabal512_4way( &ctx.shabal, vhash, 64 ); shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash ); shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 ); dintrlv_4x32_512( hash0, hash1, hash2, hash3, vhash );
// 15 Whirlpool serial // 15 Whirlpool serial
sph_whirlpool_init( &ctx.whirlpool ); sph_whirlpool_init( &ctx.whirlpool );
@@ -188,7 +188,7 @@ void x17_4way_hash( void *state, const void *input )
sph_whirlpool_close( &ctx.whirlpool, hash3 ); sph_whirlpool_close( &ctx.whirlpool, hash3 );
// 16 SHA512 parallel 64 bit // 16 SHA512 parallel 64 bit
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 ); intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
sha512_4way_init( &ctx.sha512 ); sha512_4way_init( &ctx.sha512 );
sha512_4way( &ctx.sha512, vhash, 64 ); sha512_4way( &ctx.sha512, vhash, 64 );
@@ -205,7 +205,7 @@ void x17_4way_hash( void *state, const void *input )
int scanhash_x17_4way( struct work *work, uint32_t max_nonce, int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]); uint32_t *hash7 = &(hash[7<<2]);

View File

@@ -332,7 +332,7 @@ void xevan_4way_hash( void *output, const void *input )
int scanhash_xevan_4way( struct work *work, uint32_t max_nonce, int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
uint32_t hash[4*8] __attribute__ ((aligned (64))); uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64))); uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]); uint32_t *hash7 = &(hash[7<<2]);

View File

@@ -399,15 +399,15 @@ int scanhash_yescrypt( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[k], pdata[k]); be32enc(&endiandata[k], pdata[k]);
do { do {
be32enc(&endiandata[19], n); be32enc(&endiandata[19], n);
yescrypt_hash((char*) endiandata, (char*) vhash, 80); yescrypt_hash((char*) endiandata, (char*) vhash, 80);
if (vhash[7] < Htarg && fulltest(vhash, ptarget)) { if (vhash[7] < Htarg && fulltest(vhash, ptarget )
work_set_target_ratio( work, vhash ); && !opt_benchmark )
*hashes_done = n - first_nonce + 1; {
pdata[19] = n; pdata[19] = n;
return true; submit_solution( work, vhash, mythr );
} }
n++; n++;
} while (n < max_nonce && !work_restart[thr_id].restart); } while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1; *hashes_done = n - first_nonce + 1;

View File

@@ -53,15 +53,15 @@ int scanhash_yespower( struct work *work, uint32_t max_nonce,
for (int k = 0; k < 19; k++) for (int k = 0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]); be32enc(&endiandata[k], pdata[k]);
do { do {
be32enc(&endiandata[19], n); be32enc(&endiandata[19], n);
yespower_hash((char*) endiandata, (char*) vhash, 80); yespower_hash((char*) endiandata, (char*) vhash, 80);
if (vhash[7] < Htarg && fulltest(vhash, ptarget)) { if ( vhash[7] < Htarg && fulltest( vhash, ptarget )
work_set_target_ratio( work, vhash ); && !opt_benchmark )
*hashes_done = n - first_nonce + 1; {
pdata[19] = n; pdata[19] = n;
return true; submit_solution( work, vhash, mythr );
} }
n++; n++;
} while (n < max_nonce && !work_restart[thr_id].restart); } while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1; *hashes_done = n - first_nonce + 1;

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.5.4. # Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.6.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.9.5.4' PACKAGE_VERSION='3.9.6'
PACKAGE_STRING='cpuminer-opt 3.9.5.4' PACKAGE_STRING='cpuminer-opt 3.9.6'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures cpuminer-opt 3.9.5.4 to adapt to many kinds of systems. \`configure' configures cpuminer-opt 3.9.6 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.9.5.4:";; short | recursive ) echo "Configuration of cpuminer-opt 3.9.6:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 3.9.5.4 cpuminer-opt configure 3.9.6
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.9.5.4, which was It was created by cpuminer-opt $as_me 3.9.6, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='3.9.5.4' VERSION='3.9.6'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 3.9.5.4, which was This file was extended by cpuminer-opt $as_me 3.9.6, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 3.9.5.4 cpuminer-opt config.status 3.9.6
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.9.5.4]) AC_INIT([cpuminer-opt], [3.9.6])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -1009,8 +1009,7 @@ static int share_result( int result, struct work *null_work,
sres, diffstr, share_time, accepted_share_count, sres, diffstr, share_time, accepted_share_count,
rejected_share_count, solved_block_count ); rejected_share_count, solved_block_count );
if ( have_stratum && result && my_stats.share_diff && my_stats.net_diff if ( have_stratum && result && !opt_quiet )
&& !opt_quiet )
{ {
applog( LOG_NOTICE, "Miner %s %sH/s, Share %s, Latency %d ms.", applog( LOG_NOTICE, "Miner %s %sH/s, Share %s, Latency %d ms.",
hr, hr_units, shr, latency ); hr, hr_units, shr, latency );

38
miner.h
View File

@@ -313,6 +313,7 @@ void applog(int prio, const char *fmt, ...);
void restart_threads(void); void restart_threads(void);
extern json_t *json_rpc_call( CURL *curl, const char *url, const char *userpass, extern json_t *json_rpc_call( CURL *curl, const char *url, const char *userpass,
const char *rpc_req, int *curl_err, int flags ); const char *rpc_req, int *curl_err, int flags );
extern void cbin2hex(char *out, const char *in, size_t len);
void bin2hex( char *s, const unsigned char *p, size_t len ); void bin2hex( char *s, const unsigned char *p, size_t len );
char *abin2hex( const unsigned char *p, size_t len ); char *abin2hex( const unsigned char *p, size_t len );
bool hex2bin( unsigned char *p, const char *hexstr, size_t len ); bool hex2bin( unsigned char *p, const char *hexstr, size_t len );
@@ -330,6 +331,7 @@ extern void diff_to_target(uint32_t *target, double diff);
double hash_target_ratio( uint32_t* hash, uint32_t* target ); double hash_target_ratio( uint32_t* hash, uint32_t* target );
void work_set_target_ratio( struct work* work, uint32_t* hash ); void work_set_target_ratio( struct work* work, uint32_t* hash );
void get_currentalgo( char* buf, int sz ); void get_currentalgo( char* buf, int sz );
bool has_sha(); bool has_sha();
bool has_aes_ni(); bool has_aes_ni();
@@ -363,6 +365,14 @@ struct work {
char *job_id; char *job_id;
size_t xnonce2_len; size_t xnonce2_len;
unsigned char *xnonce2; unsigned char *xnonce2;
// x16rt
uint32_t merkleroothash[8];
uint32_t witmerkleroothash[8];
uint32_t denom10[8];
uint32_t denom100[8];
uint32_t denom1000[8];
uint32_t denom10000[8];
} __attribute__ ((aligned (64))); } __attribute__ ((aligned (64)));
struct stratum_job { struct stratum_job {
@@ -376,9 +386,15 @@ struct stratum_job {
unsigned char version[4]; unsigned char version[4];
unsigned char nbits[4]; unsigned char nbits[4];
unsigned char ntime[4]; unsigned char ntime[4];
bool clean;
double diff; double diff;
unsigned char extra[64]; bool clean;
// for x16rt
unsigned char extra[64];
unsigned char denom10[32];
unsigned char denom100[32];
unsigned char denom1000[32];
unsigned char denom10000[32];
unsigned char proofoffullnode[32];
} __attribute__ ((aligned (64))); } __attribute__ ((aligned (64)));
@@ -498,6 +514,7 @@ enum algos {
// ALGO_BLAKE2B, // ALGO_BLAKE2B,
ALGO_BLAKE2S, ALGO_BLAKE2S,
ALGO_BMW, ALGO_BMW,
ALGO_BMW512,
ALGO_C11, ALGO_C11,
ALGO_CRYPTOLIGHT, ALGO_CRYPTOLIGHT,
ALGO_CRYPTONIGHT, ALGO_CRYPTONIGHT,
@@ -555,10 +572,13 @@ enum algos {
ALGO_X11GOST, ALGO_X11GOST,
ALGO_X12, ALGO_X12,
ALGO_X13, ALGO_X13,
ALGO_X13BCD,
ALGO_X13SM3, ALGO_X13SM3,
ALGO_X14, ALGO_X14,
ALGO_X15, ALGO_X15,
ALGO_X16R, ALGO_X16R,
ALGO_X16RT,
ALGO_X16RT_VEIL,
ALGO_X16S, ALGO_X16S,
ALGO_X17, ALGO_X17,
ALGO_XEVAN, ALGO_XEVAN,
@@ -586,6 +606,7 @@ static const char* const algo_names[] = {
// "blake2b", // "blake2b",
"blake2s", "blake2s",
"bmw", "bmw",
"bmw512",
"c11", "c11",
"cryptolight", "cryptolight",
"cryptonight", "cryptonight",
@@ -643,10 +664,13 @@ static const char* const algo_names[] = {
"x11gost", "x11gost",
"x12", "x12",
"x13", "x13",
"x13bcd",
"x13sm3", "x13sm3",
"x14", "x14",
"x15", "x15",
"x16r", "x16r",
"x16rt",
"x16rt-veil",
"x16s", "x16s",
"x17", "x17",
"xevan", "xevan",
@@ -736,6 +760,7 @@ Options:\n\
blakecoin blake256r8\n\ blakecoin blake256r8\n\
blake2s Blake-2 S\n\ blake2s Blake-2 S\n\
bmw BMW 256\n\ bmw BMW 256\n\
bmw512 BMW 512\n\
c11 Chaincoin\n\ c11 Chaincoin\n\
cryptolight Cryptonight-light\n\ cryptolight Cryptonight-light\n\
cryptonight Cryptonote legacy\n\ cryptonight Cryptonote legacy\n\
@@ -782,7 +807,7 @@ Options:\n\
skein2 Double Skein (Woodcoin)\n\ skein2 Double Skein (Woodcoin)\n\
skunk Signatum (SIGT)\n\ skunk Signatum (SIGT)\n\
sonoa Sono\n\ sonoa Sono\n\
timetravel timeravel8, Machinecoin (MAC)\n\ timetravel timeravel8, Machinecoin (MAC)\n\
timetravel10 Bitcore (BTX)\n\ timetravel10 Bitcore (BTX)\n\
tribus Denarius (DNR)\n\ tribus Denarius (DNR)\n\
vanilla blake256r8vnl (VCash)\n\ vanilla blake256r8vnl (VCash)\n\
@@ -794,20 +819,23 @@ Options:\n\
x11gost sib (SibCoin)\n\ x11gost sib (SibCoin)\n\
x12 Galaxie Cash (GCH)\n\ x12 Galaxie Cash (GCH)\n\
x13 X13\n\ x13 X13\n\
x13bcd bcd \n\
x13sm3 hsr (Hshare)\n\ x13sm3 hsr (Hshare)\n\
x14 X14\n\ x14 X14\n\
x15 X15\n\ x15 X15\n\
x16r Ravencoin (RVN)\n\ x16r Ravencoin (RVN)\n\
x16rt Gincoin (GIN)\n\
x16rt-veil Veil (VEIL)\n\
x16s Pigeoncoin (PGN)\n\ x16s Pigeoncoin (PGN)\n\
x17\n\ x17\n\
xevan Bitsend (BSD)\n\ xevan Bitsend (BSD)\n\
yescrypt Globlboost-Y (BSTY)\n\ yescrypt Globalboost-Y (BSTY)\n\
yescryptr8 BitZeny (ZNY)\n\ yescryptr8 BitZeny (ZNY)\n\
yescryptr16 Eli\n\ yescryptr16 Eli\n\
yescryptr32 WAVI\n\ yescryptr32 WAVI\n\
yespower Cryply\n\ yespower Cryply\n\
yespowerr16 Yenten (YTN)\n\ yespowerr16 Yenten (YTN)\n\
zr5 Ziftr\n\ zr5 Ziftr\n\
-o, --url=URL URL of mining server\n\ -o, --url=URL URL of mining server\n\
-O, --userpass=U:P username:password pair for mining server\n\ -O, --userpass=U:P username:password pair for mining server\n\
-u, --user=USERNAME username for mining server\n\ -u, --user=USERNAME username for mining server\n\

View File

@@ -477,42 +477,42 @@ static inline void mm256_bswap32_intrlv80_8x32( void *d, void *src )
__m256i s0 = mm256_bswap_32( casti_m256i( src,0 ) ); __m256i s0 = mm256_bswap_32( casti_m256i( src,0 ) );
__m256i s1 = mm256_bswap_32( casti_m256i( src,1 ) ); __m256i s1 = mm256_bswap_32( casti_m256i( src,1 ) );
__m128i s2 = mm128_bswap_32( casti_m128i( src,4 ) ); __m128i s2 = mm128_bswap_32( casti_m128i( src,4 ) );
const __m256i zero = m256_zero; const __m256i zero = m256_zero;
const __m256i one = m256_one_32; const __m256i one = m256_one_32;
const __m256i two = _mm256_add_epi32( one, one ); const __m256i two = _mm256_add_epi32( one, one );
const __m256i tre = _mm256_add_epi32( two, one ); const __m256i three = _mm256_add_epi32( two, one );
const __m256i four = _mm256_add_epi32( two, two ); const __m256i four = _mm256_add_epi32( two, two );
casti_m256i( d, 0 ) = _mm256_permutevar8x32_epi32( s0, zero ); casti_m256i( d, 0 ) = _mm256_permutevar8x32_epi32( s0, zero );
casti_m256i( d, 1 ) = _mm256_permutevar8x32_epi32( s0, one ); casti_m256i( d, 1 ) = _mm256_permutevar8x32_epi32( s0, one );
casti_m256i( d, 2 ) = _mm256_permutevar8x32_epi32( s0, two ); casti_m256i( d, 2 ) = _mm256_permutevar8x32_epi32( s0, two );
casti_m256i( d, 3 ) = _mm256_permutevar8x32_epi32( s0, tre ); casti_m256i( d, 3 ) = _mm256_permutevar8x32_epi32( s0, three );
casti_m256i( d, 4 ) = _mm256_permutevar8x32_epi32( s0, four ); casti_m256i( d, 4 ) = _mm256_permutevar8x32_epi32( s0, four );
casti_m256i( d, 5 ) = _mm256_permutevar8x32_epi32( s0, casti_m256i( d, 5 ) = _mm256_permutevar8x32_epi32( s0,
_mm256_add_epi32( four, one ) ); _mm256_add_epi32( four, one ) );
casti_m256i( d, 6 ) = _mm256_permutevar8x32_epi32( s0, casti_m256i( d, 6 ) = _mm256_permutevar8x32_epi32( s0,
_mm256_add_epi32( four, two ) ); _mm256_add_epi32( four, two ) );
casti_m256i( d, 7 ) = _mm256_permutevar8x32_epi32( s0, casti_m256i( d, 7 ) = _mm256_permutevar8x32_epi32( s0,
_mm256_add_epi32( four, tre ) ); _mm256_add_epi32( four, three ) );
casti_m256i( d, 8 ) = _mm256_permutevar8x32_epi32( s1, zero ); casti_m256i( d, 8 ) = _mm256_permutevar8x32_epi32( s1, zero );
casti_m256i( d, 9 ) = _mm256_permutevar8x32_epi32( s1, one ); casti_m256i( d, 9 ) = _mm256_permutevar8x32_epi32( s1, one );
casti_m256i( d,10 ) = _mm256_permutevar8x32_epi32( s1, two ); casti_m256i( d,10 ) = _mm256_permutevar8x32_epi32( s1, two );
casti_m256i( d,11 ) = _mm256_permutevar8x32_epi32( s1, tre ); casti_m256i( d,11 ) = _mm256_permutevar8x32_epi32( s1, three );
casti_m256i( d,12 ) = _mm256_permutevar8x32_epi32( s1, four ); casti_m256i( d,12 ) = _mm256_permutevar8x32_epi32( s1, four );
casti_m256i( d,13 ) = _mm256_permutevar8x32_epi32( s1, casti_m256i( d,13 ) = _mm256_permutevar8x32_epi32( s1,
_mm256_add_epi32( four, one ) ); _mm256_add_epi32( four, one ) );
casti_m256i( d,14 ) = _mm256_permutevar8x32_epi32( s1, casti_m256i( d,14 ) = _mm256_permutevar8x32_epi32( s1,
_mm256_add_epi32( four, two ) ); _mm256_add_epi32( four, two ) );
casti_m256i( d,15 ) = _mm256_permutevar8x32_epi32( s1, casti_m256i( d,15 ) = _mm256_permutevar8x32_epi32( s1,
_mm256_add_epi32( four, tre ) ); _mm256_add_epi32( four, three ) );
casti_m256i( d,16 ) = _mm256_permutevar8x32_epi32( casti_m256i( d,16 ) = _mm256_permutevar8x32_epi32(
_mm256_castsi128_si256( s2 ), zero ); _mm256_castsi128_si256( s2 ), zero );
casti_m256i( d,17 ) = _mm256_permutevar8x32_epi32( casti_m256i( d,17 ) = _mm256_permutevar8x32_epi32(
_mm256_castsi128_si256( s2 ), one ); _mm256_castsi128_si256( s2 ), one );
casti_m256i( d,18 ) = _mm256_permutevar8x32_epi32( casti_m256i( d,18 ) = _mm256_permutevar8x32_epi32(
_mm256_castsi128_si256( s2 ), two ); _mm256_castsi128_si256( s2 ), two );
casti_m256i( d,19 ) = _mm256_permutevar8x32_epi32( casti_m256i( d,19 ) = _mm256_permutevar8x32_epi32(
_mm256_castsi128_si256( s2 ), tre ); _mm256_castsi128_si256( s2 ), three );
} }
#endif // AVX2 #endif // AVX2
@@ -677,39 +677,39 @@ static inline void mm512_bswap32_intrlv80_16x32( void *d, void *src )
{ {
__m512i s0 = mm512_bswap_32( casti_m512i( src, 0 ) ); __m512i s0 = mm512_bswap_32( casti_m512i( src, 0 ) );
__m128i s1 = mm128_bswap_32( casti_m128i( src, 4 ) ); __m128i s1 = mm128_bswap_32( casti_m128i( src, 4 ) );
const __m512i zero = m512_zero; const __m512i zero = m512_zero;
const __m512i one = m512_one_32; const __m512i one = m512_one_32;
const __m512i two = _mm512_add_epi32( one, one ); const __m512i two = _mm512_add_epi32( one, one );
const __m512i tre = _mm512_add_epi32( two, one ); const __m512i three = _mm512_add_epi32( two, one );
const __m512i four = _mm512_add_epi32( two, two ); const __m512i four = _mm512_add_epi32( two, two );
const __m512i eight = _mm512_add_epi32( four, four ); const __m512i eight = _mm512_add_epi32( four, four );
const __m512i eleven = _mm512_add_epi32( eight, tre ); const __m512i eleven = _mm512_add_epi32( eight, three );
casti_m512i( d, 0 ) = _mm512_permutexvar_epi32( s0, zero ); casti_m512i( d, 0 ) = _mm512_permutexvar_epi32( s0, zero );
casti_m512i( d, 1 ) = _mm512_permutexvar_epi32( s0, one ); casti_m512i( d, 1 ) = _mm512_permutexvar_epi32( s0, one );
casti_m512i( d, 2 ) = _mm512_permutexvar_epi32( s0, two ); casti_m512i( d, 2 ) = _mm512_permutexvar_epi32( s0, two );
casti_m512i( d, 3 ) = _mm512_permutexvar_epi32( s0, tre ); casti_m512i( d, 3 ) = _mm512_permutexvar_epi32( s0, three );
casti_m512i( d, 4 ) = _mm512_permutexvar_epi32( s0, four ); casti_m512i( d, 4 ) = _mm512_permutexvar_epi32( s0, four );
casti_m512i( d, 5 ) = _mm512_permutexvar_epi32( s0, casti_m512i( d, 5 ) = _mm512_permutexvar_epi32( s0,
_mm512_add_epi32( four, one ) ); _mm512_add_epi32( four, one ) );
casti_m512i( d, 6 ) = _mm512_permutexvar_epi32( s0, casti_m512i( d, 6 ) = _mm512_permutexvar_epi32( s0,
_mm512_add_epi32( four, two ) ); _mm512_add_epi32( four, two ) );
casti_m512i( d, 7 ) = _mm512_permutexvar_epi32( s0, casti_m512i( d, 7 ) = _mm512_permutexvar_epi32( s0,
_mm512_add_epi32( four, tre ) ); _mm512_add_epi32( four, three ) );
casti_m512i( d, 8 ) = _mm512_permutexvar_epi32( s0, eight ); casti_m512i( d, 8 ) = _mm512_permutexvar_epi32( s0, eight );
casti_m512i( d, 9 ) = _mm512_permutexvar_epi32( s0, casti_m512i( d, 9 ) = _mm512_permutexvar_epi32( s0,
_mm512_add_epi32( eight, one ) ); _mm512_add_epi32( eight, one ) );
casti_m512i( d,10 ) = _mm512_permutexvar_epi32( s0, casti_m512i( d,10 ) = _mm512_permutexvar_epi32( s0,
_mm512_add_epi32( eight, two ) ); _mm512_add_epi32( eight, two ) );
casti_m512i( d,11 ) = _mm512_permutexvar_epi32( s0, eleven ); casti_m512i( d,11 ) = _mm512_permutexvar_epi32( s0, eleven );
casti_m512i( d,12 ) = _mm512_permutexvar_epi32( s0, casti_m512i( d,12 ) = _mm512_permutexvar_epi32( s0,
_mm512_add_epi32( eleven, one ) ); _mm512_add_epi32( eleven, one ) );
casti_m512i( d,13 ) = _mm512_permutexvar_epi32( s0, casti_m512i( d,13 ) = _mm512_permutexvar_epi32( s0,
_mm512_add_epi32( eleven, two ) ); _mm512_add_epi32( eleven, two ) );
casti_m512i( d,14 ) = _mm512_permutexvar_epi32( s0, casti_m512i( d,14 ) = _mm512_permutexvar_epi32( s0,
_mm512_add_epi32( eleven, tre ) ); _mm512_add_epi32( eleven, three ) );
casti_m512i( d,15 ) = _mm512_permutexvar_epi32( s0, casti_m512i( d,15 ) = _mm512_permutexvar_epi32( s0,
_mm512_add_epi32( eleven, four ) ); _mm512_add_epi32( eleven, four ) );
casti_m512i( d,16 ) = _mm512_permutexvar_epi32( casti_m512i( d,16 ) = _mm512_permutexvar_epi32(
_mm512_castsi128_si512( s1 ), zero ); _mm512_castsi128_si512( s1 ), zero );
casti_m512i( d,17 ) = _mm512_permutexvar_epi32( casti_m512i( d,17 ) = _mm512_permutexvar_epi32(
@@ -717,7 +717,7 @@ static inline void mm512_bswap32_intrlv80_16x32( void *d, void *src )
casti_m512i( d,18 ) = _mm512_permutexvar_epi32( casti_m512i( d,18 ) = _mm512_permutexvar_epi32(
_mm512_castsi128_si512( s1 ), two ); _mm512_castsi128_si512( s1 ), two );
casti_m512i( d,19 ) = _mm512_permutexvar_epi32( casti_m512i( d,19 ) = _mm512_permutexvar_epi32(
_mm512_castsi128_si512( s1 ), tre ); _mm512_castsi128_si512( s1 ), three );
} }
#endif // AVX512 #endif // AVX512
@@ -1006,20 +1006,20 @@ static inline void mm512_bswap32_intrlv80_8x64( void *dst, void *src )
__m512i *d = (__m512i*)dst; __m512i *d = (__m512i*)dst;
__m512i s0 = mm512_bswap_32( casti_m512i(src, 0 ) ); __m512i s0 = mm512_bswap_32( casti_m512i(src, 0 ) );
__m128i s1 = mm128_bswap_32( casti_m128i(src, 4 ) ); __m128i s1 = mm128_bswap_32( casti_m128i(src, 4 ) );
const __m512i zero = m512_zero; const __m512i zero = m512_zero;
const __m512i one = m512_one_64; const __m512i one = m512_one_64;
const __m512i two = _mm512_add_epi64( one, one ); const __m512i two = _mm512_add_epi64( one, one );
const __m512i tre = _mm512_add_epi64( two, one ); const __m512i three = _mm512_add_epi64( two, one );
const __m512i four = _mm512_add_epi64( two, two ); const __m512i four = _mm512_add_epi64( two, two );
d[0] = _mm512_permutexvar_epi64( s0, zero ); d[0] = _mm512_permutexvar_epi64( s0, zero );
d[1] = _mm512_permutexvar_epi64( s0, one ); d[1] = _mm512_permutexvar_epi64( s0, one );
d[2] = _mm512_permutexvar_epi64( s0, two ); d[2] = _mm512_permutexvar_epi64( s0, two );
d[3] = _mm512_permutexvar_epi64( s0, tre ); d[3] = _mm512_permutexvar_epi64( s0, three );
d[4] = _mm512_permutexvar_epi64( s0, four ); d[4] = _mm512_permutexvar_epi64( s0, four );
d[5] = _mm512_permutexvar_epi64( s0, _mm512_add_epi64( four, one ) ); d[5] = _mm512_permutexvar_epi64( s0, _mm512_add_epi64( four, one ) );
d[6] = _mm512_permutexvar_epi64( s0, _mm512_add_epi64( four, two ) ); d[6] = _mm512_permutexvar_epi64( s0, _mm512_add_epi64( four, two ) );
d[7] = _mm512_permutexvar_epi64( s0, _mm512_add_epi64( four, tre ) ); d[7] = _mm512_permutexvar_epi64( s0, _mm512_add_epi64( four, three ) );
d[8] = _mm512_permutexvar_epi64( d[8] = _mm512_permutexvar_epi64(
_mm512_castsi128_si512( s1 ), zero ); _mm512_castsi128_si512( s1 ), zero );
d[9] = _mm512_permutexvar_epi64( d[9] = _mm512_permutexvar_epi64(
@@ -1296,25 +1296,18 @@ static inline void rintrlv_4x64_2x128( void *dst0, void *dst1,
#if defined(__SSE4_1__) #if defined(__SSE4_1__)
// No SSE2 implementation. // No SSE2 implementation.
#define mm128_intrlv_blend_64( hi, lo ) \ #define mm128_intrlv_blend_64( hi, lo ) _mm_blend_epi16( hi, lo, 0x0f )
_mm_blend_epi16( hi, lo, 0x0f ) #define mm128_intrlv_blend_32( hi, lo ) _mm_blend_epi16( hi, lo, 0x33 )
#define mm128_intrlv_blend_32( hi, lo ) \
_mm_blend_epi16( hi, lo, 0x33 )
#endif // SSE4_1 #endif // SSE4_1
#if defined(__AVX2__) #if defined(__AVX2__)
#define mm256_intrlv_blend_128( hi, lo ) \ #define mm256_intrlv_blend_128( hi, lo ) _mm256_blend_epi32( hi, lo, 0x0f )
_mm256_blend_epi32( hi, lo, 0x0f ) #define mm256_intrlv_blend_64( hi, lo ) _mm256_blend_epi32( hi, lo, 0x33 )
#define mm256_intrlv_blend_32( hi, lo ) _mm256_blend_epi32( hi, lo, 0x55 )
#define mm256_intrlv_blend_64( hi, lo ) \ // Select lanes of 32 byte hash from 2 sources according to control mask.
_mm256_blend_epi32( hi, lo, 0x33 )
#define mm256_intrlv_blend_32( hi, lo ) \
_mm256_blend_epi32( hi, lo, 0x55 )
// Blend 32 byte lanes of hash from 2 sources according to control mask.
// macro due to 256 bit value arg. // macro due to 256 bit value arg.
#define mm256_blend_hash_4x64( dst, a, b, mask ) \ #define mm256_blend_hash_4x64( dst, a, b, mask ) \
do { \ do { \

View File

@@ -358,17 +358,17 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, int n )
// no SSE2 implementation, no current users // no SSE2 implementation, no current users
#define mm128_ror_1x16( v ) \ #define mm128_ror_1x16( v ) \
_mm_shuffle_epi8( v, _mm_set_epi8( 1, 0,15,14,13,12,11,10 \ _mm_shuffle_epi8( v, m128_const_64( 0x01000f0e0d0c0b0a, \
9, 8, 7, 6, 5, 4, 3, 2 ) ) 0x0908070605040302 ) )
#define mm128_rol_1x16( v ) \ #define mm128_rol_1x16( v ) \
_mm_shuffle_epi8( v, _mm_set_epi8( 13,12,11,10, 9, 8, 7, 6, \ _mm_shuffle_epi8( v, m128_const_64( 0x0d0c0b0a09080706, \
5, 4, 3, 2, 1, 0,15,14 ) ) 0x0504030201000f0e ) )
#define mm128_ror_1x8( v ) \ #define mm128_ror_1x8( v ) \
_mm_shuffle_epi8( v, _mm_set_epi8( 0,15,14,13,12,11,10, 9, \ _mm_shuffle_epi8( v, m128_const_64( 0x000f0e0d0c0b0a09, \
8, 7, 6, 5, 4, 3, 2, 1 ) ) 0x0807060504030201 ) )
#define mm128_rol_1x8( v ) \ #define mm128_rol_1x8( v ) \
_mm_shuffle_epi8( v, _mm_set_epi8( 14,13,12,11,10, 9, 8, 7, \ _mm_shuffle_epi8( v, m128_const_64( 0x0e0d0c0b0a090807, \
6, 5, 4, 3, 2, 1, 0,15 ) ) 0x060504030201000f ) )
#endif // SSE3 #endif // SSE3
// Rotate 16 byte (128 bit) vector by c bytes. // Rotate 16 byte (128 bit) vector by c bytes.
@@ -386,12 +386,12 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, int n )
#define mm128_swap32_64( v ) _mm_shuffle_epi32( v, 0xb1 ) #define mm128_swap32_64( v ) _mm_shuffle_epi32( v, 0xb1 )
#define mm128_ror16_64( v ) _mm_shuffle_epi8( v, \ #define mm128_ror16_64( v ) _mm_shuffle_epi8( v, \
_mm_set_epi8( 9, 8,15,14,13,12,11,10, 1, 0, 7, 6, 5, 4, 3, 2 ) m128_const_64( 0x09080f0e0d0c0b0a, 0x0100070605040302 )
#define mm128_rol16_64( v ) _mm_shuffle_epi8( v, \ #define mm128_rol16_64( v ) _mm_shuffle_epi8( v, \
_mm_set_epi8( 13,12,11,10, 9, 8,15,14, 5, 4, 3, 2, 1, 0, 7, 6 ) m128_const_64( 0x0dc0b0a09080f0e, 0x0504030201000706 )
#define mm128_swap16_32( v ) _mm_shuffle_epi8( v, \ #define mm128_swap16_32( v ) _mm_shuffle_epi8( v, \
_mm_set_epi8( 13,12,15,14, 9,8,11,10, 5,4,7,6, 1,0,3,2 ) m128_const_64( 0x0d0c0f0e09080b0a, 0x0504070601000302 )
// //
// Endian byte swap. // Endian byte swap.
@@ -399,16 +399,15 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, int n )
#if defined(__SSSE3__) #if defined(__SSSE3__)
#define mm128_bswap_64( v ) \ #define mm128_bswap_64( v ) \
_mm_shuffle_epi8( v, m128_const64( 0x08090a0b0c0d0e0f, \ _mm_shuffle_epi8( v, m128_const_64( 0x08090a0b0c0d0e0f, \
0x0001020304050607 ) ) 0x0001020304050607 ) )
#define mm128_bswap_32( v ) \ #define mm128_bswap_32( v ) \
_mm_shuffle_epi8( v, m128_const_64( 0x0c0d0e0f08090a0b, \ _mm_shuffle_epi8( v, m128_const_64( 0x0c0d0e0f08090a0b, \
0x0405060700010203 ) ) 0x0405060700010203 ) )
#define mm128_bswap_16( v ) \ #define mm128_bswap_16( v ) _mm_shuffle_epi8( \
_mm_shuffle_epi8( v, _mm_set_epi8( 14,15, 12,13, 10,11, 8, 9, \ m128_const_64( 0x0e0f0c0d0a0b0809, 0x0607040502030001 )
6, 7, 4, 5, 2, 3, 0, 1 ) )
// 8 byte qword * 8 qwords * 2 lanes = 128 bytes // 8 byte qword * 8 qwords * 2 lanes = 128 bytes
#define mm128_block_bswap_64( d, s ) do \ #define mm128_block_bswap_64( d, s ) do \
@@ -462,14 +461,14 @@ static inline __m128i mm128_bswap_16( __m128i v )
static inline void mm128_block_bswap_64( __m128i *d, __m128i *s ) static inline void mm128_block_bswap_64( __m128i *d, __m128i *s )
{ {
d[0] = mm128_bswap_32( s[0] ); d[0] = mm128_bswap_64( s[0] );
d[1] = mm128_bswap_32( s[1] ); d[1] = mm128_bswap_64( s[1] );
d[2] = mm128_bswap_32( s[2] ); d[2] = mm128_bswap_64( s[2] );
d[3] = mm128_bswap_32( s[3] ); d[3] = mm128_bswap_64( s[3] );
d[4] = mm128_bswap_32( s[4] ); d[4] = mm128_bswap_64( s[4] );
d[5] = mm128_bswap_32( s[5] ); d[5] = mm128_bswap_64( s[5] );
d[6] = mm128_bswap_32( s[6] ); d[6] = mm128_bswap_64( s[6] );
d[7] = mm128_bswap_32( s[7] ); d[7] = mm128_bswap_64( s[7] );
} }
static inline void mm128_block_bswap_32( __m128i *d, __m128i *s ) static inline void mm128_block_bswap_32( __m128i *d, __m128i *s )

View File

@@ -32,6 +32,7 @@
// set instructions load memory resident constants, this avoids mem. // set instructions load memory resident constants, this avoids mem.
// cost 4 pinsert + 1 vinsert, estimate 7 clocks. // cost 4 pinsert + 1 vinsert, estimate 7 clocks.
// Avoid using, mm128_const_64 twice is still faster.
#define m256_const_64( i3, i2, i1, i0 ) \ #define m256_const_64( i3, i2, i1, i0 ) \
_mm256_insertf128_si256( _mm256_castsi128_si256( m128_const_64( i1, i0 ) ), \ _mm256_insertf128_si256( _mm256_castsi128_si256( m128_const_64( i1, i0 ) ), \
m128_const_64( i3, i2 ), 1 ) m128_const_64( i3, i2 ), 1 )
@@ -50,7 +51,7 @@ static inline __m256i m256_one_64_fn()
asm( "vpxor %0, %0, %0\n\t" asm( "vpxor %0, %0, %0\n\t"
"vpcmpeqd %%ymm1, %%ymm1, %%ymm1\n\t" "vpcmpeqd %%ymm1, %%ymm1, %%ymm1\n\t"
"vpsubq %%ymm1, %0, %0\n\t" "vpsubq %%ymm1, %0, %0\n\t"
:"=x"(a) : "=x"(a)
: :
: "ymm1" ); : "ymm1" );
return a; return a;
@@ -63,7 +64,7 @@ static inline __m256i m256_one_32_fn()
asm( "vpxor %0, %0, %0\n\t" asm( "vpxor %0, %0, %0\n\t"
"vpcmpeqd %%ymm1, %%ymm1, %%ymm1\n\t" "vpcmpeqd %%ymm1, %%ymm1, %%ymm1\n\t"
"vpsubd %%ymm1, %0, %0\n\t" "vpsubd %%ymm1, %0, %0\n\t"
:"=x"(a) : "=x"(a)
: :
: "ymm1" ); : "ymm1" );
return a; return a;
@@ -76,7 +77,7 @@ static inline __m256i m256_one_16_fn()
asm( "vpxor %0, %0, %0\n\t" asm( "vpxor %0, %0, %0\n\t"
"vpcmpeqd %%ymm1, %%ymm1, %%ymm1\n\t" "vpcmpeqd %%ymm1, %%ymm1, %%ymm1\n\t"
"vpsubw %%ymm1, %0, %0\n\t" "vpsubw %%ymm1, %0, %0\n\t"
:"=x"(a) : "=x"(a)
: :
: "ymm1" ); : "ymm1" );
return a; return a;
@@ -89,7 +90,7 @@ static inline __m256i m256_one_8_fn()
asm( "vpxor %0, %0, %0\n\t" asm( "vpxor %0, %0, %0\n\t"
"vpcmpeqd %%ymm1, %%ymm1, %%ymm1\n\t" "vpcmpeqd %%ymm1, %%ymm1, %%ymm1\n\t"
"vpsubb %%ymm1, %0, %0\n\t" "vpsubb %%ymm1, %0, %0\n\t"
:"=x"(a) : "=x"(a)
: :
: "ymm1" ); : "ymm1" );
return a; return a;
@@ -100,7 +101,7 @@ static inline __m256i m256_neg1_fn()
{ {
__m256i a; __m256i a;
asm( "vpcmpeqq %0, %0, %0\n\t" asm( "vpcmpeqq %0, %0, %0\n\t"
:"=x"(a) ); : "=x"(a) );
return a; return a;
} }
#define m256_neg1 m256_neg1_fn() #define m256_neg1 m256_neg1_fn()
@@ -423,23 +424,23 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, int n )
// Rotate 256 bit vector by one 16 bit element. // Rotate 256 bit vector by one 16 bit element.
#define mm256_ror_1x16( v ) \ #define mm256_ror_1x16( v ) \
_mm256_permutexvar_epi16( _mm256_set_epi16( \ _mm256_permutexvar_epi16( m256_const_64( \
0,15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ), v ) 0x0000000f000e000d, 0x000c000b000a0009, \
0x0008000700060005, 0x0004000300020001 ), v )
#define mm256_rol_1x16( v ) \ #define mm256_rol_1x16( v ) \
_mm256_permutexvar_epi16( _mm256_set_epi16( \ _mm256_permutexvar_epi16( m256_const_64( \
14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,15 ), v ) 0x000e000d000c000b, 0x000a000900080007, \
0x0006000500040003, 0x000200010000000f ), v )
// Rotate 256 bit vector by one byte. // Rotate 256 bit vector by one byte.
#define mm256_ror_1x8( v ) \ #define mm256_ror_1x8( v ) m256_const_64( \
_mm256_permutexvar_epi8( _mm256_set_epi8( \ 0x001f1e1d1c1b1a19, 0x1817161514131211, \
0,31,30,29,28,27,26,25, 24,23,22,21,20,19,18,17, \ 0x100f0e0d0c0b0a09, 0x0807060504030201 )
16,15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ), v )
#define mm256_rol_1x8( v ) \ #define mm256_rol_1x8( v ) m256_const_64( \
_mm256_permutexvar_epi8( _mm256_set_epi8( \ 0x1e1d1c1b1a191817, 0x161514131211100f, \
30,29,28,27,26,25,24,23, 22,21,20,19,18,17,16,15, \ 0x0e0d0c0b0a090807, 0x060504030201001f )
14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,31 ), v )
#endif // AVX512 #endif // AVX512

View File

@@ -503,7 +503,7 @@ static inline __m512i m512_neg1_fn()
0x08090A0B, 0x0C0D0E0F, 0x00010203, 0x04050607 ) ) 0x08090A0B, 0x0C0D0E0F, 0x00010203, 0x04050607 ) )
#define mm512_bswap_32( v ) \ #define mm512_bswap_32( v ) \
_mm512_permutexvar_epi8( v, _mm512_set_epi832( \ _mm512_permutexvar_epi8( v, _mm512_set_epi32( \
0x3C3D3E3F, 0x38393A3B, 0x34353637, 0x30313233, \ 0x3C3D3E3F, 0x38393A3B, 0x34353637, 0x30313233, \
0x3C3D3E3F, 0x38393A3B, 0x34353637, 0x30313233, \ 0x3C3D3E3F, 0x38393A3B, 0x34353637, 0x30313233, \
0x3C3D3E3F, 0x38393A3B, 0x34353637, 0x30313233, \ 0x3C3D3E3F, 0x38393A3B, 0x34353637, 0x30313233, \

94
util.c
View File

@@ -668,6 +668,15 @@ err_out:
return cfg; return cfg;
} }
void cbin2hex(char *out, const char *in, size_t len)
{
if (out) {
unsigned int i;
for (i = 0; i < len; i++)
sprintf(out + (i * 2), "%02x", (uint8_t)in[i]);
}
}
void bin2hex(char *s, const unsigned char *p, size_t len) void bin2hex(char *s, const unsigned char *p, size_t len)
{ {
for (size_t i = 0; i < len; i++) for (size_t i = 0; i < len; i++)
@@ -1693,35 +1702,47 @@ static uint32_t getblocheight(struct stratum_ctx *sctx)
static bool stratum_notify(struct stratum_ctx *sctx, json_t *params) static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
{ {
const char *job_id, *prevhash, *coinb1, *coinb2, *version, *nbits, *stime; const char *job_id, *prevhash, *coinb1, *coinb2, *version, *nbits, *stime;
const char *extradata = NULL; const char *denom10 = NULL, *denom100 = NULL, *denom1000 = NULL,
*denom10000 = NULL, *prooffullnode = NULL;
const char *extradata = NULL;
size_t coinb1_size, coinb2_size; size_t coinb1_size, coinb2_size;
bool clean, ret = false; bool clean, ret = false;
int merkle_count, i, p = 0; int merkle_count, i, p = 0;
json_t *merkle_arr; json_t *merkle_arr;
uchar **merkle = NULL; uchar **merkle = NULL;
int jsize = json_array_size(params); int jsize = json_array_size(params);
bool has_claim = ( opt_algo == ALGO_LBRY ) && ( jsize == 10 ); bool has_claim = ( opt_algo == ALGO_LBRY ) && ( jsize == 10 );
bool has_roots = ( opt_algo == ALGO_PHI2 ) && ( jsize == 10 ); bool has_roots = ( opt_algo == ALGO_PHI2 ) && ( jsize == 10 );
job_id = json_string_value(json_array_get(params, p++)); bool is_veil = ( opt_algo == ALGO_X16RT_VEIL );
job_id = json_string_value(json_array_get(params, p++));
prevhash = json_string_value(json_array_get(params, p++)); prevhash = json_string_value(json_array_get(params, p++));
if ( has_claim ) if ( has_claim )
{ {
extradata = json_string_value(json_array_get(params, p++)); extradata = json_string_value(json_array_get(params, p++));
if ( !extradata || strlen( extradata ) != 64 ) if ( !extradata || strlen( extradata ) != 64 )
{ {
applog(LOG_ERR, "Stratum notify: invalid claim parameter"); applog(LOG_ERR, "Stratum notify: invalid claim parameter");
goto out; goto out;
} }
} }
else if ( has_roots ) else if ( has_roots )
{ {
extradata = json_string_value(json_array_get(params, p++)); extradata = json_string_value(json_array_get(params, p++));
if ( !extradata || strlen( extradata ) != 128 ) if ( !extradata || strlen( extradata ) != 128 )
{ {
applog(LOG_ERR, "Stratum notify: invalid UTXO root parameter"); applog(LOG_ERR, "Stratum notify: invalid UTXO root parameter");
goto out; goto out;
} }
} }
if ( is_veil )
{
denom10 = json_string_value(json_array_get(params, p++));
denom100 = json_string_value(json_array_get(params, p++));
denom1000 = json_string_value(json_array_get(params, p++));
denom10000 = json_string_value(json_array_get(params, p++));
prooffullnode = json_string_value(json_array_get(params, p++));
}
coinb1 = json_string_value(json_array_get(params, p++)); coinb1 = json_string_value(json_array_get(params, p++));
coinb2 = json_string_value(json_array_get(params, p++)); coinb2 = json_string_value(json_array_get(params, p++));
@@ -1733,7 +1754,7 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
nbits = json_string_value(json_array_get(params, p++)); nbits = json_string_value(json_array_get(params, p++));
stime = json_string_value(json_array_get(params, p++)); stime = json_string_value(json_array_get(params, p++));
clean = json_is_true(json_array_get(params, p)); p++; clean = json_is_true(json_array_get(params, p)); p++;
if (!job_id || !prevhash || !coinb1 || !coinb2 || !version || !nbits || !stime || if (!job_id || !prevhash || !coinb1 || !coinb2 || !version || !nbits || !stime ||
strlen(prevhash) != 64 || strlen(version) != 8 || strlen(prevhash) != 64 || strlen(version) != 8 ||
strlen(nbits) != 8 || strlen(stime) != 8) { strlen(nbits) != 8 || strlen(stime) != 8) {
@@ -1741,8 +1762,22 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
goto out; goto out;
} }
merkle = (uchar**) malloc(merkle_count * sizeof(char *)); if ( is_veil )
for (i = 0; i < merkle_count; i++) { {
if ( !denom10 || !denom100 || !denom1000 || !denom10000
|| !prooffullnode || strlen(denom10) != 64 || strlen(denom100) != 64
|| strlen(denom1000) != 64 || strlen(denom10000) != 64
|| strlen(prooffullnode) != 64 )
{
applog(LOG_ERR, "Stratum notify: invalid veil parameters");
goto out;
}
}
if ( merkle_count )
merkle = (uchar**) malloc(merkle_count * sizeof(char *));
for ( i = 0; i < merkle_count; i++ )
{
const char *s = json_string_value(json_array_get(merkle_arr, i)); const char *s = json_string_value(json_array_get(merkle_arr, i));
if (!s || strlen(s) != 64) { if (!s || strlen(s) != 64) {
while (i--) while (i--)
@@ -1774,6 +1809,15 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
if (has_claim) hex2bin(sctx->job.extra, extradata, 32); if (has_claim) hex2bin(sctx->job.extra, extradata, 32);
if (has_roots) hex2bin(sctx->job.extra, extradata, 64); if (has_roots) hex2bin(sctx->job.extra, extradata, 64);
if ( is_veil )
{
hex2bin(sctx->job.denom10, denom10, 32);
hex2bin(sctx->job.denom100, denom100, 32);
hex2bin(sctx->job.denom1000, denom1000, 32);
hex2bin(sctx->job.denom10000, denom10000, 32);
hex2bin(sctx->job.proofoffullnode, prooffullnode, 32);
}
sctx->bloc_height = getblocheight(sctx); sctx->bloc_height = getblocheight(sctx);
for (i = 0; i < sctx->job.merkle_count; i++) for (i = 0; i < sctx->job.merkle_count; i++)