This commit is contained in:
Jay D Dee
2025-07-20 19:43:10 -04:00
parent aa47e880d5
commit 12480a3ea5
17 changed files with 507 additions and 504 deletions

View File

@@ -54,9 +54,9 @@ Supported Algorithms
allium Garlicoin
anime Animecoin
argon2 Argon2 coin (AR2)
argon2d250
argon2d500
argon2d1000
argon2d4096
blake Blake-256
blake2b Blake2-512

View File

@@ -75,9 +75,14 @@ If not what makes it happen or not happen?
Change Log
----------
v25.6
Added argon2d1000, argon2d16000 algos.
Target specific AES optimizations improve shavite for ARM64 & x86_64.
v25.5
x86_64: Fixed and insidious bug in sha256 early rejection optimization for AVX2 & AVX512.
x86_64: Fixed an insidious bug in sha256 early rejection optimization for AVX2 & AVX512.
x86_64: Faster sha256d, sha256dt for AVX2 & AVX512.
Other small bug fixes.

View File

@@ -297,6 +297,8 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_ANIME: rc = register_anime_algo ( gate ); break;
case ALGO_ARGON2D250: rc = register_argon2d250_algo ( gate ); break;
case ALGO_ARGON2D500: rc = register_argon2d500_algo ( gate ); break;
case ALGO_ARGON2D1000: rc = register_argon2d1000_algo ( gate ); break;
case ALGO_ARGON2D16000: rc = register_argon2d16000_algo ( gate ); break;
case ALGO_ARGON2D4096: rc = register_argon2d4096_algo ( gate ); break;
case ALGO_AXIOM: rc = register_axiom_algo ( gate ); break;
case ALGO_BLAKE: rc = register_blake_algo ( gate ); break;

View File

@@ -172,8 +172,11 @@ void ( *set_work_data_endian ) ( struct work* );
json_t* ( *longpoll_rpc_call ) ( CURL*, int*, char* );
// Deprecated
set_t optimizations;
int ( *get_work_data_size ) ();
int ntime_index;
int nbits_index;
int nonce_index; // use with caution, see warning below
@@ -274,8 +277,6 @@ void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
void sha256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
// OpenSSL sha256 deprecated
//void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
bool std_le_work_decode( struct work *work );
bool std_be_work_decode( struct work *work );

View File

@@ -6,6 +6,38 @@ static const size_t INPUT_BYTES = 80; // Lenth of a block header in bytes. Inpu
static const size_t OUTPUT_BYTES = 32; // Length of output needed for a 256-bit hash
static const unsigned int DEFAULT_ARGON2_FLAG = 2; //Same as ARGON2_DEFAULT_FLAGS
// generic, works with most variations of argon2d
int scanhash_argon2d( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) edata[20];
uint32_t _ALIGN(64) hash[8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const int thr_id = mythr->id;
const uint32_t first_nonce = (const uint32_t)pdata[19];
const uint32_t last_nonce = (const uint32_t)max_nonce;
uint32_t nonce = first_nonce;
const bool bench = opt_benchmark;
v128_bswap32_80( edata, pdata );
do
{
edata[19] = nonce;
algo_gate.hash( hash, edata, thr_id );
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( nonce );
submit_solution( work, hash, mythr );
}
nonce++;
} while ( likely( nonce < last_nonce && !work_restart[thr_id].restart ) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 0;
}
void argon2d250_hash( void *output, const void *input )
{
argon2_context context;
@@ -32,41 +64,10 @@ void argon2d250_hash( void *output, const void *input )
argon2_ctx( &context, Argon2_d );
}
int scanhash_argon2d250( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) edata[20];
uint32_t _ALIGN(64) hash[8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t nonce = first_nonce;
swab32_array( edata, pdata, 20 );
do {
be32enc(&edata[19], nonce);
argon2d250_hash( hash, edata );
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
bool register_argon2d250_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_argon2d250;
gate->scanhash = (void*)&scanhash_argon2d;
gate->hash = (void*)&argon2d250_hash;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
opt_target_factor = 65536.0;
return true;
}
@@ -97,43 +98,78 @@ void argon2d500_hash( void *output, const void *input )
argon2_ctx( &context, Argon2_d );
}
int scanhash_argon2d500( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) edata[20];
uint32_t _ALIGN(64) hash[8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const int thr_id = mythr->id;
const uint32_t first_nonce = (const uint32_t)pdata[19];
const uint32_t last_nonce = (const uint32_t)max_nonce;
uint32_t nonce = first_nonce;
const bool bench = opt_benchmark;
v128_bswap32_80( edata, pdata );
do
{
edata[19] = nonce;
argon2d500_hash( hash, edata );
if ( unlikely( valid_hash( (uint64_t*)hash, (uint64_t*)ptarget )
&& !bench ) )
{
pdata[19] = bswap_32( nonce );;
submit_solution( work, hash, mythr );
}
nonce++;
} while ( likely( nonce < last_nonce && !work_restart[thr_id].restart ) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 0;
}
bool register_argon2d500_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_argon2d500;
gate->scanhash = (void*)&scanhash_argon2d;
gate->hash = (void*)&argon2d500_hash;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
opt_target_factor = 65536.0;
return true;
}
void argon2d1000_hash( void *output, const void *input )
{
argon2_context context;
context.out = (uint8_t *)output;
context.outlen = (uint32_t)OUTPUT_BYTES;
context.pwd = (uint8_t *)input;
context.pwdlen = (uint32_t)INPUT_BYTES;
context.salt = (uint8_t *)input; //salt = input
context.saltlen = (uint32_t)INPUT_BYTES;
context.secret = NULL;
context.secretlen = 0;
context.ad = NULL;
context.adlen = 0;
context.allocate_cbk = NULL;
context.free_cbk = NULL;
context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
// main configurable Argon2 hash parameters
context.m_cost = 1000; // Memory in KiB (1MB)
context.lanes = 8; // Degree of Parallelism
context.threads = 1; // Threads
context.t_cost = 2; // Iterations
context.version = ARGON2_VERSION_10;
argon2_ctx( &context, Argon2_d );
}
bool register_argon2d1000_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_argon2d;
gate->hash = (void*)&argon2d1000_hash;
opt_target_factor = 65536.0;
return true;
}
void argon2d16000_hash( void *output, const void *input )
{
argon2_context context;
context.out = (uint8_t *)output;
context.outlen = (uint32_t)OUTPUT_BYTES;
context.pwd = (uint8_t *)input;
context.pwdlen = (uint32_t)INPUT_BYTES;
context.salt = (uint8_t *)input; //salt = input
context.saltlen = (uint32_t)INPUT_BYTES;
context.secret = NULL;
context.secretlen = 0;
context.ad = NULL;
context.adlen = 0;
context.allocate_cbk = NULL;
context.free_cbk = NULL;
context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
// main configurable Argon2 hash parameters
context.m_cost = 16000; // Memory in KiB (~16384KB)
context.lanes = 1; // Degree of Parallelism
context.threads = 1; // Threads
context.t_cost = 1; // Iterations
context.version = ARGON2_VERSION_10;
argon2_ctx( &context, Argon2_d );
}
bool register_argon2d16000_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_argon2d;
gate->hash = (void*)&argon2d16000_hash;
opt_target_factor = 65536.0;
return true;
}
@@ -148,7 +184,7 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = (const uint32_t)max_nonce;
uint32_t n = first_nonce;
const int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
uint32_t t_cost = 1; // 1 iteration
uint32_t m_cost = 4096; // use 4MB
uint32_t parallelism = 1; // 1 thread, 2 lanes
@@ -176,7 +212,6 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
bool register_argon2d4096_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_argon2d4096;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT |NEON_OPT;
opt_target_factor = 65536.0;
return true;
}

View File

@@ -4,22 +4,27 @@
#include "algo-gate-api.h"
#include <stdint.h>
int scanhash_argon2d( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
// Credits: version = 0x10, m_cost = 250.
bool register_argon2d250_algo( algo_gate_t* gate );
void argon2d250_hash( void *state, const void *input );
int scanhash_argon2d250( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
// Dynamic: version = 0x10, m_cost = 500.
bool register_argon2d500_algo( algo_gate_t* gate );
void argon2d500_hash( void *state, const void *input );
int scanhash_argon2d500( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
// Zero Dynamics Cash: version = 0x10, m_cost = 1000.
bool register_argon2d1000_algo( algo_gate_t* gate );
void argon2d1000_hash( void *state, const void *input );
bool register_argon2d16000_algo( algo_gate_t* gate );
void argon2d16000_hash( void *state, const void *input );
// Unitus: version = 0x13, m_cost = 4096.
bool register_argon2d4096_algo( algo_gate_t* gate );

View File

@@ -21,7 +21,7 @@ c512_4way( shavite512_4way_context *ctx, const void *msg )
__m512i *H = (__m512i*)ctx->h;
const __m512i count = _mm512_set4_epi32( ctx->count3, ctx->count2,
ctx->count1, ctx->count0 );
int r;
const __m512i zero = _mm512_setzero_si512();
P0 = H[0];
P1 = H[1];
@@ -37,182 +37,160 @@ c512_4way( shavite512_4way_context *ctx, const void *msg )
K6 = M[6];
K7 = M[7];
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K0 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
// round 0
P0 = _mm512_xor_si512( P0, X );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K0 ), zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
P0 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P0 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K4 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K4 ), zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
P2 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P2 );
P2 = _mm512_xor_si512( P2, X );
// round
for ( r = 0; r < 3; r ++ )
for ( int r = 0; r < 3; r ++ )
{
// round 1, 5, 9
K0 = _mm512_xor_si512( K7, mm512_shuflr128_32(
_mm512_aesenc_epi128( K0, m512_zero ) ) );
_mm512_aesenc_epi128( K0, zero ) ) );
if ( r == 0 )
K0 = _mm512_xor_si512( K0,
_mm512_mask_xor_epi32( count, 0x8888, count, m512_neg1 ) );
_mm512_mask_ternarylogic_epi32( count, 0x8888, count, count, 1 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K0 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K0 ), zero );
K1 = _mm512_xor_si512( K0,
mm512_shuflr128_32( _mm512_aesenc_epi128( K1, m512_zero ) ) );
mm512_shuflr128_32( _mm512_aesenc_epi128( K1, zero ) ) );
if ( r == 1 )
K1 = _mm512_xor_si512( K1, mm512_shuflr128_32(
_mm512_mask_xor_epi32( count, 0x1111, count, m512_neg1 ) ) );
_mm512_mask_ternarylogic_epi32( count, 0x1111, count, count, 1 ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
K2 = _mm512_xor_si512( K1,
mm512_shuflr128_32( _mm512_aesenc_epi128( K2, m512_zero ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
mm512_shuflr128_32( _mm512_aesenc_epi128( K2, zero ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
K3 = _mm512_xor_si512( K2,
mm512_shuflr128_32( _mm512_aesenc_epi128( K3, m512_zero ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
P3 = _mm512_xor_si512( P3, X );
mm512_shuflr128_32( _mm512_aesenc_epi128( K3, zero ) ) );
P3 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P3 );
K4 = _mm512_xor_si512( K3,
mm512_shuflr128_32( _mm512_aesenc_epi128( K4, m512_zero ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K4 ), m512_zero );
mm512_shuflr128_32( _mm512_aesenc_epi128( K4, zero ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K4 ), zero );
K5 = _mm512_xor_si512( K4,
mm512_shuflr128_32( _mm512_aesenc_epi128( K5, m512_zero ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
mm512_shuflr128_32( _mm512_aesenc_epi128( K5, zero ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
K6 = _mm512_xor_si512( K5,
mm512_shuflr128_32( _mm512_aesenc_epi128( K6, m512_zero ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
mm512_shuflr128_32( _mm512_aesenc_epi128( K6, zero ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
K7 = _mm512_xor_si512( K6,
mm512_shuflr128_32( _mm512_aesenc_epi128( K7, m512_zero ) ) );
mm512_shuflr128_32( _mm512_aesenc_epi128( K7, zero ) ) );
if ( r == 2 )
K7 = _mm512_xor_si512( K7, mm512_swap128_64(
_mm512_mask_xor_epi32( count, 0x2222, count, m512_neg1 ) ) );
_mm512_mask_ternarylogic_epi32( count, 0x2222, count, count, 1 ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
P1 = _mm512_xor_si512( P1, X );
P1 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P1 );
// round 2, 6, 10
K0 = _mm512_xor_si512( K0, _mm512_alignr_epi8( K7, K6, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K0 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K0 ), zero );
K1 = _mm512_xor_si512( K1, _mm512_alignr_epi8( K0, K7, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
K2 = _mm512_xor_si512( K2, _mm512_alignr_epi8( K1, K0, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
K3 = _mm512_xor_si512( K3, _mm512_alignr_epi8( K2, K1, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
P2 = _mm512_xor_si512( P2, X );
P2 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P2 );
K4 = _mm512_xor_si512( K4, _mm512_alignr_epi8( K3, K2, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K4 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K4 ), zero );
K5 = _mm512_xor_si512( K5, _mm512_alignr_epi8( K4, K3, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
K6 = _mm512_xor_si512( K6, _mm512_alignr_epi8( K5, K4, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
K7 = _mm512_xor_si512( K7, _mm512_alignr_epi8( K6, K5, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
P0 = _mm512_xor_si512( P0, X );
P0 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P0 );
// round 3, 7, 11
K0 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K0, m512_zero ) ), K7 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K0 ), m512_zero );
_mm512_aesenc_epi128( K0, zero ) ), K7 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K0 ), zero );
K1 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K1, m512_zero ) ), K0 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
_mm512_aesenc_epi128( K1, zero ) ), K0 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
K2 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K2, m512_zero ) ), K1 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
_mm512_aesenc_epi128( K2, zero ) ), K1 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
K3 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K3, m512_zero ) ), K2 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
P1 = _mm512_xor_si512( P1, X );
_mm512_aesenc_epi128( K3, zero ) ), K2 );
P1 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P1 );
K4 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K4, m512_zero ) ), K3 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K4 ), m512_zero );
_mm512_aesenc_epi128( K4, zero ) ), K3 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K4 ), zero );
K5 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K5, m512_zero ) ), K4 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
_mm512_aesenc_epi128( K5, zero ) ), K4 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
K6 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K6, m512_zero ) ), K5 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
_mm512_aesenc_epi128( K6, zero ) ), K5 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
K7 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K7, m512_zero ) ), K6 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
P3 = _mm512_xor_si512( P3, X );
_mm512_aesenc_epi128( K7, zero ) ), K6 );
P3 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P3 );
// round 4, 8, 12
K0 = _mm512_xor_si512( K0, _mm512_alignr_epi8( K7, K6, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K0 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K0 ), zero );
K1 = _mm512_xor_si512( K1, _mm512_alignr_epi8( K0, K7, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
K2 = _mm512_xor_si512( K2, _mm512_alignr_epi8( K1, K0, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
K3 = _mm512_xor_si512( K3, _mm512_alignr_epi8( K2, K1, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
P0 = _mm512_xor_si512( P0, X );
P0 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P0 );
K4 = _mm512_xor_si512( K4, _mm512_alignr_epi8( K3, K2, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K4 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K4 ), zero );
K5 = _mm512_xor_si512( K5, _mm512_alignr_epi8( K4, K3, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
K6 = _mm512_xor_si512( K6, _mm512_alignr_epi8( K5, K4, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
K7 = _mm512_xor_si512( K7, _mm512_alignr_epi8( K6, K5, 4 ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
P2 = _mm512_xor_si512( P2, X );
P2 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P2 );
}
// round 13
K0 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K0, m512_zero ) ), K7 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K0 ), m512_zero );
_mm512_aesenc_epi128( K0, zero ) ), K7 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K0 ), zero );
K1 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K1, m512_zero ) ), K0 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
_mm512_aesenc_epi128( K1, zero ) ), K0 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
K2 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K2, m512_zero ) ), K1 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
_mm512_aesenc_epi128( K2, zero ) ), K1 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
K3 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K3, m512_zero ) ), K2 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
P3 = _mm512_xor_si512( P3, X );
_mm512_aesenc_epi128( K3, zero ) ), K2 );
P3 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P3 );
K4 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K4, m512_zero ) ), K3 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K4 ), m512_zero );
_mm512_aesenc_epi128( K4, zero ) ), K3 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K4 ), zero );
K5 = _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K5, m512_zero ) ), K4 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
K6 = mm512_shuflr128_32( _mm512_aesenc_epi128( K6, m512_zero ) );
K6 = _mm512_xor_si512( K6, _mm512_xor_si512( K5, mm512_swap64_32(
_mm512_mask_xor_epi32( count, 0x4444, count, m512_neg1 ) ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
_mm512_aesenc_epi128( K5, zero ) ), K4 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
K6 = mm512_shuflr128_32( _mm512_aesenc_epi128( K6, zero ) );
K6 = mm512_xor3( K6, K5, mm512_swap64_32(
_mm512_mask_ternarylogic_epi32( count, 0x4444, count, count, 1 ) ) );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
K7= _mm512_xor_si512( mm512_shuflr128_32(
_mm512_aesenc_epi128( K7, m512_zero ) ), K6 );
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
P1 = _mm512_xor_si512( P1, X );
_mm512_aesenc_epi128( K7, zero ) ), K6 );
P1 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P1 );
H[0] = _mm512_xor_si512( H[0], P2 );
H[1] = _mm512_xor_si512( H[1], P3 );

View File

@@ -1,159 +0,0 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>
#include <stdint.h>
#include "sph_shavite.h"
extern void inkhash(void *state, const void *input)
{
sph_shavite512_context ctx_shavite;
uint32_t hash[16];
sph_shavite512_init(&ctx_shavite);
sph_shavite512 (&ctx_shavite, (const void*) input, 80);
sph_shavite512_close(&ctx_shavite, (void*) hash);
sph_shavite512_init(&ctx_shavite);
sph_shavite512(&ctx_shavite, (const void*) hash, 64);
sph_shavite512_close(&ctx_shavite, (void*) hash);
memcpy(state, hash, 32);
/*
int ii;
printf("result: ");
for (ii=0; ii < 32; ii++)
{
printf ("%.2x",((uint8_t*)state)[ii]);
};
printf ("\n");
*/
}
int scanhash_ink( struct work *work,
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id;
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
//const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(32) hash64[8];
uint32_t endiandata[32];
//char testdata[] = {"\x70\x00\x00\x00\x5d\x38\x5b\xa1\x14\xd0\x79\x97\x0b\x29\xa9\x41\x8f\xd0\x54\x9e\x7d\x68\xa9\x5c\x7f\x16\x86\x21\xa3\x14\x20\x10\x00\x00\x00\x00\x57\x85\x86\xd1\x49\xfd\x07\xb2\x2f\x3a\x8a\x34\x7c\x51\x6d\xe7\x05\x2f\x03\x4d\x2b\x76\xff\x68\xe0\xd6\xec\xff\x9b\x77\xa4\x54\x89\xe3\xfd\x51\x17\x32\x01\x1d\xf0\x73\x10\x00"};
//we need bigendian data...
//lessons learned: do NOT endianchange directly in pdata, this will all proof-of-works be considered as stale from minerd....
int kk=0;
for (; kk < 32; kk++)
{
be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]);
};
// if (opt_debug)
// {
// applog(LOG_DEBUG, "Thr: %02d, firstN: %08x, maxN: %08x, ToDo: %d", thr_id, first_nonce, max_nonce, max_nonce-first_nonce);
// }
/* I'm to lazy to put the loop in an inline function... so dirty copy'n'paste.... */
/* i know that i could set a variable, but i don't know how the compiler will optimize it, not that then the cpu needs to load the value *everytime* in a register */
if (ptarget[7]==0) {
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
inkhash(hash64, endiandata);
if (((hash64[7]&0xFFFFFFFF)==0) &&
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
else if (ptarget[7]<=0xF)
{
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
inkhash(hash64, endiandata);
if (((hash64[7]&0xFFFFFFF0)==0) &&
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
else if (ptarget[7]<=0xFF)
{
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
inkhash(hash64, endiandata);
if (((hash64[7]&0xFFFFFF00)==0) &&
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
else if (ptarget[7]<=0xFFF)
{
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
inkhash(hash64, endiandata);
if (((hash64[7]&0xFFFFF000)==0) &&
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
else if (ptarget[7]<=0xFFFF)
{
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
inkhash(hash64, endiandata);
if (((hash64[7]&0xFFFF0000)==0) &&
fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
else
{
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
inkhash(hash64, endiandata);
if (fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (n < max_nonce && !work_restart[thr_id].restart);
}
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
bool register_shavite_algo( algo_gate_t* gate )
{
algo_not_implemented();
return false;
// gate->scanhash = (void*)&scanhash_ink;
// gate->hash = (void*)&inkhash;
// return true;
};

View File

@@ -50,7 +50,8 @@ extern "C"{
#pragma warning (disable: 4146)
#endif
static const sph_u32 IV512[] = {
static const sph_u32 IV512[] =
{
0x72FCCDD8, 0x79CA4727, 0x128A077B, 0x40D55AEC,
0xD1901A06, 0x430AE307, 0xB29F5CD1, 0xDF07FBFC,
0x8E45D73D, 0x681AB538, 0xBDE86578, 0xDD577E47,
@@ -71,38 +72,26 @@ c512( sph_shavite_big_context *sc, const void *msg )
p2 = h[2];
p3 = h[3];
// round
k00 = m[0];
x = v128_xor( p1, k00 );
x = v128_aesenc_nokey( x );
k01 = m[1];
x = v128_xor( x, k01 );
x = v128_aesenc_nokey( x );
k02 = m[2];
x = v128_xor( x, k02 );
x = v128_aesenc_nokey( x );
k03 = m[3];
x = v128_xor( x, k03 );
x = v128_aesenc_nokey( x );
p0 = v128_xor( p0, x );
k10 = m[4];
x = v128_xor( p3, k10 );
x = v128_aesenc_nokey( x );
k11 = m[5];
x = v128_xor( x, k11 );
x = v128_aesenc_nokey( x );
k12 = m[6];
x = v128_xor( x, k12 );
x = v128_aesenc_nokey( x );
k13 = m[7];
x = v128_xor( x, k13 );
x = v128_aesenc_nokey( x );
p2 = v128_xor( p2, x );
// round 0
x = v128_xoraesenc( p1, k00 );
x = v128_xoraesenc( x, k01 );
x = v128_xoraesenc( x, k02 );
p0 = v128_xoraesencxor( x, k03, p0 );
x = v128_xoraesenc( p3, k10 );
x = v128_xoraesenc( x, k11 );
x = v128_xoraesenc( x, k12 );
p2 = v128_xoraesencxor( x, k13, p2 );
for ( r = 0; r < 3; r ++ )
{
@@ -113,198 +102,165 @@ c512( sph_shavite_big_context *sc, const void *msg )
if ( r == 0 )
k00 = v128_xor( k00, v128_set32(
~sc->count3, sc->count2, sc->count1, sc->count0 ) );
x = v128_xoraesenc( p0, k00 );
x = v128_xor( p0, k00 );
x = v128_aesenc_nokey( x );
k01 = v128_shuflr32( v128_aesenc_nokey( k01 ) );
k01 = v128_xor( k01, k00 );
if ( r == 1 )
k01 = v128_xor( k01, v128_set32(
~sc->count0, sc->count1, sc->count2, sc->count3 ) );
x = v128_xoraesenc( x, k01 );
x = v128_xor( x, k01 );
x = v128_aesenc_nokey( x );
k02 = v128_shuflr32( v128_aesenc_nokey( k02 ) );
k02 = v128_xor( k02, k01 );
x = v128_xor( x, k02 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( x, k02 );
k03 = v128_shuflr32( v128_aesenc_nokey( k03 ) );
k03 = v128_xor( k03, k02 );
x = v128_xor( x, k03 );
x = v128_aesenc_nokey( x );
p3 = v128_xor( p3, x );
p3 = v128_xoraesencxor( x, k03, p3 );
k10 = v128_shuflr32( v128_aesenc_nokey( k10 ) );
k10 = v128_xor( k10, k03 );
x = v128_xoraesenc( p2, k10 );
x = v128_xor( p2, k10 );
x = v128_aesenc_nokey( x );
k11 = v128_shuflr32( v128_aesenc_nokey( k11 ) );
k11 = v128_xor( k11, k10 );
x = v128_xor( x, k11 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( x, k11 );
k12 = v128_shuflr32( v128_aesenc_nokey( k12 ) );
k12 = v128_xor( k12, k11 );
x = v128_xor( x, k12 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( x, k12 );
k13 = v128_shuflr32( v128_aesenc_nokey( k13 ) );
k13 = v128_xor( k13, k12 );
if ( r == 2 )
k13 = v128_xor( k13, v128_set32(
~sc->count1, sc->count0, sc->count3, sc->count2 ) );
x = v128_xor( x, k13 );
x = v128_aesenc_nokey( x );
p1 = v128_xor( p1, x );
p1 = v128_xoraesencxor( x, k13, p1 );
// round 2, 6, 10
k00 = v128_xor( k00, v128_alignr8( k13, k12, 4 ) );
x = v128_xor( p3, k00 );
x = v128_aesenc_nokey( x );
k01 = v128_xor( k01, v128_alignr8( k00, k13, 4 ) );
x = v128_xor( x, k01 );
x = v128_aesenc_nokey( x );
k02 = v128_xor( k02, v128_alignr8( k01, k00, 4 ) );
x = v128_xor( x, k02 );
x = v128_aesenc_nokey( x );
k03 = v128_xor( k03, v128_alignr8( k02, k01, 4 ) );
x = v128_xor( x, k03 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( p3, k00 );
p2 = v128_xor( p2, x );
k01 = v128_xor( k01, v128_alignr8( k00, k13, 4 ) );
x = v128_xoraesenc( x, k01 );
k02 = v128_xor( k02, v128_alignr8( k01, k00, 4 ) );
x = v128_xoraesenc( x, k02 );
k03 = v128_xor( k03, v128_alignr8( k02, k01, 4 ) );
p2 = v128_xoraesencxor( x, k03, p2 );
k10 = v128_xor( k10, v128_alignr8( k03, k02, 4 ) );
x = v128_xor( p1, k10 );
x = v128_aesenc_nokey( x );
k11 = v128_xor( k11, v128_alignr8( k10, k03, 4 ) );
x = v128_xor( x, k11 );
x = v128_aesenc_nokey( x );
k12 = v128_xor( k12, v128_alignr8( k11, k10, 4 ) );
x = v128_xor( x, k12 );
x = v128_aesenc_nokey( x );
k13 = v128_xor( k13, v128_alignr8( k12, k11, 4 ) );
x = v128_xor( x, k13 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( p1, k10 );
p0 = v128_xor( p0, x );
k11 = v128_xor( k11, v128_alignr8( k10, k03, 4 ) );
x = v128_xoraesenc( x, k11 );
k12 = v128_xor( k12, v128_alignr8( k11, k10, 4 ) );
x = v128_xoraesenc( x, k12 );
k13 = v128_xor( k13, v128_alignr8( k12, k11, 4 ) );
p0 = v128_xoraesencxor( x, k13, p0 );
// round 3, 7, 11
k00 = v128_shuflr32( v128_aesenc_nokey( k00 ) );
k00 = v128_xor( k00, k13 );
x = v128_xor( p2, k00 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( p2, k00 );
k01 = v128_shuflr32( v128_aesenc_nokey( k01 ) );
k01 = v128_xor( k01, k00 );
x = v128_xor( x, k01 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( x, k01 );
k02 = v128_shuflr32( v128_aesenc_nokey( k02 ) );
k02 = v128_xor( k02, k01 );
x = v128_xor( x, k02 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( x, k02 );
k03 = v128_shuflr32( v128_aesenc_nokey( k03 ) );
k03 = v128_xor( k03, k02 );
x = v128_xor( x, k03 );
x = v128_aesenc_nokey( x );
p1 = v128_xor( p1, x );
p1 = v128_xoraesencxor( x, k03, p1 );
k10 = v128_shuflr32( v128_aesenc_nokey( k10 ) );
k10 = v128_xor( k10, k03 );
x = v128_xor( p0, k10 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( p0, k10 );
k11 = v128_shuflr32( v128_aesenc_nokey( k11 ) );
k11 = v128_xor( k11, k10 );
x = v128_xor( x, k11 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( x, k11 );
k12 = v128_shuflr32( v128_aesenc_nokey( k12 ) );
k12 = v128_xor( k12, k11 );
x = v128_xor( x, k12 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( x, k12 );
k13 = v128_shuflr32( v128_aesenc_nokey( k13 ) );
k13 = v128_xor( k13, k12 );
x = v128_xor( x, k13 );
x = v128_aesenc_nokey( x );
p3 = v128_xor( p3, x );
p3 = v128_xoraesencxor( x, k13, p3 );
// round 4, 8, 12
k00 = v128_xor( k00, v128_alignr8( k13, k12, 4 ) );
x = v128_xor( p1, k00 );
x = v128_aesenc_nokey( x );
k01 = v128_xor( k01, v128_alignr8( k00, k13, 4 ) );
x = v128_xor( x, k01 );
x = v128_aesenc_nokey( x );
k02 = v128_xor( k02, v128_alignr8( k01, k00, 4 ) );
x = v128_xor( x, k02 );
x = v128_aesenc_nokey( x );
k03 = v128_xor( k03, v128_alignr8( k02, k01, 4 ) );
x = v128_xor( x, k03 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( p1, k00 );
p0 = v128_xor( p0, x );
k01 = v128_xor( k01, v128_alignr8( k00, k13, 4 ) );
x = v128_xoraesenc( x, k01 );
k02 = v128_xor( k02, v128_alignr8( k01, k00, 4 ) );
x = v128_xoraesenc( x, k02 );
k03 = v128_xor( k03, v128_alignr8( k02, k01, 4 ) );
p0 = v128_xoraesencxor( x, k03, p0 );
k10 = v128_xor( k10, v128_alignr8( k03, k02, 4 ) );
x = v128_xor( p3, k10 );
x = v128_aesenc_nokey( x );
k11 = v128_xor( k11, v128_alignr8( k10, k03, 4 ) );
x = v128_xor( x, k11 );
x = v128_aesenc_nokey( x );
k12 = v128_xor( k12, v128_alignr8( k11, k10, 4 ) );
x = v128_xor( x, k12 );
x = v128_aesenc_nokey( x );
k13 = v128_xor( k13, v128_alignr8( k12, k11, 4 ) );
x = v128_xor( x, k13 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( p3, k10 );
p2 = v128_xor( p2, x );
k11 = v128_xor( k11, v128_alignr8( k10, k03, 4 ) );
x = v128_xoraesenc( x, k11 );
k12 = v128_xor( k12, v128_alignr8( k11, k10, 4 ) );
x = v128_xoraesenc( x, k12 );
k13 = v128_xor( k13, v128_alignr8( k12, k11, 4 ) );
p2 = v128_xoraesencxor( x, k13, p2 );
}
// round 13
k00 = v128_shuflr32( v128_aesenc_nokey( k00 ) );
k00 = v128_xor( k00, k13 );
x = v128_xor( p0, k00 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( p0, k00 );
k01 = v128_shuflr32( v128_aesenc_nokey( k01 ) );
k01 = v128_xor( k01, k00 );
x = v128_xor( x, k01 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( x, k01 );
k02 = v128_shuflr32( v128_aesenc_nokey( k02 ) );
k02 = v128_xor( k02, k01 );
x = v128_xor( x, k02 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( x, k02 );
k03 = v128_shuflr32( v128_aesenc_nokey( k03 ) );
k03 = v128_xor( k03, k02 );
x = v128_xor( x, k03 );
x = v128_aesenc_nokey( x );
p3 = v128_xor( p3, x );
p3 = v128_xoraesencxor( x, k03, p3 );
k10 = v128_shuflr32( v128_aesenc_nokey( k10 ) );
k10 = v128_xor( k10, k03 );
x = v128_xor( p2, k10 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( p2, k10 );
k11 = v128_shuflr32( v128_aesenc_nokey( k11 ) );
k11 = v128_xor( k11, k10 );
x = v128_xor( x, k11 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( x, k11 );
k12 = v128_shuflr32( v128_aesenc_nokey( k12 ) );
k12 = v128_xor( k12, v128_xor( k11, v128_set32(
~sc->count2, sc->count3, sc->count0, sc->count1 ) ) );
x = v128_xor( x, k12 );
x = v128_aesenc_nokey( x );
x = v128_xoraesenc( x, k12 );
k13 = v128_shuflr32( v128_aesenc_nokey( k13 ) );
k13 = v128_xor( k13, k12 );
x = v128_xor( x, k13 );
x = v128_aesenc_nokey( x );
p1 = v128_xor( p1, x );
p1 = v128_xoraesencxor( x, k13, p1 );
h[0] = v128_xor( h[0], p2 );
h[1] = v128_xor( h[1], p3 );

View File

@@ -108,7 +108,24 @@ extern "C"{
} while (0)
#define AES_ROUND_NOKEY_LE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
AES_ROUND_LE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
{ \
(Y0) = AES0[(X0) & 0xFF] \
^ AES1[((X1) >> 8) & 0xFF] \
^ AES2[((X2) >> 16) & 0xFF] \
^ AES3[((X3) >> 24) & 0xFF]; \
(Y1) = AES0[(X1) & 0xFF] \
^ AES1[((X2) >> 8) & 0xFF] \
^ AES2[((X3) >> 16) & 0xFF] \
^ AES3[((X0) >> 24) & 0xFF]; \
(Y2) = AES0[(X2) & 0xFF] \
^ AES1[((X3) >> 8) & 0xFF] \
^ AES2[((X0) >> 16) & 0xFF] \
^ AES3[((X1) >> 24) & 0xFF]; \
(Y3) = AES0[(X3) & 0xFF] \
^ AES1[((X0) >> 8) & 0xFF] \
^ AES2[((X1) >> 16) & 0xFF] \
^ AES3[((X2) >> 24) & 0xFF]; \
}
#endif

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 25.5.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 25.6.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -608,8 +608,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='25.5'
PACKAGE_STRING='cpuminer-opt 25.5'
PACKAGE_VERSION='25.6'
PACKAGE_STRING='cpuminer-opt 25.6'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1359,7 +1359,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 25.5 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 25.6 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1431,7 +1431,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 25.5:";;
short | recursive ) echo "Configuration of cpuminer-opt 25.6:";;
esac
cat <<\_ACEOF
@@ -1536,7 +1536,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 25.5
cpuminer-opt configure 25.6
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1983,7 +1983,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 25.5, which was
It was created by cpuminer-opt $as_me 25.6, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@@ -3591,7 +3591,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='25.5'
VERSION='25.6'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -7435,7 +7435,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 25.5, which was
This file was extended by cpuminer-opt $as_me 25.6, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -7503,7 +7503,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
cpuminer-opt config.status 25.5
cpuminer-opt config.status 25.6
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [25.5])
AC_INIT([cpuminer-opt], [25.6])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.72 for cpuminer-opt 25.5.
# Generated by GNU Autoconf 2.72 for cpuminer-opt 25.6.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation,
@@ -601,8 +601,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='25.5'
PACKAGE_STRING='cpuminer-opt 25.5'
PACKAGE_VERSION='25.6'
PACKAGE_STRING='cpuminer-opt 25.6'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1352,7 +1352,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
'configure' configures cpuminer-opt 25.5 to adapt to many kinds of systems.
'configure' configures cpuminer-opt 25.6 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1424,7 +1424,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 25.5:";;
short | recursive ) echo "Configuration of cpuminer-opt 25.6:";;
esac
cat <<\_ACEOF
@@ -1528,7 +1528,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 25.5
cpuminer-opt configure 25.6
generated by GNU Autoconf 2.72
Copyright (C) 2023 Free Software Foundation, Inc.
@@ -1949,7 +1949,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 25.5, which was
It was created by cpuminer-opt $as_me 25.6, which was
generated by GNU Autoconf 2.72. Invocation command line was
$ $0$ac_configure_args_raw
@@ -3065,7 +3065,7 @@ ac_config_headers="$ac_config_headers cpuminer-config.h"
am__api_version='1.17'
am__api_version='1.18'
# Find a good install program. We prefer a C program (faster),
@@ -3334,10 +3334,14 @@ am_lf='
'
case `pwd` in
*[\\\"\#\$\&\'\`$am_lf]*)
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;;
esac
case $srcdir in
*[\\\"\#\$\&\'\`$am_lf\ \ ]*)
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;;
esac
@@ -3764,7 +3768,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='25.5'
VERSION='25.6'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -3802,9 +3806,133 @@ AMTAR='$${TAR-tar}'
# We'll loop over all known methods to create a tar archive until one works.
_am_tools='gnutar pax cpio none'
_am_tools='gnutar plaintar pax cpio none'
am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
# The POSIX 1988 'ustar' format is defined with fixed-size fields.
# There is notably a 21 bits limit for the UID and the GID. In fact,
# the 'pax' utility can hang on bigger UID/GID (see automake bug#8343
# and bug#13588).
am_max_uid=2097151 # 2^21 - 1
am_max_gid=$am_max_uid
# The $UID and $GID variables are not portable, so we need to resort
# to the POSIX-mandated id(1) utility. Errors in the 'id' calls
# below are definitely unexpected, so allow the users to see them
# (that is, avoid stderr redirection).
am_uid=`id -u || echo unknown`
am_gid=`id -g || echo unknown`
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether UID '$am_uid' is supported by ustar format" >&5
printf %s "checking whether UID '$am_uid' is supported by ustar format... " >&6; }
if test x$am_uid = xunknown; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: ancient id detected; assuming current UID is ok, but dist-ustar might not work" >&5
printf "%s\n" "$as_me: WARNING: ancient id detected; assuming current UID is ok, but dist-ustar might not work" >&2;}
elif test $am_uid -le $am_max_uid; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
printf "%s\n" "yes" >&6; }
else
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
_am_tools=none
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GID '$am_gid' is supported by ustar format" >&5
printf %s "checking whether GID '$am_gid' is supported by ustar format... " >&6; }
if test x$gm_gid = xunknown; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: ancient id detected; assuming current GID is ok, but dist-ustar might not work" >&5
printf "%s\n" "$as_me: WARNING: ancient id detected; assuming current GID is ok, but dist-ustar might not work" >&2;}
elif test $am_gid -le $am_max_gid; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
printf "%s\n" "yes" >&6; }
else
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
_am_tools=none
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to create a ustar tar archive" >&5
printf %s "checking how to create a ustar tar archive... " >&6; }
# Go ahead even if we have the value already cached. We do so because we
# need to set the values for the 'am__tar' and 'am__untar' variables.
_am_tools=${am_cv_prog_tar_ustar-$_am_tools}
for _am_tool in $_am_tools; do
case $_am_tool in
gnutar)
for _am_tar in tar gnutar gtar; do
{ echo "$as_me:$LINENO: $_am_tar --version" >&5
($_am_tar --version) >&5 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && break
done
am__tar="$_am_tar --format=ustar -chf - "'"$$tardir"'
am__tar_="$_am_tar --format=ustar -chf - "'"$tardir"'
am__untar="$_am_tar -xf -"
;;
plaintar)
# Must skip GNU tar: if it does not support --format= it doesn't create
# ustar tarball either.
(tar --version) >/dev/null 2>&1 && continue
am__tar='tar chf - "$$tardir"'
am__tar_='tar chf - "$tardir"'
am__untar='tar xf -'
;;
pax)
am__tar='pax -L -x ustar -w "$$tardir"'
am__tar_='pax -L -x ustar -w "$tardir"'
am__untar='pax -r'
;;
cpio)
am__tar='find "$$tardir" -print | cpio -o -H ustar -L'
am__tar_='find "$tardir" -print | cpio -o -H ustar -L'
am__untar='cpio -i -H ustar -d'
;;
none)
am__tar=false
am__tar_=false
am__untar=false
;;
esac
# If the value was cached, stop now. We just wanted to have am__tar
# and am__untar set.
test -n "${am_cv_prog_tar_ustar}" && break
# tar/untar a dummy directory, and stop if the command works.
rm -rf conftest.dir
mkdir conftest.dir
echo GrepMe > conftest.dir/file
{ echo "$as_me:$LINENO: tardir=conftest.dir && eval $am__tar_ >conftest.tar" >&5
(tardir=conftest.dir && eval $am__tar_ >conftest.tar) >&5 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }
rm -rf conftest.dir
if test -s conftest.tar; then
{ echo "$as_me:$LINENO: $am__untar <conftest.tar" >&5
($am__untar <conftest.tar) >&5 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }
{ echo "$as_me:$LINENO: cat conftest.dir/file" >&5
(cat conftest.dir/file) >&5 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }
grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
fi
done
rm -rf conftest.dir
if test ${am_cv_prog_tar_ustar+y}
then :
printf %s "(cached) " >&6
else case e in #(
e) am_cv_prog_tar_ustar=$_am_tool ;;
esac
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_tar_ustar" >&5
printf "%s\n" "$am_cv_prog_tar_ustar" >&6; }
@@ -4986,7 +5114,10 @@ _ACEOF
break
fi
done
rm -f core conftest*
# aligned with autoconf, so not including core; see bug#72225.
rm -f -r a.out a.exe b.out conftest.$ac_ext conftest.$ac_objext \
conftest.dSYM conftest1.$ac_ext conftest1.$ac_objext conftest1.dSYM \
conftest2.$ac_ext conftest2.$ac_objext conftest2.dSYM
unset am_i ;;
esac
fi
@@ -7450,7 +7581,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 25.5, which was
This file was extended by cpuminer-opt $as_me 25.6, which was
generated by GNU Autoconf 2.72. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -7518,7 +7649,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
cpuminer-opt config.status 25.5
cpuminer-opt config.status 25.6
configured by $0, generated by GNU Autoconf 2.72,
with options \\"\$ac_cs_config\\"

View File

@@ -582,6 +582,8 @@ enum algos {
ALGO_ANIME,
ALGO_ARGON2D250,
ALGO_ARGON2D500,
ALGO_ARGON2D1000,
ALGO_ARGON2D16000,
ALGO_ARGON2D4096,
ALGO_AXIOM,
ALGO_BLAKE,
@@ -677,6 +679,8 @@ static const char* const algo_names[] = {
"anime",
"argon2d250",
"argon2d500",
"argon2d1000",
"argon2d16000",
"argon2d4096",
"axiom",
"blake",
@@ -837,6 +841,8 @@ Options:\n\
anime Animecoin (ANI)\n\
argon2d250\n\
argon2d500\n\
argon2d1000\n\
argon2d16000\n\
argon2d4096\n\
axiom Shabal-256 MemoHash\n\
blake blake256r14 (SFR)\n\

View File

@@ -137,10 +137,24 @@
#define v128_unpackhi8 _mm_unpackhi_epi8
// AES
// Nokey means nothing on x86_64 but it saves an instruction and a register
// on ARM.
#define v128_aesenc _mm_aesenc_si128
// xor key with result after encryption, x86_64 format.
#define v128_aesencxor _mm_aesenc_si128
// default is x86_64 format.
#define v128_aesenc v128_aesencxor
// xor key with v before encryption, arm64 format.
#define v128_xoraesenc( v, k ) \
_mm_aesenc_si128( v128_xor( v, k ), v128_zero )
// xor v with k_in before encryption then xor the result with k_out afterward.
// Uses the applicable optimization based on the target.
#define v128_xoraesencxor( v, k_in, k_out ) \
_mm_aesenc_si128( v128_xor( v, k_in ), k_out )
// arm64 optimized
#define v128_aesenc_nokey(v) _mm_aesenc_si128( v, v128_zero )
#define v128_aesenclast _mm_aesenclast_si128
#define v128_aesenclast_nokey(v) _mm_aesenclast_si128( v, v128_zero )
#define v128_aesdec _mm_aesdec_si128

View File

@@ -187,9 +187,21 @@
// vzipq_u32 can do hi & lo and return uint32x4x2, no 64 bit version.
// AES
// consistent with Intel AES intrinsics, break up for optimizing
#define v128_aesenc( v, k ) \
v128_xor( k, vaesmcq_u8( vaeseq_u8( v, v128_zero ) ) )
// xor key with result after encryption, x86_64 format.
#define v128_aesencxor( v, k ) \
v128_xor( vaesmcq_u8( vaeseq_u8( v, v128_zero ) ), k )
// default is x86_64 format.
#define v128_aesenc v128_aesencxor
// xor key with v before encryption, arm64 format.
#define v128_xoraesenc( v, k ) \
vaesmcq_u8( vaeseq_u8( v, k ) )
// xor v with k_in before encryption then xor the result with k_out afterward.
// Uses the applicable optimization based on the target.
#define v128_xoraesencxor( v, k_in, k_out ) \
v128_xor( v128_xoraesenc( v, k_in ), k_out )
#define v128_aesenc_nokey( v ) \
vaesmcq_u8( vaeseq_u8( v, v128_zero ) )