mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v25.6
This commit is contained in:
@@ -6,6 +6,38 @@ static const size_t INPUT_BYTES = 80; // Lenth of a block header in bytes. Inpu
|
||||
static const size_t OUTPUT_BYTES = 32; // Length of output needed for a 256-bit hash
|
||||
static const unsigned int DEFAULT_ARGON2_FLAG = 2; //Same as ARGON2_DEFAULT_FLAGS
|
||||
|
||||
// generic, works with most variations of argon2d
|
||||
int scanhash_argon2d( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t first_nonce = (const uint32_t)pdata[19];
|
||||
const uint32_t last_nonce = (const uint32_t)max_nonce;
|
||||
uint32_t nonce = first_nonce;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
v128_bswap32_80( edata, pdata );
|
||||
do
|
||||
{
|
||||
edata[19] = nonce;
|
||||
algo_gate.hash( hash, edata, thr_id );
|
||||
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( nonce );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( likely( nonce < last_nonce && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void argon2d250_hash( void *output, const void *input )
|
||||
{
|
||||
argon2_context context;
|
||||
@@ -32,41 +64,10 @@ void argon2d250_hash( void *output, const void *input )
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
int scanhash_argon2d250( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc(&edata[19], nonce);
|
||||
argon2d250_hash( hash, edata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_argon2d250_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d250;
|
||||
gate->scanhash = (void*)&scanhash_argon2d;
|
||||
gate->hash = (void*)&argon2d250_hash;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
@@ -97,43 +98,78 @@ void argon2d500_hash( void *output, const void *input )
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
int scanhash_argon2d500( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t first_nonce = (const uint32_t)pdata[19];
|
||||
const uint32_t last_nonce = (const uint32_t)max_nonce;
|
||||
uint32_t nonce = first_nonce;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
v128_bswap32_80( edata, pdata );
|
||||
do
|
||||
{
|
||||
edata[19] = nonce;
|
||||
argon2d500_hash( hash, edata );
|
||||
if ( unlikely( valid_hash( (uint64_t*)hash, (uint64_t*)ptarget )
|
||||
&& !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( nonce );;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( likely( nonce < last_nonce && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_argon2d500_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d500;
|
||||
gate->scanhash = (void*)&scanhash_argon2d;
|
||||
gate->hash = (void*)&argon2d500_hash;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
|
||||
void argon2d1000_hash( void *output, const void *input )
|
||||
{
|
||||
argon2_context context;
|
||||
context.out = (uint8_t *)output;
|
||||
context.outlen = (uint32_t)OUTPUT_BYTES;
|
||||
context.pwd = (uint8_t *)input;
|
||||
context.pwdlen = (uint32_t)INPUT_BYTES;
|
||||
context.salt = (uint8_t *)input; //salt = input
|
||||
context.saltlen = (uint32_t)INPUT_BYTES;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
|
||||
// main configurable Argon2 hash parameters
|
||||
context.m_cost = 1000; // Memory in KiB (1MB)
|
||||
context.lanes = 8; // Degree of Parallelism
|
||||
context.threads = 1; // Threads
|
||||
context.t_cost = 2; // Iterations
|
||||
context.version = ARGON2_VERSION_10;
|
||||
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
bool register_argon2d1000_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d;
|
||||
gate->hash = (void*)&argon2d1000_hash;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
|
||||
void argon2d16000_hash( void *output, const void *input )
|
||||
{
|
||||
argon2_context context;
|
||||
context.out = (uint8_t *)output;
|
||||
context.outlen = (uint32_t)OUTPUT_BYTES;
|
||||
context.pwd = (uint8_t *)input;
|
||||
context.pwdlen = (uint32_t)INPUT_BYTES;
|
||||
context.salt = (uint8_t *)input; //salt = input
|
||||
context.saltlen = (uint32_t)INPUT_BYTES;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
|
||||
// main configurable Argon2 hash parameters
|
||||
context.m_cost = 16000; // Memory in KiB (~16384KB)
|
||||
context.lanes = 1; // Degree of Parallelism
|
||||
context.threads = 1; // Threads
|
||||
context.t_cost = 1; // Iterations
|
||||
context.version = ARGON2_VERSION_10;
|
||||
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
bool register_argon2d16000_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d;
|
||||
gate->hash = (void*)&argon2d16000_hash;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
@@ -148,7 +184,7 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = (const uint32_t)max_nonce;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
uint32_t t_cost = 1; // 1 iteration
|
||||
uint32_t m_cost = 4096; // use 4MB
|
||||
uint32_t parallelism = 1; // 1 thread, 2 lanes
|
||||
@@ -176,7 +212,6 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
|
||||
bool register_argon2d4096_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d4096;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT |NEON_OPT;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -4,22 +4,27 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
int scanhash_argon2d( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
// Credits: version = 0x10, m_cost = 250.
|
||||
bool register_argon2d250_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d250_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_argon2d250( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
// Dynamic: version = 0x10, m_cost = 500.
|
||||
bool register_argon2d500_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d500_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_argon2d500( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
// Zero Dynamics Cash: version = 0x10, m_cost = 1000.
|
||||
bool register_argon2d1000_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d1000_hash( void *state, const void *input );
|
||||
|
||||
bool register_argon2d16000_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d16000_hash( void *state, const void *input );
|
||||
|
||||
// Unitus: version = 0x13, m_cost = 4096.
|
||||
bool register_argon2d4096_algo( algo_gate_t* gate );
|
||||
|
||||
@@ -109,7 +109,7 @@ c512_2way( shavite512_2way_context *ctx, const void *msg )
|
||||
|
||||
for ( r = 0; r < 3; r ++ )
|
||||
{
|
||||
// round 1, 5, 9
|
||||
// round 1, 5, 9
|
||||
|
||||
k00 = _mm256_xor_si256( k13, mm256_shuflr128_32(
|
||||
mm256_aesenc_2x128( k00, zero ) ) );
|
||||
|
||||
@@ -21,7 +21,7 @@ c512_4way( shavite512_4way_context *ctx, const void *msg )
|
||||
__m512i *H = (__m512i*)ctx->h;
|
||||
const __m512i count = _mm512_set4_epi32( ctx->count3, ctx->count2,
|
||||
ctx->count1, ctx->count0 );
|
||||
int r;
|
||||
const __m512i zero = _mm512_setzero_si512();
|
||||
|
||||
P0 = H[0];
|
||||
P1 = H[1];
|
||||
@@ -37,182 +37,160 @@ c512_4way( shavite512_4way_context *ctx, const void *msg )
|
||||
K6 = M[6];
|
||||
K7 = M[7];
|
||||
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K0 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
|
||||
// round 0
|
||||
|
||||
P0 = _mm512_xor_si512( P0, X );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K0 ), zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
|
||||
P0 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P0 );
|
||||
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K4 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K4 ), zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
|
||||
P2 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P2 );
|
||||
|
||||
P2 = _mm512_xor_si512( P2, X );
|
||||
|
||||
// round
|
||||
for ( r = 0; r < 3; r ++ )
|
||||
for ( int r = 0; r < 3; r ++ )
|
||||
{
|
||||
// round 1, 5, 9
|
||||
// round 1, 5, 9
|
||||
|
||||
K0 = _mm512_xor_si512( K7, mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K0, m512_zero ) ) );
|
||||
_mm512_aesenc_epi128( K0, zero ) ) );
|
||||
|
||||
if ( r == 0 )
|
||||
K0 = _mm512_xor_si512( K0,
|
||||
_mm512_mask_xor_epi32( count, 0x8888, count, m512_neg1 ) );
|
||||
_mm512_mask_ternarylogic_epi32( count, 0x8888, count, count, 1 ) );
|
||||
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K0 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K0 ), zero );
|
||||
K1 = _mm512_xor_si512( K0,
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K1, m512_zero ) ) );
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K1, zero ) ) );
|
||||
|
||||
if ( r == 1 )
|
||||
K1 = _mm512_xor_si512( K1, mm512_shuflr128_32(
|
||||
_mm512_mask_xor_epi32( count, 0x1111, count, m512_neg1 ) ) );
|
||||
_mm512_mask_ternarylogic_epi32( count, 0x1111, count, count, 1 ) ) );
|
||||
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
|
||||
K2 = _mm512_xor_si512( K1,
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K2, m512_zero ) ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K2, zero ) ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
|
||||
K3 = _mm512_xor_si512( K2,
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K3, m512_zero ) ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
|
||||
|
||||
P3 = _mm512_xor_si512( P3, X );
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K3, zero ) ) );
|
||||
P3 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P3 );
|
||||
|
||||
K4 = _mm512_xor_si512( K3,
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K4, m512_zero ) ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K4 ), m512_zero );
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K4, zero ) ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K4 ), zero );
|
||||
K5 = _mm512_xor_si512( K4,
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K5, m512_zero ) ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K5, zero ) ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
|
||||
K6 = _mm512_xor_si512( K5,
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K6, m512_zero ) ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K6, zero ) ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
|
||||
K7 = _mm512_xor_si512( K6,
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K7, m512_zero ) ) );
|
||||
mm512_shuflr128_32( _mm512_aesenc_epi128( K7, zero ) ) );
|
||||
|
||||
if ( r == 2 )
|
||||
K7 = _mm512_xor_si512( K7, mm512_swap128_64(
|
||||
_mm512_mask_xor_epi32( count, 0x2222, count, m512_neg1 ) ) );
|
||||
_mm512_mask_ternarylogic_epi32( count, 0x2222, count, count, 1 ) ) );
|
||||
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
|
||||
P1 = _mm512_xor_si512( P1, X );
|
||||
P1 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P1 );
|
||||
|
||||
// round 2, 6, 10
|
||||
|
||||
K0 = _mm512_xor_si512( K0, _mm512_alignr_epi8( K7, K6, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K0 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K0 ), zero );
|
||||
K1 = _mm512_xor_si512( K1, _mm512_alignr_epi8( K0, K7, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
|
||||
K2 = _mm512_xor_si512( K2, _mm512_alignr_epi8( K1, K0, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
|
||||
K3 = _mm512_xor_si512( K3, _mm512_alignr_epi8( K2, K1, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
|
||||
|
||||
P2 = _mm512_xor_si512( P2, X );
|
||||
P2 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P2 );
|
||||
|
||||
K4 = _mm512_xor_si512( K4, _mm512_alignr_epi8( K3, K2, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K4 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K4 ), zero );
|
||||
K5 = _mm512_xor_si512( K5, _mm512_alignr_epi8( K4, K3, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
|
||||
K6 = _mm512_xor_si512( K6, _mm512_alignr_epi8( K5, K4, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
|
||||
K7 = _mm512_xor_si512( K7, _mm512_alignr_epi8( K6, K5, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
|
||||
|
||||
P0 = _mm512_xor_si512( P0, X );
|
||||
P0 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P0 );
|
||||
|
||||
// round 3, 7, 11
|
||||
|
||||
K0 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K0, m512_zero ) ), K7 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K0 ), m512_zero );
|
||||
_mm512_aesenc_epi128( K0, zero ) ), K7 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K0 ), zero );
|
||||
K1 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K1, m512_zero ) ), K0 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
|
||||
_mm512_aesenc_epi128( K1, zero ) ), K0 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
|
||||
K2 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K2, m512_zero ) ), K1 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
|
||||
_mm512_aesenc_epi128( K2, zero ) ), K1 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
|
||||
K3 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K3, m512_zero ) ), K2 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
|
||||
|
||||
P1 = _mm512_xor_si512( P1, X );
|
||||
_mm512_aesenc_epi128( K3, zero ) ), K2 );
|
||||
P1 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P1 );
|
||||
|
||||
K4 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K4, m512_zero ) ), K3 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K4 ), m512_zero );
|
||||
_mm512_aesenc_epi128( K4, zero ) ), K3 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K4 ), zero );
|
||||
K5 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K5, m512_zero ) ), K4 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
|
||||
_mm512_aesenc_epi128( K5, zero ) ), K4 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
|
||||
K6 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K6, m512_zero ) ), K5 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
|
||||
_mm512_aesenc_epi128( K6, zero ) ), K5 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
|
||||
K7 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K7, m512_zero ) ), K6 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
|
||||
|
||||
P3 = _mm512_xor_si512( P3, X );
|
||||
_mm512_aesenc_epi128( K7, zero ) ), K6 );
|
||||
P3 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P3 );
|
||||
|
||||
// round 4, 8, 12
|
||||
|
||||
K0 = _mm512_xor_si512( K0, _mm512_alignr_epi8( K7, K6, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K0 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P1, K0 ), zero );
|
||||
K1 = _mm512_xor_si512( K1, _mm512_alignr_epi8( K0, K7, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
|
||||
K2 = _mm512_xor_si512( K2, _mm512_alignr_epi8( K1, K0, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
|
||||
K3 = _mm512_xor_si512( K3, _mm512_alignr_epi8( K2, K1, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
|
||||
|
||||
P0 = _mm512_xor_si512( P0, X );
|
||||
P0 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P0 );
|
||||
|
||||
K4 = _mm512_xor_si512( K4, _mm512_alignr_epi8( K3, K2, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K4 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P3, K4 ), zero );
|
||||
K5 = _mm512_xor_si512( K5, _mm512_alignr_epi8( K4, K3, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
|
||||
K6 = _mm512_xor_si512( K6, _mm512_alignr_epi8( K5, K4, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
|
||||
K7 = _mm512_xor_si512( K7, _mm512_alignr_epi8( K6, K5, 4 ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
|
||||
|
||||
P2 = _mm512_xor_si512( P2, X );
|
||||
P2 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P2 );
|
||||
}
|
||||
|
||||
// round 13
|
||||
|
||||
K0 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K0, m512_zero ) ), K7 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K0 ), m512_zero );
|
||||
_mm512_aesenc_epi128( K0, zero ) ), K7 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P0, K0 ), zero );
|
||||
K1 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K1, m512_zero ) ), K0 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), m512_zero );
|
||||
_mm512_aesenc_epi128( K1, zero ) ), K0 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K1 ), zero );
|
||||
K2 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K2, m512_zero ) ), K1 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), m512_zero );
|
||||
_mm512_aesenc_epi128( K2, zero ) ), K1 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K2 ), zero );
|
||||
K3 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K3, m512_zero ) ), K2 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), m512_zero );
|
||||
|
||||
P3 = _mm512_xor_si512( P3, X );
|
||||
_mm512_aesenc_epi128( K3, zero ) ), K2 );
|
||||
P3 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K3 ), P3 );
|
||||
|
||||
K4 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K4, m512_zero ) ), K3 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K4 ), m512_zero );
|
||||
_mm512_aesenc_epi128( K4, zero ) ), K3 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( P2, K4 ), zero );
|
||||
K5 = _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K5, m512_zero ) ), K4 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), m512_zero );
|
||||
K6 = mm512_shuflr128_32( _mm512_aesenc_epi128( K6, m512_zero ) );
|
||||
K6 = _mm512_xor_si512( K6, _mm512_xor_si512( K5, mm512_swap64_32(
|
||||
_mm512_mask_xor_epi32( count, 0x4444, count, m512_neg1 ) ) ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), m512_zero );
|
||||
_mm512_aesenc_epi128( K5, zero ) ), K4 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K5 ), zero );
|
||||
K6 = mm512_shuflr128_32( _mm512_aesenc_epi128( K6, zero ) );
|
||||
K6 = mm512_xor3( K6, K5, mm512_swap64_32(
|
||||
_mm512_mask_ternarylogic_epi32( count, 0x4444, count, count, 1 ) ) );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K6 ), zero );
|
||||
K7= _mm512_xor_si512( mm512_shuflr128_32(
|
||||
_mm512_aesenc_epi128( K7, m512_zero ) ), K6 );
|
||||
X = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), m512_zero );
|
||||
|
||||
P1 = _mm512_xor_si512( P1, X );
|
||||
_mm512_aesenc_epi128( K7, zero ) ), K6 );
|
||||
P1 = _mm512_aesenc_epi128( _mm512_xor_si512( X, K7 ), P1 );
|
||||
|
||||
H[0] = _mm512_xor_si512( H[0], P2 );
|
||||
H[1] = _mm512_xor_si512( H[1], P3 );
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "sph_shavite.h"
|
||||
|
||||
extern void inkhash(void *state, const void *input)
|
||||
{
|
||||
sph_shavite512_context ctx_shavite;
|
||||
uint32_t hash[16];
|
||||
|
||||
sph_shavite512_init(&ctx_shavite);
|
||||
sph_shavite512 (&ctx_shavite, (const void*) input, 80);
|
||||
sph_shavite512_close(&ctx_shavite, (void*) hash);
|
||||
|
||||
sph_shavite512_init(&ctx_shavite);
|
||||
sph_shavite512(&ctx_shavite, (const void*) hash, 64);
|
||||
sph_shavite512_close(&ctx_shavite, (void*) hash);
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
|
||||
/*
|
||||
int ii;
|
||||
printf("result: ");
|
||||
for (ii=0; ii < 32; ii++)
|
||||
{
|
||||
printf ("%.2x",((uint8_t*)state)[ii]);
|
||||
};
|
||||
printf ("\n");
|
||||
*/
|
||||
}
|
||||
|
||||
int scanhash_ink( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t endiandata[32];
|
||||
|
||||
//char testdata[] = {"\x70\x00\x00\x00\x5d\x38\x5b\xa1\x14\xd0\x79\x97\x0b\x29\xa9\x41\x8f\xd0\x54\x9e\x7d\x68\xa9\x5c\x7f\x16\x86\x21\xa3\x14\x20\x10\x00\x00\x00\x00\x57\x85\x86\xd1\x49\xfd\x07\xb2\x2f\x3a\x8a\x34\x7c\x51\x6d\xe7\x05\x2f\x03\x4d\x2b\x76\xff\x68\xe0\xd6\xec\xff\x9b\x77\xa4\x54\x89\xe3\xfd\x51\x17\x32\x01\x1d\xf0\x73\x10\x00"};
|
||||
|
||||
//we need bigendian data...
|
||||
//lessons learned: do NOT endianchange directly in pdata, this will all proof-of-works be considered as stale from minerd....
|
||||
int kk=0;
|
||||
for (; kk < 32; kk++)
|
||||
{
|
||||
be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]);
|
||||
};
|
||||
|
||||
// if (opt_debug)
|
||||
// {
|
||||
// applog(LOG_DEBUG, "Thr: %02d, firstN: %08x, maxN: %08x, ToDo: %d", thr_id, first_nonce, max_nonce, max_nonce-first_nonce);
|
||||
// }
|
||||
|
||||
/* I'm to lazy to put the loop in an inline function... so dirty copy'n'paste.... */
|
||||
/* i know that i could set a variable, but i don't know how the compiler will optimize it, not that then the cpu needs to load the value *everytime* in a register */
|
||||
if (ptarget[7]==0) {
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
inkhash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFFFFF)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
}
|
||||
else if (ptarget[7]<=0xF)
|
||||
{
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
inkhash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFFFF0)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
}
|
||||
else if (ptarget[7]<=0xFF)
|
||||
{
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
inkhash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFFF00)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
}
|
||||
else if (ptarget[7]<=0xFFF)
|
||||
{
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
inkhash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFF000)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
}
|
||||
else if (ptarget[7]<=0xFFFF)
|
||||
{
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
inkhash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFF0000)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
inkhash(hash64, endiandata);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
}
|
||||
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_shavite_algo( algo_gate_t* gate )
|
||||
{
|
||||
algo_not_implemented();
|
||||
return false;
|
||||
|
||||
// gate->scanhash = (void*)&scanhash_ink;
|
||||
// gate->hash = (void*)&inkhash;
|
||||
// return true;
|
||||
};
|
||||
|
||||
@@ -50,7 +50,8 @@ extern "C"{
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
static const sph_u32 IV512[] = {
|
||||
static const sph_u32 IV512[] =
|
||||
{
|
||||
0x72FCCDD8, 0x79CA4727, 0x128A077B, 0x40D55AEC,
|
||||
0xD1901A06, 0x430AE307, 0xB29F5CD1, 0xDF07FBFC,
|
||||
0x8E45D73D, 0x681AB538, 0xBDE86578, 0xDD577E47,
|
||||
@@ -71,38 +72,26 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
p2 = h[2];
|
||||
p3 = h[3];
|
||||
|
||||
// round
|
||||
|
||||
k00 = m[0];
|
||||
x = v128_xor( p1, k00 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
|
||||
k01 = m[1];
|
||||
x = v128_xor( x, k01 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k02 = m[2];
|
||||
x = v128_xor( x, k02 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k03 = m[3];
|
||||
x = v128_xor( x, k03 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
|
||||
p0 = v128_xor( p0, x );
|
||||
|
||||
k10 = m[4];
|
||||
x = v128_xor( p3, k10 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k11 = m[5];
|
||||
x = v128_xor( x, k11 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k12 = m[6];
|
||||
x = v128_xor( x, k12 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k13 = m[7];
|
||||
x = v128_xor( x, k13 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
|
||||
p2 = v128_xor( p2, x );
|
||||
// round 0
|
||||
|
||||
x = v128_xoraesenc( p1, k00 );
|
||||
x = v128_xoraesenc( x, k01 );
|
||||
x = v128_xoraesenc( x, k02 );
|
||||
p0 = v128_xoraesencxor( x, k03, p0 );
|
||||
|
||||
x = v128_xoraesenc( p3, k10 );
|
||||
x = v128_xoraesenc( x, k11 );
|
||||
x = v128_xoraesenc( x, k12 );
|
||||
p2 = v128_xoraesencxor( x, k13, p2 );
|
||||
|
||||
for ( r = 0; r < 3; r ++ )
|
||||
{
|
||||
@@ -113,198 +102,165 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
if ( r == 0 )
|
||||
k00 = v128_xor( k00, v128_set32(
|
||||
~sc->count3, sc->count2, sc->count1, sc->count0 ) );
|
||||
x = v128_xoraesenc( p0, k00 );
|
||||
|
||||
x = v128_xor( p0, k00 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k01 = v128_shuflr32( v128_aesenc_nokey( k01 ) );
|
||||
k01 = v128_xor( k01, k00 );
|
||||
|
||||
if ( r == 1 )
|
||||
k01 = v128_xor( k01, v128_set32(
|
||||
~sc->count0, sc->count1, sc->count2, sc->count3 ) );
|
||||
x = v128_xoraesenc( x, k01 );
|
||||
|
||||
x = v128_xor( x, k01 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k02 = v128_shuflr32( v128_aesenc_nokey( k02 ) );
|
||||
k02 = v128_xor( k02, k01 );
|
||||
x = v128_xor( x, k02 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( x, k02 );
|
||||
|
||||
k03 = v128_shuflr32( v128_aesenc_nokey( k03 ) );
|
||||
k03 = v128_xor( k03, k02 );
|
||||
x = v128_xor( x, k03 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
|
||||
p3 = v128_xor( p3, x );
|
||||
p3 = v128_xoraesencxor( x, k03, p3 );
|
||||
|
||||
k10 = v128_shuflr32( v128_aesenc_nokey( k10 ) );
|
||||
k10 = v128_xor( k10, k03 );
|
||||
x = v128_xoraesenc( p2, k10 );
|
||||
|
||||
x = v128_xor( p2, k10 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k11 = v128_shuflr32( v128_aesenc_nokey( k11 ) );
|
||||
k11 = v128_xor( k11, k10 );
|
||||
x = v128_xor( x, k11 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( x, k11 );
|
||||
|
||||
k12 = v128_shuflr32( v128_aesenc_nokey( k12 ) );
|
||||
k12 = v128_xor( k12, k11 );
|
||||
x = v128_xor( x, k12 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( x, k12 );
|
||||
|
||||
k13 = v128_shuflr32( v128_aesenc_nokey( k13 ) );
|
||||
k13 = v128_xor( k13, k12 );
|
||||
|
||||
if ( r == 2 )
|
||||
k13 = v128_xor( k13, v128_set32(
|
||||
~sc->count1, sc->count0, sc->count3, sc->count2 ) );
|
||||
|
||||
x = v128_xor( x, k13 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
p1 = v128_xor( p1, x );
|
||||
p1 = v128_xoraesencxor( x, k13, p1 );
|
||||
|
||||
// round 2, 6, 10
|
||||
|
||||
k00 = v128_xor( k00, v128_alignr8( k13, k12, 4 ) );
|
||||
x = v128_xor( p3, k00 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k01 = v128_xor( k01, v128_alignr8( k00, k13, 4 ) );
|
||||
x = v128_xor( x, k01 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k02 = v128_xor( k02, v128_alignr8( k01, k00, 4 ) );
|
||||
x = v128_xor( x, k02 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k03 = v128_xor( k03, v128_alignr8( k02, k01, 4 ) );
|
||||
x = v128_xor( x, k03 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( p3, k00 );
|
||||
|
||||
p2 = v128_xor( p2, x );
|
||||
k01 = v128_xor( k01, v128_alignr8( k00, k13, 4 ) );
|
||||
x = v128_xoraesenc( x, k01 );
|
||||
|
||||
k02 = v128_xor( k02, v128_alignr8( k01, k00, 4 ) );
|
||||
x = v128_xoraesenc( x, k02 );
|
||||
|
||||
k03 = v128_xor( k03, v128_alignr8( k02, k01, 4 ) );
|
||||
p2 = v128_xoraesencxor( x, k03, p2 );
|
||||
|
||||
k10 = v128_xor( k10, v128_alignr8( k03, k02, 4 ) );
|
||||
x = v128_xor( p1, k10 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k11 = v128_xor( k11, v128_alignr8( k10, k03, 4 ) );
|
||||
x = v128_xor( x, k11 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k12 = v128_xor( k12, v128_alignr8( k11, k10, 4 ) );
|
||||
x = v128_xor( x, k12 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k13 = v128_xor( k13, v128_alignr8( k12, k11, 4 ) );
|
||||
x = v128_xor( x, k13 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( p1, k10 );
|
||||
|
||||
p0 = v128_xor( p0, x );
|
||||
k11 = v128_xor( k11, v128_alignr8( k10, k03, 4 ) );
|
||||
x = v128_xoraesenc( x, k11 );
|
||||
|
||||
k12 = v128_xor( k12, v128_alignr8( k11, k10, 4 ) );
|
||||
x = v128_xoraesenc( x, k12 );
|
||||
|
||||
k13 = v128_xor( k13, v128_alignr8( k12, k11, 4 ) );
|
||||
p0 = v128_xoraesencxor( x, k13, p0 );
|
||||
|
||||
// round 3, 7, 11
|
||||
|
||||
k00 = v128_shuflr32( v128_aesenc_nokey( k00 ) );
|
||||
k00 = v128_xor( k00, k13 );
|
||||
x = v128_xor( p2, k00 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( p2, k00 );
|
||||
|
||||
k01 = v128_shuflr32( v128_aesenc_nokey( k01 ) );
|
||||
k01 = v128_xor( k01, k00 );
|
||||
x = v128_xor( x, k01 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( x, k01 );
|
||||
|
||||
k02 = v128_shuflr32( v128_aesenc_nokey( k02 ) );
|
||||
k02 = v128_xor( k02, k01 );
|
||||
x = v128_xor( x, k02 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( x, k02 );
|
||||
|
||||
k03 = v128_shuflr32( v128_aesenc_nokey( k03 ) );
|
||||
k03 = v128_xor( k03, k02 );
|
||||
x = v128_xor( x, k03 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
|
||||
p1 = v128_xor( p1, x );
|
||||
p1 = v128_xoraesencxor( x, k03, p1 );
|
||||
|
||||
k10 = v128_shuflr32( v128_aesenc_nokey( k10 ) );
|
||||
k10 = v128_xor( k10, k03 );
|
||||
x = v128_xor( p0, k10 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( p0, k10 );
|
||||
|
||||
k11 = v128_shuflr32( v128_aesenc_nokey( k11 ) );
|
||||
k11 = v128_xor( k11, k10 );
|
||||
x = v128_xor( x, k11 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( x, k11 );
|
||||
|
||||
k12 = v128_shuflr32( v128_aesenc_nokey( k12 ) );
|
||||
k12 = v128_xor( k12, k11 );
|
||||
x = v128_xor( x, k12 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( x, k12 );
|
||||
|
||||
k13 = v128_shuflr32( v128_aesenc_nokey( k13 ) );
|
||||
k13 = v128_xor( k13, k12 );
|
||||
x = v128_xor( x, k13 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
|
||||
p3 = v128_xor( p3, x );
|
||||
p3 = v128_xoraesencxor( x, k13, p3 );
|
||||
|
||||
// round 4, 8, 12
|
||||
|
||||
k00 = v128_xor( k00, v128_alignr8( k13, k12, 4 ) );
|
||||
x = v128_xor( p1, k00 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k01 = v128_xor( k01, v128_alignr8( k00, k13, 4 ) );
|
||||
x = v128_xor( x, k01 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k02 = v128_xor( k02, v128_alignr8( k01, k00, 4 ) );
|
||||
x = v128_xor( x, k02 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k03 = v128_xor( k03, v128_alignr8( k02, k01, 4 ) );
|
||||
x = v128_xor( x, k03 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( p1, k00 );
|
||||
|
||||
p0 = v128_xor( p0, x );
|
||||
k01 = v128_xor( k01, v128_alignr8( k00, k13, 4 ) );
|
||||
x = v128_xoraesenc( x, k01 );
|
||||
|
||||
k02 = v128_xor( k02, v128_alignr8( k01, k00, 4 ) );
|
||||
x = v128_xoraesenc( x, k02 );
|
||||
|
||||
k03 = v128_xor( k03, v128_alignr8( k02, k01, 4 ) );
|
||||
p0 = v128_xoraesencxor( x, k03, p0 );
|
||||
|
||||
k10 = v128_xor( k10, v128_alignr8( k03, k02, 4 ) );
|
||||
x = v128_xor( p3, k10 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k11 = v128_xor( k11, v128_alignr8( k10, k03, 4 ) );
|
||||
x = v128_xor( x, k11 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k12 = v128_xor( k12, v128_alignr8( k11, k10, 4 ) );
|
||||
x = v128_xor( x, k12 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
k13 = v128_xor( k13, v128_alignr8( k12, k11, 4 ) );
|
||||
x = v128_xor( x, k13 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( p3, k10 );
|
||||
|
||||
p2 = v128_xor( p2, x );
|
||||
k11 = v128_xor( k11, v128_alignr8( k10, k03, 4 ) );
|
||||
x = v128_xoraesenc( x, k11 );
|
||||
|
||||
k12 = v128_xor( k12, v128_alignr8( k11, k10, 4 ) );
|
||||
x = v128_xoraesenc( x, k12 );
|
||||
|
||||
k13 = v128_xor( k13, v128_alignr8( k12, k11, 4 ) );
|
||||
p2 = v128_xoraesencxor( x, k13, p2 );
|
||||
}
|
||||
|
||||
// round 13
|
||||
|
||||
k00 = v128_shuflr32( v128_aesenc_nokey( k00 ) );
|
||||
k00 = v128_xor( k00, k13 );
|
||||
x = v128_xor( p0, k00 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( p0, k00 );
|
||||
|
||||
k01 = v128_shuflr32( v128_aesenc_nokey( k01 ) );
|
||||
k01 = v128_xor( k01, k00 );
|
||||
x = v128_xor( x, k01 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( x, k01 );
|
||||
|
||||
k02 = v128_shuflr32( v128_aesenc_nokey( k02 ) );
|
||||
k02 = v128_xor( k02, k01 );
|
||||
x = v128_xor( x, k02 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( x, k02 );
|
||||
|
||||
k03 = v128_shuflr32( v128_aesenc_nokey( k03 ) );
|
||||
k03 = v128_xor( k03, k02 );
|
||||
x = v128_xor( x, k03 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
|
||||
p3 = v128_xor( p3, x );
|
||||
p3 = v128_xoraesencxor( x, k03, p3 );
|
||||
|
||||
k10 = v128_shuflr32( v128_aesenc_nokey( k10 ) );
|
||||
k10 = v128_xor( k10, k03 );
|
||||
x = v128_xor( p2, k10 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( p2, k10 );
|
||||
|
||||
k11 = v128_shuflr32( v128_aesenc_nokey( k11 ) );
|
||||
k11 = v128_xor( k11, k10 );
|
||||
x = v128_xor( x, k11 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( x, k11 );
|
||||
|
||||
k12 = v128_shuflr32( v128_aesenc_nokey( k12 ) );
|
||||
k12 = v128_xor( k12, v128_xor( k11, v128_set32(
|
||||
~sc->count2, sc->count3, sc->count0, sc->count1 ) ) );
|
||||
x = v128_xor( x, k12 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
x = v128_xoraesenc( x, k12 );
|
||||
|
||||
k13 = v128_shuflr32( v128_aesenc_nokey( k13 ) );
|
||||
k13 = v128_xor( k13, k12 );
|
||||
x = v128_xor( x, k13 );
|
||||
x = v128_aesenc_nokey( x );
|
||||
|
||||
p1 = v128_xor( p1, x );
|
||||
p1 = v128_xoraesencxor( x, k13, p1 );
|
||||
|
||||
h[0] = v128_xor( h[0], p2 );
|
||||
h[1] = v128_xor( h[1], p3 );
|
||||
|
||||
Reference in New Issue
Block a user