mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
9d3a46c355 | ||
![]() |
4e3f1b926f | ||
![]() |
045b42babf | ||
![]() |
fc696dbbe5 |
@@ -250,6 +250,7 @@ cpuminer_SOURCES = \
|
|||||||
algo/x16/x16rt.c \
|
algo/x16/x16rt.c \
|
||||||
algo/x16/x16rt-4way.c \
|
algo/x16/x16rt-4way.c \
|
||||||
algo/x16/hex.c \
|
algo/x16/hex.c \
|
||||||
|
algo/x16/x20r.c \
|
||||||
algo/x16/x21s-4way.c \
|
algo/x16/x21s-4way.c \
|
||||||
algo/x16/x21s.c \
|
algo/x16/x21s.c \
|
||||||
algo/x16/minotaur.c \
|
algo/x16/minotaur.c \
|
||||||
|
@@ -87,7 +87,6 @@ Supported Algorithms
|
|||||||
groestl Groestl coin
|
groestl Groestl coin
|
||||||
hex x16r-hex
|
hex x16r-hex
|
||||||
hmq1725
|
hmq1725
|
||||||
hodl Hodlcoin
|
|
||||||
jha Jackpotcoin
|
jha Jackpotcoin
|
||||||
keccak Maxcoin
|
keccak Maxcoin
|
||||||
keccakc Creative coin
|
keccakc Creative coin
|
||||||
@@ -115,9 +114,11 @@ Supported Algorithms
|
|||||||
scrypt:N scrypt(N, 1, 1)
|
scrypt:N scrypt(N, 1, 1)
|
||||||
scryptn2 scrypt(1048576, 1, 1)
|
scryptn2 scrypt(1048576, 1, 1)
|
||||||
sha256d Double SHA-256
|
sha256d Double SHA-256
|
||||||
|
sha256dt
|
||||||
sha256q Quad SHA-256
|
sha256q Quad SHA-256
|
||||||
sha256t Triple SHA-256
|
sha256t Triple SHA-256
|
||||||
sha3d Double keccak256 (BSHA3)
|
sha3d Double keccak256 (BSHA3)
|
||||||
|
sha512256d
|
||||||
skein Skein+Sha (Skeincoin)
|
skein Skein+Sha (Skeincoin)
|
||||||
skein2 Double Skein (Woodcoin)
|
skein2 Double Skein (Woodcoin)
|
||||||
skunk Signatum (SIGT)
|
skunk Signatum (SIGT)
|
||||||
@@ -145,6 +146,7 @@ Supported Algorithms
|
|||||||
x16rt-veil veil
|
x16rt-veil veil
|
||||||
x16s
|
x16s
|
||||||
x17
|
x17
|
||||||
|
x20r
|
||||||
x21s
|
x21s
|
||||||
x22i
|
x22i
|
||||||
x25x
|
x25x
|
||||||
|
@@ -27,17 +27,19 @@ See INSTALL_LINUX or INSTALL_WINDOWS for compile instructions
|
|||||||
Requirements
|
Requirements
|
||||||
------------
|
------------
|
||||||
|
|
||||||
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
|
- A x86_64 architecture CPU with a minimum of SSE2 support. This includes Intel Core2 and newer and AMD equivalents.
|
||||||
supported.
|
- Arm CPU supporting AArch64 and NEON.
|
||||||
|
|
||||||
64 bit Linux or Windows operating system. Apple, Android and Raspberry Pi
|
32 bit CPUs are not supported.
|
||||||
are not supported. FreeBSD YMMV.
|
|
||||||
|
|
||||||
ARM requirements (Beta):
|
Older CPUs are supported by open source cpuminer-multi by TPruvot but at reduced performance.
|
||||||
|
|
||||||
CPU: Armv8 and NEON, SHA2 & AES are optional
|
Mining on mobile devices that meet the requirements is not recommended due to the risk of
|
||||||
OS: Linux distribution built for AArch64.
|
overheating and damaging the battery. Mining has unlimited demand, it will push any device
|
||||||
Packages: source code only.
|
to or beyond its limits. There is also a fire risk with overheated lithium batteries.
|
||||||
|
|
||||||
|
Beware of apps claiming "mobile only mining". There is no such thing, they aren't miners.
|
||||||
|
If a mobile CPU can mine it any CPU can.
|
||||||
|
|
||||||
See wiki for details.
|
See wiki for details.
|
||||||
|
|
||||||
@@ -73,12 +75,40 @@ If not what makes it happen or not happen?
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
v23.15
|
||||||
|
|
||||||
|
Fixed x11gost (sib) algo for all architectures, broken in v3.23.4.
|
||||||
|
ARM: Fugue AES optimizations enabled.
|
||||||
|
ARM: quark, qubit, x11gost algos optimized with NEON & AES.
|
||||||
|
|
||||||
|
v23.14
|
||||||
|
|
||||||
|
ARM: Groestl AES optimizations enabled.
|
||||||
|
All: Small optimization to Shabal 4way.
|
||||||
|
x86_64: Extend Shabal 4way support to SSE2 from SSE4.1.
|
||||||
|
All: deleted some unused files.
|
||||||
|
|
||||||
|
v23.13
|
||||||
|
|
||||||
|
Added x20r algo.
|
||||||
|
Eliminated redundant hash order calculations for x16r family.
|
||||||
|
|
||||||
|
v23.12
|
||||||
|
|
||||||
|
Several bugs fixes and speed improvements for x16r family for all CPU architectures.
|
||||||
|
|
||||||
|
v23.11
|
||||||
|
|
||||||
|
This is a release candidate for full AArch64 support, marking the end of the Beta phase.
|
||||||
|
Fixed hmq1725 & x25x algos, SSE2 & NEON, broken in v3.23.4.
|
||||||
|
Most CPU-mineable SHA3 algos (X*) upgraded to 2-way SSE2 & NEON.
|
||||||
|
|
||||||
v23.10
|
v23.10
|
||||||
|
|
||||||
x86_64: Fixed scrypt, scryptn2 algos SSE2.
|
x86_64: Fixed scrypt, scryptn2 algos SSE2.
|
||||||
Fixed sha512d256d algo AVX2, SSE2, NEON.
|
Fixed sha512256d algo AVX2, SSE2, NEON.
|
||||||
Fixed a bug in Skein N-way that reduced performance.
|
Fixed a bug in Skein N-way that reduced performance.
|
||||||
ARM: Skein algo optimized for NEON & SHA2.
|
ARM: Skein optimized for NEON, SHA2 & SSE2.
|
||||||
Skein2 algo 2-way optimized for NEON & SSE2.
|
Skein2 algo 2-way optimized for NEON & SSE2.
|
||||||
|
|
||||||
v23.9
|
v23.9
|
||||||
|
@@ -368,6 +368,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
|||||||
case ALGO_X16RT_VEIL: rc = register_x16rt_veil_algo ( gate ); break;
|
case ALGO_X16RT_VEIL: rc = register_x16rt_veil_algo ( gate ); break;
|
||||||
case ALGO_X16S: rc = register_x16s_algo ( gate ); break;
|
case ALGO_X16S: rc = register_x16s_algo ( gate ); break;
|
||||||
case ALGO_X17: rc = register_x17_algo ( gate ); break;
|
case ALGO_X17: rc = register_x17_algo ( gate ); break;
|
||||||
|
case ALGO_X20R: rc = register_x20r_algo ( gate ); break;
|
||||||
case ALGO_X21S: rc = register_x21s_algo ( gate ); break;
|
case ALGO_X21S: rc = register_x21s_algo ( gate ); break;
|
||||||
case ALGO_X22I: rc = register_x22i_algo ( gate ); break;
|
case ALGO_X22I: rc = register_x22i_algo ( gate ); break;
|
||||||
case ALGO_X25X: rc = register_x25x_algo ( gate ); break;
|
case ALGO_X25X: rc = register_x25x_algo ( gate ); break;
|
||||||
|
@@ -99,7 +99,7 @@ typedef uint32_t set_t;
|
|||||||
#define AES_OPT 1 << 7 // Intel Westmere, AArch64
|
#define AES_OPT 1 << 7 // Intel Westmere, AArch64
|
||||||
#define VAES_OPT 1 << 8 // Icelake, Zen3
|
#define VAES_OPT 1 << 8 // Icelake, Zen3
|
||||||
#define SHA_OPT 1 << 9 // Zen1, Icelake, AArch64
|
#define SHA_OPT 1 << 9 // Zen1, Icelake, AArch64
|
||||||
#define SHA512_OPT 1 << 10 // AArch64
|
#define SHA512_OPT 1 << 10 // Intel Arrow Lake, AArch64
|
||||||
#define NEON_OPT 1 << 11 // AArch64
|
#define NEON_OPT 1 << 11 // AArch64
|
||||||
|
|
||||||
// AVX10 does not have explicit algo features:
|
// AVX10 does not have explicit algo features:
|
||||||
|
@@ -39,7 +39,7 @@ int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
|
|||||||
blake256r14_4way_update( &blake_4w_ctx, vdata, 64 );
|
blake256r14_4way_update( &blake_4w_ctx, vdata, 64 );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
*noncev = v128_bswap32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||||
|
|
||||||
blakehash_4way( hash, vdata );
|
blakehash_4way( hash, vdata );
|
||||||
|
|
||||||
|
@@ -182,7 +182,7 @@ int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
|
|||||||
blake256r8_4way_update( &blakecoin_4w_ctx, vdata, 64 );
|
blake256r8_4way_update( &blakecoin_4w_ctx, vdata, 64 );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
*noncev = v128_bswap32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
blakecoin_4way_hash( hash, vdata );
|
blakecoin_4way_hash( hash, vdata );
|
||||||
|
|
||||||
|
@@ -2,7 +2,6 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
//#include "sph_keccak.h"
|
|
||||||
#include "bmw-hash-4way.h"
|
#include "bmw-hash-4way.h"
|
||||||
|
|
||||||
#if defined(BMW512_8WAY)
|
#if defined(BMW512_8WAY)
|
||||||
@@ -27,9 +26,9 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const uint32_t last_nonce = max_nonce - 8;
|
const uint32_t last_nonce = max_nonce - 8;
|
||||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
__m512i *noncev = (__m512i*)vdata + 9;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
int thr_id = mythr->id;
|
const int thr_id = mythr->id;
|
||||||
|
|
||||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||||
do {
|
do {
|
||||||
@@ -43,7 +42,7 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( unlikely( hash7[ lane<<1 ] <= Htarg ) )
|
if ( unlikely( hash7[ lane<<1 ] <= Htarg ) )
|
||||||
{
|
{
|
||||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ))
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_solution( work, lane_hash, mythr );
|
submit_solution( work, lane_hash, mythr );
|
||||||
@@ -59,8 +58,6 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
#elif defined(BMW512_4WAY)
|
#elif defined(BMW512_4WAY)
|
||||||
|
|
||||||
//#ifdef BMW512_4WAY
|
|
||||||
|
|
||||||
void bmw512hash_4way( void *state, const void *input )
|
void bmw512hash_4way( void *state, const void *input )
|
||||||
{
|
{
|
||||||
bmw512_4way_context ctx;
|
bmw512_4way_context ctx;
|
||||||
@@ -81,9 +78,9 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const uint32_t last_nonce = max_nonce - 4;
|
const uint32_t last_nonce = max_nonce - 4;
|
||||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
__m256i *noncev = (__m256i*)vdata + 9;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
const int thr_id = mythr->id;
|
||||||
|
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||||
do {
|
do {
|
||||||
@@ -96,7 +93,7 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( unlikely( hash7[ lane<<1 ] <= Htarg ) )
|
if ( unlikely( hash7[ lane<<1 ] <= Htarg ) )
|
||||||
{
|
{
|
||||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ))
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_solution( work, lane_hash, mythr );
|
submit_solution( work, lane_hash, mythr );
|
||||||
@@ -110,4 +107,55 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined(BMW512_2WAY)
|
||||||
|
|
||||||
|
void bmw512hash_2x64( void *state, const void *input )
|
||||||
|
{
|
||||||
|
bmw512_2x64_context ctx;
|
||||||
|
bmw512_2x64_init( &ctx );
|
||||||
|
bmw512_2x64_update( &ctx, input, 80 );
|
||||||
|
bmw512_2x64_close( &ctx, state );
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_bmw512_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash[16*2] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t *hash7 = &(hash[13]); // 3*4+1
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
uint32_t n = pdata[19];
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 2;
|
||||||
|
v128_t *noncev = (v128_t*)vdata + 9;
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
do {
|
||||||
|
*noncev = v128_intrlv_blend_32( v128_bswap32(
|
||||||
|
v128_set32( n+1, 0, n, 0 ) ), *noncev );
|
||||||
|
|
||||||
|
bmw512hash_2x64( hash, vdata );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 2; lane++ )
|
||||||
|
if ( unlikely( hash7[ lane<<1 ] <= Htarg ) )
|
||||||
|
{
|
||||||
|
extr_lane_2x64( lane_hash, hash, lane, 256 );
|
||||||
|
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ))
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n += 2;
|
||||||
|
|
||||||
|
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||||
|
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
bool register_bmw512_algo( algo_gate_t* gate )
|
bool register_bmw512_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
#if defined (BMW512_8WAY)
|
#if defined (BMW512_8WAY)
|
||||||
gate->scanhash = (void*)&scanhash_bmw512_8way;
|
gate->scanhash = (void*)&scanhash_bmw512_8way;
|
||||||
@@ -10,6 +10,9 @@ bool register_bmw512_algo( algo_gate_t* gate )
|
|||||||
#elif defined (BMW512_4WAY)
|
#elif defined (BMW512_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_bmw512_4way;
|
gate->scanhash = (void*)&scanhash_bmw512_4way;
|
||||||
gate->hash = (void*)&bmw512hash_4way;
|
gate->hash = (void*)&bmw512hash_4way;
|
||||||
|
#elif defined (BMW512_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_bmw512_2x64;
|
||||||
|
gate->hash = (void*)&bmw512hash_2x64;
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_bmw512;
|
gate->scanhash = (void*)&scanhash_bmw512;
|
||||||
gate->hash = (void*)&bmw512hash;
|
gate->hash = (void*)&bmw512hash;
|
||||||
|
@@ -8,6 +8,8 @@
|
|||||||
#define BMW512_8WAY 1
|
#define BMW512_8WAY 1
|
||||||
#elif defined(__AVX2__)
|
#elif defined(__AVX2__)
|
||||||
#define BMW512_4WAY 1
|
#define BMW512_4WAY 1
|
||||||
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
|
#define BMW512_2WAY 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(BMW512_8WAY)
|
#if defined(BMW512_8WAY)
|
||||||
@@ -22,6 +24,12 @@ void bmw512hash_4way( void *state, const void *input );
|
|||||||
int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
|
#elif defined(BMW512_2WAY)
|
||||||
|
|
||||||
|
void bmw512hash_2x64( void *state, const void *input );
|
||||||
|
int scanhash_bmw512_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void bmw512hash( void *state, const void *input );
|
void bmw512hash( void *state, const void *input );
|
||||||
|
@@ -236,8 +236,6 @@ void Compress(hashState_echo *ctx, const unsigned char *pmsg, unsigned int uBloc
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
HashReturn init_echo( hashState_echo *ctx, int nHashSize )
|
HashReturn init_echo( hashState_echo *ctx, int nHashSize )
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
@@ -280,7 +278,8 @@ HashReturn init_echo(hashState_echo *ctx, int nHashSize)
|
|||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
HashReturn update_echo(hashState_echo *state, const BitSequence *data, DataLength databitlen)
|
HashReturn update_echo( hashState_echo *state, const void *data,
|
||||||
|
uint32_t databitlen )
|
||||||
{
|
{
|
||||||
unsigned int uByteLength, uBlockCount, uRemainingBytes;
|
unsigned int uByteLength, uBlockCount, uRemainingBytes;
|
||||||
|
|
||||||
@@ -330,7 +329,7 @@ HashReturn update_echo(hashState_echo *state, const BitSequence *data, DataLengt
|
|||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
HashReturn final_echo(hashState_echo *state, BitSequence *hashval)
|
HashReturn final_echo( hashState_echo *state, void *hashval)
|
||||||
{
|
{
|
||||||
v128_t remainingbits;
|
v128_t remainingbits;
|
||||||
|
|
||||||
@@ -407,8 +406,8 @@ HashReturn final_echo(hashState_echo *state, BitSequence *hashval)
|
|||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
|
HashReturn update_final_echo( hashState_echo *state, void *hashval,
|
||||||
const BitSequence *data, DataLength databitlen )
|
const void *data, uint32_t databitlen )
|
||||||
{
|
{
|
||||||
unsigned int uByteLength, uBlockCount, uRemainingBytes;
|
unsigned int uByteLength, uBlockCount, uRemainingBytes;
|
||||||
|
|
||||||
@@ -530,8 +529,8 @@ HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
|
|||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
|
HashReturn echo_full( hashState_echo *state, void *hashval,
|
||||||
int nHashSize, const BitSequence *data, DataLength datalen )
|
int nHashSize, const void *data, uint32_t datalen )
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
@@ -578,7 +577,7 @@ HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
|
|||||||
{
|
{
|
||||||
// Fill the buffer
|
// Fill the buffer
|
||||||
memcpy( state->buffer + state->uBufferBytes,
|
memcpy( state->buffer + state->uBufferBytes,
|
||||||
(void*)data, state->uBlockLength - state->uBufferBytes );
|
data, state->uBlockLength - state->uBufferBytes );
|
||||||
|
|
||||||
// Process buffer
|
// Process buffer
|
||||||
Compress( state, state->buffer, 1 );
|
Compress( state, state->buffer, 1 );
|
||||||
@@ -601,7 +600,7 @@ HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if( uRemainingBytes > 0 )
|
if( uRemainingBytes > 0 )
|
||||||
memcpy(state->buffer, (void*)data, uRemainingBytes);
|
memcpy(state->buffer, data, uRemainingBytes);
|
||||||
|
|
||||||
state->uBufferBytes = uRemainingBytes;
|
state->uBufferBytes = uRemainingBytes;
|
||||||
}
|
}
|
||||||
@@ -689,7 +688,7 @@ HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
|
HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
|
||||||
{
|
{
|
||||||
HashReturn hRet;
|
HashReturn hRet;
|
||||||
@@ -746,5 +745,6 @@ HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databit
|
|||||||
|
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -47,16 +47,16 @@ HashReturn init_echo(hashState_echo *state, int hashbitlen);
|
|||||||
|
|
||||||
HashReturn reinit_echo(hashState_echo *state);
|
HashReturn reinit_echo(hashState_echo *state);
|
||||||
|
|
||||||
HashReturn update_echo(hashState_echo *state, const BitSequence *data, DataLength databitlen);
|
HashReturn update_echo(hashState_echo *state, const void *data, uint32_t databitlen);
|
||||||
|
|
||||||
HashReturn final_echo(hashState_echo *state, BitSequence *hashval);
|
HashReturn final_echo(hashState_echo *state, void *hashval);
|
||||||
|
|
||||||
HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
|
HashReturn hash_echo(int hashbitlen, const void *data, uint32_t databitlen, void *hashval);
|
||||||
|
|
||||||
HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
|
HashReturn update_final_echo( hashState_echo *state, void *hashval,
|
||||||
const BitSequence *data, DataLength databitlen );
|
const void *data, uint32_t databitlen );
|
||||||
HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
|
HashReturn echo_full( hashState_echo *state, void *hashval,
|
||||||
int nHashSize, const BitSequence *data, DataLength databitlen );
|
int nHashSize, const void *data, uint32_t databitlen );
|
||||||
|
|
||||||
#endif // HASH_API_H
|
#endif // HASH_API_H
|
||||||
|
|
||||||
|
@@ -36,7 +36,6 @@
|
|||||||
|
|
||||||
#include "sph_echo.h"
|
#include "sph_echo.h"
|
||||||
|
|
||||||
#if !defined(__AES__)
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C"{
|
extern "C"{
|
||||||
@@ -1031,4 +1030,3 @@ sph_echo512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif // !AES
|
|
||||||
|
@@ -36,8 +36,6 @@
|
|||||||
#ifndef SPH_ECHO_H__
|
#ifndef SPH_ECHO_H__
|
||||||
#define SPH_ECHO_H__
|
#define SPH_ECHO_H__
|
||||||
|
|
||||||
#if !defined(__AES__)
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C"{
|
extern "C"{
|
||||||
#endif
|
#endif
|
||||||
@@ -318,5 +316,4 @@ void sph_echo512_addbits_and_close(
|
|||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif // !AES
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -15,237 +15,176 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if ( defined(__SSE4_1__) && defined(__AES__) ) || ( defined(__ARM_NEON) && defined(__ARM_FEATURE_AES) )
|
||||||
|
|
||||||
#include <x86intrin.h>
|
|
||||||
|
|
||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
#include "fugue-aesni.h"
|
#include "fugue-aesni.h"
|
||||||
|
|
||||||
|
static const v128u64_t _supermix1a __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x0202010807020100, 0x0a05000f06010c0b };
|
||||||
|
|
||||||
MYALIGN const unsigned long long _supermix1a[] = {0x0202010807020100, 0x0a05000f06010c0b};
|
static const v128u64_t _supermix1b __attribute__ ((aligned (16))) =
|
||||||
MYALIGN const unsigned long long _supermix1b[] = {0x0b0d080703060504, 0x0e0a090c050e0f0a};
|
{ 0x0b0d080703060504, 0x0e0a090c050e0f0a };
|
||||||
MYALIGN const unsigned long long _supermix1c[] = {0x0402060c070d0003, 0x090a060580808080};
|
|
||||||
MYALIGN const unsigned long long _supermix1d[] = {0x808080800f0e0d0c, 0x0f0e0d0c80808080};
|
|
||||||
MYALIGN const unsigned long long _supermix2a[] = {0x07020d0880808080, 0x0b06010c050e0f0a};
|
|
||||||
MYALIGN const unsigned long long _supermix4a[] = {0x000f0a050c0b0601, 0x0302020404030e09};
|
|
||||||
MYALIGN const unsigned long long _supermix4b[] = {0x07020d08080e0d0d, 0x07070908050e0f0a};
|
|
||||||
MYALIGN const unsigned long long _supermix4c[] = {0x0706050403020000, 0x0302000007060504};
|
|
||||||
MYALIGN const unsigned long long _supermix7a[] = {0x010c0b060d080702, 0x0904030e03000104};
|
|
||||||
MYALIGN const unsigned long long _supermix7b[] = {0x8080808080808080, 0x0504070605040f06};
|
|
||||||
//MYALIGN const unsigned long long _k_n[] = {0x4E4E4E4E4E4E4E4E, 0x1B1B1B1B0E0E0E0E};
|
|
||||||
//MYALIGN const unsigned char _shift_one_mask[] = {7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14, 3, 0, 1, 2};
|
|
||||||
//MYALIGN const unsigned char _shift_four_mask[] = {13, 14, 15, 12, 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8};
|
|
||||||
//MYALIGN const unsigned char _shift_seven_mask[] = {10, 11, 8, 9, 14, 15, 12, 13, 2, 3, 0, 1, 6, 7, 4, 5};
|
|
||||||
//MYALIGN const unsigned char _aes_shift_rows[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11};
|
|
||||||
MYALIGN const unsigned int _inv_shift_rows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c};
|
|
||||||
MYALIGN const unsigned int _mul2mask[] = {0x1b1b0000, 0x00000000, 0x00000000, 0x00000000};
|
|
||||||
MYALIGN const unsigned int _mul4mask[] = {0x2d361b00, 0x00000000, 0x00000000, 0x00000000};
|
|
||||||
MYALIGN const unsigned int _lsbmask2[] = {0x03030303, 0x03030303, 0x03030303, 0x03030303};
|
|
||||||
|
|
||||||
|
static const v128u64_t _supermix1c __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x0402060c070d0003, 0x090a060580808080 };
|
||||||
|
|
||||||
MYALIGN const unsigned int _IV512[] = {
|
static const v128u64_t _supermix1d __attribute__ ((aligned (16))) =
|
||||||
0x00000000, 0x00000000, 0x7ea50788, 0x00000000,
|
{ 0x808080800f0e0d0c, 0x0f0e0d0c80808080 };
|
||||||
|
|
||||||
|
static const v128u64_t _supermix2a __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x07020d0880808080, 0x0b06010c050e0f0a };
|
||||||
|
|
||||||
|
static const v128u64_t _supermix4a __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x000f0a050c0b0601, 0x0302020404030e09 };
|
||||||
|
|
||||||
|
static const v128u64_t _supermix4b __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x07020d08080e0d0d, 0x07070908050e0f0a };
|
||||||
|
|
||||||
|
static const v128u64_t _supermix4c __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x0706050403020000, 0x0302000007060504 };
|
||||||
|
|
||||||
|
static const v128u64_t _supermix7a __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x010c0b060d080702, 0x0904030e03000104 };
|
||||||
|
|
||||||
|
static const v128u64_t _supermix7b __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x8080808080808080, 0x0504070605040f06 };
|
||||||
|
|
||||||
|
static const v128u64_t _inv_shift_rows __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x0b0e0104070a0d00, 0x0306090c0f020508 };
|
||||||
|
|
||||||
|
static const v128u64_t _mul2mask __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x000000001b1b0000, 0x0000000000000000 };
|
||||||
|
|
||||||
|
static const v128u64_t _mul4mask __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x000000002d361b00, 0x0000000000000000 };
|
||||||
|
|
||||||
|
static const v128u64_t _lsbmask2 __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x0303030303030303, 0x0303030303030303 };
|
||||||
|
|
||||||
|
static const uint32_t _IV512[] __attribute__ ((aligned (32))) =
|
||||||
|
{ 0x00000000, 0x00000000, 0x7ea50788, 0x00000000,
|
||||||
0x75af16e6, 0xdbe4d3c5, 0x27b09aac, 0x00000000,
|
0x75af16e6, 0xdbe4d3c5, 0x27b09aac, 0x00000000,
|
||||||
0x17f115d9, 0x54cceeb6, 0x0b02e806, 0x00000000,
|
0x17f115d9, 0x54cceeb6, 0x0b02e806, 0x00000000,
|
||||||
0xd1ef924a, 0xc9e2c6aa, 0x9813b2dd, 0x00000000,
|
0xd1ef924a, 0xc9e2c6aa, 0x9813b2dd, 0x00000000,
|
||||||
0x3858e6ca, 0x3f207f43, 0xe778ea25, 0x00000000,
|
0x3858e6ca, 0x3f207f43, 0xe778ea25, 0x00000000,
|
||||||
0xd6dd1f95, 0x1dd16eda, 0x67353ee1, 0x00000000};
|
0xd6dd1f95, 0x1dd16eda, 0x67353ee1, 0x00000000
|
||||||
|
};
|
||||||
|
|
||||||
#if defined(__SSE4_1__)
|
#if defined(__ARM_NEON)
|
||||||
|
|
||||||
#define PACK_S0(s0, s1, t1)\
|
#define mask_1000(v) v128_put32( v, 0, 3 )
|
||||||
s0 = _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(s0), _mm_castsi128_ps(s1), 0x30))
|
|
||||||
|
|
||||||
#define UNPACK_S0(s0, s1, t1)\
|
static const v128u32_t MASK_3321 __attribute__ ((aligned (16))) =
|
||||||
s1 = _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(s1), _mm_castsi128_ps(s0), 0xc0));\
|
{ 0x07060504, 0x0b0a0908, 0x0f0e0d0c, 0x0f0e0d0c };
|
||||||
s0 = mm128_mask_32( s0, 8 )
|
|
||||||
|
static const v128u32_t MASK_3033 __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x0f0e0d0c, 0x0f0e0d0c, 0x03020100, 0x0f0e0d0c };
|
||||||
|
|
||||||
|
static const v128u32_t MASK_3303 __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x0f0e0d0c, 0x03020100, 0x0f0e0d0c, 0x0f0e0d0c };
|
||||||
|
|
||||||
|
static const v128u32_t MASK_0321 __attribute__ ((aligned (16))) =
|
||||||
|
{ 0x07060504, 0x0b0a0908, 0x0f0e0d0c, 0x03020100 };
|
||||||
|
|
||||||
|
#define shuffle_3303(v) vqtbl1q_u8( v, MASK_3303 )
|
||||||
|
#define shuffle_0321(v) vqtbl1q_u8( v, MASK_0321 )
|
||||||
|
|
||||||
|
#define CMIX( s1, s2, r1, r2, t1, t2 ) \
|
||||||
|
t1 = vqtbl1q_u8( s1, MASK_3321 ); \
|
||||||
|
t2 = vqtbl1q_u8( s2, MASK_3033 ); \
|
||||||
|
t1 = v128_xor( t1, t2 ); \
|
||||||
|
r1 = v128_xor( r1, t1 ); \
|
||||||
|
r2 = v128_xor( r2, t1 );
|
||||||
|
|
||||||
|
#elif defined(__SSE4_1__)
|
||||||
|
|
||||||
|
#define mask_1000(v) v128_mask32( v, 8 )
|
||||||
|
|
||||||
|
#define shuffle_3303(v) _mm_shuffle_epi32( v, 0xf3 )
|
||||||
|
#define shuffle_0321(v) _mm_shuffle_epi32( v, 0x39 )
|
||||||
|
|
||||||
#define CMIX( s1, s2, r1, r2, t1, t2 ) \
|
#define CMIX( s1, s2, r1, r2, t1, t2 ) \
|
||||||
t1 = s1; \
|
t1 = s1; \
|
||||||
t1 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(t1), _mm_castsi128_ps(s2), _MM_SHUFFLE(3, 0, 2, 1)));\
|
t1 = v128_shuffle2_32( t1, s2, _MM_SHUFFLE( 3, 0, 2, 1 ) ); \
|
||||||
r1 = _mm_xor_si128(r1, t1);\
|
r1 = v128_xor( r1, t1 ); \
|
||||||
r2 = _mm_xor_si128(r2, t1);
|
r2 = v128_xor( r2, t1 );
|
||||||
|
|
||||||
#else // SSE2
|
|
||||||
|
|
||||||
#define PACK_S0(s0, s1, t1)\
|
|
||||||
t1 = _mm_shuffle_epi32(s1, _MM_SHUFFLE(0, 3, 3, 3));\
|
|
||||||
s0 = _mm_xor_si128(s0, t1);
|
|
||||||
|
|
||||||
#define UNPACK_S0(s0, s1, t1)\
|
|
||||||
t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 3, 3));\
|
|
||||||
s1 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s1), _mm_castsi128_ps(t1)));\
|
|
||||||
s0 = mm128_mask_32( s0, 8 )
|
|
||||||
|
|
||||||
#define CMIX(s1, s2, r1, r2, t1, t2)\
|
|
||||||
t1 = _mm_shuffle_epi32(s1, 0xf9);\
|
|
||||||
t2 = _mm_shuffle_epi32(s2, 0xcf);\
|
|
||||||
t1 = _mm_xor_si128(t1, t2);\
|
|
||||||
r1 = _mm_xor_si128(r1, t1);\
|
|
||||||
r2 = _mm_xor_si128(r2, t1)
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TIX256(msg, s10, s8, s24, s0, t1, t2, t3)\
|
#define PACK_S0( s0, s1, t1 ) \
|
||||||
t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 0, 3));\
|
s0 = v128_movlane32( s0, 3, s1, 0 )
|
||||||
s10 = _mm_xor_si128(s10, t1);\
|
|
||||||
t1 = _mm_castps_si128(_mm_load_ss((float*)msg));\
|
|
||||||
s0 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s0), _mm_castsi128_ps(t1)));\
|
|
||||||
t1 = _mm_slli_si128(t1, 8);\
|
|
||||||
s8 = _mm_xor_si128(s8, t1);\
|
|
||||||
t1 = _mm_shuffle_epi32(s24, _MM_SHUFFLE(3, 3, 0, 3));\
|
|
||||||
s0 = _mm_xor_si128(s0, t1)
|
|
||||||
|
|
||||||
|
#define UNPACK_S0( s0, s1, t1 ) \
|
||||||
#define TIX384(msg, s16, s8, s27, s30, s0, s4, t1, t2, t3)\
|
s1 = v128_movlane32( s1, 0, s0, 3 ); \
|
||||||
t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 0, 3));\
|
s0 = mask_1000( s0 )
|
||||||
s16 = _mm_xor_si128(s16, t1);\
|
|
||||||
t1 = _mm_castps_si128(_mm_load_ss((float*)msg));\
|
|
||||||
s0 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s0), _mm_castsi128_ps(t1)));\
|
|
||||||
t1 = _mm_slli_si128(t1, 8);\
|
|
||||||
s8 = _mm_xor_si128(s8, t1);\
|
|
||||||
t1 = _mm_shuffle_epi32(s27, _MM_SHUFFLE(3, 3, 0, 3));\
|
|
||||||
s0 = _mm_xor_si128(s0, t1);\
|
|
||||||
t1 = _mm_shuffle_epi32(s30, _MM_SHUFFLE(3, 3, 0, 3));\
|
|
||||||
s4 = _mm_xor_si128(s4, t1)
|
|
||||||
|
|
||||||
#define TIX512(msg, s22, s8, s24, s27, s30, s0, s4, s7, t1, t2, t3)\
|
#define TIX512(msg, s22, s8, s24, s27, s30, s0, s4, s7, t1, t2, t3)\
|
||||||
t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 0, 3));\
|
t1 = shuffle_3303( s0 ); \
|
||||||
s22 = _mm_xor_si128(s22, t1);\
|
s22 = v128_xor(s22, t1);\
|
||||||
t1 = _mm_castps_si128(_mm_load_ss((float*)msg));\
|
t1 = v128_put32( v128_zero, *(uint32_t*)msg, 0 ); \
|
||||||
s0 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s0), _mm_castsi128_ps(t1)));\
|
s0 = v128_movlane32( s0, 0, t1, 0 ); \
|
||||||
t1 = _mm_slli_si128(t1, 8);\
|
t1 = v128_alignr64( t1, v128_zero, 1 ); \
|
||||||
s8 = _mm_xor_si128(s8, t1);\
|
s8 = v128_xor(s8, t1);\
|
||||||
t1 = _mm_shuffle_epi32(s24, _MM_SHUFFLE(3, 3, 0, 3));\
|
t1 = shuffle_3303( s24 ); \
|
||||||
s0 = _mm_xor_si128(s0, t1);\
|
s0 = v128_xor(s0, t1);\
|
||||||
t1 = _mm_shuffle_epi32(s27, _MM_SHUFFLE(3, 3, 0, 3));\
|
t1 = shuffle_3303( s27 ); \
|
||||||
s4 = _mm_xor_si128(s4, t1);\
|
s4 = v128_xor(s4, t1);\
|
||||||
t1 = _mm_shuffle_epi32(s30, _MM_SHUFFLE(3, 3, 0, 3));\
|
t1 = shuffle_3303( s30 ); \
|
||||||
s7 = _mm_xor_si128(s7, t1)
|
s7 = v128_xor(s7, t1)
|
||||||
|
|
||||||
#define PRESUPERMIX(t0, t1, t2, t3, t4)\
|
|
||||||
t2 = t0;\
|
|
||||||
t3 = _mm_add_epi8(t0, t0);\
|
|
||||||
t4 = _mm_add_epi8(t3, t3);\
|
|
||||||
t1 = _mm_srli_epi16(t0, 6);\
|
|
||||||
t1 = _mm_and_si128(t1, M128(_lsbmask2));\
|
|
||||||
t3 = _mm_xor_si128(t3, _mm_shuffle_epi8(M128(_mul2mask), t1));\
|
|
||||||
t0 = _mm_xor_si128(t4, _mm_shuffle_epi8(M128(_mul4mask), t1))
|
|
||||||
|
|
||||||
/*
|
|
||||||
#define PRESUPERMIX(x, t1, s1, s2, t2)\
|
|
||||||
s1 = x;\
|
|
||||||
s2 = _mm_add_epi8(x, x);\
|
|
||||||
t2 = _mm_add_epi8(s2, s2);\
|
|
||||||
t1 = _mm_srli_epi16(x, 6);\
|
|
||||||
t1 = _mm_and_si128(t1, M128(_lsbmask2));\
|
|
||||||
s2 = _mm_xor_si128(s2, _mm_shuffle_epi8(M128(_mul2mask), t1));\
|
|
||||||
x = _mm_xor_si128(t2, _mm_shuffle_epi8(M128(_mul4mask), t1))
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define SUBSTITUTE( r0, _t2 ) \
|
#define SUBSTITUTE( r0, _t2 ) \
|
||||||
_t2 = _mm_shuffle_epi8(r0, M128(_inv_shift_rows));\
|
_t2 = v128_shuffle8( r0, _inv_shift_rows ); \
|
||||||
_t2 = _mm_aesenclast_si128( _t2, v128_zero )
|
_t2 = v128_aesenclast_nokey( _t2 )
|
||||||
|
|
||||||
#define SUPERMIX(t0, t1, t2, t3, t4)\
|
#define SUPERMIX(t0, t1, t2, t3, t4)\
|
||||||
t2 = t0;\
|
t2 = t0;\
|
||||||
t3 = _mm_add_epi8(t0, t0);\
|
t3 = v128_add8( t0, t0 ); \
|
||||||
t4 = _mm_add_epi8(t3, t3);\
|
t4 = v128_add8( t3, t3 ); \
|
||||||
t1 = _mm_srli_epi16(t0, 6);\
|
t1 = v128_sr16( t0, 6 ); \
|
||||||
t1 = _mm_and_si128(t1, M128(_lsbmask2));\
|
t1 = v128_and( t1, _lsbmask2 ); \
|
||||||
t0 = _mm_xor_si128(t4, _mm_shuffle_epi8(M128(_mul4mask), t1)); \
|
t0 = v128_xor( t4, v128_shuffle8( _mul4mask, t1 ) ); \
|
||||||
t4 = _mm_shuffle_epi8(t2, M128(_supermix1b));\
|
t4 = v128_shuffle8( t2, _supermix1b ); \
|
||||||
t3 = _mm_xor_si128(t3, _mm_shuffle_epi8(M128(_mul2mask), t1));\
|
t3 = v128_xor( t3, v128_shuffle8( _mul2mask, t1 ) ); \
|
||||||
t1 = _mm_shuffle_epi8(t4, M128(_supermix1c));\
|
t1 = v128_shuffle8( t4, _supermix1c ); \
|
||||||
t4 = _mm_xor_si128(t4, t1);\
|
t4 = v128_xor( t4, t1 ); \
|
||||||
t1 = _mm_shuffle_epi8(t4, M128(_supermix1d));\
|
t1 = v128_shuffle8( t4, _supermix1d ); \
|
||||||
t4 = _mm_xor_si128(t4, t1);\
|
t4 = v128_xor( t4, t1 ); \
|
||||||
t1 = _mm_shuffle_epi8(t2, M128(_supermix1a));\
|
t1 = v128_shuffle8( t2, _supermix1a ); \
|
||||||
t2 = v128_xor3( t2, t3, t0 ); \
|
t2 = v128_xor3( t2, t3, t0 ); \
|
||||||
t2 = _mm_shuffle_epi8(t2, M128(_supermix7a));\
|
t2 = v128_shuffle8( t2, _supermix7a ); \
|
||||||
t4 = v128_xor3( t4, t1, t2 ); \
|
t4 = v128_xor3( t4, t1, t2 ); \
|
||||||
t2 = _mm_shuffle_epi8(t2, M128(_supermix7b));\
|
t2 = v128_shuffle8( t2, _supermix7b ); \
|
||||||
t3 = _mm_shuffle_epi8(t3, M128(_supermix2a));\
|
t3 = v128_shuffle8( t3, _supermix2a ); \
|
||||||
t1 = _mm_shuffle_epi8(t0, M128(_supermix4a));\
|
t1 = v128_shuffle8( t0, _supermix4a ); \
|
||||||
t0 = _mm_shuffle_epi8(t0, M128(_supermix4b));\
|
t0 = v128_shuffle8( t0, _supermix4b ); \
|
||||||
t4 = v128_xor3( t4, t2, t1 ); \
|
t4 = v128_xor3( t4, t2, t1 ); \
|
||||||
t0 = _mm_xor_si128(t0, t3);\
|
t0 = v128_xor( t0, t3 ); \
|
||||||
t4 = v128_xor3(t4, t0, _mm_shuffle_epi8(t0, M128(_supermix4c)));
|
t4 = v128_xor3( t4, t0, v128_shuffle8( t0, _supermix4c ) );
|
||||||
|
|
||||||
/*
|
|
||||||
#define SUPERMIX(t0, t1, t2, t3, t4)\
|
|
||||||
PRESUPERMIX(t0, t1, t2, t3, t4);\
|
|
||||||
POSTSUPERMIX(t0, t1, t2, t3, t4)
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define POSTSUPERMIX(t0, t1, t2, t3, t4)\
|
|
||||||
t1 = _mm_shuffle_epi8(t2, M128(_supermix1b));\
|
|
||||||
t4 = t1;\
|
|
||||||
t1 = _mm_shuffle_epi8(t1, M128(_supermix1c));\
|
|
||||||
t4 = _mm_xor_si128(t4, t1);\
|
|
||||||
t1 = _mm_shuffle_epi8(t4, M128(_supermix1d));\
|
|
||||||
t4 = _mm_xor_si128(t4, t1);\
|
|
||||||
t1 = _mm_shuffle_epi8(t2, M128(_supermix1a));\
|
|
||||||
t4 = _mm_xor_si128(t4, t1);\
|
|
||||||
t2 = v128_xor3(t2, t3, t0 );\
|
|
||||||
t2 = _mm_shuffle_epi8(t2, M128(_supermix7a));\
|
|
||||||
t4 = _mm_xor_si128(t4, t2);\
|
|
||||||
t2 = _mm_shuffle_epi8(t2, M128(_supermix7b));\
|
|
||||||
t4 = _mm_xor_si128(t4, t2);\
|
|
||||||
t3 = _mm_shuffle_epi8(t3, M128(_supermix2a));\
|
|
||||||
t1 = _mm_shuffle_epi8(t0, M128(_supermix4a));\
|
|
||||||
t4 = _mm_xor_si128(t4, t1);\
|
|
||||||
t0 = _mm_shuffle_epi8(t0, M128(_supermix4b));\
|
|
||||||
t0 = _mm_xor_si128(t0, t3);\
|
|
||||||
t4 = _mm_xor_si128(t4, t0);\
|
|
||||||
t0 = _mm_shuffle_epi8(t0, M128(_supermix4c));\
|
|
||||||
t4 = _mm_xor_si128(t4, t0)
|
|
||||||
|
|
||||||
#define SUBROUND512_3(r1a, r1b, r1c, r1d, r2a, r2b, r2c, r2d, r3a, r3b, r3c, r3d)\
|
|
||||||
CMIX(r1a, r1b, r1c, r1d, _t0, _t1);\
|
|
||||||
PACK_S0(r1c, r1a, _t0);\
|
|
||||||
SUBSTITUTE(r1c, _t2 );\
|
|
||||||
SUPERMIX(_t2, _t3, _t0, _t1, r1c);\
|
|
||||||
_t0 = _mm_shuffle_epi32(r1c, 0x39);\
|
|
||||||
r2c = _mm_xor_si128(r2c, _t0);\
|
|
||||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
|
||||||
r2d = _mm_xor_si128(r2d, _t0);\
|
|
||||||
UNPACK_S0(r1c, r1a, _t3);\
|
|
||||||
SUBSTITUTE(r2c, _t2 );\
|
|
||||||
SUPERMIX(_t2, _t3, _t0, _t1, r2c);\
|
|
||||||
_t0 = _mm_shuffle_epi32(r2c, 0x39);\
|
|
||||||
r3c = _mm_xor_si128(r3c, _t0);\
|
|
||||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
|
||||||
r3d = _mm_xor_si128(r3d, _t0);\
|
|
||||||
UNPACK_S0(r2c, r2a, _t3);\
|
|
||||||
SUBSTITUTE(r3c, _t2 );\
|
|
||||||
SUPERMIX(_t2, _t3, _t0, _t1, r3c);\
|
|
||||||
UNPACK_S0(r3c, r3a, _t3)
|
|
||||||
|
|
||||||
#define SUBROUND512_4(r1a, r1b, r1c, r1d, r2a, r2b, r2c, r2d, r3a, r3b, r3c, r3d, r4a, r4b, r4c, r4d)\
|
#define SUBROUND512_4(r1a, r1b, r1c, r1d, r2a, r2b, r2c, r2d, r3a, r3b, r3c, r3d, r4a, r4b, r4c, r4d)\
|
||||||
CMIX(r1a, r1b, r1c, r1d, _t0, _t1);\
|
CMIX(r1a, r1b, r1c, r1d, _t0, _t1);\
|
||||||
PACK_S0(r1c, r1a, _t0);\
|
PACK_S0(r1c, r1a, _t0);\
|
||||||
SUBSTITUTE( r1c, _t2 );\
|
SUBSTITUTE( r1c, _t2 );\
|
||||||
SUPERMIX(_t2, _t3, _t0, _t1, r1c);\
|
SUPERMIX(_t2, _t3, _t0, _t1, r1c);\
|
||||||
_t0 = _mm_shuffle_epi32(r1c, 0x39);\
|
_t0 = shuffle_0321( r1c ); \
|
||||||
r2c = _mm_xor_si128(r2c, _t0);\
|
r2c = v128_xor(r2c, _t0);\
|
||||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
_t0 = mask_1000( _t0 ); \
|
||||||
r2d = _mm_xor_si128(r2d, _t0);\
|
r2d = v128_xor(r2d, _t0);\
|
||||||
UNPACK_S0(r1c, r1a, _t3);\
|
UNPACK_S0(r1c, r1a, _t3);\
|
||||||
SUBSTITUTE(r2c, _t2 );\
|
SUBSTITUTE(r2c, _t2 );\
|
||||||
SUPERMIX(_t2, _t3, _t0, _t1, r2c);\
|
SUPERMIX(_t2, _t3, _t0, _t1, r2c);\
|
||||||
_t0 = _mm_shuffle_epi32(r2c, 0x39);\
|
_t0 = shuffle_0321( r2c ); \
|
||||||
r3c = _mm_xor_si128(r3c, _t0);\
|
r3c = v128_xor(r3c, _t0);\
|
||||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
_t0 = mask_1000( _t0 ); \
|
||||||
r3d = _mm_xor_si128(r3d, _t0);\
|
r3d = v128_xor(r3d, _t0);\
|
||||||
UNPACK_S0(r2c, r2a, _t3);\
|
UNPACK_S0(r2c, r2a, _t3);\
|
||||||
SUBSTITUTE( r3c, _t2 );\
|
SUBSTITUTE( r3c, _t2 );\
|
||||||
SUPERMIX(_t2, _t3, _t0, _t1, r3c);\
|
SUPERMIX(_t2, _t3, _t0, _t1, r3c);\
|
||||||
_t0 = _mm_shuffle_epi32(r3c, 0x39);\
|
_t0 = shuffle_0321( r3c ); \
|
||||||
r4c = _mm_xor_si128(r4c, _t0);\
|
r4c = v128_xor(r4c, _t0);\
|
||||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
_t0 = mask_1000( _t0 ); \
|
||||||
r4d = _mm_xor_si128(r4d, _t0);\
|
r4d = v128_xor(r4d, _t0);\
|
||||||
UNPACK_S0(r3c, r3a, _t3);\
|
UNPACK_S0(r3c, r3a, _t3);\
|
||||||
SUBSTITUTE( r4c, _t2 );\
|
SUBSTITUTE( r4c, _t2 );\
|
||||||
SUPERMIX(_t2, _t3, _t0, _t1, r4c);\
|
SUPERMIX(_t2, _t3, _t0, _t1, r4c);\
|
||||||
@@ -256,18 +195,19 @@ MYALIGN const unsigned int _IV512[] = {
|
|||||||
block[1] = col[(base + a + 1) % s];\
|
block[1] = col[(base + a + 1) % s];\
|
||||||
block[2] = col[(base + a + 2) % s];\
|
block[2] = col[(base + a + 2) % s];\
|
||||||
block[3] = col[(base + a + 3) % s];\
|
block[3] = col[(base + a + 3) % s];\
|
||||||
x = _mm_load_si128((__m128i*)block)
|
x = v128_load( (v128_t*)block )
|
||||||
|
|
||||||
#define STORECOLUMN(x, s)\
|
#define STORECOLUMN(x, s)\
|
||||||
_mm_store_si128((__m128i*)block, x);\
|
v128_store((v128_t*)block, x );\
|
||||||
col[(base + 0) % s] = block[0];\
|
col[(base + 0) % s] = block[0];\
|
||||||
col[(base + 1) % s] = block[1];\
|
col[(base + 1) % s] = block[1];\
|
||||||
col[(base + 2) % s] = block[2];\
|
col[(base + 2) % s] = block[2];\
|
||||||
col[(base + 3) % s] = block[3]
|
col[(base + 3) % s] = block[3]
|
||||||
|
|
||||||
void Compress512(hashState_fugue *ctx, const unsigned char *pmsg, unsigned int uBlockCount)
|
void Compress512( hashState_fugue *ctx, const unsigned char *pmsg,
|
||||||
|
unsigned int uBlockCount )
|
||||||
{
|
{
|
||||||
__m128i _t0, _t1, _t2, _t3;
|
v128_t _t0, _t1, _t2, _t3;
|
||||||
|
|
||||||
switch(ctx->base)
|
switch(ctx->base)
|
||||||
{
|
{
|
||||||
@@ -346,19 +286,18 @@ void Compress512(hashState_fugue *ctx, const unsigned char *pmsg, unsigned int u
|
|||||||
pmsg += 4;
|
pmsg += 4;
|
||||||
uBlockCount--;
|
uBlockCount--;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Final512(hashState_fugue *ctx, BitSequence *hashval)
|
void Final512( hashState_fugue *ctx, uint8_t *hashval )
|
||||||
{
|
{
|
||||||
unsigned int block[4] __attribute__ ((aligned (32)));
|
unsigned int block[4] __attribute__ ((aligned (32)));
|
||||||
unsigned int col[36] __attribute__ ((aligned (16)));
|
unsigned int col[36] __attribute__ ((aligned (16)));
|
||||||
unsigned int i, base;
|
unsigned int i, base;
|
||||||
__m128i r0, _t0, _t1, _t2, _t3;
|
v128_t r0, _t0, _t1, _t2, _t3;
|
||||||
|
|
||||||
for( i = 0; i < 12; i++ )
|
for( i = 0; i < 12; i++ )
|
||||||
{
|
{
|
||||||
_mm_store_si128((__m128i*)block, ctx->state[i]);
|
v128_store( (v128_t*)block, ctx->state[i] );
|
||||||
|
|
||||||
col[3 * i + 0] = block[0];
|
col[3 * i + 0] = block[0];
|
||||||
col[3 * i + 1] = block[1];
|
col[3 * i + 1] = block[1];
|
||||||
@@ -458,22 +397,22 @@ void Final512(hashState_fugue *ctx, BitSequence *hashval)
|
|||||||
|
|
||||||
// Transform to the standard basis and store output; S1 || S2 || S3 || S4
|
// Transform to the standard basis and store output; S1 || S2 || S3 || S4
|
||||||
LOADCOLUMN( r0, 36, 1 );
|
LOADCOLUMN( r0, 36, 1 );
|
||||||
_mm_store_si128((__m128i*)hashval, r0);
|
v128_store( (v128_t*)hashval, r0 );
|
||||||
|
|
||||||
// Transform to the standard basis and store output; S9 || S10 || S11 || S12
|
// Transform to the standard basis and store output; S9 || S10 || S11 || S12
|
||||||
LOADCOLUMN( r0, 36, 9 );
|
LOADCOLUMN( r0, 36, 9 );
|
||||||
_mm_store_si128((__m128i*)hashval + 1, r0);
|
v128_store( (v128_t*)hashval + 1, r0 );
|
||||||
|
|
||||||
// Transform to the standard basis and store output; S18 || S19 || S20 || S21
|
// Transform to the standard basis and store output; S18 || S19 || S20 || S21
|
||||||
LOADCOLUMN( r0, 36, 18 );
|
LOADCOLUMN( r0, 36, 18 );
|
||||||
_mm_store_si128((__m128i*)hashval + 2, r0);
|
v128_store( (v128_t*)hashval + 2, r0 );
|
||||||
|
|
||||||
// Transform to the standard basis and store output; S27 || S28 || S29 || S30
|
// Transform to the standard basis and store output; S27 || S28 || S29 || S30
|
||||||
LOADCOLUMN( r0, 36, 27 );
|
LOADCOLUMN( r0, 36, 27 );
|
||||||
_mm_store_si128((__m128i*)hashval + 3, r0);
|
v128_store( (v128_t*)hashval + 3, r0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
HashReturn fugue512_Init(hashState_fugue *ctx, int nHashSize)
|
int fugue512_Init( hashState_fugue *ctx, int nHashSize )
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
ctx->processed_bits = 0;
|
ctx->processed_bits = 0;
|
||||||
@@ -487,18 +426,18 @@ HashReturn fugue512_Init(hashState_fugue *ctx, int nHashSize)
|
|||||||
for(i = 0; i < 6; i++)
|
for(i = 0; i < 6; i++)
|
||||||
ctx->state[i] = v128_zero;
|
ctx->state[i] = v128_zero;
|
||||||
|
|
||||||
ctx->state[6] = _mm_load_si128((__m128i*)_IV512 + 0);
|
ctx->state[6] = casti_v128( _IV512, 0 );
|
||||||
ctx->state[7] = _mm_load_si128((__m128i*)_IV512 + 1);
|
ctx->state[7] = casti_v128( _IV512, 1 );
|
||||||
ctx->state[8] = _mm_load_si128((__m128i*)_IV512 + 2);
|
ctx->state[8] = casti_v128( _IV512, 2 );
|
||||||
ctx->state[9] = _mm_load_si128((__m128i*)_IV512 + 3);
|
ctx->state[9] = casti_v128( _IV512, 3 );
|
||||||
ctx->state[10] = _mm_load_si128((__m128i*)_IV512 + 4);
|
ctx->state[10] = casti_v128( _IV512, 4 );
|
||||||
ctx->state[11] = _mm_load_si128((__m128i*)_IV512 + 5);
|
ctx->state[11] = casti_v128( _IV512, 5 );
|
||||||
|
|
||||||
return SUCCESS;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int fugue512_Update( hashState_fugue *state, const void *data,
|
||||||
HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength databitlen)
|
uint64_t databitlen )
|
||||||
{
|
{
|
||||||
unsigned int uByteLength, uBlockCount, uRemainingBytes;
|
unsigned int uByteLength, uBlockCount, uRemainingBytes;
|
||||||
|
|
||||||
@@ -509,7 +448,8 @@ HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength
|
|||||||
if(state->uBufferBytes != 0)
|
if(state->uBufferBytes != 0)
|
||||||
{
|
{
|
||||||
// Fill the buffer
|
// Fill the buffer
|
||||||
memcpy(state->buffer + state->uBufferBytes, (void*)data, state->uBlockLength - state->uBufferBytes);
|
memcpy( state->buffer + state->uBufferBytes, (void*)data,
|
||||||
|
state->uBlockLength - state->uBufferBytes );
|
||||||
|
|
||||||
// Process the buffer
|
// Process the buffer
|
||||||
Compress512(state, state->buffer, 1);
|
Compress512(state, state->buffer, 1);
|
||||||
@@ -545,13 +485,13 @@ HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength
|
|||||||
state->uBufferBytes += uByteLength;
|
state->uBufferBytes += uByteLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
return SUCCESS;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
HashReturn fugue512_Final(hashState_fugue *state, void *hashval)
|
int fugue512_Final( hashState_fugue *state, void *hashval )
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
BitSequence lengthbuf[8] __attribute__((aligned(64)));
|
uint8_t lengthbuf[8] __attribute__((aligned(64)));
|
||||||
|
|
||||||
// Update message bit count
|
// Update message bit count
|
||||||
state->processed_bits += state->uBufferBytes * 8;
|
state->processed_bits += state->uBufferBytes * 8;
|
||||||
@@ -575,16 +515,17 @@ HashReturn fugue512_Final(hashState_fugue *state, void *hashval)
|
|||||||
// Finalization
|
// Finalization
|
||||||
Final512(state, hashval);
|
Final512(state, hashval);
|
||||||
|
|
||||||
return SUCCESS;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
HashReturn fugue512_full(hashState_fugue *hs, void *hashval, const void *data, DataLength databitlen)
|
int fugue512_full( hashState_fugue *hs, void *hashval, const void *data,
|
||||||
|
uint64_t databitlen )
|
||||||
{
|
{
|
||||||
fugue512_Init( hs, 512 );
|
fugue512_Init( hs, 512 );
|
||||||
fugue512_Update( hs, data, databitlen*8 );
|
fugue512_Update( hs, data, databitlen*8 );
|
||||||
fugue512_Final( hs, hashval );
|
fugue512_Final( hs, hashval );
|
||||||
return SUCCESS;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // AES
|
#endif // AES
|
||||||
|
@@ -14,37 +14,31 @@
|
|||||||
#ifndef FUGUE_HASH_API_H
|
#ifndef FUGUE_HASH_API_H
|
||||||
#define FUGUE_HASH_API_H
|
#define FUGUE_HASH_API_H
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if ( defined(__SSE4_1__) && defined(__AES__) ) || ( defined(__ARM_NEON) && defined(__ARM_FEATURE_AES) )
|
||||||
|
|
||||||
#if !defined(__SSE4_1__)
|
|
||||||
#error "Unsupported configuration, AES needs SSE4.1. Compile without AES."
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "compat/sha3_common.h"
|
|
||||||
#include "simd-utils.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
__m128i state[12];
|
v128_t state[12];
|
||||||
unsigned int base;
|
unsigned int base;
|
||||||
|
|
||||||
unsigned int uHashSize;
|
unsigned int uHashSize;
|
||||||
unsigned int uBlockLength;
|
unsigned int uBlockLength;
|
||||||
unsigned int uBufferBytes;
|
unsigned int uBufferBytes;
|
||||||
DataLength processed_bits;
|
uint64_t processed_bits;
|
||||||
BitSequence buffer[4];
|
uint8_t buffer[4];
|
||||||
|
|
||||||
} hashState_fugue __attribute__ ((aligned (64)));
|
} hashState_fugue __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
|
||||||
// These functions are deprecated, use the lower case macro aliases that use
|
// These functions are deprecated, use the lower case macro aliases that use
|
||||||
// the standard interface. This will be cleaned up at a later date.
|
// the standard interface. This will be cleaned up at a later date.
|
||||||
HashReturn fugue512_Init(hashState_fugue *state, int hashbitlen);
|
int fugue512_Init( hashState_fugue *state, int hashbitlen );
|
||||||
|
|
||||||
HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength databitlen);
|
int fugue512_Update( hashState_fugue *state, const void *data,
|
||||||
|
uint64_t databitlen );
|
||||||
|
|
||||||
HashReturn fugue512_Final(hashState_fugue *state, void *hashval);
|
int fugue512_Final( hashState_fugue *state, void *hashval );
|
||||||
|
|
||||||
#define fugue512_init( state ) \
|
#define fugue512_init( state ) \
|
||||||
fugue512_Init( state, 512 )
|
fugue512_Init( state, 512 )
|
||||||
@@ -54,7 +48,8 @@ HashReturn fugue512_Final(hashState_fugue *state, void *hashval);
|
|||||||
fugue512_Final
|
fugue512_Final
|
||||||
|
|
||||||
|
|
||||||
HashReturn fugue512_full(hashState_fugue *hs, void *hashval, const void *data, DataLength databitlen);
|
int fugue512_full( hashState_fugue *hs, void *hashval, const void *data,
|
||||||
|
uint64_t databitlen);
|
||||||
|
|
||||||
#endif // AES
|
#endif // AES
|
||||||
#endif // HASH_API_H
|
#endif // HASH_API_H
|
||||||
|
@@ -704,15 +704,15 @@ static void AddXor512(const void *a,const void *b,void *c)
|
|||||||
casti_m256i( b, 0 ) );
|
casti_m256i( b, 0 ) );
|
||||||
casti_m256i( c, 1 ) = _mm256_xor_si256( casti_m256i( a, 1 ),
|
casti_m256i( c, 1 ) = _mm256_xor_si256( casti_m256i( a, 1 ),
|
||||||
casti_m256i( b, 1 ) );
|
casti_m256i( b, 1 ) );
|
||||||
#elif defined(__SSE2__)
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
casti_m128i( c, 0 ) = _mm_xor_si128( casti_m128i( a, 0 ),
|
casti_v128( c, 0 ) = v128_xor( casti_v128( a, 0 ),
|
||||||
casti_m128i( b, 0 ) );
|
casti_v128( b, 0 ) );
|
||||||
casti_m128i( c, 1 ) = _mm_xor_si128( casti_m128i( a, 1 ),
|
casti_v128( c, 1 ) = v128_xor( casti_v128( a, 1 ),
|
||||||
casti_m128i( b, 1 ) );
|
casti_v128( b, 1 ) );
|
||||||
casti_m128i( c, 2 ) = _mm_xor_si128( casti_m128i( a, 2 ),
|
casti_v128( c, 2 ) = v128_xor( casti_v128( a, 2 ),
|
||||||
casti_m128i( b, 2 ) );
|
casti_v128( b, 2 ) );
|
||||||
casti_m128i( c, 3 ) = _mm_xor_si128( casti_m128i( a, 3 ),
|
casti_v128( c, 3 ) = v128_xor( casti_v128( a, 3 ),
|
||||||
casti_m128i( b, 3 ) );
|
casti_v128( b, 3 ) );
|
||||||
#else
|
#else
|
||||||
const unsigned long long *A=a, *B=b;
|
const unsigned long long *A=a, *B=b;
|
||||||
unsigned long long *C=c;
|
unsigned long long *C=c;
|
||||||
|
@@ -60,46 +60,10 @@ static const v128u64_t SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003 };
|
|||||||
|
|
||||||
#if defined(__ARM_NEON)
|
#if defined(__ARM_NEON)
|
||||||
|
|
||||||
// No fast shuffle on NEON
|
static const v128u32_t gr_mask __attribute__ ((aligned (16))) =
|
||||||
//static const uint32x4_t vmask_d8 = { 3, 1, 2, 0 };
|
{ 0x03020100, 0x0b0a0908, 0x07060504, 0x0f0e0d0c };
|
||||||
static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
|
||||||
|
|
||||||
#define gr_shuffle32( v ) v128_blendv( v128_qrev32( v ), v, BLEND_MASK )
|
#define gr_shuffle32(v) vqtbl1q_u8( v, gr_mask )
|
||||||
|
|
||||||
/*
|
|
||||||
#define TRANSP_MASK \
|
|
||||||
0xd,0x5,0x9,0x1,0xc,0x4,0x8,0x0,0xf,0x7,0xb,0x3,0xe,0x6,0xa,0x2
|
|
||||||
#define SUBSH_MASK0 \
|
|
||||||
0xb,0xe,0x1,0x4,0x7,0xa,0xd,0x0,0x3,0x6,0x9,0xc,0xf,0x2,0x5,0x8
|
|
||||||
#define SUBSH_MASK1 \
|
|
||||||
0xc,0xf,0x2,0x5,0x8,0xb,0xe,0x1,0x4,0x7,0xa,0xd,0x0,0x3,0x6,0x9
|
|
||||||
#define SUBSH_MASK2 \
|
|
||||||
0xd,0x0,0x3,0x6,0x9,0xc,0xf,0x2,0x5,0x8,0xb,0xe,0x1,0x4,0x7,0xa
|
|
||||||
#define SUBSH_MASK3 \
|
|
||||||
0xe,0x1,0x4,0x7,0xa,0xd,0x0,0x3,0x6,0x9,0xc,0xf,0x2,0x5,0x8,0xb
|
|
||||||
#define SUBSH_MASK4 \
|
|
||||||
0xf,0x2,0x5,0x8,0xb,0xe,0x1,0x4,0x7,0xa,0xd,0x0,0x3,0x6,0x9,0xc
|
|
||||||
#define SUBSH_MASK5 \
|
|
||||||
0x0,0x3,0x6,0x9,0xc,0xf,0x2,0x5,0x8,0xb,0xe,0x1,0x4,0x7,0xa,0xd
|
|
||||||
#define SUBSH_MASK6 \
|
|
||||||
0x1,0x4,0x7,0xa,0xd,0x0,0x3,0x6,0x9,0xc,0xf,0x2,0x5,0x8,0xb,0xe
|
|
||||||
#define SUBSH_MASK7 \
|
|
||||||
0x6,0x9,0xc,0xf,0x2,0x5,0x8,0xb,0xe,0x1,0x4,0x7,0xa,0xd,0x0,0x3
|
|
||||||
|
|
||||||
//#define gr_shuffle8( v, c ) v128_shullfev8( v, c )
|
|
||||||
|
|
||||||
|
|
||||||
#define gr_shuffle8( v, c15, c14, c13, c12, c11, c10, c09, c08, \
|
|
||||||
c07, c06, c05, c04, c03, c02, c01, c00 ) \
|
|
||||||
v128_movlane8( v128_movlane8( v128_movlane8( v128_movlane8( \
|
|
||||||
v128_movlane8( v128_movlane8( v128_movlane8( v128_movlane8( \
|
|
||||||
v128_movlane8( v128_movlane8( v128_movlane8( v128_movlane8( \
|
|
||||||
v128_movlane8( v128_movlane8( v128_movlane8( v128_movlane8( \
|
|
||||||
v, 15, v, c15 ), 14, v, c14 ), 13, v, c13 ), 12, v, c12 ), \
|
|
||||||
11, v, c11 ), 10, v, c10 ), 9, v, c09 ), 8, v, c08 ), \
|
|
||||||
7, v, c07 ), 6, v, c06 ), 5, v, c05 ), 4, v, c04 ), \
|
|
||||||
3, v, c03 ), 2, v, c02 ), 1, v, c01 ), 0, v, c00 )
|
|
||||||
*/
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@@ -107,7 +71,6 @@ static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define tos(a) #a
|
#define tos(a) #a
|
||||||
#define tostr(a) tos(a)
|
#define tostr(a) tos(a)
|
||||||
|
|
||||||
@@ -334,15 +297,14 @@ static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
|||||||
*/
|
*/
|
||||||
#define SUBMIX(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
#define SUBMIX(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
||||||
/* SubBytes */\
|
/* SubBytes */\
|
||||||
b0 = v128_xor(b0, b0);\
|
a0 = v128_aesenclast_nokey( a0 ); \
|
||||||
a0 = v128_aesenclast(a0, b0);\
|
a1 = v128_aesenclast_nokey( a1 ); \
|
||||||
a1 = v128_aesenclast(a1, b0);\
|
a2 = v128_aesenclast_nokey( a2 ); \
|
||||||
a2 = v128_aesenclast(a2, b0);\
|
a3 = v128_aesenclast_nokey( a3 ); \
|
||||||
a3 = v128_aesenclast(a3, b0);\
|
a4 = v128_aesenclast_nokey( a4 ); \
|
||||||
a4 = v128_aesenclast(a4, b0);\
|
a5 = v128_aesenclast_nokey( a5 ); \
|
||||||
a5 = v128_aesenclast(a5, b0);\
|
a6 = v128_aesenclast_nokey( a6 ); \
|
||||||
a6 = v128_aesenclast(a6, b0);\
|
a7 = v128_aesenclast_nokey( a7 ); \
|
||||||
a7 = v128_aesenclast(a7, b0);\
|
|
||||||
/* MixBytes */\
|
/* MixBytes */\
|
||||||
MixBytes( a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7 ); \
|
MixBytes( a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7 ); \
|
||||||
}
|
}
|
||||||
@@ -365,7 +327,6 @@ static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
|||||||
/* SubBytes + MixBytes */\
|
/* SubBytes + MixBytes */\
|
||||||
SUBMIX( xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, \
|
SUBMIX( xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, \
|
||||||
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 ); \
|
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 ); \
|
||||||
\
|
|
||||||
/* AddRoundConstant P1024 */\
|
/* AddRoundConstant P1024 */\
|
||||||
xmm0 = v128_xor( xmm0, \
|
xmm0 = v128_xor( xmm0, \
|
||||||
casti_v128( round_const_p, round_counter+1 ) ); \
|
casti_v128( round_const_p, round_counter+1 ) ); \
|
||||||
@@ -467,7 +428,6 @@ static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
|||||||
t1 = v128_unpackhi16(t1, i3);\
|
t1 = v128_unpackhi16(t1, i3);\
|
||||||
i2 = v128_unpacklo16(i2, i3);\
|
i2 = v128_unpacklo16(i2, i3);\
|
||||||
i0 = v128_unpacklo16(i0, i1);\
|
i0 = v128_unpacklo16(i0, i1);\
|
||||||
\
|
|
||||||
/* shuffle with immediate */\
|
/* shuffle with immediate */\
|
||||||
t0 = gr_shuffle32( t0 ); \
|
t0 = gr_shuffle32( t0 ); \
|
||||||
t1 = gr_shuffle32( t1 ); \
|
t1 = gr_shuffle32( t1 ); \
|
||||||
@@ -477,7 +437,6 @@ static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
|||||||
i2 = gr_shuffle32( i2 ); \
|
i2 = gr_shuffle32( i2 ); \
|
||||||
i4 = gr_shuffle32( i4 ); \
|
i4 = gr_shuffle32( i4 ); \
|
||||||
i6 = gr_shuffle32( i6 ); \
|
i6 = gr_shuffle32( i6 ); \
|
||||||
\
|
|
||||||
/* continue with unpack */\
|
/* continue with unpack */\
|
||||||
t4 = i0;\
|
t4 = i0;\
|
||||||
i0 = v128_unpacklo32(i0, i2);\
|
i0 = v128_unpacklo32(i0, i2);\
|
||||||
@@ -584,7 +543,8 @@ static const v128u32_t BLEND_MASK = { 0xffffffff, 0, 0, 0xffffffff };
|
|||||||
/* transpose done */\
|
/* transpose done */\
|
||||||
}/**/
|
}/**/
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// not used
|
||||||
void INIT( v128_t* chaining )
|
void INIT( v128_t* chaining )
|
||||||
{
|
{
|
||||||
static v128_t xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
static v128_t xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||||
@@ -613,6 +573,7 @@ void INIT( v128_t* chaining )
|
|||||||
chaining[6] = xmm14;
|
chaining[6] = xmm14;
|
||||||
chaining[7] = xmm15;
|
chaining[7] = xmm15;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void TF1024( v128_t* chaining, const v128_t* message )
|
void TF1024( v128_t* chaining, const v128_t* message )
|
||||||
{
|
{
|
||||||
|
@@ -1,3 +1,6 @@
|
|||||||
|
#if !defined GROESTL256_INTR_AES_H__
|
||||||
|
#define GROESTL256_INTR_AES_H__
|
||||||
|
|
||||||
/* groestl-intr-aes.h Aug 2011
|
/* groestl-intr-aes.h Aug 2011
|
||||||
*
|
*
|
||||||
* Groestl implementation with intrinsics using ssse3, sse4.1, and aes
|
* Groestl implementation with intrinsics using ssse3, sse4.1, and aes
|
||||||
@@ -50,10 +53,10 @@ static const v128u64_t SUBSH_MASK7 = { 0x090c000306080b07, 0x02050f0a0d01040e };
|
|||||||
|
|
||||||
#if defined(__ARM_NEON)
|
#if defined(__ARM_NEON)
|
||||||
|
|
||||||
// No fast shuffle on NEON
|
static const v128u32_t gr_mask __attribute__ ((aligned (16))) =
|
||||||
static const uint32x4_t vmask_d8 = { 3, 1, 2, 0 };
|
{ 0x03020100, 0x0b0a0908, 0x07060504, 0x0f0e0d0c };
|
||||||
|
|
||||||
#define gr_shuffle32( v ) v128_shufflev32( v, vmask_d8 )
|
#define gr_shuffle32(v) vqtbl1q_u8( v, gr_mask )
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@@ -61,7 +64,6 @@ static const uint32x4_t vmask_d8 = { 3, 1, 2, 0 };
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define tos(a) #a
|
#define tos(a) #a
|
||||||
#define tostr(a) tos(a)
|
#define tostr(a) tos(a)
|
||||||
|
|
||||||
@@ -598,4 +600,4 @@ void OF512( v128_t* chaining )
|
|||||||
chaining[3] = xmm11;
|
chaining[3] = xmm11;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@@ -181,6 +181,7 @@ int groestl512( hashState_groestl* ctx, void* output, const void* input,
|
|||||||
|
|
||||||
// digest final padding block and do output transform
|
// digest final padding block and do output transform
|
||||||
TF1024( ctx->chaining, ctx->buffer );
|
TF1024( ctx->chaining, ctx->buffer );
|
||||||
|
|
||||||
OF1024( ctx->chaining );
|
OF1024( ctx->chaining );
|
||||||
|
|
||||||
// store hash result in output
|
// store hash result in output
|
||||||
|
@@ -87,6 +87,7 @@ int final_groestl( hashState_groestl*, void* );
|
|||||||
int update_and_final_groestl( hashState_groestl*, void*, const void*, int );
|
int update_and_final_groestl( hashState_groestl*, void*, const void*, int );
|
||||||
int groestl512( hashState_groestl*, void*, const void*, uint64_t );
|
int groestl512( hashState_groestl*, void*, const void*, uint64_t );
|
||||||
#define groestl512_full groestl512
|
#define groestl512_full groestl512
|
||||||
|
#define groestl512_ctx groestl512
|
||||||
|
|
||||||
|
|
||||||
#endif /* __hash_h */
|
#endif /* __hash_h */
|
||||||
|
@@ -626,7 +626,7 @@ static const __m256i SUBSH_MASK7_2WAY =
|
|||||||
|
|
||||||
#define ROUND_2WAY(i, a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
#define ROUND_2WAY(i, a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
||||||
/* AddRoundConstant */\
|
/* AddRoundConstant */\
|
||||||
b1 = mm256_bcast_m128( mm128_mask_32( v128_neg1, 0x3 ) ); \
|
b1 = mm256_bcast_m128( v128_mask32( v128_neg1, 0x3 ) ); \
|
||||||
a0 = _mm256_xor_si256( a0, mm256_bcast_m128( round_const_l0[i] ) );\
|
a0 = _mm256_xor_si256( a0, mm256_bcast_m128( round_const_l0[i] ) );\
|
||||||
a1 = _mm256_xor_si256( a1, b1 );\
|
a1 = _mm256_xor_si256( a1, b1 );\
|
||||||
a2 = _mm256_xor_si256( a2, b1 );\
|
a2 = _mm256_xor_si256( a2, b1 );\
|
||||||
|
@@ -213,7 +213,7 @@ int scanhash_myriad_4way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
v128_bswap32_intrlv80_4x32( vdata, pdata );
|
v128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||||
do {
|
do {
|
||||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
*noncev = v128_bswap32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
||||||
|
|
||||||
myriad_4way_hash( hash, vdata );
|
myriad_4way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
@@ -35,8 +35,6 @@
|
|||||||
|
|
||||||
#include "sph_groestl.h"
|
#include "sph_groestl.h"
|
||||||
|
|
||||||
#if !defined(__AES__)
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C"{
|
extern "C"{
|
||||||
#endif
|
#endif
|
||||||
@@ -3119,5 +3117,4 @@ sph_groestl512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // !AES
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -42,7 +42,6 @@ extern "C"{
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include "compat/sph_types.h"
|
#include "compat/sph_types.h"
|
||||||
|
|
||||||
#if !defined(__AES__)
|
|
||||||
/**
|
/**
|
||||||
* Output size (in bits) for Groestl-224.
|
* Output size (in bits) for Groestl-224.
|
||||||
*/
|
*/
|
||||||
@@ -327,5 +326,4 @@ void sph_groestl512_addbits_and_close(
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // !AES
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -108,4 +108,53 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined(KECCAK_2WAY)
|
||||||
|
|
||||||
|
void keccakhash_2x64(void *state, const void *input)
|
||||||
|
{
|
||||||
|
keccak256_2x64_context ctx;
|
||||||
|
keccak256_2x64_init( &ctx );
|
||||||
|
keccak256_2x64_update( &ctx, input, 80 );
|
||||||
|
keccak256_2x64_close( &ctx, state );
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_keccak_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash[16*2] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t *hash7 = &(hash[13]); // 3*4+1
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
uint32_t n = pdata[19];
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
v128_t *noncev = (v128_t*)vdata + 9;
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
|
||||||
|
do {
|
||||||
|
keccakhash_2x64( hash, vdata );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 2; lane++ )
|
||||||
|
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
|
||||||
|
{
|
||||||
|
extr_lane_2x64( lane_hash, hash, lane, 256 );
|
||||||
|
if ( valid_hash( lane_hash, ptarget ))
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n + lane );
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
|
||||||
|
n += 2;
|
||||||
|
} while ( (n < max_nonce-2) && !work_restart[thr_id].restart);
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -17,6 +17,9 @@ bool register_keccak_algo( algo_gate_t* gate )
|
|||||||
#elif defined (KECCAK_4WAY)
|
#elif defined (KECCAK_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_keccak_4way;
|
gate->scanhash = (void*)&scanhash_keccak_4way;
|
||||||
gate->hash = (void*)&keccakhash_4way;
|
gate->hash = (void*)&keccakhash_4way;
|
||||||
|
#elif defined (KECCAK_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_keccak_2x64;
|
||||||
|
gate->hash = (void*)&keccakhash_2x64;
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_keccak;
|
gate->scanhash = (void*)&scanhash_keccak;
|
||||||
gate->hash = (void*)&keccakhash;
|
gate->hash = (void*)&keccakhash;
|
||||||
@@ -37,6 +40,9 @@ bool register_keccakc_algo( algo_gate_t* gate )
|
|||||||
#elif defined (KECCAK_4WAY)
|
#elif defined (KECCAK_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_keccak_4way;
|
gate->scanhash = (void*)&scanhash_keccak_4way;
|
||||||
gate->hash = (void*)&keccakhash_4way;
|
gate->hash = (void*)&keccakhash_4way;
|
||||||
|
#elif defined (KECCAK_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_keccak_2x64;
|
||||||
|
gate->hash = (void*)&keccakhash_2x64;
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_keccak;
|
gate->scanhash = (void*)&scanhash_keccak;
|
||||||
gate->hash = (void*)&keccakhash;
|
gate->hash = (void*)&keccakhash;
|
||||||
@@ -75,15 +81,17 @@ void sha3d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
|
|||||||
bool register_sha3d_algo( algo_gate_t* gate )
|
bool register_sha3d_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
hard_coded_eb = 6;
|
hard_coded_eb = 6;
|
||||||
// opt_extranonce = false;
|
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
|
||||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
|
||||||
gate->gen_merkle_root = (void*)&sha3d_gen_merkle_root;
|
gate->gen_merkle_root = (void*)&sha3d_gen_merkle_root;
|
||||||
#if defined (KECCAK_8WAY)
|
#if defined (SHA3D_8WAY)
|
||||||
gate->scanhash = (void*)&scanhash_sha3d_8way;
|
gate->scanhash = (void*)&scanhash_sha3d_8way;
|
||||||
gate->hash = (void*)&sha3d_hash_8way;
|
gate->hash = (void*)&sha3d_hash_8way;
|
||||||
#elif defined (KECCAK_4WAY)
|
#elif defined (SHA3D_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_sha3d_4way;
|
gate->scanhash = (void*)&scanhash_sha3d_4way;
|
||||||
gate->hash = (void*)&sha3d_hash_4way;
|
gate->hash = (void*)&sha3d_hash_4way;
|
||||||
|
#elif defined (SHA3D_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_sha3d_2x64;
|
||||||
|
gate->hash = (void*)&sha3d_hash_2x64;
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_sha3d;
|
gate->scanhash = (void*)&scanhash_sha3d;
|
||||||
gate->hash = (void*)&sha3d_hash;
|
gate->hash = (void*)&sha3d_hash;
|
||||||
|
@@ -8,6 +8,16 @@
|
|||||||
#define KECCAK_8WAY 1
|
#define KECCAK_8WAY 1
|
||||||
#elif defined(__AVX2__)
|
#elif defined(__AVX2__)
|
||||||
#define KECCAK_4WAY 1
|
#define KECCAK_4WAY 1
|
||||||
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
|
#define KECCAK_2WAY 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
#define SHA3D_8WAY 1
|
||||||
|
#elif defined(__AVX2__)
|
||||||
|
#define SHA3D_4WAY 1
|
||||||
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
|
#define SHA3D_2WAY 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern int hard_coded_eb;
|
extern int hard_coded_eb;
|
||||||
@@ -18,18 +28,16 @@ void keccakhash_8way( void *state, const void *input );
|
|||||||
int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void sha3d_hash_8way( void *state, const void *input );
|
|
||||||
int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
|
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
|
|
||||||
#elif defined(KECCAK_4WAY)
|
#elif defined(KECCAK_4WAY)
|
||||||
|
|
||||||
void keccakhash_4way( void *state, const void *input );
|
void keccakhash_4way( void *state, const void *input );
|
||||||
int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void sha3d_hash_4way( void *state, const void *input );
|
#elif defined(KECCAK_2WAY)
|
||||||
int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
|
|
||||||
|
void keccakhash_2x64( void *state, const void *input );
|
||||||
|
int scanhash_keccak_2x64( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#else
|
#else
|
||||||
@@ -38,6 +46,28 @@ void keccakhash( void *state, const void *input );
|
|||||||
int scanhash_keccak( struct work *work, uint32_t max_nonce,
|
int scanhash_keccak( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(SHA3D_8WAY)
|
||||||
|
|
||||||
|
void sha3d_hash_8way( void *state, const void *input );
|
||||||
|
int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
|
#elif defined(SHA3D_4WAY)
|
||||||
|
|
||||||
|
void sha3d_hash_4way( void *state, const void *input );
|
||||||
|
int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
|
#elif defined(SHA3D_2WAY)
|
||||||
|
|
||||||
|
void sha3d_hash_2x64( void *state, const void *input );
|
||||||
|
int scanhash_sha3d_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
void sha3d_hash( void *state, const void *input );
|
void sha3d_hash( void *state, const void *input );
|
||||||
int scanhash_sha3d( struct work *work, uint32_t max_nonce,
|
int scanhash_sha3d( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
@@ -4,7 +4,7 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "keccak-hash-4way.h"
|
#include "keccak-hash-4way.h"
|
||||||
|
|
||||||
#if defined(KECCAK_8WAY)
|
#if defined(SHA3D_8WAY)
|
||||||
|
|
||||||
void sha3d_hash_8way(void *state, const void *input)
|
void sha3d_hash_8way(void *state, const void *input)
|
||||||
{
|
{
|
||||||
@@ -64,7 +64,7 @@ int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(KECCAK_4WAY)
|
#elif defined(SHA3D_4WAY)
|
||||||
|
|
||||||
void sha3d_hash_4way(void *state, const void *input)
|
void sha3d_hash_4way(void *state, const void *input)
|
||||||
{
|
{
|
||||||
@@ -122,4 +122,60 @@ int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined(SHA3D_2WAY)
|
||||||
|
|
||||||
|
void sha3d_hash_2x64(void *state, const void *input)
|
||||||
|
{
|
||||||
|
uint32_t buffer[16*4] __attribute__ ((aligned (64)));
|
||||||
|
keccak256_2x64_context ctx;
|
||||||
|
|
||||||
|
keccak256_2x64_init( &ctx );
|
||||||
|
keccak256_2x64_update( &ctx, input, 80 );
|
||||||
|
keccak256_2x64_close( &ctx, buffer );
|
||||||
|
|
||||||
|
keccak256_2x64_init( &ctx );
|
||||||
|
keccak256_2x64_update( &ctx, buffer, 32 );
|
||||||
|
keccak256_2x64_close( &ctx, state );
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_sha3d_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash[16*2] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t *hash7 = &(hash[13]); // 3*4+1
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
uint32_t n = pdata[19];
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 2;
|
||||||
|
v128_t *noncev = (v128_t*)vdata + 9;
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
|
||||||
|
do {
|
||||||
|
sha3d_hash_2x64( hash, vdata );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 2; lane++ )
|
||||||
|
if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
|
||||||
|
{
|
||||||
|
extr_lane_2x64( lane_hash, hash, lane, 256 );
|
||||||
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n + lane );
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
|
||||||
|
n += 2;
|
||||||
|
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -465,12 +465,8 @@ typedef union
|
|||||||
{
|
{
|
||||||
keccak256_2x64_context keccak;
|
keccak256_2x64_context keccak;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
//#if defined(__x86_64__)
|
|
||||||
skein256_2x64_context skein;
|
skein256_2x64_context skein;
|
||||||
//#else
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
// sph_skein512_context skein;
|
|
||||||
//#endif
|
|
||||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
|
||||||
hashState_groestl256 groestl;
|
hashState_groestl256 groestl;
|
||||||
#else
|
#else
|
||||||
sph_groestl256_context groestl;
|
sph_groestl256_context groestl;
|
||||||
@@ -516,7 +512,6 @@ static void allium_4way_hash( void *hash, const void *midstate_vars,
|
|||||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||||
|
|
||||||
//#if defined(__x86_64__)
|
|
||||||
intrlv_2x64( vhashA, hash0, hash1, 256 );
|
intrlv_2x64( vhashA, hash0, hash1, 256 );
|
||||||
skein256_2x64_init( &ctx.skein );
|
skein256_2x64_init( &ctx.skein );
|
||||||
skein256_2x64_update( &ctx.skein, vhashA, 32 );
|
skein256_2x64_update( &ctx.skein, vhashA, 32 );
|
||||||
@@ -527,23 +522,8 @@ static void allium_4way_hash( void *hash, const void *midstate_vars,
|
|||||||
skein256_2x64_update( &ctx.skein, vhashA, 32 );
|
skein256_2x64_update( &ctx.skein, vhashA, 32 );
|
||||||
skein256_2x64_close( &ctx.skein, vhashA );
|
skein256_2x64_close( &ctx.skein, vhashA );
|
||||||
dintrlv_2x64( hash2, hash3, vhashA, 256 );
|
dintrlv_2x64( hash2, hash3, vhashA, 256 );
|
||||||
/*
|
|
||||||
#else
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
sph_skein256_init( &ctx.skein );
|
|
||||||
sph_skein256( &ctx.skein, hash0, 32 );
|
|
||||||
sph_skein256_close( &ctx.skein, hash0 );
|
|
||||||
sph_skein256_init( &ctx.skein );
|
|
||||||
sph_skein256( &ctx.skein, hash1, 32 );
|
|
||||||
sph_skein256_close( &ctx.skein, hash1 );
|
|
||||||
sph_skein256_init( &ctx.skein );
|
|
||||||
sph_skein256( &ctx.skein, hash2, 32 );
|
|
||||||
sph_skein256_close( &ctx.skein, hash2 );
|
|
||||||
sph_skein256_init( &ctx.skein );
|
|
||||||
sph_skein256( &ctx.skein, hash3, 32 );
|
|
||||||
sph_skein256_close( &ctx.skein, hash3 );
|
|
||||||
#endif
|
|
||||||
*/
|
|
||||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
|
||||||
groestl256_full( &ctx.groestl, hash0, hash0, 256 );
|
groestl256_full( &ctx.groestl, hash0, hash0, 256 );
|
||||||
groestl256_full( &ctx.groestl, hash1, hash1, 256 );
|
groestl256_full( &ctx.groestl, hash1, hash1, 256 );
|
||||||
groestl256_full( &ctx.groestl, hash2, hash2, 256 );
|
groestl256_full( &ctx.groestl, hash2, hash2, 256 );
|
||||||
|
@@ -67,7 +67,7 @@ int scanhash_lyra2h_4way( struct work *work, uint32_t max_nonce,
|
|||||||
lyra2h_4way_midstate( vdata );
|
lyra2h_4way_midstate( vdata );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
*noncev = v128_bswap32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||||
lyra2h_4way_hash( hash, vdata );
|
lyra2h_4way_hash( hash, vdata );
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ )
|
||||||
|
@@ -456,7 +456,7 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
*noncev = v128_bswap32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||||
|
|
||||||
lyra2rev2_4way_hash( hash, vdata );
|
lyra2rev2_4way_hash( hash, vdata );
|
||||||
|
|
||||||
|
@@ -9,11 +9,11 @@ bool register_hmq1725_algo( algo_gate_t* gate )
|
|||||||
gate->scanhash = (void*)&scanhash_hmq1725_4way;
|
gate->scanhash = (void*)&scanhash_hmq1725_4way;
|
||||||
gate->hash = (void*)&hmq1725_4way_hash;
|
gate->hash = (void*)&hmq1725_4way_hash;
|
||||||
#else
|
#else
|
||||||
init_hmq1725_ctx();
|
|
||||||
gate->scanhash = (void*)&scanhash_hmq1725;
|
gate->scanhash = (void*)&scanhash_hmq1725;
|
||||||
gate->hash = (void*)&hmq1725hash;
|
gate->hash = (void*)&hmq1725hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
||||||
|
| NEON_OPT;
|
||||||
opt_target_factor = 65536.0;
|
opt_target_factor = 65536.0;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
@@ -29,7 +29,6 @@ int scanhash_hmq1725_4way( struct work *work, uint32_t max_nonce,
|
|||||||
void hmq1725hash( void *state, const void *input );
|
void hmq1725hash( void *state, const void *input );
|
||||||
int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
void init_hmq1725_ctx();
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -4,336 +4,254 @@
|
|||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "algo/blake/sph_blake.h"
|
#include "algo/blake/blake512-hash.h"
|
||||||
#include "algo/bmw/sph_bmw.h"
|
#include "algo/bmw/sph_bmw.h"
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
#include "algo/fugue/fugue-aesni.h"
|
||||||
|
#else
|
||||||
|
#include "algo/fugue/sph_fugue.h"
|
||||||
|
#endif
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
|
#else
|
||||||
#include "algo/groestl/sph_groestl.h"
|
#include "algo/groestl/sph_groestl.h"
|
||||||
|
#include "algo/echo/sph_echo.h"
|
||||||
|
#endif
|
||||||
#include "algo/jh/sph_jh.h"
|
#include "algo/jh/sph_jh.h"
|
||||||
#include "algo/keccak/sph_keccak.h"
|
#include "algo/keccak/sph_keccak.h"
|
||||||
#include "algo/skein/sph_skein.h"
|
#include "algo/skein/sph_skein.h"
|
||||||
#include "algo/shavite/sph_shavite.h"
|
#include "algo/shavite/sph_shavite.h"
|
||||||
#include "algo/echo/sph_echo.h"
|
|
||||||
#include "algo/hamsi/sph_hamsi.h"
|
#include "algo/hamsi/sph_hamsi.h"
|
||||||
#include "algo/fugue/sph_fugue.h"
|
|
||||||
#include "algo/shabal/sph_shabal.h"
|
#include "algo/shabal/sph_shabal.h"
|
||||||
#include "algo/whirlpool/sph_whirlpool.h"
|
#include "algo/whirlpool/sph_whirlpool.h"
|
||||||
#include "algo/haval/sph-haval.h"
|
#include "algo/haval/sph-haval.h"
|
||||||
#include "algo/sha/sph_sha2.h"
|
#include "algo/sha/sph_sha2.h"
|
||||||
#if defined(__AES__)
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
|
||||||
#include "algo/fugue/fugue-aesni.h"
|
|
||||||
#else
|
|
||||||
#include "algo/groestl/sph_groestl.h"
|
|
||||||
#include "algo/echo/sph_echo.h"
|
|
||||||
#include "algo/fugue/sph_fugue.h"
|
|
||||||
#endif
|
|
||||||
#include "algo/luffa/luffa_for_sse2.h"
|
#include "algo/luffa/luffa_for_sse2.h"
|
||||||
#include "algo/cubehash/cubehash_sse2.h"
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
#if defined(__aarch64__)
|
#include "algo/simd/simd-hash-2way.h"
|
||||||
#include "algo/simd/sph_simd.h"
|
|
||||||
#else
|
|
||||||
#include "algo/simd/nist.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef struct {
|
union _hmq1725_ctx_holder
|
||||||
sph_blake512_context blake1, blake2;
|
{
|
||||||
sph_bmw512_context bmw1, bmw2, bmw3;
|
blake512_context blake;
|
||||||
sph_skein512_context skein1, skein2;
|
sph_bmw512_context bmw;
|
||||||
sph_jh512_context jh1, jh2;
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
sph_keccak512_context keccak1, keccak2;
|
hashState_fugue fugue;
|
||||||
hashState_luffa luffa1, luffa2;
|
#else
|
||||||
|
sph_fugue512_context fugue;
|
||||||
|
#endif
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_groestl groestl;
|
||||||
|
hashState_echo echo;
|
||||||
|
#else
|
||||||
|
sph_groestl512_context groestl;
|
||||||
|
sph_echo512_context echo;
|
||||||
|
#endif
|
||||||
|
sph_skein512_context skein;
|
||||||
|
sph_jh512_context jh;
|
||||||
|
sph_keccak512_context keccak;
|
||||||
|
hashState_luffa luffa;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
sph_shavite512_context shavite1, shavite2;
|
sph_shavite512_context shavite;
|
||||||
#if defined(__aarch64__)
|
simd512_context simd;
|
||||||
sph_simd512_context simd1, simd2;
|
sph_hamsi512_context hamsi;
|
||||||
#else
|
sph_shabal512_context shabal;
|
||||||
hashState_sd simd1, simd2;
|
sph_whirlpool_context whirlpool;
|
||||||
#endif
|
sph_sha512_context sha;
|
||||||
sph_hamsi512_context hamsi1;
|
sph_haval256_5_context haval;
|
||||||
sph_shabal512_context shabal1;
|
};
|
||||||
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
|
typedef union _hmq1725_ctx_holder hmq1725_ctx_holder;
|
||||||
sph_sha512_context sha1, sha2;
|
|
||||||
sph_haval256_5_context haval1, haval2;
|
|
||||||
#if defined(__AES__)
|
|
||||||
hashState_echo echo1, echo2;
|
|
||||||
hashState_groestl groestl1, groestl2;
|
|
||||||
hashState_fugue fugue1, fugue2;
|
|
||||||
#else
|
|
||||||
sph_groestl512_context groestl1, groestl2;
|
|
||||||
sph_echo512_context echo1, echo2;
|
|
||||||
sph_fugue512_context fugue1, fugue2;
|
|
||||||
#endif
|
|
||||||
} hmq1725_ctx_holder;
|
|
||||||
|
|
||||||
static hmq1725_ctx_holder hmq1725_ctx __attribute__ ((aligned (64)));
|
|
||||||
static __thread sph_bmw512_context hmq_bmw_mid __attribute__ ((aligned (64)));
|
|
||||||
|
|
||||||
void init_hmq1725_ctx()
|
|
||||||
{
|
|
||||||
sph_blake512_init(&hmq1725_ctx.blake1);
|
|
||||||
sph_blake512_init(&hmq1725_ctx.blake2);
|
|
||||||
|
|
||||||
sph_bmw512_init(&hmq1725_ctx.bmw1);
|
|
||||||
sph_bmw512_init(&hmq1725_ctx.bmw2);
|
|
||||||
sph_bmw512_init(&hmq1725_ctx.bmw3);
|
|
||||||
|
|
||||||
sph_skein512_init(&hmq1725_ctx.skein1);
|
|
||||||
sph_skein512_init(&hmq1725_ctx.skein2);
|
|
||||||
|
|
||||||
sph_jh512_init(&hmq1725_ctx.jh1);
|
|
||||||
sph_jh512_init(&hmq1725_ctx.jh2);
|
|
||||||
|
|
||||||
sph_keccak512_init(&hmq1725_ctx.keccak1);
|
|
||||||
sph_keccak512_init(&hmq1725_ctx.keccak2);
|
|
||||||
|
|
||||||
init_luffa( &hmq1725_ctx.luffa1, 512 );
|
|
||||||
init_luffa( &hmq1725_ctx.luffa2, 512 );
|
|
||||||
|
|
||||||
cubehashInit( &hmq1725_ctx.cube, 512, 16, 32 );
|
|
||||||
|
|
||||||
sph_shavite512_init(&hmq1725_ctx.shavite1);
|
|
||||||
sph_shavite512_init(&hmq1725_ctx.shavite2);
|
|
||||||
|
|
||||||
#if defined(__aarch64__)
|
|
||||||
sph_simd512_init(&hmq1725_ctx.simd1);
|
|
||||||
sph_simd512_init(&hmq1725_ctx.simd2);
|
|
||||||
#else
|
|
||||||
init_sd( &hmq1725_ctx.simd1, 512 );
|
|
||||||
init_sd( &hmq1725_ctx.simd2, 512 );
|
|
||||||
#endif
|
|
||||||
|
|
||||||
sph_hamsi512_init(&hmq1725_ctx.hamsi1);
|
|
||||||
|
|
||||||
#if defined(__AES__)
|
|
||||||
fugue512_Init( &hmq1725_ctx.fugue1, 512 );
|
|
||||||
fugue512_Init( &hmq1725_ctx.fugue2, 512 );
|
|
||||||
#else
|
|
||||||
sph_fugue512_init(&hmq1725_ctx.fugue1);
|
|
||||||
sph_fugue512_init(&hmq1725_ctx.fugue2);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
sph_shabal512_init(&hmq1725_ctx.shabal1);
|
|
||||||
|
|
||||||
sph_whirlpool_init(&hmq1725_ctx.whirlpool1);
|
|
||||||
sph_whirlpool_init(&hmq1725_ctx.whirlpool2);
|
|
||||||
sph_whirlpool_init(&hmq1725_ctx.whirlpool3);
|
|
||||||
sph_whirlpool_init(&hmq1725_ctx.whirlpool4);
|
|
||||||
|
|
||||||
sph_sha512_init( &hmq1725_ctx.sha1 );
|
|
||||||
sph_sha512_init( &hmq1725_ctx.sha2 );
|
|
||||||
|
|
||||||
sph_haval256_5_init(&hmq1725_ctx.haval1);
|
|
||||||
sph_haval256_5_init(&hmq1725_ctx.haval2);
|
|
||||||
|
|
||||||
#if defined(__AES__)
|
|
||||||
init_echo( &hmq1725_ctx.echo1, 512 );
|
|
||||||
init_echo( &hmq1725_ctx.echo2, 512 );
|
|
||||||
init_groestl( &hmq1725_ctx.groestl1, 64 );
|
|
||||||
init_groestl( &hmq1725_ctx.groestl2, 64 );
|
|
||||||
#else
|
|
||||||
sph_groestl512_init( &hmq1725_ctx.groestl1 );
|
|
||||||
sph_groestl512_init( &hmq1725_ctx.groestl2 );
|
|
||||||
sph_echo512_init( &hmq1725_ctx.echo1 );
|
|
||||||
sph_echo512_init( &hmq1725_ctx.echo2 );
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void hmq_bmw512_midstate( const void* input )
|
|
||||||
{
|
|
||||||
memcpy( &hmq_bmw_mid, &hmq1725_ctx.bmw1, sizeof hmq_bmw_mid );
|
|
||||||
sph_bmw512( &hmq_bmw_mid, input, 64 );
|
|
||||||
}
|
|
||||||
|
|
||||||
__thread hmq1725_ctx_holder h_ctx __attribute__ ((aligned (64)));
|
|
||||||
|
|
||||||
extern void hmq1725hash(void *state, const void *input)
|
extern void hmq1725hash(void *state, const void *input)
|
||||||
{
|
{
|
||||||
const uint32_t mask = 24;
|
const uint32_t mask = 24;
|
||||||
uint32_t hashA[32] __attribute__((aligned(64)));
|
uint32_t hashA[32] __attribute__((aligned(32)));
|
||||||
uint32_t hashB[32] __attribute__((aligned(64)));
|
uint32_t hashB[32] __attribute__((aligned(32)));
|
||||||
const int midlen = 64; // bytes
|
hmq1725_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||||
const int tail = 80 - midlen; // 16
|
|
||||||
|
|
||||||
memcpy(&h_ctx, &hmq1725_ctx, sizeof(hmq1725_ctx));
|
sph_bmw512_init( &ctx.bmw );
|
||||||
|
sph_bmw512( &ctx.bmw, input, 80 );
|
||||||
|
sph_bmw512_close( &ctx.bmw, hashA ); //1
|
||||||
|
|
||||||
memcpy( &h_ctx.bmw1, &hmq_bmw_mid, sizeof hmq_bmw_mid );
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sph_bmw512( &h_ctx.bmw1, input + midlen, tail );
|
sph_whirlpool( &ctx.whirlpool, hashA, 64 ); //0
|
||||||
sph_bmw512_close(&h_ctx.bmw1, hashA); //1
|
sph_whirlpool_close( &ctx.whirlpool, hashB ); //1
|
||||||
|
|
||||||
sph_whirlpool (&h_ctx.whirlpool1, hashA, 64); //0
|
|
||||||
sph_whirlpool_close(&h_ctx.whirlpool1, hashB); //1
|
|
||||||
|
|
||||||
if ( hashB[0] & mask ) //1
|
if ( hashB[0] & mask ) //1
|
||||||
{
|
{
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
update_and_final_groestl( &h_ctx.groestl1, (char*)hashA,
|
groestl512_full( &ctx.groestl, hashA, hashB, 512 );
|
||||||
(const char*)hashB, 512 );
|
|
||||||
#else
|
#else
|
||||||
sph_groestl512 (&h_ctx.groestl1, hashB, 64); //1
|
sph_groestl512_init( &ctx.groestl );
|
||||||
sph_groestl512_close(&h_ctx.groestl1, hashA); //2
|
sph_groestl512( &ctx.groestl, hashB, 64 ); //1
|
||||||
|
sph_groestl512_close( &ctx.groestl, hashA ); //2
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_skein512 (&h_ctx.skein1, hashB, 64); //1
|
sph_skein512_init( &ctx.skein );
|
||||||
sph_skein512_close(&h_ctx.skein1, hashA); //2
|
sph_skein512( &ctx.skein, hashB, 64 ); //1
|
||||||
|
sph_skein512_close( &ctx.skein, hashA ); //2
|
||||||
}
|
}
|
||||||
|
|
||||||
sph_jh512 (&h_ctx.jh1, hashA, 64); //3
|
sph_jh512_init( &ctx.jh );
|
||||||
sph_jh512_close(&h_ctx.jh1, hashB); //4
|
sph_jh512( &ctx.jh, hashA, 64 ); //3
|
||||||
|
sph_jh512_close( &ctx.jh, hashB ); //4
|
||||||
|
|
||||||
sph_keccak512 (&h_ctx.keccak1, hashB, 64); //2
|
sph_keccak512_init( &ctx.keccak );
|
||||||
sph_keccak512_close(&h_ctx.keccak1, hashA); //3
|
sph_keccak512( &ctx.keccak, hashB, 64 ); //2
|
||||||
|
sph_keccak512_close( &ctx.keccak, hashA ); //3
|
||||||
|
|
||||||
if ( hashA[0] & mask ) //4
|
if ( hashA[0] & mask ) //4
|
||||||
{
|
{
|
||||||
sph_blake512 (&h_ctx.blake1, hashA, 64); //
|
blake512_init( &ctx.blake );
|
||||||
sph_blake512_close(&h_ctx.blake1, hashB); //5
|
blake512_update( &ctx.blake, hashA, 64 );
|
||||||
|
blake512_close( &ctx.blake, hashB );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_bmw512 (&h_ctx.bmw2, hashA, 64); //4
|
sph_bmw512_init( &ctx.bmw );
|
||||||
sph_bmw512_close(&h_ctx.bmw2, hashB); //5
|
sph_bmw512( &ctx.bmw, hashA, 64 ); //4
|
||||||
|
sph_bmw512_close( &ctx.bmw, hashB ); //5
|
||||||
}
|
}
|
||||||
|
|
||||||
update_and_final_luffa( &h_ctx.luffa1, hashA, hashB, 64 );
|
luffa_full( &ctx.luffa, hashA, 512, hashB, 64 );
|
||||||
|
|
||||||
cubehashUpdateDigest( &h_ctx.cube, hashB, hashA, 64 );
|
cubehash_full( &ctx.cube, hashB, 512, hashA, 64 );
|
||||||
|
|
||||||
if ( hashB[0] & mask ) //7
|
if ( hashB[0] & mask ) //7
|
||||||
{
|
{
|
||||||
sph_keccak512 (&h_ctx.keccak2, hashB, 64); //
|
sph_keccak512_init( &ctx.keccak );
|
||||||
sph_keccak512_close(&h_ctx.keccak2, hashA); //8
|
sph_keccak512( &ctx.keccak, hashB, 64 ); //
|
||||||
|
sph_keccak512_close( &ctx.keccak, hashA ); //8
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_jh512 (&h_ctx.jh2, hashB, 64); //7
|
sph_jh512_init( &ctx.jh );
|
||||||
sph_jh512_close(&h_ctx.jh2, hashA); //8
|
sph_jh512( &ctx.jh, hashB, 64 ); //7
|
||||||
|
sph_jh512_close( &ctx.jh, hashA ); //8
|
||||||
}
|
}
|
||||||
|
|
||||||
sph_shavite512 (&h_ctx.shavite1, hashA, 64); //3
|
sph_shavite512_init( &ctx.shavite );
|
||||||
sph_shavite512_close(&h_ctx.shavite1, hashB); //4
|
sph_shavite512( &ctx.shavite, hashA, 64 ); //3
|
||||||
|
sph_shavite512_close( &ctx.shavite, hashB ); //4
|
||||||
|
|
||||||
#if defined(__aarch64__)
|
simd512_ctx( &ctx.simd, hashA, hashB, 64 );
|
||||||
sph_simd512 (&h_ctx.simd1, hashB, 64); //3
|
|
||||||
sph_simd512_close(&h_ctx.simd1, hashA); //4
|
|
||||||
#else
|
|
||||||
update_final_sd( &h_ctx.simd1, (BitSequence *)hashA,
|
|
||||||
(const BitSequence *)hashB, 512 );
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ( hashA[0] & mask ) //4
|
if ( hashA[0] & mask ) //4
|
||||||
{
|
{
|
||||||
sph_whirlpool (&h_ctx.whirlpool2, hashA, 64); //
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sph_whirlpool_close(&h_ctx.whirlpool2, hashB); //5
|
sph_whirlpool( &ctx.whirlpool, hashA, 64 ); //
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hashB ); //5
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_haval256_5 (&h_ctx.haval1, hashA, 64); //4
|
sph_haval256_5_init( &ctx.haval );
|
||||||
sph_haval256_5_close(&h_ctx.haval1, hashB); //5
|
sph_haval256_5( &ctx.haval, hashA, 64 ); //4
|
||||||
|
sph_haval256_5_close( &ctx.haval, hashB ); //5
|
||||||
memset(&hashB[8], 0, 32);
|
memset(&hashB[8], 0, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
update_final_echo ( &h_ctx.echo1, (BitSequence *)hashA,
|
echo_full( &ctx.echo, hashA, 512, hashB, 64 );
|
||||||
(const BitSequence *)hashB, 512 );
|
|
||||||
#else
|
#else
|
||||||
sph_echo512 (&h_ctx.echo1, hashB, 64); //5
|
sph_echo512_init( &ctx.echo );
|
||||||
sph_echo512_close(&h_ctx.echo1, hashA); //6
|
sph_echo512( &ctx.echo, hashB, 64 ); //5
|
||||||
|
sph_echo512_close( &ctx.echo, hashA ); //6
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
sph_blake512 (&h_ctx.blake2, hashA, 64); //6
|
blake512_init( &ctx.blake );
|
||||||
sph_blake512_close(&h_ctx.blake2, hashB); //7
|
blake512_update( &ctx.blake, hashA, 64 );
|
||||||
|
blake512_close( &ctx.blake, hashB );
|
||||||
|
|
||||||
if ( hashB[0] & mask ) //7
|
if ( hashB[0] & mask ) //7
|
||||||
{
|
{
|
||||||
sph_shavite512 (&h_ctx.shavite2, hashB, 64); //
|
sph_shavite512_init( &ctx.shavite );
|
||||||
sph_shavite512_close(&h_ctx.shavite2, hashA); //8
|
sph_shavite512( &ctx.shavite, hashB, 64 ); //
|
||||||
|
sph_shavite512_close( &ctx.shavite, hashA ); //8
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
luffa_full( &ctx.luffa, hashA, 512, hashB, 64 );
|
||||||
update_and_final_luffa( &h_ctx.luffa2, hashA, hashB, 64 );
|
|
||||||
}
|
|
||||||
|
|
||||||
sph_hamsi512 (&h_ctx.hamsi1, hashA, 64); //3
|
sph_hamsi512_init( &ctx.hamsi );
|
||||||
sph_hamsi512_close(&h_ctx.hamsi1, hashB); //4
|
sph_hamsi512( &ctx.hamsi, hashA, 64 ); //3
|
||||||
|
sph_hamsi512_close( &ctx.hamsi, hashB ); //4
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
fugue512_Update( &h_ctx.fugue1, hashB, 512 ); //2 ////
|
fugue512_full( &ctx.fugue, hashA, hashB, 64 );
|
||||||
fugue512_Final( &h_ctx.fugue1, hashA ); //3
|
|
||||||
#else
|
#else
|
||||||
sph_fugue512 (&h_ctx.fugue1, hashB, 64); //2 ////
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512_close(&h_ctx.fugue1, hashA); //3
|
sph_fugue512( &ctx.fugue, hashB, 64 ); //2 ////
|
||||||
|
sph_fugue512_close( &ctx.fugue, hashA ); //3
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ( hashA[0] & mask ) //4
|
if ( hashA[0] & mask ) //4
|
||||||
{
|
{
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
update_final_echo ( &h_ctx.echo2, (BitSequence *)hashB,
|
echo_full( &ctx.echo, hashB, 512, hashA, 64 );
|
||||||
(const BitSequence *)hashA, 512 );
|
|
||||||
#else
|
#else
|
||||||
sph_echo512 (&h_ctx.echo2, hashA, 64); //
|
sph_echo512_init( &ctx.echo );
|
||||||
sph_echo512_close(&h_ctx.echo2, hashB); //5
|
sph_echo512( &ctx.echo, hashA, 64 ); //
|
||||||
|
sph_echo512_close( &ctx.echo, hashB ); //5
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
simd512_ctx( &ctx.simd, hashB, hashA, 64 );
|
||||||
#if defined(__aarch64__)
|
|
||||||
sph_simd512(&h_ctx.simd2, hashA, 64); //6
|
|
||||||
sph_simd512_close(&h_ctx.simd2, hashB); //7
|
|
||||||
#else
|
|
||||||
update_final_sd( &h_ctx.simd2, (BitSequence *)hashB,
|
|
||||||
(const BitSequence *)hashA, 512 );
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
sph_shabal512 (&h_ctx.shabal1, hashB, 64); //5
|
sph_shabal512_init( &ctx.shabal );
|
||||||
sph_shabal512_close(&h_ctx.shabal1, hashA); //6
|
sph_shabal512( &ctx.shabal, hashB, 64 ); //5
|
||||||
|
sph_shabal512_close( &ctx.shabal, hashA ); //6
|
||||||
|
|
||||||
sph_whirlpool (&h_ctx.whirlpool3, hashA, 64); //6
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sph_whirlpool_close(&h_ctx.whirlpool3, hashB); //7
|
sph_whirlpool( &ctx.whirlpool, hashA, 64 ); //6
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hashB ); //7
|
||||||
|
|
||||||
if ( hashB[0] & mask ) //7
|
if ( hashB[0] & mask ) //7
|
||||||
{
|
{
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
fugue512_Update( &h_ctx.fugue2, hashB, 512 ); //
|
fugue512_full( &ctx.fugue, hashA, hashB, 64 );
|
||||||
fugue512_Final( &h_ctx.fugue2, hashA ); //8
|
|
||||||
#else
|
#else
|
||||||
sph_fugue512 (&h_ctx.fugue2, hashB, 64); //
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512_close(&h_ctx.fugue2, hashA); //8
|
sph_fugue512( &ctx.fugue, hashB, 64 ); //
|
||||||
|
sph_fugue512_close( &ctx.fugue, hashA ); //8
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_sha512( &h_ctx.sha1, hashB, 64 );
|
sph_sha512_init( &ctx.sha );
|
||||||
sph_sha512_close( &h_ctx.sha1, hashA );
|
sph_sha512( &ctx.sha, hashB, 64 );
|
||||||
|
sph_sha512_close( &ctx.sha, hashA );
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
update_and_final_groestl( &h_ctx.groestl2, (char*)hashB,
|
groestl512_full( &ctx.groestl, hashB, hashA, 512 );
|
||||||
(const char*)hashA, 512 );
|
|
||||||
#else
|
#else
|
||||||
sph_groestl512 (&h_ctx.groestl2, hashA, 64); //3
|
sph_groestl512_init( &ctx.groestl );
|
||||||
sph_groestl512_close(&h_ctx.groestl2, hashB); //4
|
sph_groestl512( &ctx.groestl, hashA, 64 ); //3
|
||||||
|
sph_groestl512_close( &ctx.groestl, hashB ); //4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
sph_sha512( &h_ctx.sha2, hashB, 64 );
|
sph_sha512_init( &ctx.sha );
|
||||||
sph_sha512_close( &h_ctx.sha2, hashA );
|
sph_sha512( &ctx.sha, hashB, 64 );
|
||||||
|
sph_sha512_close( &ctx.sha, hashA );
|
||||||
|
|
||||||
if ( hashA[0] & mask ) //4
|
if ( hashA[0] & mask ) //4
|
||||||
{
|
{
|
||||||
sph_haval256_5 (&h_ctx.haval2, hashA, 64); //
|
sph_haval256_5_init( &ctx.haval );
|
||||||
sph_haval256_5_close(&h_ctx.haval2, hashB); //5
|
sph_haval256_5( &ctx.haval, hashA, 64 ); //
|
||||||
|
sph_haval256_5_close( &ctx.haval, hashB ); //5
|
||||||
memset( &hashB[8], 0, 32 );
|
memset( &hashB[8], 0, 32 );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_whirlpool (&h_ctx.whirlpool4, hashA, 64); //4
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sph_whirlpool_close(&h_ctx.whirlpool4, hashB); //5
|
sph_whirlpool( &ctx.whirlpool, hashA, 64 ); //4
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hashB ); //5
|
||||||
}
|
}
|
||||||
|
|
||||||
sph_bmw512 (&h_ctx.bmw3, hashB, 64); //5
|
sph_bmw512_init( &ctx.bmw );
|
||||||
sph_bmw512_close(&h_ctx.bmw3, hashA); //6
|
sph_bmw512( &ctx.bmw, hashB, 64 ); //5
|
||||||
|
sph_bmw512_close( &ctx.bmw, hashA ); //6
|
||||||
|
|
||||||
memcpy( state, hashA, 32 );
|
memcpy( state, hashA, 32 );
|
||||||
}
|
}
|
||||||
@@ -341,30 +259,18 @@ extern void hmq1725hash(void *state, const void *input)
|
|||||||
int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
// uint32_t endiandata[32] __attribute__((aligned(64)));
|
uint32_t endiandata[20] __attribute__((aligned(32)));
|
||||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
//const uint32_t Htarg = ptarget[7];
|
|
||||||
|
|
||||||
//we need bigendian data...
|
//we need bigendian data...
|
||||||
// for (int k = 0; k < 32; k++)
|
|
||||||
for (int k = 0; k < 20; k++)
|
for (int k = 0; k < 20; k++)
|
||||||
be32enc(&endiandata[k], pdata[k]);
|
be32enc(&endiandata[k], pdata[k]);
|
||||||
|
|
||||||
hmq_bmw512_midstate( endiandata );
|
|
||||||
|
|
||||||
// if (opt_debug)
|
|
||||||
// {
|
|
||||||
// applog(LOG_DEBUG, "Thr: %02d, firstN: %08x, maxN: %08x, ToDo: %d", thr_id, first_nonce, max_nonce, max_nonce-first_nonce);
|
|
||||||
// }
|
|
||||||
|
|
||||||
/* I'm to lazy to put the loop in an inline function... so dirty copy'n'paste.... */
|
|
||||||
/* i know that i could set a variable, but i don't know how the compiler will optimize it, not that then the cpu needs to load the value *everytime* in a register */
|
|
||||||
if (ptarget[7]==0) {
|
if (ptarget[7]==0) {
|
||||||
do {
|
do {
|
||||||
pdata[19] = ++n;
|
pdata[19] = ++n;
|
||||||
|
@@ -14,7 +14,8 @@ bool register_quark_algo( algo_gate_t* gate )
|
|||||||
gate->scanhash = (void*)&scanhash_quark;
|
gate->scanhash = (void*)&scanhash_quark;
|
||||||
gate->hash = (void*)&quark_hash;
|
gate->hash = (void*)&quark_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
||||||
|
| NEON_OPT;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -7,12 +7,12 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "algo/blake/sph_blake.h"
|
#include "algo/blake/blake512-hash.h"
|
||||||
#include "algo/bmw/sph_bmw.h"
|
#include "algo/bmw/sph_bmw.h"
|
||||||
#include "algo/jh/sph_jh.h"
|
#include "algo/jh/sph_jh.h"
|
||||||
#include "algo/keccak/sph_keccak.h"
|
#include "algo/keccak/sph_keccak.h"
|
||||||
#include "algo/skein/sph_skein.h"
|
#include "algo/skein/sph_skein.h"
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#else
|
#else
|
||||||
#include "algo/groestl/sph_groestl.h"
|
#include "algo/groestl/sph_groestl.h"
|
||||||
@@ -21,9 +21,9 @@
|
|||||||
void quark_hash(void *state, const void *input)
|
void quark_hash(void *state, const void *input)
|
||||||
{
|
{
|
||||||
uint32_t hash[16] __attribute__((aligned(64)));
|
uint32_t hash[16] __attribute__((aligned(64)));
|
||||||
sph_blake512_context ctx_blake;
|
blake512_context ctx_blake;
|
||||||
sph_bmw512_context ctx_bmw;
|
sph_bmw512_context ctx_bmw;
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
hashState_groestl ctx_groestl;
|
hashState_groestl ctx_groestl;
|
||||||
#else
|
#else
|
||||||
sph_groestl512_context ctx_groestl;
|
sph_groestl512_context ctx_groestl;
|
||||||
@@ -33,9 +33,7 @@ void quark_hash(void *state, const void *input)
|
|||||||
sph_keccak512_context ctx_keccak;
|
sph_keccak512_context ctx_keccak;
|
||||||
uint32_t mask = 8;
|
uint32_t mask = 8;
|
||||||
|
|
||||||
sph_blake512_init( &ctx_blake );
|
blake512_full( &ctx_blake, hash, input, 80 );
|
||||||
sph_blake512( &ctx_blake, input, 80 );
|
|
||||||
sph_blake512_close( &ctx_blake, hash );
|
|
||||||
|
|
||||||
sph_bmw512_init( &ctx_bmw );
|
sph_bmw512_init( &ctx_bmw );
|
||||||
sph_bmw512( &ctx_bmw, hash, 64 );
|
sph_bmw512( &ctx_bmw, hash, 64 );
|
||||||
@@ -43,7 +41,7 @@ void quark_hash(void *state, const void *input)
|
|||||||
|
|
||||||
if ( hash[0] & mask )
|
if ( hash[0] & mask )
|
||||||
{
|
{
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
init_groestl( &ctx_groestl, 64 );
|
init_groestl( &ctx_groestl, 64 );
|
||||||
update_and_final_groestl( &ctx_groestl, (char*)hash,
|
update_and_final_groestl( &ctx_groestl, (char*)hash,
|
||||||
(const char*)hash, 512 );
|
(const char*)hash, 512 );
|
||||||
@@ -60,7 +58,7 @@ void quark_hash(void *state, const void *input)
|
|||||||
sph_skein512_close( &ctx_skein, hash );
|
sph_skein512_close( &ctx_skein, hash );
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
init_groestl( &ctx_groestl, 64 );
|
init_groestl( &ctx_groestl, 64 );
|
||||||
update_and_final_groestl( &ctx_groestl, (char*)hash,
|
update_and_final_groestl( &ctx_groestl, (char*)hash,
|
||||||
(const char*)hash, 512 );
|
(const char*)hash, 512 );
|
||||||
@@ -76,9 +74,7 @@ void quark_hash(void *state, const void *input)
|
|||||||
|
|
||||||
if ( hash[0] & mask )
|
if ( hash[0] & mask )
|
||||||
{
|
{
|
||||||
sph_blake512_init( &ctx_blake );
|
blake512_full( &ctx_blake, hash, hash, 64 );
|
||||||
sph_blake512( &ctx_blake, hash, 64 );
|
|
||||||
sph_blake512_close( &ctx_blake, hash );
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@@ -83,7 +83,7 @@ int scanhash_deep_2way( struct work *work,uint32_t max_nonce,
|
|||||||
|
|
||||||
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||||
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
casti_v128( endiandata, 4 ) = v128_bswap32( casti_v128( pdata, 4 ) );
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
uint64_t *edata = (uint64_t*)endiandata;
|
||||||
intrlv_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
intrlv_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
||||||
|
@@ -236,7 +236,7 @@ int scanhash_qubit_2way( struct work *work,uint32_t max_nonce,
|
|||||||
|
|
||||||
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||||
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
casti_v128( endiandata, 4 ) = v128_bswap32( casti_v128( pdata, 4 ) );
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
uint64_t *edata = (uint64_t*)endiandata;
|
||||||
intrlv_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
intrlv_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
||||||
|
@@ -16,7 +16,8 @@ bool register_qubit_algo( algo_gate_t* gate )
|
|||||||
gate->scanhash = (void*)&scanhash_qubit;
|
gate->scanhash = (void*)&scanhash_qubit;
|
||||||
gate->hash = (void*)&qubit_hash;
|
gate->hash = (void*)&qubit_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
||||||
|
| NEON_OPT;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -8,13 +8,9 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "algo/luffa/luffa_for_sse2.h"
|
#include "algo/luffa/luffa_for_sse2.h"
|
||||||
#include "algo/cubehash/cubehash_sse2.h"
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
#if defined(__aarch64__)
|
#include "algo/simd/simd-hash-2way.h"
|
||||||
#include "algo/simd/sph_simd.h"
|
|
||||||
#else
|
|
||||||
#include "algo/simd/nist.h"
|
|
||||||
#endif
|
|
||||||
#include "algo/shavite/sph_shavite.h"
|
#include "algo/shavite/sph_shavite.h"
|
||||||
#ifdef __AES__
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#else
|
#else
|
||||||
#include "algo/echo/sph_echo.h"
|
#include "algo/echo/sph_echo.h"
|
||||||
@@ -25,12 +21,8 @@ typedef struct
|
|||||||
hashState_luffa luffa;
|
hashState_luffa luffa;
|
||||||
cubehashParam cubehash;
|
cubehashParam cubehash;
|
||||||
sph_shavite512_context shavite;
|
sph_shavite512_context shavite;
|
||||||
#if defined(__aarch64__)
|
simd512_context simd;
|
||||||
sph_simd512_context simd;
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
#else
|
|
||||||
hashState_sd simd;
|
|
||||||
#endif
|
|
||||||
#ifdef __AES__
|
|
||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
#else
|
#else
|
||||||
sph_echo512_context echo;
|
sph_echo512_context echo;
|
||||||
@@ -45,12 +37,7 @@ void init_qubit_ctx()
|
|||||||
init_luffa(&qubit_ctx.luffa,512);
|
init_luffa(&qubit_ctx.luffa,512);
|
||||||
cubehashInit(&qubit_ctx.cubehash,512,16,32);
|
cubehashInit(&qubit_ctx.cubehash,512,16,32);
|
||||||
sph_shavite512_init(&qubit_ctx.shavite);
|
sph_shavite512_init(&qubit_ctx.shavite);
|
||||||
#if defined(__aarch64__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
sph_simd512_init( &qubit_ctx.simd );
|
|
||||||
#else
|
|
||||||
init_sd( &qubit_ctx.simd, 512 );
|
|
||||||
#endif
|
|
||||||
#ifdef __AES__
|
|
||||||
init_echo(&qubit_ctx.echo, 512);
|
init_echo(&qubit_ctx.echo, 512);
|
||||||
#else
|
#else
|
||||||
sph_echo512_init(&qubit_ctx.echo);
|
sph_echo512_init(&qubit_ctx.echo);
|
||||||
@@ -81,15 +68,9 @@ void qubit_hash(void *output, const void *input)
|
|||||||
sph_shavite512( &ctx.shavite, hash, 64);
|
sph_shavite512( &ctx.shavite, hash, 64);
|
||||||
sph_shavite512_close( &ctx.shavite, hash);
|
sph_shavite512_close( &ctx.shavite, hash);
|
||||||
|
|
||||||
#if defined(__aarch64__)
|
simd512_ctx( &ctx.simd, hash, hash, 64 );
|
||||||
sph_simd512(&ctx.simd, (const void*) hash, 64);
|
|
||||||
sph_simd512_close(&ctx.simd, hash);
|
|
||||||
#else
|
|
||||||
update_sd( &ctx.simd, (const BitSequence *)hash, 512 );
|
|
||||||
final_sd( &ctx.simd, (BitSequence *)hash );
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __AES__
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
||||||
(const BitSequence *) hash, 512 );
|
(const BitSequence *) hash, 512 );
|
||||||
#else
|
#else
|
||||||
|
@@ -45,10 +45,10 @@ static const uint32_t IV[5] =
|
|||||||
|
|
||||||
#define RR(a, b, c, d, e, f, s, r, k) \
|
#define RR(a, b, c, d, e, f, s, r, k) \
|
||||||
do{ \
|
do{ \
|
||||||
a = _mm_add_epi32( mm128_rol_32( _mm_add_epi32( _mm_add_epi32( \
|
a = _mm_add_epi32( v128_rol32( _mm_add_epi32( _mm_add_epi32( \
|
||||||
_mm_add_epi32( a, f( b ,c, d ) ), r ), \
|
_mm_add_epi32( a, f( b ,c, d ) ), r ), \
|
||||||
_mm_set1_epi64x( k ) ), s ), e ); \
|
_mm_set1_epi64x( k ) ), s ), e ); \
|
||||||
c = mm128_rol_32( c, 10 );\
|
c = v128_rol32( c, 10 );\
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define ROUND1(a, b, c, d, e, f, s, r, k) \
|
#define ROUND1(a, b, c, d, e, f, s, r, k) \
|
||||||
|
@@ -587,8 +587,8 @@ void sha256_ni2x_final_rounds( uint32_t *out_X, uint32_t *out_Y,
|
|||||||
// Add the nonces (msg[0] lane 3) to A & E (STATE0 lanes 1 & 3)
|
// Add the nonces (msg[0] lane 3) to A & E (STATE0 lanes 1 & 3)
|
||||||
TMSG0_X = casti_m128i( msg_X, 0 );
|
TMSG0_X = casti_m128i( msg_X, 0 );
|
||||||
TMSG0_Y = casti_m128i( msg_Y, 0 );
|
TMSG0_Y = casti_m128i( msg_Y, 0 );
|
||||||
TMP_X = mm128_xim_32( TMSG0_X, TMSG0_X, 0xd5 );
|
TMP_X = v128_xim32( TMSG0_X, TMSG0_X, 0xd5 );
|
||||||
TMP_Y = mm128_xim_32( TMSG0_Y, TMSG0_Y, 0xd5 );
|
TMP_Y = v128_xim32( TMSG0_Y, TMSG0_Y, 0xd5 );
|
||||||
STATE0_X = _mm_add_epi32( STATE0_X, TMP_X );
|
STATE0_X = _mm_add_epi32( STATE0_X, TMP_X );
|
||||||
STATE0_Y = _mm_add_epi32( STATE0_Y, TMP_Y );
|
STATE0_Y = _mm_add_epi32( STATE0_Y, TMP_Y );
|
||||||
|
|
||||||
|
@@ -34,8 +34,6 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "shabal-hash-4way.h"
|
#include "shabal-hash-4way.h"
|
||||||
|
|
||||||
//#if defined(__SSE4_1__) || defined(__ARM_NEON)
|
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
|
||||||
#define DECL_STATE16 \
|
#define DECL_STATE16 \
|
||||||
@@ -47,8 +45,6 @@
|
|||||||
C8, C9, CA, CB, CC, CD, CE, CF; \
|
C8, C9, CA, CB, CC, CD, CE, CF; \
|
||||||
__m512i M0, M1, M2, M3, M4, M5, M6, M7, \
|
__m512i M0, M1, M2, M3, M4, M5, M6, M7, \
|
||||||
M8, M9, MA, MB, MC, MD, ME, MF; \
|
M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||||
const __m512i FIVE = v512_32( 5 ); \
|
|
||||||
const __m512i THREE = v512_32( 3 ); \
|
|
||||||
uint32_t Wlow, Whigh;
|
uint32_t Wlow, Whigh;
|
||||||
|
|
||||||
#define READ_STATE16(state) do \
|
#define READ_STATE16(state) do \
|
||||||
@@ -292,11 +288,21 @@ do { \
|
|||||||
mm512_swap1024_512( BF, CF ); \
|
mm512_swap1024_512( BF, CF ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
static inline __m512i v512_mult_x3( const __m512i x )
|
||||||
|
{
|
||||||
|
return _mm512_add_epi32( x, _mm512_slli_epi32( x, 1 ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m512i v512_mult_x5( const __m512i x )
|
||||||
|
{
|
||||||
|
return _mm512_add_epi32( x, _mm512_slli_epi32( x, 2 ) );
|
||||||
|
}
|
||||||
|
|
||||||
#define PERM_ELT16( xa0, xa1, xb0, xb1, xb2, xb3, xc, xm ) \
|
#define PERM_ELT16( xa0, xa1, xb0, xb1, xb2, xb3, xc, xm ) \
|
||||||
do { \
|
do { \
|
||||||
xa0 = mm512_xor3( xm, xb1, mm512_xorandnot( \
|
xa0 = mm512_xor3( xm, xb1, mm512_xorandnot( \
|
||||||
_mm512_mullo_epi32( mm512_xor3( xa0, xc, \
|
v512_mult_x3( mm512_xor3( xa0, xc, \
|
||||||
_mm512_mullo_epi32( mm512_rol_32( xa1, 15 ), FIVE ) ), THREE ), \
|
v512_mult_x5( mm512_rol_32( xa1, 15 ) ) ) ), \
|
||||||
xb3, xb2 ) ); \
|
xb3, xb2 ) ); \
|
||||||
xb0 = mm512_xnor( xa0, mm512_rol_32( xb0, 1 ) ); \
|
xb0 = mm512_xnor( xa0, mm512_rol_32( xb0, 1 ) ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
@@ -644,8 +650,6 @@ shabal512_16way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|||||||
C8, C9, CA, CB, CC, CD, CE, CF; \
|
C8, C9, CA, CB, CC, CD, CE, CF; \
|
||||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7, \
|
__m256i M0, M1, M2, M3, M4, M5, M6, M7, \
|
||||||
M8, M9, MA, MB, MC, MD, ME, MF; \
|
M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||||
const __m256i FIVE = v256_32( 5 ); \
|
|
||||||
const __m256i THREE = v256_32( 3 ); \
|
|
||||||
uint32_t Wlow, Whigh;
|
uint32_t Wlow, Whigh;
|
||||||
|
|
||||||
#define READ_STATE8(state) do \
|
#define READ_STATE8(state) do \
|
||||||
@@ -889,11 +893,21 @@ do { \
|
|||||||
mm256_swap512_256( BF, CF ); \
|
mm256_swap512_256( BF, CF ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
static inline __m256i v256_mult_x3( const __m256i x )
|
||||||
|
{
|
||||||
|
return _mm256_add_epi32( x, _mm256_slli_epi32( x, 1 ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m256i v256_mult_x5( const __m256i x )
|
||||||
|
{
|
||||||
|
return _mm256_add_epi32( x, _mm256_slli_epi32( x, 2 ) );
|
||||||
|
}
|
||||||
|
|
||||||
#define PERM_ELT8( xa0, xa1, xb0, xb1, xb2, xb3, xc, xm ) \
|
#define PERM_ELT8( xa0, xa1, xb0, xb1, xb2, xb3, xc, xm ) \
|
||||||
do { \
|
do { \
|
||||||
xa0 = mm256_xor3( xm, xb1, mm256_xorandnot( \
|
xa0 = mm256_xor3( xm, xb1, mm256_xorandnot( \
|
||||||
_mm256_mullo_epi32( mm256_xor3( xa0, xc, \
|
v256_mult_x3( mm256_xor3( xa0, xc, \
|
||||||
_mm256_mullo_epi32( mm256_rol_32( xa1, 15 ), FIVE ) ), THREE ), \
|
v256_mult_x5( mm256_rol_32( xa1, 15 ) ) ) ), \
|
||||||
xb3, xb2 ) ); \
|
xb3, xb2 ) ); \
|
||||||
xb0 = mm256_xnor( xa0, mm256_rol_32( xb0, 1 ) ); \
|
xb0 = mm256_xnor( xa0, mm256_rol_32( xb0, 1 ) ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
@@ -1226,15 +1240,13 @@ shabal512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|||||||
|
|
||||||
#endif // AVX2
|
#endif // AVX2
|
||||||
|
|
||||||
#if defined(__SSE4_1__) || defined(__ARM_NEON)
|
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
|
|
||||||
#define DECL_STATE \
|
#define DECL_STATE \
|
||||||
v128u32_t A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, AA, AB; \
|
v128u32_t A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, AA, AB; \
|
||||||
v128u32_t B0, B1, B2, B3, B4, B5, B6, B7, B8, B9, BA, BB, BC, BD, BE, BF; \
|
v128u32_t B0, B1, B2, B3, B4, B5, B6, B7, B8, B9, BA, BB, BC, BD, BE, BF; \
|
||||||
v128u32_t C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, CA, CB, CC, CD, CE, CF; \
|
v128u32_t C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, CA, CB, CC, CD, CE, CF; \
|
||||||
v128u32_t M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; \
|
v128u32_t M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||||
const v128u32_t FIVE = v128_32( 5 ); \
|
|
||||||
const v128u32_t THREE = v128_32( 3 ); \
|
|
||||||
uint32_t Wlow, Whigh;
|
uint32_t Wlow, Whigh;
|
||||||
|
|
||||||
#define READ_STATE( state ) \
|
#define READ_STATE( state ) \
|
||||||
@@ -1479,11 +1491,21 @@ shabal512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|||||||
v128_swap256_128( BF, CF ); \
|
v128_swap256_128( BF, CF ); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline v128_t v128_mult_x3( const v128_t x )
|
||||||
|
{
|
||||||
|
return v128_add32( x, v128_sl32( x, 1 ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline v128_t v128_mult_x5( const v128_t x )
|
||||||
|
{
|
||||||
|
return v128_add32( x, v128_sl32( x, 2 ) );
|
||||||
|
}
|
||||||
|
|
||||||
#define PERM_ELT( xa0, xa1, xb0, xb1, xb2, xb3, xc, xm ) \
|
#define PERM_ELT( xa0, xa1, xb0, xb1, xb2, xb3, xc, xm ) \
|
||||||
{ \
|
{ \
|
||||||
xa0 = v128_xor3( xm, xb1, v128_xorandnot( \
|
xa0 = v128_xor3( xm, xb1, v128_xorandnot( \
|
||||||
v128_mul32( v128_xor3( xa0, xc, \
|
v128_mult_x3( v128_xor3( xa0, xc, \
|
||||||
v128_mul32( v128_rol32( xa1, 15 ), FIVE ) ), THREE ), \
|
v128_mult_x5( v128_rol32( xa1, 15 ) ) ) ), \
|
||||||
xb3, xb2 ) ); \
|
xb3, xb2 ) ); \
|
||||||
xb0 = v128_not( v128_xor( xa0, v128_rol32( xb0, 1 ) ) ); \
|
xb0 = v128_not( v128_xor( xa0, v128_rol32( xb0, 1 ) ) ); \
|
||||||
}
|
}
|
||||||
|
@@ -62,7 +62,7 @@ void shabal512_8way_addbits_and_close( void *cc, unsigned ub, unsigned n,
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__SSE4_1__) || defined(__ARM_NEON)
|
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
v128_t buf[16] __attribute__ ((aligned (64)));
|
v128_t buf[16] __attribute__ ((aligned (64)));
|
||||||
|
@@ -1,369 +0,0 @@
|
|||||||
#include "Swifftx_sha3.h"
|
|
||||||
extern "C" {
|
|
||||||
#include "SWIFFTX.h"
|
|
||||||
}
|
|
||||||
#include <math.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
// The default salt value.
|
|
||||||
// This is the expansion of e (Euler's number) - the 19 digits after 2.71:
|
|
||||||
// 8281828459045235360.
|
|
||||||
// The above in base 256, from MSB to LSB:
|
|
||||||
BitSequence SWIF_saltValueChar[SWIF_HAIFA_SALT_SIZE] = {114, 238, 247, 26, 192, 28, 170, 160};
|
|
||||||
|
|
||||||
// All the IVs here below were produced from the decimal digits of e's expansion.
|
|
||||||
// The code can be found in 'ProduceRandomIV.c'.
|
|
||||||
// The initial value for 224 digest size.
|
|
||||||
const BitSequence SWIF_HAIFA_IV_224[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
|
||||||
{37, 242, 132, 2, 167, 81, 158, 237, 113, 77, 162, 60, 65, 236, 108, 246,
|
|
||||||
101, 72, 190, 109, 58, 205, 99, 6, 114, 169, 104, 114, 38, 146, 121, 142,
|
|
||||||
59, 98, 233, 84, 72, 227, 22, 199, 17, 102, 198, 145, 24, 178, 37, 1,
|
|
||||||
215, 245, 66, 120, 230, 193, 113, 253, 165, 218, 66, 134, 49, 231, 124, 204,
|
|
||||||
0};
|
|
||||||
|
|
||||||
// The initial value for 256 digest size.
|
|
||||||
const BitSequence SWIF_HAIFA_IV_256[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
|
||||||
{250, 50, 42, 40, 14, 233, 53, 48, 227, 42, 237, 187, 211, 120, 209, 234,
|
|
||||||
27, 144, 4, 61, 243, 244, 29, 247, 37, 162, 70, 11, 231, 196, 53, 6,
|
|
||||||
193, 240, 94, 126, 204, 132, 104, 46, 114, 29, 3, 104, 118, 184, 201, 3,
|
|
||||||
57, 77, 91, 101, 31, 155, 84, 199, 228, 39, 198, 42, 248, 198, 201, 178,
|
|
||||||
8};
|
|
||||||
|
|
||||||
// The initial value for 384 digest size.
|
|
||||||
const BitSequence SWIF_HAIFA_IV_384[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
|
||||||
{40, 145, 193, 100, 205, 171, 47, 76, 254, 10, 196, 41, 165, 207, 200, 79,
|
|
||||||
109, 13, 75, 201, 17, 172, 64, 162, 217, 22, 88, 39, 51, 30, 220, 151,
|
|
||||||
133, 73, 216, 233, 184, 203, 77, 0, 248, 13, 28, 199, 30, 147, 232, 242,
|
|
||||||
227, 124, 169, 174, 14, 45, 27, 87, 254, 73, 68, 136, 135, 159, 83, 152,
|
|
||||||
0};
|
|
||||||
|
|
||||||
// The initial value for 512 digest size.
|
|
||||||
const BitSequence SWIF_HAIFA_IV_512[SWIFFTX_OUTPUT_BLOCK_SIZE] =
|
|
||||||
{195, 126, 197, 167, 157, 114, 99, 126, 208, 105, 200, 90, 71, 195, 144, 138,
|
|
||||||
142, 122, 123, 116, 24, 214, 168, 173, 203, 183, 194, 210, 102, 117, 138, 42,
|
|
||||||
114, 118, 132, 33, 35, 149, 143, 163, 163, 183, 243, 175, 72, 22, 201, 255,
|
|
||||||
102, 243, 22, 187, 211, 167, 239, 76, 164, 70, 80, 182, 181, 212, 9, 185,
|
|
||||||
0};
|
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// NIST API implementation portion.
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
int Swifftx::Init(int hashbitlen)
|
|
||||||
{
|
|
||||||
switch(hashbitlen)
|
|
||||||
{
|
|
||||||
case 224:
|
|
||||||
swifftxState.hashbitlen = hashbitlen;
|
|
||||||
// Initializes h_0 in HAIFA:
|
|
||||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_224, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
|
||||||
break;
|
|
||||||
case 256:
|
|
||||||
swifftxState.hashbitlen = hashbitlen;
|
|
||||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_256, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
|
||||||
break;
|
|
||||||
case 384:
|
|
||||||
swifftxState.hashbitlen = hashbitlen;
|
|
||||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_384, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
|
||||||
break;
|
|
||||||
case 512:
|
|
||||||
swifftxState.hashbitlen = hashbitlen;
|
|
||||||
memcpy(swifftxState.currOutputBlock, SWIF_HAIFA_IV_512, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return BAD_HASHBITLEN;
|
|
||||||
}
|
|
||||||
|
|
||||||
swifftxState.wasUpdated = false;
|
|
||||||
swifftxState.remainingSize = 0;
|
|
||||||
memset(swifftxState.remaining, 0, SWIF_HAIFA_INPUT_BLOCK_SIZE);
|
|
||||||
memset(swifftxState.numOfBitsChar, 0, SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
|
||||||
// Initialize the salt with the default value.
|
|
||||||
memcpy(swifftxState.salt, SWIF_saltValueChar, SWIF_HAIFA_SALT_SIZE);
|
|
||||||
|
|
||||||
InitializeSWIFFTX();
|
|
||||||
|
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
int Swifftx::Update(const BitSequence *data, DataLength databitlen)
|
|
||||||
{
|
|
||||||
// The size of input in bytes after putting the remaining data from previous invocation.
|
|
||||||
int sizeOfInputAfterRemaining = 0;
|
|
||||||
// The input block to compression function of SWIFFTX:
|
|
||||||
BitSequence currInputBlock[SWIFFTX_INPUT_BLOCK_SIZE] = {0};
|
|
||||||
// Whether we handled a single block.
|
|
||||||
bool wasSingleBlockHandled = false;
|
|
||||||
|
|
||||||
swifftxState.wasUpdated = true;
|
|
||||||
|
|
||||||
// Handle an empty message as required by NIST. Since 'Final()' is oblivious to the input
|
|
||||||
// (but of course uses the output of the compression function from the previous round,
|
|
||||||
// which is called h_{i-1} in HAIFA article), we have to do nothing here.
|
|
||||||
if (databitlen == 0)
|
|
||||||
return SUCCESS;
|
|
||||||
|
|
||||||
// If we had before an input with unaligned length, return an error
|
|
||||||
if (swifftxState.remainingSize % 8)
|
|
||||||
{
|
|
||||||
return INPUT_DATA_NOT_ALIGNED;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert remaining size to bytes.
|
|
||||||
swifftxState.remainingSize /= 8;
|
|
||||||
|
|
||||||
// As long as we have enough data combined from (remaining + data) to fill input block
|
|
||||||
//NASTAVENIE RUND
|
|
||||||
while (((databitlen / 8) + swifftxState.remainingSize) >= SWIF_HAIFA_INPUT_BLOCK_SIZE)
|
|
||||||
{
|
|
||||||
// Fill the input block with data:
|
|
||||||
// 1. The output of the previous block:
|
|
||||||
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
|
||||||
// 2. The input part of the block:
|
|
||||||
// 2a. The remaining data from the previous 'Update()' call:
|
|
||||||
if (swifftxState.remainingSize)
|
|
||||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE, swifftxState.remaining,
|
|
||||||
swifftxState.remainingSize);
|
|
||||||
// 2b. The input data that we have place for after the 'remaining':
|
|
||||||
sizeOfInputAfterRemaining = SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE
|
|
||||||
- ((int) swifftxState.remainingSize) - SWIF_HAIFA_NUM_OF_BITS_SIZE
|
|
||||||
- SWIF_HAIFA_SALT_SIZE;
|
|
||||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize,
|
|
||||||
data, sizeOfInputAfterRemaining);
|
|
||||||
|
|
||||||
// 3. The #bits part of the block:
|
|
||||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize
|
|
||||||
+ sizeOfInputAfterRemaining,
|
|
||||||
swifftxState.numOfBitsChar, SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
|
||||||
// 4. The salt part of the block:
|
|
||||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + swifftxState.remainingSize
|
|
||||||
+ sizeOfInputAfterRemaining + SWIF_HAIFA_NUM_OF_BITS_SIZE,
|
|
||||||
swifftxState.salt, SWIF_HAIFA_SALT_SIZE);
|
|
||||||
|
|
||||||
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, false);
|
|
||||||
|
|
||||||
// Update the #bits field with SWIF_HAIFA_INPUT_BLOCK_SIZE.
|
|
||||||
AddToCurrInBase256(swifftxState.numOfBitsChar, SWIF_HAIFA_INPUT_BLOCK_SIZE * 8);
|
|
||||||
wasSingleBlockHandled = true;
|
|
||||||
data += sizeOfInputAfterRemaining;
|
|
||||||
databitlen -= (sizeOfInputAfterRemaining * 8);
|
|
||||||
swifftxState.remainingSize = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update the swifftxState.remaining and swifftxState.remainingSize.
|
|
||||||
// remainingSize will be in bits after exiting 'Update()'.
|
|
||||||
if (wasSingleBlockHandled)
|
|
||||||
{
|
|
||||||
swifftxState.remainingSize = (unsigned int) databitlen; // now remaining size is in bits.
|
|
||||||
if (swifftxState.remainingSize)
|
|
||||||
memcpy(swifftxState.remaining, data, (swifftxState.remainingSize + 7) / 8);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
memcpy(swifftxState.remaining + swifftxState.remainingSize, data,
|
|
||||||
(size_t) (databitlen + 7) / 8);
|
|
||||||
swifftxState.remainingSize = (swifftxState.remainingSize * 8) + (unsigned short) databitlen;
|
|
||||||
}
|
|
||||||
|
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
int Swifftx::Final(BitSequence *hashval)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
// Whether to add one last block. True if the padding appended to the last block overflows
|
|
||||||
// the block size.
|
|
||||||
bool toAddFinalBlock = false;
|
|
||||||
bool toPutOneInFinalBlock = false;
|
|
||||||
unsigned short oneShift = 0;
|
|
||||||
// The size of the last input block before the zeroes padding. We add 1 here because we
|
|
||||||
// include the final '1' bit in the calculation and 7 as we round the length to bytes.
|
|
||||||
unsigned short sizeOfLastInputBlock = (swifftxState.remainingSize + 1 + 7) / 8;
|
|
||||||
// The number of bytes of zero in the padding part.
|
|
||||||
// The padding contains:
|
|
||||||
// 1. A single 1 bit.
|
|
||||||
// 2. As many zeroes as needed.
|
|
||||||
// 3. The message length in bits. Occupies SWIF_HAIFA_NUM_OF_BITS_SIZE bytes.
|
|
||||||
// 4. The digest size. Maximum is 512, so we need 2 bytes.
|
|
||||||
// If the total number achieved is negative, add an additional block, as HAIFA specifies.
|
|
||||||
short numOfZeroBytesInPadding = (short) SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE
|
|
||||||
- sizeOfLastInputBlock - (2 * SWIF_HAIFA_NUM_OF_BITS_SIZE) - 2
|
|
||||||
- SWIF_HAIFA_SALT_SIZE;
|
|
||||||
// The input block to compression function of SWIFFTX:
|
|
||||||
BitSequence currInputBlock[SWIFFTX_INPUT_BLOCK_SIZE] = {0};
|
|
||||||
// The message length in base 256.
|
|
||||||
BitSequence messageLengthChar[SWIF_HAIFA_NUM_OF_BITS_SIZE] = {0};
|
|
||||||
// The digest size used for padding:
|
|
||||||
unsigned char digestSizeLSB = swifftxState.hashbitlen % 256;
|
|
||||||
unsigned char digestSizeMSB = (swifftxState.hashbitlen - digestSizeLSB) / 256;
|
|
||||||
|
|
||||||
if (numOfZeroBytesInPadding < 1)
|
|
||||||
toAddFinalBlock = true;
|
|
||||||
|
|
||||||
// Fill the input block with data:
|
|
||||||
// 1. The output of the previous block:
|
|
||||||
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
|
||||||
// 2a. The input part of the block, which is the remaining data from the previous 'Update()'
|
|
||||||
// call, if exists and an extra '1' bit (maybe all we have is this extra 1):
|
|
||||||
|
|
||||||
// Add the last 1 in big-endian convention ...
|
|
||||||
if (swifftxState.remainingSize % 8 == 0)
|
|
||||||
{
|
|
||||||
swifftxState.remaining[sizeOfLastInputBlock - 1] = 0x80;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
swifftxState.remaining[sizeOfLastInputBlock - 1] |= (1 << (7 - (swifftxState.remainingSize % 8)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sizeOfLastInputBlock)
|
|
||||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE, swifftxState.remaining,
|
|
||||||
sizeOfLastInputBlock);
|
|
||||||
|
|
||||||
// Compute the message length in base 256:
|
|
||||||
for (i = 0; i < SWIF_HAIFA_NUM_OF_BITS_SIZE; ++i)
|
|
||||||
messageLengthChar[i] = swifftxState.numOfBitsChar[i];
|
|
||||||
if (sizeOfLastInputBlock)
|
|
||||||
AddToCurrInBase256(messageLengthChar, sizeOfLastInputBlock * 8);
|
|
||||||
|
|
||||||
if (!toAddFinalBlock)
|
|
||||||
{
|
|
||||||
// 2b. Put the zeroes:
|
|
||||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock,
|
|
||||||
0, numOfZeroBytesInPadding);
|
|
||||||
// 2c. Pad the message length:
|
|
||||||
for (i = 0; i < SWIF_HAIFA_NUM_OF_BITS_SIZE; ++i)
|
|
||||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
|
|
||||||
+ numOfZeroBytesInPadding + i] = messageLengthChar[i];
|
|
||||||
// 2d. Pad the digest size:
|
|
||||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
|
|
||||||
+ numOfZeroBytesInPadding + SWIF_HAIFA_NUM_OF_BITS_SIZE] = digestSizeMSB;
|
|
||||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock
|
|
||||||
+ numOfZeroBytesInPadding + SWIF_HAIFA_NUM_OF_BITS_SIZE + 1] = digestSizeLSB;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// 2b. Put the zeroes, if at all:
|
|
||||||
if ((SWIF_HAIFA_INPUT_BLOCK_SIZE - sizeOfLastInputBlock) > 0)
|
|
||||||
{
|
|
||||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + sizeOfLastInputBlock,
|
|
||||||
0, SWIF_HAIFA_INPUT_BLOCK_SIZE - sizeOfLastInputBlock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. The #bits part of the block:
|
|
||||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE,
|
|
||||||
swifftxState.numOfBitsChar, SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
|
||||||
// 4. The salt part of the block:
|
|
||||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
|
|
||||||
+ SWIF_HAIFA_NUM_OF_BITS_SIZE,
|
|
||||||
swifftxState.salt,
|
|
||||||
SWIF_HAIFA_SALT_SIZE);
|
|
||||||
|
|
||||||
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, !toAddFinalBlock);
|
|
||||||
|
|
||||||
// If we have to add one more block, it is now:
|
|
||||||
if (toAddFinalBlock)
|
|
||||||
{
|
|
||||||
// 1. The previous output block, as usual.
|
|
||||||
memcpy(currInputBlock, swifftxState.currOutputBlock, SWIFFTX_OUTPUT_BLOCK_SIZE);
|
|
||||||
|
|
||||||
// 2a. Instead of the input, zeroes:
|
|
||||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE , 0,
|
|
||||||
SWIF_HAIFA_INPUT_BLOCK_SIZE - SWIF_HAIFA_NUM_OF_BITS_SIZE - 2);
|
|
||||||
// 2b. Instead of the input, the message length:
|
|
||||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
|
|
||||||
- SWIF_HAIFA_NUM_OF_BITS_SIZE - 2,
|
|
||||||
messageLengthChar,
|
|
||||||
SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
|
||||||
// 2c. Instead of the input, the digest size:
|
|
||||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE - 2] = digestSizeMSB;
|
|
||||||
currInputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE - 1] = digestSizeLSB;
|
|
||||||
// 3. The #bits part of the block, which is zero in case of additional block:
|
|
||||||
memset(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE,
|
|
||||||
0,
|
|
||||||
SWIF_HAIFA_NUM_OF_BITS_SIZE);
|
|
||||||
// 4. The salt part of the block:
|
|
||||||
memcpy(currInputBlock + SWIFFTX_OUTPUT_BLOCK_SIZE + SWIF_HAIFA_INPUT_BLOCK_SIZE
|
|
||||||
+ SWIF_HAIFA_NUM_OF_BITS_SIZE,
|
|
||||||
swifftxState.salt,
|
|
||||||
SWIF_HAIFA_SALT_SIZE);
|
|
||||||
|
|
||||||
ComputeSingleSWIFFTX(currInputBlock, swifftxState.currOutputBlock, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finally, copy the result into 'hashval'. In case the digest size is not 512bit, copy the
|
|
||||||
// first hashbitlen of them:
|
|
||||||
for (i = 0; i < (swifftxState.hashbitlen / 8); ++i)
|
|
||||||
hashval[i] = swifftxState.currOutputBlock[i];
|
|
||||||
|
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
int Swifftx::Hash(int hashbitlen, const BitSequence *data, DataLength databitlen,
|
|
||||||
BitSequence *hashval)
|
|
||||||
{
|
|
||||||
int result;
|
|
||||||
//hashState state;
|
|
||||||
// The pointer to the current place in the input we take into the compression function.
|
|
||||||
DataLength currInputIndex = 0;
|
|
||||||
|
|
||||||
result = Swifftx::Init(hashbitlen);
|
|
||||||
|
|
||||||
if (result != SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
for ( ; (databitlen / 8) > SWIF_HAIFA_INPUT_BLOCK_SIZE;
|
|
||||||
currInputIndex += SWIF_HAIFA_INPUT_BLOCK_SIZE, databitlen -= (SWIF_HAIFA_INPUT_BLOCK_SIZE * 8))
|
|
||||||
{
|
|
||||||
result = Swifftx::Update(data + currInputIndex, SWIF_HAIFA_INPUT_BLOCK_SIZE * 8);
|
|
||||||
if (result != SUCCESS)
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The length of the last block may be shorter than (SWIF_HAIFA_INPUT_BLOCK_SIZE * 8)
|
|
||||||
result = Swifftx::Update(data + currInputIndex, databitlen);
|
|
||||||
if (result != SUCCESS)
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
return Swifftx::Final(hashval);
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Helper fuction implementation portion.
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
void Swifftx::AddToCurrInBase256(BitSequence value[SWIF_HAIFA_NUM_OF_BITS_SIZE],
|
|
||||||
unsigned short toAdd)
|
|
||||||
{
|
|
||||||
unsigned char remainder = 0;
|
|
||||||
short i;
|
|
||||||
BitSequence currValueInBase256[8] = {0};
|
|
||||||
unsigned short currIndex = 7;
|
|
||||||
unsigned short temp = 0;
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
remainder = toAdd % 256;
|
|
||||||
currValueInBase256[currIndex--] = remainder;
|
|
||||||
toAdd -= remainder;
|
|
||||||
toAdd /= 256;
|
|
||||||
}
|
|
||||||
while(toAdd != 0);
|
|
||||||
|
|
||||||
for (i = 7; i >= 0; --i)
|
|
||||||
{
|
|
||||||
temp = value[i] + currValueInBase256[i];
|
|
||||||
if (temp > 255)
|
|
||||||
{
|
|
||||||
value[i] = temp % 256;
|
|
||||||
currValueInBase256[i - 1]++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
value[i] = (unsigned char) temp;
|
|
||||||
}
|
|
||||||
}
|
|
@@ -1,79 +0,0 @@
|
|||||||
#ifndef SWIFFTX_SHA3_H
|
|
||||||
#define SWIFFTX_SHA3_H
|
|
||||||
|
|
||||||
#include "sha3_interface.h"
|
|
||||||
#include "stdbool.h"
|
|
||||||
#include "stdint.h"
|
|
||||||
|
|
||||||
class Swifftx : public SHA3 {
|
|
||||||
|
|
||||||
#define SWIFFTX_INPUT_BLOCK_SIZE 256
|
|
||||||
#define SWIFFTX_OUTPUT_BLOCK_SIZE 65
|
|
||||||
#define SWIF_HAIFA_SALT_SIZE 8
|
|
||||||
#define SWIF_HAIFA_NUM_OF_BITS_SIZE 8
|
|
||||||
#define SWIF_HAIFA_INPUT_BLOCK_SIZE (SWIFFTX_INPUT_BLOCK_SIZE - SWIFFTX_OUTPUT_BLOCK_SIZE \
|
|
||||||
- SWIF_HAIFA_NUM_OF_BITS_SIZE - SWIF_HAIFA_SALT_SIZE)
|
|
||||||
|
|
||||||
typedef unsigned char BitSequence;
|
|
||||||
//const DataLength SWIF_SALT_VALUE;
|
|
||||||
|
|
||||||
#define SWIF_HAIFA_IV 0
|
|
||||||
|
|
||||||
/*const BitSequence SWIF_HAIFA_IV_224[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
|
||||||
const BitSequence SWIF_HAIFA_IV_256[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
|
||||||
const BitSequence SWIF_HAIFA_IV_384[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
|
||||||
const BitSequence SWIF_HAIFA_IV_512[SWIFFTX_OUTPUT_BLOCK_SIZE];*/
|
|
||||||
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
SUCCESS = 0,
|
|
||||||
FAIL = 1,
|
|
||||||
BAD_HASHBITLEN = 2,
|
|
||||||
BAD_SALT_SIZE = 3,
|
|
||||||
SET_SALT_VALUE_FAILED = 4,
|
|
||||||
INPUT_DATA_NOT_ALIGNED = 5
|
|
||||||
} HashReturn;
|
|
||||||
|
|
||||||
typedef struct hashState {
|
|
||||||
unsigned short hashbitlen;
|
|
||||||
|
|
||||||
// The data remained after the recent call to 'Update()'.
|
|
||||||
BitSequence remaining[SWIF_HAIFA_INPUT_BLOCK_SIZE + 1];
|
|
||||||
|
|
||||||
// The size of the remaining data in bits.
|
|
||||||
// Is 0 in case there is no remaning data at all.
|
|
||||||
unsigned int remainingSize;
|
|
||||||
|
|
||||||
// The current output of the compression function. At the end will contain the final digest
|
|
||||||
// (which may be needed to be truncated, depending on hashbitlen).
|
|
||||||
BitSequence currOutputBlock[SWIFFTX_OUTPUT_BLOCK_SIZE];
|
|
||||||
|
|
||||||
// The value of '#bits hashed so far' field in HAIFA, in base 256.
|
|
||||||
BitSequence numOfBitsChar[SWIF_HAIFA_NUM_OF_BITS_SIZE];
|
|
||||||
|
|
||||||
// The salt value currently in use:
|
|
||||||
BitSequence salt[SWIF_HAIFA_SALT_SIZE];
|
|
||||||
|
|
||||||
// Indicates whether a single 'Update()' occured.
|
|
||||||
// Ater a call to 'Update()' the key and the salt values cannot be changed.
|
|
||||||
bool wasUpdated;
|
|
||||||
} hashState;
|
|
||||||
|
|
||||||
private:
|
|
||||||
int swifftxNumRounds;
|
|
||||||
hashState swifftxState;
|
|
||||||
|
|
||||||
|
|
||||||
public:
|
|
||||||
int Init(int hashbitlen);
|
|
||||||
int Update(const BitSequence *data, DataLength databitlen);
|
|
||||||
int Final(BitSequence *hashval);
|
|
||||||
int Hash(int hashbitlen, const BitSequence *data, DataLength databitlen,
|
|
||||||
BitSequence *hashval);
|
|
||||||
|
|
||||||
private:
|
|
||||||
static void AddToCurrInBase256(BitSequence value[SWIF_HAIFA_NUM_OF_BITS_SIZE], unsigned short toAdd);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@@ -1,21 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
namespace hash {
|
|
||||||
|
|
||||||
using BitSequence = unsigned char;
|
|
||||||
using DataLength = unsigned long long;
|
|
||||||
|
|
||||||
struct hash_interface {
|
|
||||||
virtual ~hash_interface() = default;
|
|
||||||
|
|
||||||
virtual int Init(int hash_bitsize) = 0;
|
|
||||||
virtual int Update(const BitSequence *data, DataLength data_bitsize) = 0;
|
|
||||||
virtual int Final(BitSequence *hash) = 0;
|
|
||||||
|
|
||||||
virtual int
|
|
||||||
Hash(int hash_bitsize, const BitSequence *data, DataLength data_bitsize, BitSequence *hash) = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace hash
|
|
@@ -1,14 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
//#include <streams/hash/hash_interface.h>
|
|
||||||
#include "hash_interface.h"
|
|
||||||
|
|
||||||
namespace sha3 {
|
|
||||||
|
|
||||||
using BitSequence = hash::BitSequence;
|
|
||||||
using DataLength = hash::DataLength;
|
|
||||||
|
|
||||||
struct sha3_interface : hash::hash_interface {};
|
|
||||||
|
|
||||||
} // namespace sha3
|
|
@@ -506,4 +506,156 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined (X11GOST_2WAY)
|
||||||
|
|
||||||
|
#include "algo/luffa/luffa_for_sse2.h"
|
||||||
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
|
#if !( defined(__AES__) || defined(__ARM_FEATURE_AES) )
|
||||||
|
#include "algo/groestl/sph_groestl.h"
|
||||||
|
#include "algo/echo/sph_echo.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
union _x11gost_context_overlay
|
||||||
|
{
|
||||||
|
blake512_2x64_context blake;
|
||||||
|
bmw512_2x64_context bmw;
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_groestl groestl;
|
||||||
|
#else
|
||||||
|
sph_groestl512_context groestl;
|
||||||
|
#endif
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_echo echo;
|
||||||
|
#else
|
||||||
|
sph_echo512_context echo;
|
||||||
|
#endif
|
||||||
|
jh512_2x64_context jh;
|
||||||
|
keccak512_2x64_context keccak;
|
||||||
|
skein512_2x64_context skein;
|
||||||
|
hashState_luffa luffa;
|
||||||
|
cubehashParam cube;
|
||||||
|
sph_shavite512_context shavite;
|
||||||
|
simd512_context simd;
|
||||||
|
sph_gost512_context gost;
|
||||||
|
};
|
||||||
|
typedef union _x11gost_context_overlay x11gost_context_overlay;
|
||||||
|
|
||||||
|
int x11gost_2x64_hash( void *state, const void *input, int thr_id )
|
||||||
|
{
|
||||||
|
uint8_t vhash[80*2] __attribute__((aligned(64)));
|
||||||
|
uint8_t hash0[64] __attribute__((aligned(64)));
|
||||||
|
uint8_t hash1[64] __attribute__((aligned(64)));
|
||||||
|
x11gost_context_overlay ctx;
|
||||||
|
|
||||||
|
intrlv_2x64( vhash, input, input+80, 640 );
|
||||||
|
|
||||||
|
blake512_2x64_full( &ctx.blake, vhash, vhash, 80 );
|
||||||
|
bmw512_2x64_init( &ctx.bmw );
|
||||||
|
bmw512_2x64_update( &ctx.bmw, vhash, 64 );
|
||||||
|
bmw512_2x64_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
groestl512_full( &ctx.groestl, hash0, hash0, 512 );
|
||||||
|
groestl512_full( &ctx.groestl, hash1, hash1, 512 );
|
||||||
|
#else
|
||||||
|
sph_groestl512_init( &ctx.groestl );
|
||||||
|
sph_groestl512( &ctx.groestl, hash0, 64 );
|
||||||
|
sph_groestl512_close( &ctx.groestl, hash0 );
|
||||||
|
sph_groestl512_init( &ctx.groestl );
|
||||||
|
sph_groestl512( &ctx.groestl, hash1, 64 );
|
||||||
|
sph_groestl512_close( &ctx.groestl, hash1 );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
intrlv_2x64( vhash, hash0, hash1, 512 );
|
||||||
|
|
||||||
|
skein512_2x64_full( &ctx.skein, vhash, vhash, 64 );
|
||||||
|
jh512_2x64_ctx( &ctx.jh, vhash, vhash, 64 );
|
||||||
|
keccak512_2x64_ctx( &ctx.keccak, vhash, vhash, 64 );
|
||||||
|
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
|
||||||
|
sph_gost512_init( &ctx.gost );
|
||||||
|
sph_gost512( &ctx.gost, hash0, 64 );
|
||||||
|
sph_gost512_close( &ctx.gost, hash0 );
|
||||||
|
sph_gost512_init( &ctx.gost );
|
||||||
|
sph_gost512( &ctx.gost, hash1, 64 );
|
||||||
|
sph_gost512_close( &ctx.gost, hash1 );
|
||||||
|
|
||||||
|
luffa_full( &ctx.luffa, hash0, 512, hash0, 64 );
|
||||||
|
luffa_full( &ctx.luffa, hash1, 512, hash1, 64 );
|
||||||
|
|
||||||
|
cubehash_full( &ctx.cube, hash0, 512, hash0, 64 );
|
||||||
|
cubehash_full( &ctx.cube, hash1, 512, hash1, 64 );
|
||||||
|
|
||||||
|
sph_shavite512_init( &ctx.shavite );
|
||||||
|
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||||
|
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||||
|
sph_shavite512_init( &ctx.shavite );
|
||||||
|
sph_shavite512( &ctx.shavite, hash1, 64 );
|
||||||
|
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||||
|
|
||||||
|
simd512_ctx( &ctx.simd, hash0, hash0, 64 );
|
||||||
|
simd512_ctx( &ctx.simd, hash1, hash1, 64 );
|
||||||
|
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
echo_full( &ctx.echo, hash0, 512, hash0, 64 );
|
||||||
|
echo_full( &ctx.echo, hash1, 512, hash1, 64 );
|
||||||
|
#else
|
||||||
|
sph_echo512_init( &ctx.echo );
|
||||||
|
sph_echo512( &ctx.echo, hash0, 64 );
|
||||||
|
sph_echo512_close( &ctx.echo, hash0 );
|
||||||
|
sph_echo512_init( &ctx.echo );
|
||||||
|
sph_echo512( &ctx.echo, hash1, 64 );
|
||||||
|
sph_echo512_close( &ctx.echo, hash1 );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
memcpy( state, hash0, 32 );
|
||||||
|
memcpy( state+32, hash1, 32 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_x11gost_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t hash[8*2] __attribute__((aligned(64)));
|
||||||
|
uint32_t edata[20*2] __attribute__((aligned(64)));
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 2;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
memcpy( edata+20, edata, 80 );
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
edata[19] = n;
|
||||||
|
edata[39] = n+1;
|
||||||
|
if ( likely( x11gost_2x64_hash( hash, edata, thr_id ) ) )
|
||||||
|
{
|
||||||
|
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n );
|
||||||
|
submit_solution( work, hash, mythr );
|
||||||
|
}
|
||||||
|
if ( unlikely( valid_hash( hash+8, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n+1 );
|
||||||
|
submit_solution( work, hash+8, mythr );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n += 2;
|
||||||
|
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
pdata[19] = n;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -10,12 +10,16 @@ bool register_x11gost_algo( algo_gate_t* gate )
|
|||||||
init_x11gost_4way_ctx();
|
init_x11gost_4way_ctx();
|
||||||
gate->scanhash = (void*)&scanhash_x11gost_4way;
|
gate->scanhash = (void*)&scanhash_x11gost_4way;
|
||||||
gate->hash = (void*)&x11gost_4way_hash;
|
gate->hash = (void*)&x11gost_4way_hash;
|
||||||
|
#elif defined(X11GOST_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_x11gost_2x64;
|
||||||
|
gate->hash = (void*)&x11gost_2x64_hash;
|
||||||
#else
|
#else
|
||||||
init_x11gost_ctx();
|
init_x11gost_ctx();
|
||||||
gate->scanhash = (void*)&scanhash_x11gost;
|
gate->scanhash = (void*)&scanhash_x11gost;
|
||||||
gate->hash = (void*)&x11gost_hash;
|
gate->hash = (void*)&x11gost_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
||||||
|
| NEON_OPT;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -8,6 +8,8 @@
|
|||||||
#define X11GOST_8WAY 1
|
#define X11GOST_8WAY 1
|
||||||
#elif defined(__AVX2__) && defined(__AES__)
|
#elif defined(__AVX2__) && defined(__AES__)
|
||||||
#define X11GOST_4WAY 1
|
#define X11GOST_4WAY 1
|
||||||
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
|
#define X11GOST_2WAY 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool register_x11gost_algo( algo_gate_t* gate );
|
bool register_x11gost_algo( algo_gate_t* gate );
|
||||||
@@ -26,6 +28,12 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
|
|||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
void init_x11gost_4way_ctx();
|
void init_x11gost_4way_ctx();
|
||||||
|
|
||||||
|
#elif defined(X11GOST_2WAY)
|
||||||
|
|
||||||
|
int x11gost_2x64_hash( void *state, const void *input, int thr_id );
|
||||||
|
int scanhash_x11gost_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void x11gost_hash( void *state, const void *input );
|
void x11gost_hash( void *state, const void *input );
|
||||||
|
@@ -1,6 +1,8 @@
|
|||||||
#include "x11gost-gate.h"
|
#include "x11gost-gate.h"
|
||||||
|
|
||||||
#if !defined(X11GOST_8WAY) && !defined(X11GOST_4WAY)
|
// no longer used, not working when last used.
|
||||||
|
|
||||||
|
#if !defined(X11GOST_8WAY) && !defined(X11GOST_4WAY) && !defined(X11GOST_2WAY)
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
@@ -155,13 +155,13 @@ void skunk_4way_hash( void *output, const void *input )
|
|||||||
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
|
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
|
||||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*)hash0, 64 );
|
cubehashUpdateDigest( &ctx.cube, hash0, hash0, 64 );
|
||||||
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
|
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1, 64 );
|
cubehashUpdateDigest( &ctx.cube, hash1, hash1, 64 );
|
||||||
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
|
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2, 64 );
|
cubehashUpdateDigest( &ctx.cube, hash2, hash2, 64 );
|
||||||
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
|
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 );
|
cubehashUpdateDigest( &ctx.cube, hash3, hash3, 64 );
|
||||||
|
|
||||||
fugue512_full( &ctx.fugue, hash0, hash0, 64 );
|
fugue512_full( &ctx.fugue, hash0, hash0, 64 );
|
||||||
fugue512_full( &ctx.fugue, hash1, hash1, 64 );
|
fugue512_full( &ctx.fugue, hash1, hash1, 64 );
|
||||||
|
@@ -23,13 +23,12 @@ static void hex_getAlgoString(const uint32_t* prevblock, char *output)
|
|||||||
*sptr = '\0';
|
*sptr = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
static __thread x16r_context_overlay hex_ctx;
|
|
||||||
|
|
||||||
int hex_hash( void* output, const void* input, int thrid )
|
int hex_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[16];
|
uint32_t _ALIGN(128) hash[16];
|
||||||
x16r_context_overlay ctx;
|
x16r_context_overlay ctx;
|
||||||
memcpy( &ctx, &hex_ctx, sizeof(ctx) );
|
memcpy( &ctx, &x16r_ref_ctx, sizeof(ctx) );
|
||||||
void *in = (void*) input;
|
void *in = (void*) input;
|
||||||
int size = 80;
|
int size = 80;
|
||||||
|
|
||||||
@@ -52,7 +51,7 @@ int hex_hash( void* output, const void* input, int thrid )
|
|||||||
break;
|
break;
|
||||||
case GROESTL:
|
case GROESTL:
|
||||||
#if defined(__AES__)
|
#if defined(__AES__)
|
||||||
groestl512_full( &ctx.groestl, (char*)hash, (char*)in, size<<3 );
|
groestl512_full( &ctx.groestl, hash, in, size<<3 );
|
||||||
#else
|
#else
|
||||||
sph_groestl512_init( &ctx.groestl );
|
sph_groestl512_init( &ctx.groestl );
|
||||||
sph_groestl512( &ctx.groestl, in, size );
|
sph_groestl512( &ctx.groestl, in, size );
|
||||||
@@ -87,7 +86,7 @@ int hex_hash( void* output, const void* input, int thrid )
|
|||||||
case LUFFA:
|
case LUFFA:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
{
|
{
|
||||||
update_and_final_luffa( &ctx.luffa, hash, (const void*)in+64, 16 );
|
update_and_final_luffa( &ctx.luffa, hash, in+64, 16 );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -97,7 +96,7 @@ int hex_hash( void* output, const void* input, int thrid )
|
|||||||
break;
|
break;
|
||||||
case CUBEHASH:
|
case CUBEHASH:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
cubehashUpdateDigest( &ctx.cube, hash, (const void*)in+64, 16 );
|
cubehashUpdateDigest( &ctx.cube, hash, in+64, 16 );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
@@ -108,22 +107,11 @@ int hex_hash( void* output, const void* input, int thrid )
|
|||||||
shavite512_full( &ctx.shavite, hash, in, size );
|
shavite512_full( &ctx.shavite, hash, in, size );
|
||||||
break;
|
break;
|
||||||
case SIMD:
|
case SIMD:
|
||||||
#if defined(__aarch64__)
|
simd512_ctx( &ctx.simd, hash, in, size<<3 );
|
||||||
sph_simd512_init( &ctx.simd );
|
|
||||||
sph_simd512(&ctx.simd, (const void*) hash, 64);
|
|
||||||
sph_simd512_close(&ctx.simd, hash);
|
|
||||||
#else
|
|
||||||
simd_full( &ctx.simd, (BitSequence *)hash,
|
|
||||||
(const BitSequence*)in, size<<3 );
|
|
||||||
init_sd( &ctx.simd, 512 );
|
|
||||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
|
||||||
(const BitSequence*)in, size<<3 );
|
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
case ECHO:
|
case ECHO:
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
echo_full( &ctx.echo, (BitSequence *)hash, 512,
|
echo_full( &ctx.echo, hash, 512, in, size );
|
||||||
(const BitSequence *)in, size );
|
|
||||||
#else
|
#else
|
||||||
sph_echo512_init( &ctx.echo );
|
sph_echo512_init( &ctx.echo );
|
||||||
sph_echo512( &ctx.echo, in, size );
|
sph_echo512( &ctx.echo, in, size );
|
||||||
@@ -216,32 +204,32 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
|
|||||||
switch ( algo )
|
switch ( algo )
|
||||||
{
|
{
|
||||||
case JH:
|
case JH:
|
||||||
sph_jh512_init( &hex_ctx.jh );
|
sph_jh512_init( &x16r_ref_ctx.jh );
|
||||||
sph_jh512( &hex_ctx.jh, edata, 64 );
|
sph_jh512( &x16r_ref_ctx.jh, edata, 64 );
|
||||||
break;
|
break;
|
||||||
case SKEIN:
|
case SKEIN:
|
||||||
sph_skein512_init( &hex_ctx.skein );
|
sph_skein512_init( &x16r_ref_ctx.skein );
|
||||||
sph_skein512( &hex_ctx.skein, edata, 64 );
|
sph_skein512( &x16r_ref_ctx.skein, edata, 64 );
|
||||||
break;
|
break;
|
||||||
case LUFFA:
|
case LUFFA:
|
||||||
init_luffa( &hex_ctx.luffa, 512 );
|
init_luffa( &x16r_ref_ctx.luffa, 512 );
|
||||||
update_luffa( &hex_ctx.luffa, edata, 64 );
|
update_luffa( &x16r_ref_ctx.luffa, edata, 64 );
|
||||||
break;
|
break;
|
||||||
case CUBEHASH:
|
case CUBEHASH:
|
||||||
cubehashInit( &hex_ctx.cube, 512, 16, 32 );
|
cubehashInit( &x16r_ref_ctx.cube, 512, 16, 32 );
|
||||||
cubehashUpdate( &hex_ctx.cube, edata, 64 );
|
cubehashUpdate( &x16r_ref_ctx.cube, edata, 64 );
|
||||||
break;
|
break;
|
||||||
case HAMSI:
|
case HAMSI:
|
||||||
sph_hamsi512_init( &hex_ctx.hamsi );
|
sph_hamsi512_init( &x16r_ref_ctx.hamsi );
|
||||||
sph_hamsi512( &hex_ctx.hamsi, edata, 64 );
|
sph_hamsi512( &x16r_ref_ctx.hamsi, edata, 64 );
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
sph_shabal512_init( &hex_ctx.shabal );
|
sph_shabal512_init( &x16r_ref_ctx.shabal );
|
||||||
sph_shabal512( &hex_ctx.shabal, edata, 64 );
|
sph_shabal512( &x16r_ref_ctx.shabal, edata, 64 );
|
||||||
break;
|
break;
|
||||||
case WHIRLPOOL:
|
case WHIRLPOOL:
|
||||||
sph_whirlpool_init( &hex_ctx.whirlpool );
|
sph_whirlpool_init( &x16r_ref_ctx.whirlpool );
|
||||||
sph_whirlpool( &hex_ctx.whirlpool, edata, 64 );
|
sph_whirlpool( &x16r_ref_ctx.whirlpool, edata, 64 );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -14,9 +14,6 @@
|
|||||||
#include "algo/luffa/luffa_for_sse2.h"
|
#include "algo/luffa/luffa_for_sse2.h"
|
||||||
#include "algo/cubehash/cubehash_sse2.h"
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
#include "algo/simd/simd-hash-2way.h"
|
#include "algo/simd/simd-hash-2way.h"
|
||||||
#if defined(__aarch64__)
|
|
||||||
#include "algo/simd/sph_simd.h"
|
|
||||||
#endif
|
|
||||||
#include "algo/hamsi/sph_hamsi.h"
|
#include "algo/hamsi/sph_hamsi.h"
|
||||||
#include "algo/shabal/sph_shabal.h"
|
#include "algo/shabal/sph_shabal.h"
|
||||||
#include "algo/whirlpool/sph_whirlpool.h"
|
#include "algo/whirlpool/sph_whirlpool.h"
|
||||||
@@ -24,11 +21,15 @@
|
|||||||
#include "algo/yespower/yespower.h"
|
#include "algo/yespower/yespower.h"
|
||||||
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#else
|
||||||
#endif
|
|
||||||
#include "algo/echo/sph_echo.h"
|
#include "algo/echo/sph_echo.h"
|
||||||
|
#endif
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
|
#else
|
||||||
#include "algo/groestl/sph_groestl.h"
|
#include "algo/groestl/sph_groestl.h"
|
||||||
#if defined(__AES__)
|
#endif
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
#include "algo/fugue/fugue-aesni.h"
|
#include "algo/fugue/fugue-aesni.h"
|
||||||
#else
|
#else
|
||||||
#include "algo/fugue/sph_fugue.h"
|
#include "algo/fugue/sph_fugue.h"
|
||||||
@@ -46,7 +47,7 @@ typedef struct TortureGarden TortureGarden;
|
|||||||
// Graph of hash algos plus SPH contexts
|
// Graph of hash algos plus SPH contexts
|
||||||
struct TortureGarden
|
struct TortureGarden
|
||||||
{
|
{
|
||||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
hashState_groestl groestl;
|
hashState_groestl groestl;
|
||||||
#else
|
#else
|
||||||
sph_groestl512_context groestl;
|
sph_groestl512_context groestl;
|
||||||
@@ -56,7 +57,7 @@ struct TortureGarden
|
|||||||
#else
|
#else
|
||||||
sph_echo512_context echo;
|
sph_echo512_context echo;
|
||||||
#endif
|
#endif
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
hashState_fugue fugue;
|
hashState_fugue fugue;
|
||||||
#else
|
#else
|
||||||
sph_fugue512_context fugue;
|
sph_fugue512_context fugue;
|
||||||
@@ -112,14 +113,14 @@ static int get_hash( void *output, const void *input, TortureGarden *garden,
|
|||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
fugue512_full( &garden->fugue, hash, input, 64 );
|
fugue512_full( &garden->fugue, hash, input, 64 );
|
||||||
#else
|
#else
|
||||||
sph_fugue512_full( &garden->fugue, hash, input, 64 );
|
sph_fugue512_full( &garden->fugue, hash, input, 64 );
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
groestl512_full( &garden->groestl, hash, input, 512 );
|
groestl512_full( &garden->groestl, hash, input, 512 );
|
||||||
#else
|
#else
|
||||||
sph_groestl512_init( &garden->groestl) ;
|
sph_groestl512_init( &garden->groestl) ;
|
||||||
|
@@ -19,12 +19,12 @@
|
|||||||
// Perform midstate prehash of hash functions with block size <= 72 bytes,
|
// Perform midstate prehash of hash functions with block size <= 72 bytes,
|
||||||
// 76 bytes for hash functions that operate on 32 bit data.
|
// 76 bytes for hash functions that operate on 32 bit data.
|
||||||
|
|
||||||
void x16r_8way_prehash( void *vdata, void *pdata )
|
void x16r_8way_prehash( void *vdata, void *pdata, const char *hash_order )
|
||||||
{
|
{
|
||||||
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
const char elem = x16r_hash_order[0];
|
const char elem = hash_order[0];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
@@ -110,7 +110,8 @@ void x16r_8way_prehash( void *vdata, void *pdata )
|
|||||||
// Called by wrapper hash function to optionally continue hashing and
|
// Called by wrapper hash function to optionally continue hashing and
|
||||||
// convert to final hash.
|
// convert to final hash.
|
||||||
|
|
||||||
int x16r_8way_hash_generic( void* output, const void* input, int thrid )
|
int x16r_8way_hash_generic( void* output, const void* input, int thrid,
|
||||||
|
const char *hash_order, const int func_count )
|
||||||
{
|
{
|
||||||
uint32_t vhash[20*8] __attribute__ ((aligned (128)));
|
uint32_t vhash[20*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t hash0[20] __attribute__ ((aligned (16)));
|
uint32_t hash0[20] __attribute__ ((aligned (16)));
|
||||||
@@ -136,9 +137,9 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
|
|||||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||||
input, 640 );
|
input, 640 );
|
||||||
|
|
||||||
for ( int i = 0; i < 16; i++ )
|
for ( int i = 0; i < func_count; i++ )
|
||||||
{
|
{
|
||||||
const char elem = x16r_hash_order[i];
|
const char elem = hash_order[i];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
@@ -474,7 +475,8 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
|
|||||||
int x16r_8way_hash( void* output, const void* input, int thrid )
|
int x16r_8way_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint8_t hash[64*8] __attribute__ ((aligned (128)));
|
uint8_t hash[64*8] __attribute__ ((aligned (128)));
|
||||||
if ( !x16r_8way_hash_generic( hash, input, thrid ) )
|
if ( !x16r_8way_hash_generic( hash, input, thrid, x16r_hash_order,
|
||||||
|
X16R_HASH_FUNC_COUNT ) )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
memcpy( output, hash, 32 );
|
memcpy( output, hash, 32 );
|
||||||
@@ -495,7 +497,6 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[16*8] __attribute__ ((aligned (128)));
|
uint32_t hash[16*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t bedata1[2];
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
@@ -508,21 +509,18 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
if ( bench ) ptarget[7] = 0x0cff;
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
bedata1[0] = bswap_32( pdata[1] );
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
bedata1[1] = bswap_32( pdata[2] );
|
if ( work->height != saved_height )
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
|
||||||
const uint32_t ntime = bswap_32( pdata[17] );
|
|
||||||
if ( s_ntime != ntime )
|
|
||||||
{
|
{
|
||||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
s_ntime = ntime;
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
|
saved_height = work->height;
|
||||||
if ( opt_debug && !thr_id )
|
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
|
||||||
applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime );
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x16r_hash_order );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_8way_prehash( vdata, pdata );
|
x16r_8way_prehash( vdata, pdata, x16r_hash_order );
|
||||||
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
||||||
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
@@ -546,12 +544,12 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
#elif defined (X16R_4WAY)
|
#elif defined (X16R_4WAY)
|
||||||
|
|
||||||
void x16r_4way_prehash( void *vdata, void *pdata )
|
void x16r_4way_prehash( void *vdata, void *pdata, const char *hash_order )
|
||||||
{
|
{
|
||||||
uint32_t vdata2[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata2[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
const char elem = x16r_hash_order[0];
|
const char elem = hash_order[0];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
@@ -627,7 +625,8 @@ void x16r_4way_prehash( void *vdata, void *pdata )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
int x16r_4way_hash_generic( void* output, const void* input, int thrid,
|
||||||
|
const char *hash_order, const int func_count )
|
||||||
{
|
{
|
||||||
uint32_t vhash[20*4] __attribute__ ((aligned (128)));
|
uint32_t vhash[20*4] __attribute__ ((aligned (128)));
|
||||||
uint32_t hash0[20] __attribute__ ((aligned (32)));
|
uint32_t hash0[20] __attribute__ ((aligned (32)));
|
||||||
@@ -644,9 +643,9 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
|||||||
|
|
||||||
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
|
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
|
||||||
|
|
||||||
for ( int i = 0; i < 16; i++ )
|
for ( int i = 0; i < func_count; i++ )
|
||||||
{
|
{
|
||||||
const char elem = x16r_hash_order[i];
|
const char elem = hash_order[i];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
@@ -908,7 +907,8 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
|||||||
int x16r_4way_hash( void* output, const void* input, int thrid )
|
int x16r_4way_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint8_t hash[64*4] __attribute__ ((aligned (64)));
|
uint8_t hash[64*4] __attribute__ ((aligned (64)));
|
||||||
if ( !x16r_4way_hash_generic( hash, input, thrid ) )
|
if ( !x16r_4way_hash_generic( hash, input, thrid, x16r_hash_order,
|
||||||
|
X16R_HASH_FUNC_COUNT ) )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
memcpy( output, hash, 32 );
|
memcpy( output, hash, 32 );
|
||||||
@@ -924,7 +924,6 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t bedata1[2];
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
@@ -937,20 +936,18 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
if ( bench ) ptarget[7] = 0x0cff;
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
bedata1[0] = bswap_32( pdata[1] );
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
bedata1[1] = bswap_32( pdata[2] );
|
if ( work->height != saved_height )
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
|
||||||
const uint32_t ntime = bswap_32( pdata[17] );
|
|
||||||
if ( s_ntime != ntime )
|
|
||||||
{
|
{
|
||||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
s_ntime = ntime;
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
if ( opt_debug && !thr_id )
|
saved_height = work->height;
|
||||||
applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime );
|
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x16r_hash_order );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_4way_prehash( vdata, pdata );
|
x16r_4way_prehash( vdata, pdata, x16r_hash_order );
|
||||||
*noncev = mm256_intrlv_blend_32(
|
*noncev = mm256_intrlv_blend_32(
|
||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
@@ -971,4 +968,404 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined (X16R_2WAY)
|
||||||
|
|
||||||
|
void x16r_2x64_prehash( void *vdata, void *pdata, const char *hash_order )
|
||||||
|
{
|
||||||
|
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||||
|
const char elem = hash_order[0];
|
||||||
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
|
switch ( algo )
|
||||||
|
{
|
||||||
|
case JH:
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
jh512_2x64_init( &x16r_ctx.jh );
|
||||||
|
jh512_2x64_update( &x16r_ctx.jh, vdata, 64 );
|
||||||
|
break;
|
||||||
|
case KECCAK:
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
keccak512_2x64_init( &x16r_ctx.keccak );
|
||||||
|
keccak512_2x64_update( &x16r_ctx.keccak, vdata, 72 );
|
||||||
|
break;
|
||||||
|
case SKEIN:
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
skein512_2x64_prehash64( &x16r_ctx.skein, vdata );
|
||||||
|
break;
|
||||||
|
case LUFFA:
|
||||||
|
{
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
init_luffa( &x16r_ctx.luffa, 512 );
|
||||||
|
update_luffa( &x16r_ctx.luffa, edata, 64 );
|
||||||
|
intrlv_2x64( vdata, edata, edata, 640 );
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CUBEHASH:
|
||||||
|
{
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
cubehashInit( &x16r_ctx.cube, 512, 16, 32 );
|
||||||
|
cubehashUpdate( &x16r_ctx.cube, edata, 64 );
|
||||||
|
intrlv_2x64( vdata, edata, edata, 640 );
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case HAMSI:
|
||||||
|
#if defined(__SSE4_2__) || defined(__ARM_NEON)
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
hamsi512_2x64_init( &x16r_ctx.hamsi );
|
||||||
|
hamsi512_2x64_update( &x16r_ctx.hamsi, vdata, 72 );
|
||||||
|
#else
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
sph_hamsi512_init( &x16r_ctx.hamsi );
|
||||||
|
sph_hamsi512( &x16r_ctx.hamsi, edata, 72 );
|
||||||
|
intrlv_2x64( vdata, edata, edata, 640 );
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case FUGUE:
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
fugue512_init( &x16r_ctx.fugue );
|
||||||
|
fugue512_update( &x16r_ctx.fugue, edata, 76 );
|
||||||
|
#else
|
||||||
|
sph_fugue512_init( &x16r_ctx.fugue );
|
||||||
|
sph_fugue512( &x16r_ctx.fugue, edata, 76 );
|
||||||
|
#endif
|
||||||
|
intrlv_2x64( vdata, edata, edata, 640 );
|
||||||
|
break;
|
||||||
|
case SHABAL:
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
sph_shabal512_init( &x16r_ctx.shabal );
|
||||||
|
sph_shabal512( &x16r_ctx.shabal, edata, 64);
|
||||||
|
intrlv_2x64( vdata, edata, edata, 640 );
|
||||||
|
break;
|
||||||
|
case WHIRLPOOL:
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
sph_whirlpool_init( &x16r_ctx.whirlpool );
|
||||||
|
sph_whirlpool( &x16r_ctx.whirlpool, edata, 64 );
|
||||||
|
intrlv_2x64( vdata, edata, edata, 640 );
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int x16r_2x64_hash_generic( void* output, const void* input, int thrid,
|
||||||
|
const char *hash_order, const int func_count )
|
||||||
|
{
|
||||||
|
uint32_t vhash[20*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash0[20] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t hash1[20] __attribute__ ((aligned (32)));
|
||||||
|
x16r_2x64_context_overlay ctx;
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
||||||
|
void *in0 = (void*) hash0;
|
||||||
|
void *in1 = (void*) hash1;
|
||||||
|
int size = 80;
|
||||||
|
|
||||||
|
dintrlv_2x64( hash0, hash1, input, 640 );
|
||||||
|
|
||||||
|
for ( int i = 0; i < func_count; i++ )
|
||||||
|
{
|
||||||
|
const char elem = hash_order[i];
|
||||||
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
|
switch ( algo )
|
||||||
|
{
|
||||||
|
case BLAKE:
|
||||||
|
if ( i == 0 )
|
||||||
|
blake512_2x64_full( &ctx.blake, vhash, input, size );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, in0, in1, size<<3 );
|
||||||
|
blake512_2x64_full( &ctx.blake, vhash, vhash, size );
|
||||||
|
}
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
case BMW:
|
||||||
|
bmw512_2x64_init( &ctx.bmw );
|
||||||
|
if ( i == 0 )
|
||||||
|
bmw512_2x64_update( &ctx.bmw, input, size );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, in0, in1, size<<3 );
|
||||||
|
bmw512_2x64_update( &ctx.bmw, vhash, size );
|
||||||
|
}
|
||||||
|
bmw512_2x64_close( &ctx.bmw, vhash );
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
case GROESTL:
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
groestl512_full( &ctx.groestl, hash0, in0, size<<3 );
|
||||||
|
groestl512_full( &ctx.groestl, hash1, in1, size<<3 );
|
||||||
|
#else
|
||||||
|
sph_groestl512_init( &ctx.groestl );
|
||||||
|
sph_groestl512( &ctx.groestl, in0, size );
|
||||||
|
sph_groestl512_close( &ctx.groestl, hash0 );
|
||||||
|
sph_groestl512_init( &ctx.groestl );
|
||||||
|
sph_groestl512( &ctx.groestl, in1, size );
|
||||||
|
sph_groestl512_close( &ctx.groestl, hash1 );
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case JH:
|
||||||
|
if ( i == 0 )
|
||||||
|
jh512_2x64_update( &ctx.jh, input + (64*2), 16 );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, in0, in1, size<<3 );
|
||||||
|
jh512_2x64_init( &ctx.jh );
|
||||||
|
jh512_2x64_update( &ctx.jh, vhash, size );
|
||||||
|
}
|
||||||
|
jh512_2x64_close( &ctx.jh, vhash );
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
case KECCAK:
|
||||||
|
if ( i == 0 )
|
||||||
|
keccak512_2x64_update( &ctx.keccak, input + (72*2), 8 );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, in0, in1, size<<3 );
|
||||||
|
keccak512_2x64_init( &ctx.keccak );
|
||||||
|
keccak512_2x64_update( &ctx.keccak, vhash, size );
|
||||||
|
}
|
||||||
|
keccak512_2x64_close( &ctx.keccak, vhash );
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
case SKEIN:
|
||||||
|
if ( i == 0 )
|
||||||
|
skein512_2x64_final16( &ctx.skein, vhash, input + (64*2) );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, in0, in1, size<<3 );
|
||||||
|
skein512_2x64_full( &ctx.skein, vhash, vhash, size );
|
||||||
|
}
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
case LUFFA:
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
update_and_final_luffa( &ctx.luffa, hash0, in0 + 64, 16 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
||||||
|
update_and_final_luffa( &ctx.luffa, hash1, in1 + 64, 16 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
luffa_full( &ctx.luffa, hash0, 512, hash0, size );
|
||||||
|
luffa_full( &ctx.luffa, hash1, 512, hash1, size );
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CUBEHASH:
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
cubehashUpdateDigest( &ctx.cube, hash0, in0 + 64, 16 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, hash1, in1 + 64, 16 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cubehash_full( &ctx.cube, hash0, 512, hash0, size );
|
||||||
|
cubehash_full( &ctx.cube, hash1, 512, hash1, size );
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SHAVITE:
|
||||||
|
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||||
|
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||||
|
break;
|
||||||
|
case SIMD:
|
||||||
|
simd512_ctx( &ctx.simd, hash0, in0, size );
|
||||||
|
simd512_ctx( &ctx.simd, hash1, in1, size );
|
||||||
|
break;
|
||||||
|
case ECHO:
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
echo_full( &ctx.echo, hash0, 512, in0, size );
|
||||||
|
echo_full( &ctx.echo, hash1, 512, in1, size );
|
||||||
|
#else
|
||||||
|
sph_echo512_init( &ctx.echo );
|
||||||
|
sph_echo512( &ctx.echo, in0, size );
|
||||||
|
sph_echo512_close( &ctx.echo, hash0 );
|
||||||
|
sph_echo512_init( &ctx.echo );
|
||||||
|
sph_echo512( &ctx.echo, in1, size );
|
||||||
|
sph_echo512_close( &ctx.echo, hash1 );
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case HAMSI:
|
||||||
|
#if defined(__SSE4_2__) || defined(__ARM_NEON)
|
||||||
|
if ( i == 0 )
|
||||||
|
hamsi512_2x64_update( &ctx.hamsi, input + (72*2), 8 );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, hash0, hash1, size<<3 );
|
||||||
|
hamsi512_2x64_init( &ctx.hamsi );
|
||||||
|
hamsi512_2x64_update( &ctx.hamsi, vhash, size );
|
||||||
|
}
|
||||||
|
hamsi512_2x64_close( &ctx.hamsi, vhash );
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
#else
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
sph_hamsi512( &ctx.hamsi, in0 + 72, 8 );
|
||||||
|
sph_hamsi512_close( &ctx.hamsi, hash0 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
||||||
|
sph_hamsi512( &ctx.hamsi, in1 + 72, 8 );
|
||||||
|
sph_hamsi512_close( &ctx.hamsi, hash1 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_hamsi512_init( &ctx.hamsi );
|
||||||
|
sph_hamsi512( &ctx.hamsi, hash0, size );
|
||||||
|
sph_hamsi512_close( &ctx.hamsi, hash0 );
|
||||||
|
sph_hamsi512_init( &ctx.hamsi );
|
||||||
|
sph_hamsi512( &ctx.hamsi, hash1, size );
|
||||||
|
sph_hamsi512_close( &ctx.hamsi, hash1 );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case FUGUE:
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
fugue512_update( &ctx.fugue, in0 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash0 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in1 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash1 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fugue512_full( &ctx.fugue, hash0, hash0, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash1, hash1, size );
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
sph_fugue512( &ctx.fugue, in0 + 76, 4 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(sph_fugue512_context) );
|
||||||
|
sph_fugue512( &ctx.fugue, in1 + 76, 4 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_fugue512_full( &ctx.fugue, hash0, hash0, size );
|
||||||
|
sph_fugue512_full( &ctx.fugue, hash1, hash1, size );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case SHABAL:
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
sph_shabal512( &ctx.shabal, in0 + 64, 16 );
|
||||||
|
sph_shabal512_close( &ctx.shabal, hash0 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
||||||
|
sph_shabal512( &ctx.shabal, in1 + 64, 16 );
|
||||||
|
sph_shabal512_close( &ctx.shabal, hash1 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_shabal512_init( &ctx.shabal );
|
||||||
|
sph_shabal512( &ctx.shabal, hash0, size );
|
||||||
|
sph_shabal512_close( &ctx.shabal, hash0 );
|
||||||
|
sph_shabal512_init( &ctx.shabal );
|
||||||
|
sph_shabal512( &ctx.shabal, hash1, size );
|
||||||
|
sph_shabal512_close( &ctx.shabal, hash1 );
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case WHIRLPOOL:
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, size );
|
||||||
|
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, size );
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SHA_512:
|
||||||
|
sha512_2x64_init( &ctx.sha512 );
|
||||||
|
if ( i == 0 )
|
||||||
|
sha512_2x64_update( &ctx.sha512, input, size );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, in0, in1, size<<3 );
|
||||||
|
sha512_2x64_init( &ctx.sha512 );
|
||||||
|
sha512_2x64_update( &ctx.sha512, vhash, size );
|
||||||
|
}
|
||||||
|
sha512_2x64_close( &ctx.sha512, vhash );
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
|
size = 64;
|
||||||
|
}
|
||||||
|
memcpy( output, hash0, 64 );
|
||||||
|
memcpy( output+64, hash1, 64 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int x16r_2x64_hash( void* output, const void* input, int thrid )
|
||||||
|
{
|
||||||
|
uint8_t hash[64*2] __attribute__ ((aligned (64)));
|
||||||
|
if ( !x16r_2x64_hash_generic( hash, input, thrid, x16r_hash_order,
|
||||||
|
X16R_HASH_FUNC_COUNT ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
memcpy( output, hash, 32 );
|
||||||
|
memcpy( output+32, hash+64, 32 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_x16r_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
|
{
|
||||||
|
uint32_t hash[16*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vdata[20*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 2;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
v128_t *noncev = (v128_t*)vdata + 9;
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
|
if ( work->height != saved_height )
|
||||||
|
{
|
||||||
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
|
saved_height = work->height;
|
||||||
|
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x16r_hash_order );
|
||||||
|
}
|
||||||
|
|
||||||
|
x16r_2x64_prehash( vdata, pdata, x16r_hash_order );
|
||||||
|
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if ( x16r_2x64_hash( hash, vdata, thr_id ) );
|
||||||
|
for ( int i = 0; i < 2; i++ )
|
||||||
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n+i );
|
||||||
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
|
}
|
||||||
|
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
|
||||||
|
n += 2;
|
||||||
|
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -13,10 +13,13 @@ __thread x16r_8way_context_overlay x16r_ctx;
|
|||||||
|
|
||||||
__thread x16r_4way_context_overlay x16r_ctx;
|
__thread x16r_4way_context_overlay x16r_ctx;
|
||||||
|
|
||||||
|
#elif defined(X16R_2WAY)
|
||||||
|
|
||||||
|
__thread x16r_2x64_context_overlay x16r_ctx;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__thread x16r_context_overlay x16_ctx;
|
__thread x16r_context_overlay x16r_ref_ctx;
|
||||||
|
|
||||||
|
|
||||||
void x16r_getAlgoString( const uint8_t* prevblock, char *output )
|
void x16r_getAlgoString( const uint8_t* prevblock, char *output )
|
||||||
{
|
{
|
||||||
@@ -58,11 +61,15 @@ bool register_x16r_algo( algo_gate_t* gate )
|
|||||||
#elif defined(X16R_4WAY)
|
#elif defined(X16R_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_x16r_4way;
|
gate->scanhash = (void*)&scanhash_x16r_4way;
|
||||||
gate->hash = (void*)&x16r_4way_hash;
|
gate->hash = (void*)&x16r_4way_hash;
|
||||||
|
#elif defined(X16R_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_x16r_2x64;
|
||||||
|
gate->hash = (void*)&x16r_2x64_hash;
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_x16r;
|
gate->scanhash = (void*)&scanhash_x16r;
|
||||||
gate->hash = (void*)&x16r_hash;
|
gate->hash = (void*)&x16r_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
||||||
|
| NEON_OPT;
|
||||||
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
@@ -76,11 +83,15 @@ bool register_x16rv2_algo( algo_gate_t* gate )
|
|||||||
#elif defined(X16RV2_4WAY)
|
#elif defined(X16RV2_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_x16rv2_4way;
|
gate->scanhash = (void*)&scanhash_x16rv2_4way;
|
||||||
gate->hash = (void*)&x16rv2_4way_hash;
|
gate->hash = (void*)&x16rv2_4way_hash;
|
||||||
|
#elif defined(X16RV2_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_x16rv2_2x64;
|
||||||
|
gate->hash = (void*)&x16rv2_2x64_hash;
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_x16rv2;
|
gate->scanhash = (void*)&scanhash_x16rv2;
|
||||||
gate->hash = (void*)&x16rv2_hash;
|
gate->hash = (void*)&x16rv2_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
||||||
|
| NEON_OPT;
|
||||||
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
@@ -94,11 +105,15 @@ bool register_x16s_algo( algo_gate_t* gate )
|
|||||||
#elif defined(X16R_4WAY)
|
#elif defined(X16R_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_x16r_4way;
|
gate->scanhash = (void*)&scanhash_x16r_4way;
|
||||||
gate->hash = (void*)&x16r_4way_hash;
|
gate->hash = (void*)&x16r_4way_hash;
|
||||||
|
#elif defined(X16R_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_x16r_2x64;
|
||||||
|
gate->hash = (void*)&x16r_2x64_hash;
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_x16r;
|
gate->scanhash = (void*)&scanhash_x16r;
|
||||||
gate->hash = (void*)&x16r_hash;
|
gate->hash = (void*)&x16r_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
||||||
|
| NEON_OPT;
|
||||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
@@ -108,7 +123,6 @@ bool register_x16s_algo( algo_gate_t* gate )
|
|||||||
//
|
//
|
||||||
// X16RT
|
// X16RT
|
||||||
|
|
||||||
|
|
||||||
void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash )
|
void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash )
|
||||||
{
|
{
|
||||||
int32_t maskedTime = timeStamp & 0xffffff80;
|
int32_t maskedTime = timeStamp & 0xffffff80;
|
||||||
@@ -221,34 +235,42 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
|||||||
|
|
||||||
bool register_x16rt_algo( algo_gate_t* gate )
|
bool register_x16rt_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined (X16R_8WAY)
|
#if defined(X16RT_8WAY)
|
||||||
gate->scanhash = (void*)&scanhash_x16rt_8way;
|
gate->scanhash = (void*)&scanhash_x16rt_8way;
|
||||||
gate->hash = (void*)&x16r_8way_hash;
|
gate->hash = (void*)&x16r_8way_hash;
|
||||||
#elif defined (X16R_4WAY)
|
#elif defined(X16RT_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
||||||
gate->hash = (void*)&x16r_4way_hash;
|
gate->hash = (void*)&x16r_4way_hash;
|
||||||
|
#elif defined(X16RT_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_x16rt_2x64;
|
||||||
|
gate->hash = (void*)&x16r_2x64_hash;
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_x16rt;
|
gate->scanhash = (void*)&scanhash_x16rt;
|
||||||
gate->hash = (void*)&x16r_hash;
|
gate->hash = (void*)&x16r_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
||||||
|
| NEON_OPT;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool register_x16rt_veil_algo( algo_gate_t* gate )
|
bool register_x16rt_veil_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined (X16R_8WAY)
|
#if defined(X16RT_8WAY)
|
||||||
gate->scanhash = (void*)&scanhash_x16rt_8way;
|
gate->scanhash = (void*)&scanhash_x16rt_8way;
|
||||||
gate->hash = (void*)&x16r_8way_hash;
|
gate->hash = (void*)&x16r_8way_hash;
|
||||||
#elif defined (X16R_4WAY)
|
#elif defined(X16RT_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
||||||
gate->hash = (void*)&x16r_4way_hash;
|
gate->hash = (void*)&x16r_4way_hash;
|
||||||
|
#elif defined(X16RT_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_x16rt_2x64;
|
||||||
|
gate->hash = (void*)&x16r_2x64_hash;
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_x16rt;
|
gate->scanhash = (void*)&scanhash_x16rt;
|
||||||
gate->hash = (void*)&x16r_hash;
|
gate->hash = (void*)&x16r_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
||||||
|
| NEON_OPT;
|
||||||
gate->build_extraheader = (void*)&veil_build_extraheader;
|
gate->build_extraheader = (void*)&veil_build_extraheader;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
@@ -262,7 +284,7 @@ bool register_hex_algo( algo_gate_t* gate )
|
|||||||
{
|
{
|
||||||
gate->scanhash = (void*)&scanhash_hex;
|
gate->scanhash = (void*)&scanhash_hex;
|
||||||
gate->hash = (void*)&x16r_hash;
|
gate->hash = (void*)&x16r_hash;
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
|
||||||
gate->gen_merkle_root = (void*)&sha256_gen_merkle_root;
|
gate->gen_merkle_root = (void*)&sha256_gen_merkle_root;
|
||||||
opt_target_factor = 128.0;
|
opt_target_factor = 128.0;
|
||||||
return true;
|
return true;
|
||||||
@@ -274,20 +296,25 @@ bool register_hex_algo( algo_gate_t* gate )
|
|||||||
|
|
||||||
bool register_x21s_algo( algo_gate_t* gate )
|
bool register_x21s_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined (X16R_8WAY)
|
#if defined(X21S_8WAY)
|
||||||
gate->scanhash = (void*)&scanhash_x21s_8way;
|
gate->scanhash = (void*)&scanhash_x21s_8way;
|
||||||
gate->hash = (void*)&x21s_8way_hash;
|
gate->hash = (void*)&x21s_8way_hash;
|
||||||
gate->miner_thread_init = (void*)&x21s_8way_thread_init;
|
gate->miner_thread_init = (void*)&x21s_8way_thread_init;
|
||||||
#elif defined (X16R_4WAY)
|
#elif defined(X21S_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_x21s_4way;
|
gate->scanhash = (void*)&scanhash_x21s_4way;
|
||||||
gate->hash = (void*)&x21s_4way_hash;
|
gate->hash = (void*)&x21s_4way_hash;
|
||||||
gate->miner_thread_init = (void*)&x21s_4way_thread_init;
|
gate->miner_thread_init = (void*)&x21s_4way_thread_init;
|
||||||
|
#elif defined(X21S_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_x21s_2x64;
|
||||||
|
gate->hash = (void*)&x21s_2x64_hash;
|
||||||
|
gate->miner_thread_init = (void*)&x21s_2x64_thread_init;
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_x21s;
|
gate->scanhash = (void*)&scanhash_x21s;
|
||||||
gate->hash = (void*)&x21s_hash;
|
gate->hash = (void*)&x21s_hash;
|
||||||
gate->miner_thread_init = (void*)&x21s_thread_init;
|
gate->miner_thread_init = (void*)&x21s_thread_init;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
||||||
|
| NEON_OPT;
|
||||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
|
@@ -7,13 +7,15 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include "algo/blake/blake512-hash.h"
|
#include "algo/blake/blake512-hash.h"
|
||||||
#include "algo/bmw/sph_bmw.h"
|
#include "algo/bmw/sph_bmw.h"
|
||||||
#include "algo/groestl/sph_groestl.h"
|
|
||||||
#include "algo/jh/sph_jh.h"
|
#include "algo/jh/sph_jh.h"
|
||||||
|
#include "algo/groestl/sph_groestl.h"
|
||||||
#include "algo/keccak/sph_keccak.h"
|
#include "algo/keccak/sph_keccak.h"
|
||||||
#include "algo/skein/sph_skein.h"
|
#include "algo/skein/sph_skein.h"
|
||||||
#include "algo/shavite/sph_shavite.h"
|
#include "algo/shavite/sph_shavite.h"
|
||||||
#include "algo/luffa/luffa_for_sse2.h"
|
#include "algo/luffa/luffa_for_sse2.h"
|
||||||
#include "algo/cubehash/cubehash_sse2.h"
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
|
#include "algo/simd/sph_simd.h"
|
||||||
|
#include "algo/simd/nist.h"
|
||||||
#include "algo/echo/sph_echo.h"
|
#include "algo/echo/sph_echo.h"
|
||||||
#include "algo/hamsi/sph_hamsi.h"
|
#include "algo/hamsi/sph_hamsi.h"
|
||||||
#include "algo/fugue/sph_fugue.h"
|
#include "algo/fugue/sph_fugue.h"
|
||||||
@@ -21,13 +23,13 @@
|
|||||||
#include "algo/whirlpool/sph_whirlpool.h"
|
#include "algo/whirlpool/sph_whirlpool.h"
|
||||||
#include "algo/sha/sha512-hash.h"
|
#include "algo/sha/sha512-hash.h"
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#include "algo/fugue/fugue-aesni.h"
|
#include "algo/fugue/fugue-aesni.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined (__AVX2__)
|
//#if defined (__AVX2__)
|
||||||
#include "algo/bmw/bmw-hash-4way.h"
|
#include "algo/bmw/bmw-hash-4way.h"
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#include "algo/skein/skein-hash-4way.h"
|
#include "algo/skein/skein-hash-4way.h"
|
||||||
@@ -39,7 +41,7 @@
|
|||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||||
#include "algo/shabal/shabal-hash-4way.h"
|
#include "algo/shabal/shabal-hash-4way.h"
|
||||||
#endif
|
//#endif
|
||||||
|
|
||||||
#if defined(__VAES__)
|
#if defined(__VAES__)
|
||||||
#include "algo/groestl/groestl512-hash-4way.h"
|
#include "algo/groestl/groestl512-hash-4way.h"
|
||||||
@@ -48,28 +50,41 @@
|
|||||||
#include "algo/echo/echo-hash-4way.h"
|
#include "algo/echo/echo-hash-4way.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__aarch64__)
|
// X16R, X16S
|
||||||
#include "algo/simd/sph_simd.h"
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
#else
|
#define X16R_8WAY 1
|
||||||
#include "algo/simd/nist.h"
|
#elif defined(__AVX2__) && defined(__AES__)
|
||||||
|
#define X16R_4WAY 1
|
||||||
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
|
#define X16R_2WAY 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
|
||||||
#define X16R_8WAY 1
|
|
||||||
#define X16RV2_8WAY 1
|
#define X16RV2_8WAY 1
|
||||||
#define X16RT_8WAY 1
|
|
||||||
#define X21S_8WAY 1
|
|
||||||
|
|
||||||
#elif defined(__AVX2__) && defined(__AES__)
|
#elif defined(__AVX2__) && defined(__AES__)
|
||||||
|
|
||||||
#define X16RV2_4WAY 1
|
#define X16RV2_4WAY 1
|
||||||
#define X16RT_4WAY 1
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
#define X21S_4WAY 1
|
#define X16RV2_2WAY 1
|
||||||
#define X16R_4WAY 1
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// X16RT, VEIL
|
||||||
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
#define X16RT_8WAY 1
|
||||||
|
#elif defined(__AVX2__) && defined(__AES__)
|
||||||
|
#define X16RT_4WAY 1
|
||||||
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
|
#define X16RT_2WAY 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
#define X21S_8WAY 1
|
||||||
|
#elif defined(__AVX2__) && defined(__AES__)
|
||||||
|
#define X21S_4WAY 1
|
||||||
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
|
#define X21S_2WAY 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
enum x16r_Algo {
|
enum x16r_Algo {
|
||||||
BLAKE = 0,
|
BLAKE = 0,
|
||||||
BMW,
|
BMW,
|
||||||
@@ -134,18 +149,23 @@ union _x16r_8way_context_overlay
|
|||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
#endif
|
#endif
|
||||||
} __attribute__ ((aligned (64)));
|
} __attribute__ ((aligned (64)));
|
||||||
|
#define _x16r_8x64_context_overlay _x16r_8way_context_overlay
|
||||||
|
|
||||||
typedef union _x16r_8way_context_overlay x16r_8way_context_overlay;
|
typedef union _x16r_8way_context_overlay x16r_8way_context_overlay;
|
||||||
|
#define x16r_8x64_context_overlay x16r_8way_context_overlay
|
||||||
|
|
||||||
extern __thread x16r_8way_context_overlay x16r_ctx;
|
extern __thread x16r_8way_context_overlay x16r_ctx;
|
||||||
|
|
||||||
void x16r_8way_prehash( void *, void * );
|
void x16r_8way_prehash( void *, void *, const char * );
|
||||||
int x16r_8way_hash_generic( void *, const void *, int );
|
int x16r_8way_hash_generic( void *, const void *, int, const char*, const int );
|
||||||
int x16r_8way_hash( void *, const void *, int );
|
int x16r_8way_hash( void *, const void *, int );
|
||||||
int scanhash_x16r_8way( struct work *, uint32_t ,
|
int scanhash_x16r_8way( struct work *, uint32_t ,
|
||||||
uint64_t *, struct thr_info * );
|
uint64_t *, struct thr_info * );
|
||||||
extern __thread x16r_8way_context_overlay x16r_ctx;
|
|
||||||
|
|
||||||
|
#define x16r_8x64_prehash x16r_8way_prehash
|
||||||
|
#define x16r_8x64_hash_generic x16r_8way_hash_generic
|
||||||
|
#define x16r_8x64_hash x16r_8way_hash
|
||||||
|
#define scanhash_x16r_8x64 scanhash_x16r_8x64
|
||||||
|
|
||||||
#elif defined(X16R_4WAY)
|
#elif defined(X16R_4WAY)
|
||||||
|
|
||||||
@@ -167,7 +187,6 @@ union _x16r_4way_context_overlay
|
|||||||
keccak512_4way_context keccak;
|
keccak512_4way_context keccak;
|
||||||
luffa_2way_context luffa;
|
luffa_2way_context luffa;
|
||||||
cube_2way_context cube;
|
cube_2way_context cube;
|
||||||
hashState_luffa luffa1;
|
|
||||||
simd_2way_context simd;
|
simd_2way_context simd;
|
||||||
hamsi512_4way_context hamsi;
|
hamsi512_4way_context hamsi;
|
||||||
hashState_fugue fugue;
|
hashState_fugue fugue;
|
||||||
@@ -175,46 +194,102 @@ union _x16r_4way_context_overlay
|
|||||||
sph_whirlpool_context whirlpool;
|
sph_whirlpool_context whirlpool;
|
||||||
sha512_4way_context sha512;
|
sha512_4way_context sha512;
|
||||||
} __attribute__ ((aligned (64)));
|
} __attribute__ ((aligned (64)));
|
||||||
|
#define _x16r_4x64_context_overlay _x16r_4way_context_overlay
|
||||||
|
|
||||||
typedef union _x16r_4way_context_overlay x16r_4way_context_overlay;
|
typedef union _x16r_4way_context_overlay x16r_4way_context_overlay;
|
||||||
|
#define x16r_4x64_context_overlay x16r_4way_context_overlay
|
||||||
|
|
||||||
extern __thread x16r_4way_context_overlay x16r_ctx;
|
extern __thread x16r_4way_context_overlay x16r_ctx;
|
||||||
|
|
||||||
void x16r_4way_prehash( void *, void * );
|
void x16r_4way_prehash( void *, void *, const char * );
|
||||||
int x16r_4way_hash_generic( void *, const void *, int );
|
int x16r_4way_hash_generic( void *, const void *, int, const char*, const int );
|
||||||
int x16r_4way_hash( void *, const void *, int );
|
int x16r_4way_hash( void *, const void *, int );
|
||||||
int scanhash_x16r_4way( struct work *, uint32_t,
|
int scanhash_x16r_4way( struct work *, uint32_t,
|
||||||
uint64_t *, struct thr_info * );
|
uint64_t *, struct thr_info * );
|
||||||
extern __thread x16r_4way_context_overlay x16r_ctx;
|
|
||||||
|
#define x16r_4x64_prehash x16r_4way_prehash
|
||||||
|
#define x16r_4x64_hash_generic x16r_4way_hash_generic
|
||||||
|
#define x16r_4x64_hash x16r_4way_hash
|
||||||
|
#define scanhash_x16r_4x64 scanhash_x16r_4x64
|
||||||
|
|
||||||
|
#elif defined(X16R_2WAY)
|
||||||
|
|
||||||
|
union _x16r_2x64_context_overlay
|
||||||
|
{
|
||||||
|
blake512_2x64_context blake;
|
||||||
|
bmw512_2x64_context bmw;
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_groestl groestl;
|
||||||
|
#else
|
||||||
|
sph_groestl512_context groestl;
|
||||||
|
#endif
|
||||||
|
skein512_2x64_context skein;
|
||||||
|
jh512_2x64_context jh;
|
||||||
|
keccak512_2x64_context keccak;
|
||||||
|
hashState_luffa luffa;
|
||||||
|
cubehashParam cube;
|
||||||
|
shavite512_context shavite;
|
||||||
|
simd512_context simd;
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_echo echo;
|
||||||
|
#else
|
||||||
|
sph_echo512_context echo;
|
||||||
|
#endif
|
||||||
|
#if defined(__SSE4_2__) || defined(__ARM_NEON)
|
||||||
|
hamsi_2x64_context hamsi;
|
||||||
|
#else
|
||||||
|
sph_hamsi512_context hamsi;
|
||||||
|
#endif
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_fugue fugue;
|
||||||
|
#else
|
||||||
|
sph_fugue512_context fugue;
|
||||||
|
#endif
|
||||||
|
sph_shabal512_context shabal;
|
||||||
|
sph_whirlpool_context whirlpool;
|
||||||
|
sha512_2x64_context sha512;
|
||||||
|
} __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
typedef union _x16r_2x64_context_overlay x16r_2x64_context_overlay;
|
||||||
|
|
||||||
|
void x16r_2x64_prehash( void *, void *, const char * );
|
||||||
|
int x16r_2x64_hash_generic( void *, const void *, int, const char*, const int );
|
||||||
|
int x16r_2x64_hash( void *, const void *, int );
|
||||||
|
int scanhash_x16r_2x64( struct work *, uint32_t,
|
||||||
|
uint64_t *, struct thr_info * );
|
||||||
|
extern __thread x16r_2x64_context_overlay x16r_ctx;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// need a reference, add hooks for SSE2.
|
||||||
// needed for hex
|
// needed for hex
|
||||||
union _x16r_context_overlay
|
union _x16r_context_overlay
|
||||||
{
|
{
|
||||||
#if defined(__AES__)
|
|
||||||
hashState_echo echo;
|
|
||||||
hashState_groestl groestl;
|
|
||||||
hashState_fugue fugue;
|
|
||||||
#else
|
|
||||||
sph_groestl512_context groestl;
|
|
||||||
sph_echo512_context echo;
|
|
||||||
sph_fugue512_context fugue;
|
|
||||||
#endif
|
|
||||||
blake512_context blake;
|
blake512_context blake;
|
||||||
sph_bmw512_context bmw;
|
sph_bmw512_context bmw;
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_groestl groestl;
|
||||||
|
#else
|
||||||
|
sph_groestl512_context groestl;
|
||||||
|
#endif
|
||||||
sph_skein512_context skein;
|
sph_skein512_context skein;
|
||||||
sph_jh512_context jh;
|
sph_jh512_context jh;
|
||||||
sph_keccak512_context keccak;
|
sph_keccak512_context keccak;
|
||||||
hashState_luffa luffa;
|
hashState_luffa luffa;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
shavite512_context shavite;
|
shavite512_context shavite;
|
||||||
#if defined(__aarch64__)
|
simd512_context simd;
|
||||||
sph_simd512_context simd;
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_echo echo;
|
||||||
#else
|
#else
|
||||||
hashState_sd simd;
|
sph_echo512_context echo;
|
||||||
#endif
|
#endif
|
||||||
sph_hamsi512_context hamsi;
|
sph_hamsi512_context hamsi;
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_fugue fugue;
|
||||||
|
#else
|
||||||
|
sph_fugue512_context fugue;
|
||||||
|
#endif
|
||||||
sph_shabal512_context shabal;
|
sph_shabal512_context shabal;
|
||||||
sph_whirlpool_context whirlpool;
|
sph_whirlpool_context whirlpool;
|
||||||
sph_sha512_context sha512;
|
sph_sha512_context sha512;
|
||||||
@@ -222,10 +297,10 @@ union _x16r_context_overlay
|
|||||||
|
|
||||||
typedef union _x16r_context_overlay x16r_context_overlay;
|
typedef union _x16r_context_overlay x16r_context_overlay;
|
||||||
|
|
||||||
extern __thread x16r_context_overlay x16_ctx;
|
extern __thread x16r_context_overlay x16r_ref_ctx;
|
||||||
|
|
||||||
void x16r_prehash( void *, void * );
|
void x16r_prehash( void *, void *, const char * );
|
||||||
int x16r_hash_generic( void *, const void *, int );
|
int x16r_hash_generic( void *, const void *, int, const char*, const int );
|
||||||
int x16r_hash( void *, const void *, int );
|
int x16r_hash( void *, const void *, int );
|
||||||
int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * );
|
int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * );
|
||||||
|
|
||||||
@@ -242,6 +317,12 @@ int x16rv2_4way_hash( void *state, const void *input, int thrid );
|
|||||||
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
|
#elif defined(X16RV2_2WAY)
|
||||||
|
|
||||||
|
int x16rv2_2x64_hash( void *state, const void *input, int thrid );
|
||||||
|
int scanhash_x16rv2_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
int x16rv2_hash( void *state, const void *input, int thr_id );
|
int x16rv2_hash( void *state, const void *input, int thr_id );
|
||||||
@@ -251,18 +332,24 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// x16rt, veil
|
// x16rt, veil
|
||||||
#if defined(X16R_8WAY)
|
#if defined(X16RT_8WAY)
|
||||||
|
|
||||||
//void x16rt_8way_hash( void *state, const void *input );
|
//void x16rt_8way_hash( void *state, const void *input );
|
||||||
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#elif defined(X16R_4WAY)
|
#elif defined(X16RT_4WAY)
|
||||||
|
|
||||||
//void x16rt_4way_hash( void *state, const void *input );
|
//void x16rt_4way_hash( void *state, const void *input );
|
||||||
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
|
#elif defined(X16RT_2WAY)
|
||||||
|
|
||||||
|
//void x16rt_4way_hash( void *state, const void *input );
|
||||||
|
int scanhash_x16rt_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
//void x16rt_hash( void *state, const void *input );
|
//void x16rt_hash( void *state, const void *input );
|
||||||
@@ -272,20 +359,27 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// x21s
|
// x21s
|
||||||
#if defined(X16R_8WAY)
|
#if defined(X21S_8WAY)
|
||||||
|
|
||||||
int x21s_8way_hash( void *state, const void *input, int thrid );
|
int x21s_8way_hash( void *state, const void *input, int thrid );
|
||||||
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool x21s_8way_thread_init();
|
bool x21s_8way_thread_init();
|
||||||
|
|
||||||
#elif defined(X16R_4WAY)
|
#elif defined(X21S_4WAY)
|
||||||
|
|
||||||
int x21s_4way_hash( void *state, const void *input, int thrid );
|
int x21s_4way_hash( void *state, const void *input, int thrid );
|
||||||
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool x21s_4way_thread_init();
|
bool x21s_4way_thread_init();
|
||||||
|
|
||||||
|
#elif defined(X21S_2WAY)
|
||||||
|
|
||||||
|
int x21s_2x64_hash( void *state, const void *input, int thrid );
|
||||||
|
int scanhash_x21s_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
bool x21s_2x64_thread_init();
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
int x21s_hash( void *state, const void *input, int thr_id );
|
int x21s_hash( void *state, const void *input, int thr_id );
|
||||||
|
@@ -10,55 +10,60 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
void x16r_prehash( void *edata, void *pdata )
|
void x16r_prehash( void *edata, void *pdata, const char *hash_order )
|
||||||
{
|
{
|
||||||
const char elem = x16r_hash_order[0];
|
const char elem = hash_order[0];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
{
|
{
|
||||||
case JH:
|
case JH:
|
||||||
sph_jh512_init( &x16_ctx.jh );
|
sph_jh512_init( &x16r_ref_ctx.jh );
|
||||||
sph_jh512( &x16_ctx.jh, edata, 64 );
|
sph_jh512( &x16r_ref_ctx.jh, edata, 64 );
|
||||||
break;
|
break;
|
||||||
case SKEIN:
|
case SKEIN:
|
||||||
sph_skein512_init( &x16_ctx.skein );
|
sph_skein512_init( &x16r_ref_ctx.skein );
|
||||||
sph_skein512( &x16_ctx.skein, edata, 64 );
|
sph_skein512( &x16r_ref_ctx.skein, edata, 64 );
|
||||||
|
break;
|
||||||
|
case KECCAK:
|
||||||
|
sph_keccak512_init( &x16r_ref_ctx.keccak );
|
||||||
|
sph_keccak512( &x16r_ref_ctx.keccak, edata, 72 );
|
||||||
break;
|
break;
|
||||||
case LUFFA:
|
case LUFFA:
|
||||||
init_luffa( &x16_ctx.luffa, 512 );
|
init_luffa( &x16r_ref_ctx.luffa, 512 );
|
||||||
update_luffa( &x16_ctx.luffa, edata, 64 );
|
update_luffa( &x16r_ref_ctx.luffa, edata, 64 );
|
||||||
break;
|
break;
|
||||||
case CUBEHASH:
|
case CUBEHASH:
|
||||||
cubehashInit( &x16_ctx.cube, 512, 16, 32 );
|
cubehashInit( &x16r_ref_ctx.cube, 512, 16, 32 );
|
||||||
cubehashUpdate( &x16_ctx.cube, edata, 64 );
|
cubehashUpdate( &x16r_ref_ctx.cube, edata, 64 );
|
||||||
break;
|
break;
|
||||||
case HAMSI:
|
case HAMSI:
|
||||||
sph_hamsi512_init( &x16_ctx.hamsi );
|
sph_hamsi512_init( &x16r_ref_ctx.hamsi );
|
||||||
sph_hamsi512( &x16_ctx.hamsi, edata, 64 );
|
sph_hamsi512( &x16r_ref_ctx.hamsi, edata, 72 );
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
sph_shabal512_init( &x16_ctx.shabal );
|
sph_shabal512_init( &x16r_ref_ctx.shabal );
|
||||||
sph_shabal512( &x16_ctx.shabal, edata, 64 );
|
sph_shabal512( &x16r_ref_ctx.shabal, edata, 64 );
|
||||||
break;
|
break;
|
||||||
case WHIRLPOOL:
|
case WHIRLPOOL:
|
||||||
sph_whirlpool_init( &x16_ctx.whirlpool );
|
sph_whirlpool_init( &x16r_ref_ctx.whirlpool );
|
||||||
sph_whirlpool( &x16_ctx.whirlpool, edata, 64 );
|
sph_whirlpool( &x16r_ref_ctx.whirlpool, edata, 64 );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int x16r_hash_generic( void* output, const void* input, int thrid )
|
int x16r_hash_generic( void* output, const void* input, int thrid,
|
||||||
|
const char *hash_order, const int func_count )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[16];
|
uint32_t _ALIGN(32) hash[16];
|
||||||
x16r_context_overlay ctx;
|
x16r_context_overlay ctx;
|
||||||
memcpy( &ctx, &x16_ctx, sizeof(ctx) );
|
memcpy( &ctx, &x16r_ref_ctx, sizeof(ctx) );
|
||||||
void *in = (void*) input;
|
void *in = (void*) input;
|
||||||
int size = 80;
|
int size = 80;
|
||||||
|
|
||||||
for ( int i = 0; i < 16; i++ )
|
for ( int i = 0; i < func_count; i++ )
|
||||||
{
|
{
|
||||||
const char elem = x16r_hash_order[i];
|
const char elem = hash_order[i];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
@@ -74,8 +79,8 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
|
|||||||
sph_bmw512_close( &ctx.bmw, hash );
|
sph_bmw512_close( &ctx.bmw, hash );
|
||||||
break;
|
break;
|
||||||
case GROESTL:
|
case GROESTL:
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
||||||
groestl512_full( &ctx.groestl, (char*)hash, (char*)in, size<<3 );
|
groestl512_full( &ctx.groestl, hash, in, size<<3 );
|
||||||
#else
|
#else
|
||||||
sph_groestl512_init( &ctx.groestl );
|
sph_groestl512_init( &ctx.groestl );
|
||||||
sph_groestl512( &ctx.groestl, in, size );
|
sph_groestl512( &ctx.groestl, in, size );
|
||||||
@@ -93,8 +98,13 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
|
|||||||
sph_jh512_close( &ctx.jh, hash );
|
sph_jh512_close( &ctx.jh, hash );
|
||||||
break;
|
break;
|
||||||
case KECCAK:
|
case KECCAK:
|
||||||
|
if ( i == 0 )
|
||||||
|
sph_keccak512( &ctx.keccak, in+72, 8 );
|
||||||
|
else
|
||||||
|
{
|
||||||
sph_keccak512_init( &ctx.keccak );
|
sph_keccak512_init( &ctx.keccak );
|
||||||
sph_keccak512( &ctx.keccak, in, size );
|
sph_keccak512( &ctx.keccak, in, size );
|
||||||
|
}
|
||||||
sph_keccak512_close( &ctx.keccak, hash );
|
sph_keccak512_close( &ctx.keccak, hash );
|
||||||
break;
|
break;
|
||||||
case SKEIN:
|
case SKEIN:
|
||||||
@@ -109,13 +119,13 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
|
|||||||
break;
|
break;
|
||||||
case LUFFA:
|
case LUFFA:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
update_and_final_luffa( &ctx.luffa, hash, (const void*)in+64, 16 );
|
update_and_final_luffa( &ctx.luffa, hash, in+64, 16 );
|
||||||
else
|
else
|
||||||
luffa_full( &ctx.luffa, hash, 512, in, size );
|
luffa_full( &ctx.luffa, hash, 512, in, size );
|
||||||
break;
|
break;
|
||||||
case CUBEHASH:
|
case CUBEHASH:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
cubehashUpdateDigest( &ctx.cube, hash, (const void*)in+64, 16 );
|
cubehashUpdateDigest( &ctx.cube, hash, in+64, 16 );
|
||||||
else
|
else
|
||||||
cubehash_full( &ctx.cube, hash, 512, in, size );
|
cubehash_full( &ctx.cube, hash, 512, in, size );
|
||||||
break;
|
break;
|
||||||
@@ -123,19 +133,13 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
|
|||||||
shavite512_full( &ctx.shavite, hash, in, size );
|
shavite512_full( &ctx.shavite, hash, in, size );
|
||||||
break;
|
break;
|
||||||
case SIMD:
|
case SIMD:
|
||||||
#if defined(__aarch64__)
|
|
||||||
sph_simd512_init( &ctx.simd );
|
sph_simd512_init( &ctx.simd );
|
||||||
sph_simd512(&ctx.simd, (const void*) hash, 64);
|
sph_simd512( &ctx.simd, hash, size );
|
||||||
sph_simd512_close( &ctx.simd, hash );
|
sph_simd512_close( &ctx.simd, hash );
|
||||||
#else
|
|
||||||
simd_full( &ctx.simd, (BitSequence *)hash,
|
|
||||||
(const BitSequence*)in, size<<3 );
|
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
case ECHO:
|
case ECHO:
|
||||||
#if defined(__AES__)
|
#if defined(__AES__)
|
||||||
echo_full( &ctx.echo, (BitSequence*)hash, 512,
|
echo_full( &ctx.echo, hash, 512, in, size );
|
||||||
(const BitSequence*)in, size );
|
|
||||||
#else
|
#else
|
||||||
sph_echo512_init( &ctx.echo );
|
sph_echo512_init( &ctx.echo );
|
||||||
sph_echo512( &ctx.echo, in, size );
|
sph_echo512( &ctx.echo, in, size );
|
||||||
@@ -144,7 +148,7 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
|
|||||||
break;
|
break;
|
||||||
case HAMSI:
|
case HAMSI:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
sph_hamsi512( &ctx.hamsi, in+64, 16 );
|
sph_hamsi512( &ctx.hamsi, in+72, 8 );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_hamsi512_init( &ctx.hamsi );
|
sph_hamsi512_init( &ctx.hamsi );
|
||||||
@@ -153,11 +157,7 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
|
|||||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
#if defined(__AES__)
|
|
||||||
fugue512_full( &ctx.fugue, hash, in, size );
|
|
||||||
#else
|
|
||||||
sph_fugue512_full( &ctx.fugue, hash, in, size );
|
sph_fugue512_full( &ctx.fugue, hash, in, size );
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
@@ -197,7 +197,8 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
|
|||||||
int x16r_hash( void* output, const void* input, int thrid )
|
int x16r_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint8_t hash[64] __attribute__ ((aligned (64)));
|
uint8_t hash[64] __attribute__ ((aligned (64)));
|
||||||
if ( !x16r_hash_generic( hash, input, thrid ) )
|
if ( !x16r_hash_generic( hash, input, thrid, x16r_hash_order,
|
||||||
|
X16R_HASH_FUNC_COUNT ) )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
memcpy( output, hash, 32 );
|
memcpy( output, hash, 32 );
|
||||||
@@ -207,8 +208,8 @@ int x16r_hash( void* output, const void* input, int thrid )
|
|||||||
int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash32[8];
|
uint32_t _ALIGN(32) hash32[8];
|
||||||
uint32_t _ALIGN(128) edata[20];
|
uint32_t _ALIGN(32) edata[20];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
@@ -230,7 +231,7 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
|||||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_prehash( edata, pdata );
|
x16r_prehash( edata, pdata, x16r_hash_order );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@@ -3,7 +3,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#if defined (X16R_8WAY)
|
#if defined (X16RT_8WAY)
|
||||||
|
|
||||||
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr)
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
@@ -30,12 +30,12 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
|||||||
x16rt_getTimeHash( masked_ntime, &timeHash );
|
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||||
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||||
s_ntime = masked_ntime;
|
s_ntime = masked_ntime;
|
||||||
if ( !thr_id )
|
if ( !opt_quiet && !thr_id )
|
||||||
applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x",
|
applog( LOG_INFO, "Hash order %s, Ntime %08x",
|
||||||
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
|
x16r_hash_order, bswap_32( pdata[17] ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_8way_prehash( vdata, pdata );
|
x16r_8way_prehash( vdata, pdata, x16r_hash_order );
|
||||||
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
||||||
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
@@ -57,7 +57,7 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined (X16R_4WAY)
|
#elif defined (X16RT_4WAY)
|
||||||
|
|
||||||
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr)
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
@@ -84,12 +84,12 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
|||||||
x16rt_getTimeHash( masked_ntime, &timeHash );
|
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||||
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||||
s_ntime = masked_ntime;
|
s_ntime = masked_ntime;
|
||||||
if ( !thr_id )
|
if ( !opt_quiet && !thr_id )
|
||||||
applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x",
|
applog( LOG_INFO, "Hash order %s, Ntime %08x",
|
||||||
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
|
x16r_hash_order, bswap_32( pdata[17] ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_4way_prehash( vdata, pdata );
|
x16r_4way_prehash( vdata, pdata, x16r_hash_order );
|
||||||
*noncev = mm256_intrlv_blend_32(
|
*noncev = mm256_intrlv_blend_32(
|
||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
@@ -110,4 +110,55 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined (X16RT_2WAY)
|
||||||
|
|
||||||
|
int scanhash_x16rt_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
|
{
|
||||||
|
uint32_t hash[2*16] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t _ALIGN(64) timeHash[4*8];
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 2;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
v128_t *noncev = (v128_t*)vdata + 9;
|
||||||
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
|
uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
|
||||||
|
if ( s_ntime != masked_ntime )
|
||||||
|
{
|
||||||
|
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||||
|
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||||
|
s_ntime = masked_ntime;
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "Hash order %s, Ntime %08x",
|
||||||
|
x16r_hash_order, bswap_32( pdata[17] ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
x16r_2x64_prehash( vdata, pdata, x16r_hash_order );
|
||||||
|
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if ( x16r_2x64_hash( hash, vdata, thr_id ) )
|
||||||
|
for ( int i = 0; i < 2; i++ )
|
||||||
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n+i );
|
||||||
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
|
}
|
||||||
|
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
|
||||||
|
n += 2;
|
||||||
|
} while ( ( n < last_nonce ) && !(*restart) );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
#include "x16r-gate.h"
|
#include "x16r-gate.h"
|
||||||
|
|
||||||
#if !defined(X16R_8WAY) && !defined(X16R_4WAY)
|
#if !defined(X16RT_8WAY) && !defined(X16RT_4WAY)
|
||||||
|
|
||||||
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
@@ -31,7 +31,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
|||||||
x16r_hash_order, swab32( pdata[17] ), timeHash );
|
x16r_hash_order, swab32( pdata[17] ), timeHash );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_prehash( edata, pdata );
|
x16r_prehash( edata, pdata, x16r_hash_order );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@@ -395,7 +395,7 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
|
|||||||
break;
|
break;
|
||||||
case HAMSI:
|
case HAMSI:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
hamsi512_8way_update( &ctx.hamsi, input + (64<<3), 16 );
|
hamsi512_8way_update( &ctx.hamsi, input + (72<<3), 8 );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||||
@@ -409,14 +409,43 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
|
|||||||
hash7, vhash );
|
hash7, vhash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
fugue512_full( &ctx.fugue, hash0, in0, size );
|
if ( i == 0 )
|
||||||
fugue512_full( &ctx.fugue, hash1, in1, size );
|
{
|
||||||
fugue512_full( &ctx.fugue, hash2, in2, size );
|
fugue512_update( &ctx.fugue, in0 + 76, 4 );
|
||||||
fugue512_full( &ctx.fugue, hash3, in3, size );
|
fugue512_final( &ctx.fugue, hash0 );
|
||||||
fugue512_full( &ctx.fugue, hash4, in4, size );
|
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
|
||||||
fugue512_full( &ctx.fugue, hash5, in5, size );
|
fugue512_update( &ctx.fugue, in1 + 76, 4 );
|
||||||
fugue512_full( &ctx.fugue, hash6, in6, size );
|
fugue512_final( &ctx.fugue, hash1 );
|
||||||
fugue512_full( &ctx.fugue, hash7, in7, size );
|
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in2 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash2 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in3 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash3 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in4 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash4 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in5 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash5 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in6 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash6 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in7 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash7 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fugue512_full( &ctx.fugue, hash0, hash0, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash1, hash1, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash2, hash2, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash3, hash3, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash4, hash4, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash5, hash5, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash6, hash6, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash7, hash7, size );
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||||
@@ -564,7 +593,6 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
@@ -577,19 +605,15 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
if ( bench ) ptarget[7] = 0x0cff;
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
|
if ( work->height != saved_height )
|
||||||
bedata1[0] = bswap_32( pdata[1] );
|
|
||||||
bedata1[1] = bswap_32( pdata[2] );
|
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
|
||||||
const uint32_t ntime = bswap_32( pdata[17] );
|
|
||||||
if ( s_ntime != ntime )
|
|
||||||
{
|
{
|
||||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
s_ntime = ntime;
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
if ( opt_debug && !thr_id )
|
saved_height = work->height;
|
||||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x16r_hash_order );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||||
@@ -626,7 +650,14 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
|||||||
case HAMSI:
|
case HAMSI:
|
||||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||||
hamsi512_8way_init( &x16rv2_ctx.hamsi );
|
hamsi512_8way_init( &x16rv2_ctx.hamsi );
|
||||||
hamsi512_8way_update( &x16rv2_ctx.hamsi, vdata, 64 );
|
hamsi512_8way_update( &x16rv2_ctx.hamsi, vdata, 72 );
|
||||||
|
break;
|
||||||
|
case FUGUE:
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
fugue512_init( &x16rv2_ctx.fugue );
|
||||||
|
fugue512_update( &x16rv2_ctx.fugue, edata, 76 );
|
||||||
|
intrlv_8x64( vdata, edata, edata, edata, edata,
|
||||||
|
edata, edata, edata, edata, 640 );
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
|
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
|
||||||
@@ -824,8 +855,8 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
|||||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
skein512_4way_init( &ctx.skein );
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way_update( &ctx.skein, vhash, size );
|
skein512_4way_update( &ctx.skein, vhash, size );
|
||||||
}
|
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
skein512_4way_close( &ctx.skein, vhash );
|
||||||
|
}
|
||||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||||
break;
|
break;
|
||||||
case LUFFA:
|
case LUFFA:
|
||||||
@@ -945,7 +976,7 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
|||||||
break;
|
break;
|
||||||
case HAMSI:
|
case HAMSI:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
hamsi512_4way_update( &ctx.hamsi, input + (64<<2), 16 );
|
hamsi512_4way_update( &ctx.hamsi, input + (72<<2), 8 );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
@@ -956,10 +987,27 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
|||||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
fugue512_full( &ctx.fugue, hash0, in0, size );
|
if ( i == 0 )
|
||||||
fugue512_full( &ctx.fugue, hash1, in1, size );
|
{
|
||||||
fugue512_full( &ctx.fugue, hash2, in2, size );
|
fugue512_update( &ctx.fugue, in0 + 76, 4 );
|
||||||
fugue512_full( &ctx.fugue, hash3, in3, size );
|
fugue512_final( &ctx.fugue, hash0 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in1 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash1 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in2 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash2 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in3 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash3 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fugue512_full( &ctx.fugue, hash0, hash0, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash1, hash1, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash2, hash2, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash3, hash3, size );
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
@@ -1055,7 +1103,6 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata32[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata32[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t edata[20];
|
uint32_t edata[20];
|
||||||
uint32_t bedata1[2];
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
@@ -1068,17 +1115,15 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
if ( bench ) ptarget[7] = 0x0fff;
|
if ( bench ) ptarget[7] = 0x0fff;
|
||||||
|
|
||||||
bedata1[0] = bswap_32( pdata[1] );
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
bedata1[1] = bswap_32( pdata[2] );
|
if ( work->height != saved_height )
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
|
||||||
const uint32_t ntime = bswap_32(pdata[17]);
|
|
||||||
if ( s_ntime != ntime )
|
|
||||||
{
|
{
|
||||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
s_ntime = ntime;
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
if ( opt_debug && !thr_id )
|
saved_height = work->height;
|
||||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x16r_hash_order );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||||
@@ -1101,7 +1146,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
break;
|
break;
|
||||||
case SKEIN:
|
case SKEIN:
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||||
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
|
skein512_4way_prehash64( &x16rv2_ctx.skein, vdata );
|
||||||
break;
|
break;
|
||||||
case CUBEHASH:
|
case CUBEHASH:
|
||||||
v128_bswap32_80( edata, pdata );
|
v128_bswap32_80( edata, pdata );
|
||||||
@@ -1112,7 +1157,13 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
case HAMSI:
|
case HAMSI:
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||||
hamsi512_4way_init( &x16rv2_ctx.hamsi );
|
hamsi512_4way_init( &x16rv2_ctx.hamsi );
|
||||||
hamsi512_4way_update( &x16rv2_ctx.hamsi, vdata, 64 );
|
hamsi512_4way_update( &x16rv2_ctx.hamsi, vdata, 72 );
|
||||||
|
break;
|
||||||
|
case FUGUE:
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
fugue512_init( &x16rv2_ctx.fugue );
|
||||||
|
fugue512_update( &x16rv2_ctx.fugue, edata, 76 );
|
||||||
|
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
v128_bswap32_intrlv80_4x32( vdata32, pdata );
|
v128_bswap32_intrlv80_4x32( vdata32, pdata );
|
||||||
@@ -1151,4 +1202,450 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined (X16RV2_2WAY)
|
||||||
|
|
||||||
|
union _x16rv2_2x64_context_overlay
|
||||||
|
{
|
||||||
|
blake512_2x64_context blake;
|
||||||
|
bmw512_2x64_context bmw;
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_groestl groestl;
|
||||||
|
#else
|
||||||
|
sph_groestl512_context groestl;
|
||||||
|
#endif
|
||||||
|
skein512_2x64_context skein;
|
||||||
|
jh512_2x64_context jh;
|
||||||
|
keccak512_2x64_context keccak;
|
||||||
|
hashState_luffa luffa;
|
||||||
|
cubehashParam cube;
|
||||||
|
shavite512_context shavite;
|
||||||
|
simd512_context simd;
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_echo echo;
|
||||||
|
#else
|
||||||
|
sph_echo512_context echo;
|
||||||
|
#endif
|
||||||
|
#if defined(__SSE4_2__) || defined(__ARM_NEON)
|
||||||
|
hamsi_2x64_context hamsi;
|
||||||
|
#else
|
||||||
|
sph_hamsi512_context hamsi;
|
||||||
|
#endif
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_fugue fugue;
|
||||||
|
#else
|
||||||
|
sph_fugue512_context fugue;
|
||||||
|
#endif
|
||||||
|
sph_shabal512_context shabal;
|
||||||
|
sph_whirlpool_context whirlpool;
|
||||||
|
sha512_2x64_context sha512;
|
||||||
|
sph_tiger_context tiger;
|
||||||
|
} __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
typedef union _x16rv2_2x64_context_overlay x16rv2_2x64_context_overlay;
|
||||||
|
|
||||||
|
static __thread x16rv2_2x64_context_overlay x16rv2_ctx;
|
||||||
|
|
||||||
|
// Pad the 24 bytes tiger hash to 64 bytes
|
||||||
|
static inline void padtiger512( uint32_t* hash )
|
||||||
|
{
|
||||||
|
for ( int i = 6; i < 16; i++ ) hash[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int x16rv2_2x64_hash( void* output, const void* input, int thrid )
|
||||||
|
{
|
||||||
|
uint32_t vhash[20*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash0[20] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t hash1[20] __attribute__ ((aligned (32)));
|
||||||
|
x16rv2_2x64_context_overlay ctx;
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||||
|
void *in0 = (void*) hash0;
|
||||||
|
void *in1 = (void*) hash1;
|
||||||
|
int size = 80;
|
||||||
|
|
||||||
|
dintrlv_2x64( hash0, hash1, input, 640 );
|
||||||
|
|
||||||
|
for ( int i = 0; i < 16; i++ )
|
||||||
|
{
|
||||||
|
const char elem = x16r_hash_order[i];
|
||||||
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
|
switch ( algo )
|
||||||
|
{
|
||||||
|
case BLAKE:
|
||||||
|
if ( i == 0 )
|
||||||
|
blake512_2x64_full( &ctx.blake, vhash, input, size );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, in0, in1, size<<3 );
|
||||||
|
blake512_2x64_full( &ctx.blake, vhash, vhash, size );
|
||||||
|
}
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
case BMW:
|
||||||
|
bmw512_2x64_init( &ctx.bmw );
|
||||||
|
if ( i == 0 )
|
||||||
|
bmw512_2x64_update( &ctx.bmw, input, size );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, in0, in1, size<<3 );
|
||||||
|
bmw512_2x64_update( &ctx.bmw, vhash, size );
|
||||||
|
}
|
||||||
|
bmw512_2x64_close( &ctx.bmw, vhash );
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
case GROESTL:
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
groestl512_full( &ctx.groestl, hash0, in0, size<<3 );
|
||||||
|
groestl512_full( &ctx.groestl, hash1, in1, size<<3 );
|
||||||
|
#else
|
||||||
|
sph_groestl512_init( &ctx.groestl );
|
||||||
|
sph_groestl512( &ctx.groestl, in0, size );
|
||||||
|
sph_groestl512_close( &ctx.groestl, hash0 );
|
||||||
|
sph_groestl512_init( &ctx.groestl );
|
||||||
|
sph_groestl512( &ctx.groestl, in1, size );
|
||||||
|
sph_groestl512_close( &ctx.groestl, hash1 );
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case JH:
|
||||||
|
if ( i == 0 )
|
||||||
|
jh512_2x64_update( &ctx.jh, input + (64<<1), 16 );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, in0, in1, size<<3 );
|
||||||
|
jh512_2x64_init( &ctx.jh );
|
||||||
|
jh512_2x64_update( &ctx.jh, vhash, size );
|
||||||
|
}
|
||||||
|
jh512_2x64_close( &ctx.jh, vhash );
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
case KECCAK:
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
sph_tiger( &ctx.tiger, in0 + 64, 16 );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash0 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||||
|
sph_tiger( &ctx.tiger, in1 + 64, 16 );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash1 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_tiger_init( &ctx.tiger );
|
||||||
|
sph_tiger( &ctx.tiger, in0, size );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash0 );
|
||||||
|
sph_tiger_init( &ctx.tiger );
|
||||||
|
sph_tiger( &ctx.tiger, in1, size );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash1 );
|
||||||
|
}
|
||||||
|
for ( int i = (24/4); i < (64/4); i++ )
|
||||||
|
hash0[i] = hash1[i] = 0;
|
||||||
|
|
||||||
|
intrlv_2x64( vhash, hash0, hash1, 512 );
|
||||||
|
keccak512_2x64_init( &ctx.keccak );
|
||||||
|
keccak512_2x64_update( &ctx.keccak, vhash, 64 );
|
||||||
|
keccak512_2x64_close( &ctx.keccak, vhash );
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
case SKEIN:
|
||||||
|
if ( i == 0 )
|
||||||
|
skein512_2x64_final16( &ctx.skein, vhash, input + (64*2) );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, in0, in1, size<<3 );
|
||||||
|
skein512_2x64_full( &ctx.skein, vhash, vhash, size );
|
||||||
|
}
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
case LUFFA:
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
sph_tiger( &ctx.tiger, in0 + 64, 16 );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash0 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||||
|
sph_tiger( &ctx.tiger, in1 + 64, 16 );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash1 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_tiger_init( &ctx.tiger );
|
||||||
|
sph_tiger( &ctx.tiger, in0, size );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash0 );
|
||||||
|
sph_tiger_init( &ctx.tiger );
|
||||||
|
sph_tiger( &ctx.tiger, in1, size );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash1 );
|
||||||
|
}
|
||||||
|
for ( int i = (24/4); i < (64/4); i++ )
|
||||||
|
hash0[i] = hash1[i] = 0;
|
||||||
|
luffa_full( &ctx.luffa, hash0, 512, hash0, 64 );
|
||||||
|
luffa_full( &ctx.luffa, hash1, 512, hash1, 64 );
|
||||||
|
break;
|
||||||
|
case CUBEHASH:
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
cubehashUpdateDigest( &ctx.cube, hash0, in0 + 64, 16 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, hash1, in1 + 64, 16 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cubehash_full( &ctx.cube, hash0, 512, hash0, size );
|
||||||
|
cubehash_full( &ctx.cube, hash1, 512, hash1, size );
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SHAVITE:
|
||||||
|
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||||
|
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||||
|
break;
|
||||||
|
case SIMD:
|
||||||
|
simd512_ctx( &ctx.simd, hash0, in0, size );
|
||||||
|
simd512_ctx( &ctx.simd, hash1, in1, size );
|
||||||
|
break;
|
||||||
|
case ECHO:
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
echo_full( &ctx.echo, hash0, 512, in0, size );
|
||||||
|
echo_full( &ctx.echo, hash1, 512, in1, size );
|
||||||
|
#else
|
||||||
|
sph_echo512_init( &ctx.echo );
|
||||||
|
sph_echo512( &ctx.echo, in0, size );
|
||||||
|
sph_echo512_close( &ctx.echo, hash0 );
|
||||||
|
sph_echo512_init( &ctx.echo );
|
||||||
|
sph_echo512( &ctx.echo, in1, size );
|
||||||
|
sph_echo512_close( &ctx.echo, hash1 );
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case HAMSI:
|
||||||
|
#if defined(__SSE4_2__) || defined(__ARM_NEON)
|
||||||
|
if ( i == 0 )
|
||||||
|
hamsi512_2x64_update( &ctx.hamsi, input + (72*2), 8 );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
intrlv_2x64( vhash, hash0, hash1, size<<3 );
|
||||||
|
hamsi512_2x64_init( &ctx.hamsi );
|
||||||
|
hamsi512_2x64_update( &ctx.hamsi, vhash, size );
|
||||||
|
}
|
||||||
|
hamsi512_2x64_close( &ctx.hamsi, vhash );
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
#else
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
sph_hamsi512( &ctx.hamsi, in0 + 72, 8 );
|
||||||
|
sph_hamsi512_close( &ctx.hamsi, hash0 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||||
|
sph_hamsi512( &ctx.hamsi, in1 + 72, 8 );
|
||||||
|
sph_hamsi512_close( &ctx.hamsi, hash1 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_hamsi512_init( &ctx.hamsi );
|
||||||
|
sph_hamsi512( &ctx.hamsi, hash0, size );
|
||||||
|
sph_hamsi512_close( &ctx.hamsi, hash0 );
|
||||||
|
sph_hamsi512_init( &ctx.hamsi );
|
||||||
|
sph_hamsi512( &ctx.hamsi, hash1, size );
|
||||||
|
sph_hamsi512_close( &ctx.hamsi, hash1 );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case FUGUE:
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
fugue512_update( &ctx.fugue, in0 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash0 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in1 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash1 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fugue512_full( &ctx.fugue, hash0, hash0, size );
|
||||||
|
fugue512_full( &ctx.fugue, hash1, hash1, size );
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
sph_fugue512( &ctx.fugue, in0 + 76, 4 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(sph_fugue512_context) );
|
||||||
|
sph_fugue512( &ctx.fugue, in1 + 76, 4 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_fugue512_full( &ctx.fugue, hash0, hash0, size );
|
||||||
|
sph_fugue512_full( &ctx.fugue, hash1, hash1, size );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case SHABAL:
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
sph_shabal512( &ctx.shabal, in0 + 64, 16 );
|
||||||
|
sph_shabal512_close( &ctx.shabal, hash0 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||||
|
sph_shabal512( &ctx.shabal, in1 + 64, 16 );
|
||||||
|
sph_shabal512_close( &ctx.shabal, hash1 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_shabal512_init( &ctx.shabal );
|
||||||
|
sph_shabal512( &ctx.shabal, hash0, size );
|
||||||
|
sph_shabal512_close( &ctx.shabal, hash0 );
|
||||||
|
sph_shabal512_init( &ctx.shabal );
|
||||||
|
sph_shabal512( &ctx.shabal, hash1, size );
|
||||||
|
sph_shabal512_close( &ctx.shabal, hash1 );
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case WHIRLPOOL:
|
||||||
|
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
|
||||||
|
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
|
||||||
|
break;
|
||||||
|
case SHA_512:
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
sph_tiger( &ctx.tiger, in0 + 64, 16 );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash0 );
|
||||||
|
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
|
||||||
|
sph_tiger( &ctx.tiger, in1 + 64, 16 );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash1 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sph_tiger_init( &ctx.tiger );
|
||||||
|
sph_tiger( &ctx.tiger, in0, size );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash0 );
|
||||||
|
sph_tiger_init( &ctx.tiger );
|
||||||
|
sph_tiger( &ctx.tiger, in1, size );
|
||||||
|
sph_tiger_close( &ctx.tiger, hash1 );
|
||||||
|
}
|
||||||
|
for ( int i = (24/4); i < (64/4); i++ )
|
||||||
|
hash0[i] = hash1[i] = 0;
|
||||||
|
|
||||||
|
intrlv_2x64( vhash, hash0, hash1, 512 );
|
||||||
|
sha512_2x64_init( &ctx.sha512 );
|
||||||
|
sha512_2x64_update( &ctx.sha512, vhash, 64 );
|
||||||
|
sha512_2x64_close( &ctx.sha512, vhash );
|
||||||
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
|
size = 64;
|
||||||
|
}
|
||||||
|
memcpy( output, hash0, 32 );
|
||||||
|
memcpy( output+32, hash1, 32 );
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_x16rv2_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
|
{
|
||||||
|
uint32_t hash[2*16] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t edata[20];
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 2;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
v128_t *noncev = (v128_t*)vdata + 9;
|
||||||
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
|
if ( bench ) ptarget[7] = 0x0fff;
|
||||||
|
|
||||||
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
|
if ( work->height != saved_height )
|
||||||
|
{
|
||||||
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
|
saved_height = work->height;
|
||||||
|
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x16r_hash_order );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||||
|
const char elem = x16r_hash_order[0];
|
||||||
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
switch ( algo )
|
||||||
|
{
|
||||||
|
case JH:
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
jh512_2x64_init( &x16rv2_ctx.jh );
|
||||||
|
jh512_2x64_update( &x16rv2_ctx.jh, vdata, 64 );
|
||||||
|
break;
|
||||||
|
case KECCAK:
|
||||||
|
case LUFFA:
|
||||||
|
case SHA_512:
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
sph_tiger_init( &x16rv2_ctx.tiger );
|
||||||
|
sph_tiger( &x16rv2_ctx.tiger, edata, 64 );
|
||||||
|
intrlv_2x64( vdata, edata, edata, 640 );
|
||||||
|
break;
|
||||||
|
case SKEIN:
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
skein512_2x64_prehash64( &x16rv2_ctx.skein, vdata );
|
||||||
|
break;
|
||||||
|
case CUBEHASH:
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
cubehashInit( &x16rv2_ctx.cube, 512, 16, 32 );
|
||||||
|
cubehashUpdate( &x16rv2_ctx.cube, edata, 64 );
|
||||||
|
intrlv_2x64( vdata, edata, edata, 640 );
|
||||||
|
break;
|
||||||
|
case HAMSI:
|
||||||
|
#if defined(__SSE4_2__) || defined(__ARM_NEON)
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
hamsi512_2x64_init( &x16rv2_ctx.hamsi );
|
||||||
|
hamsi512_2x64_update( &x16rv2_ctx.hamsi, vdata, 72 );
|
||||||
|
#else
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
sph_hamsi512_init( &x16rv2_ctx.hamsi );
|
||||||
|
sph_hamsi512( &x16rv2_ctx.hamsi, edata, 72 );
|
||||||
|
intrlv_2x64( vdata, edata, edata, 640 );
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case FUGUE:
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
fugue512_init( &x16rv2_ctx.fugue );
|
||||||
|
fugue512_update( &x16rv2_ctx.fugue, edata, 76 );
|
||||||
|
#else
|
||||||
|
sph_fugue512_init( &x16rv2_ctx.fugue );
|
||||||
|
sph_fugue512( &x16rv2_ctx.fugue, edata, 76 );
|
||||||
|
#endif
|
||||||
|
intrlv_2x64( vdata, edata, edata, 640 );
|
||||||
|
break;
|
||||||
|
case SHABAL:
|
||||||
|
v128_bswap32_80( edata, pdata );
|
||||||
|
sph_shabal512_init( &x16rv2_ctx.shabal );
|
||||||
|
sph_shabal512( &x16rv2_ctx.shabal, edata, 64);
|
||||||
|
intrlv_2x64( vdata, edata, edata, 640 );
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
v128_bswap32_intrlv80_2x64( vdata, pdata );
|
||||||
|
}
|
||||||
|
|
||||||
|
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if ( x16rv2_2x64_hash( hash, vdata, thr_id ) )
|
||||||
|
for ( int i = 0; i < 2; i++ )
|
||||||
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n+i );
|
||||||
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
|
}
|
||||||
|
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
|
||||||
|
n += 2;
|
||||||
|
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -6,21 +6,15 @@
|
|||||||
*/
|
*/
|
||||||
#include "x16r-gate.h"
|
#include "x16r-gate.h"
|
||||||
|
|
||||||
#if !defined(X16R_8WAY) && !defined(X16R_4WAY)
|
#if !defined(X16RV2_8WAY) && !defined(X16RV2_4WAY) && !defined(X16RV2_2WAY)
|
||||||
|
|
||||||
#include "algo/tiger/sph_tiger.h"
|
#include "algo/tiger/sph_tiger.h"
|
||||||
|
|
||||||
union _x16rv2_context_overlay
|
union _x16rv2_context_overlay
|
||||||
{
|
{
|
||||||
#if defined(__AES__)
|
|
||||||
hashState_echo echo;
|
|
||||||
hashState_groestl groestl;
|
|
||||||
hashState_fugue fugue;
|
|
||||||
#else
|
|
||||||
sph_groestl512_context groestl;
|
sph_groestl512_context groestl;
|
||||||
sph_echo512_context echo;
|
sph_echo512_context echo;
|
||||||
sph_fugue512_context fugue;
|
sph_fugue512_context fugue;
|
||||||
#endif
|
|
||||||
blake512_context blake;
|
blake512_context blake;
|
||||||
sph_bmw512_context bmw;
|
sph_bmw512_context bmw;
|
||||||
sph_skein512_context skein;
|
sph_skein512_context skein;
|
||||||
@@ -29,11 +23,7 @@ union _x16rv2_context_overlay
|
|||||||
hashState_luffa luffa;
|
hashState_luffa luffa;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
shavite512_context shavite;
|
shavite512_context shavite;
|
||||||
#if defined(__aarch64__)
|
|
||||||
sph_simd512_context simd;
|
sph_simd512_context simd;
|
||||||
#else
|
|
||||||
hashState_sd simd;
|
|
||||||
#endif
|
|
||||||
sph_hamsi512_context hamsi;
|
sph_hamsi512_context hamsi;
|
||||||
sph_shabal512_context shabal;
|
sph_shabal512_context shabal;
|
||||||
sph_whirlpool_context whirlpool;
|
sph_whirlpool_context whirlpool;
|
||||||
@@ -72,15 +62,9 @@ int x16rv2_hash( void* output, const void* input, int thrid )
|
|||||||
sph_bmw512_close(&ctx.bmw, hash);
|
sph_bmw512_close(&ctx.bmw, hash);
|
||||||
break;
|
break;
|
||||||
case GROESTL:
|
case GROESTL:
|
||||||
#if defined(__AES__)
|
|
||||||
init_groestl( &ctx.groestl, 64 );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
|
||||||
(const char*)in, size<<3 );
|
|
||||||
#else
|
|
||||||
sph_groestl512_init( &ctx.groestl );
|
sph_groestl512_init( &ctx.groestl );
|
||||||
sph_groestl512( &ctx.groestl, in, size );
|
sph_groestl512( &ctx.groestl, in, size );
|
||||||
sph_groestl512_close(&ctx.groestl, hash);
|
sph_groestl512_close(&ctx.groestl, hash);
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
case SKEIN:
|
case SKEIN:
|
||||||
sph_skein512_init( &ctx.skein );
|
sph_skein512_init( &ctx.skein );
|
||||||
@@ -117,25 +101,14 @@ int x16rv2_hash( void* output, const void* input, int thrid )
|
|||||||
shavite512_full( &ctx.shavite, hash, in, size );
|
shavite512_full( &ctx.shavite, hash, in, size );
|
||||||
break;
|
break;
|
||||||
case SIMD:
|
case SIMD:
|
||||||
#if defined(__aarch64__)
|
|
||||||
sph_simd512_init( &ctx.simd );
|
sph_simd512_init( &ctx.simd );
|
||||||
sph_simd512(&ctx.simd, (const void*) hash, 64);
|
sph_simd512(&ctx.simd, hash, 64);
|
||||||
sph_simd512_close(&ctx.simd, hash);
|
sph_simd512_close(&ctx.simd, hash);
|
||||||
#else
|
|
||||||
simd_full( &ctx.simd, (BitSequence *)hash,
|
|
||||||
(const BitSequence*)in, size<<3 );
|
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
case ECHO:
|
case ECHO:
|
||||||
#if defined(__AES__)
|
|
||||||
init_echo( &ctx.echo, 512 );
|
|
||||||
update_final_echo ( &ctx.echo, (BitSequence *)hash,
|
|
||||||
(const BitSequence*)in, size<<3 );
|
|
||||||
#else
|
|
||||||
sph_echo512_init( &ctx.echo );
|
sph_echo512_init( &ctx.echo );
|
||||||
sph_echo512( &ctx.echo, in, size );
|
sph_echo512( &ctx.echo, in, size );
|
||||||
sph_echo512_close( &ctx.echo, hash );
|
sph_echo512_close( &ctx.echo, hash );
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
case HAMSI:
|
case HAMSI:
|
||||||
sph_hamsi512_init( &ctx.hamsi );
|
sph_hamsi512_init( &ctx.hamsi );
|
||||||
@@ -143,11 +116,7 @@ int x16rv2_hash( void* output, const void* input, int thrid )
|
|||||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
#if defined(__AES__)
|
|
||||||
fugue512_full( &ctx.fugue, hash, in, size );
|
|
||||||
#else
|
|
||||||
sph_fugue512_full( &ctx.fugue, hash, in, size );
|
sph_fugue512_full( &ctx.fugue, hash, in, size );
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
sph_shabal512_init( &ctx.shabal );
|
sph_shabal512_init( &ctx.shabal );
|
||||||
|
362
algo/x16/x20r.c
Normal file
362
algo/x16/x20r.c
Normal file
@@ -0,0 +1,362 @@
|
|||||||
|
#include "miner.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "algo/blake/sph_blake.h"
|
||||||
|
#include "algo/bmw/sph_bmw.h"
|
||||||
|
#include "algo/groestl/sph_groestl.h"
|
||||||
|
#include "algo/jh/sph_jh.h"
|
||||||
|
#include "algo/keccak/sph_keccak.h"
|
||||||
|
#include "algo/skein/sph_skein.h"
|
||||||
|
#include "algo/luffa/sph_luffa.h"
|
||||||
|
#include "algo/cubehash/sph_cubehash.h"
|
||||||
|
#include "algo/shavite/sph_shavite.h"
|
||||||
|
#include "algo/simd/sph_simd.h"
|
||||||
|
#include "algo/echo/sph_echo.h"
|
||||||
|
#include "algo/hamsi/sph_hamsi.h"
|
||||||
|
#include "algo/fugue/sph_fugue.h"
|
||||||
|
#include "algo/shabal/sph_shabal.h"
|
||||||
|
#include "algo/whirlpool/sph_whirlpool.h"
|
||||||
|
#include "algo/sha/sph_sha2.h"
|
||||||
|
#include "x16r-gate.h"
|
||||||
|
|
||||||
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
#define X20R_8WAY 1
|
||||||
|
#elif defined(__AVX2__) && defined(__AES__)
|
||||||
|
#define X20R_4WAY 1
|
||||||
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
||||||
|
#define X20R_2WAY 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// X20R is not what it seems. It does not permute 20 functions over 20 rounds,
|
||||||
|
// it only permutes 16 of them. The last 4 functions are victims of trying to
|
||||||
|
// fit 20 elements in the space for only 16. Arithmetic overflow recycles the
|
||||||
|
// first 4 functions. Otherwise it's identical to X16R.
|
||||||
|
// Welcome to the real X20R.
|
||||||
|
|
||||||
|
#define X20R_HASH_FUNC_COUNT 20
|
||||||
|
/*
|
||||||
|
enum x20r_algo
|
||||||
|
{
|
||||||
|
BLAKE = 0,
|
||||||
|
BMW,
|
||||||
|
GROESTL,
|
||||||
|
JH,
|
||||||
|
KECCAK,
|
||||||
|
SKEIN,
|
||||||
|
LUFFA,
|
||||||
|
CUBEHASH,
|
||||||
|
SHAVITE,
|
||||||
|
SIMD,
|
||||||
|
ECHO,
|
||||||
|
HAMSI,
|
||||||
|
FUGUE,
|
||||||
|
SHABAL,
|
||||||
|
WHIRLPOOL,
|
||||||
|
SHA512,
|
||||||
|
HAVAL, // Last 4 names are meaningless and not used
|
||||||
|
GOST,
|
||||||
|
RADIOGATUN,
|
||||||
|
PANAMA,
|
||||||
|
X20R_HASH_FUNC_COUNT
|
||||||
|
};
|
||||||
|
*/
|
||||||
|
static __thread char x20r_hash_order[ X20R_HASH_FUNC_COUNT + 1 ] = {0};
|
||||||
|
|
||||||
|
static void x20r_getAlgoString(const uint8_t* prevblock, char *output)
|
||||||
|
{
|
||||||
|
char *sptr = output;
|
||||||
|
|
||||||
|
for (int j = 0; j < X20R_HASH_FUNC_COUNT; j++) {
|
||||||
|
uint8_t b = (19 - j) >> 1; // 16 ascii hex chars, reversed
|
||||||
|
uint8_t algoDigit = (j & 1) ? prevblock[b] & 0xF : prevblock[b] >> 4;
|
||||||
|
if (algoDigit >= 10)
|
||||||
|
sprintf(sptr, "%c", 'A' + (algoDigit - 10));
|
||||||
|
else
|
||||||
|
sprintf(sptr, "%u", (uint32_t) algoDigit);
|
||||||
|
sptr++;
|
||||||
|
}
|
||||||
|
*sptr = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(X20R_8WAY)
|
||||||
|
|
||||||
|
int x20r_8x64_hash( void* output, const void* input, int thrid )
|
||||||
|
{
|
||||||
|
uint8_t hash[64*8] __attribute__ ((aligned (128)));
|
||||||
|
if ( !x16r_8x64_hash_generic( hash, input, thrid, x20r_hash_order,
|
||||||
|
X20R_HASH_FUNC_COUNT ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
memcpy( output, hash, 32 );
|
||||||
|
memcpy( output+32, hash+64, 32 );
|
||||||
|
memcpy( output+64, hash+128, 32 );
|
||||||
|
memcpy( output+96, hash+192, 32 );
|
||||||
|
memcpy( output+128, hash+256, 32 );
|
||||||
|
memcpy( output+160, hash+320, 32 );
|
||||||
|
memcpy( output+192, hash+384, 32 );
|
||||||
|
memcpy( output+224, hash+448, 32 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_x20r_8x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
|
{
|
||||||
|
uint32_t hash[16*8] __attribute__ ((aligned (128)));
|
||||||
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 8;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
|
if ( work->height != saved_height )
|
||||||
|
{
|
||||||
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
|
vdata[3] = bswap_32( pdata[3] );
|
||||||
|
saved_height = work->height;
|
||||||
|
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x20r_hash_order );
|
||||||
|
}
|
||||||
|
|
||||||
|
x16r_8x64_prehash( vdata, pdata, x20r_hash_order );
|
||||||
|
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
||||||
|
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||||
|
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if( x20r_8x64_hash( hash, vdata, thr_id ) );
|
||||||
|
for ( int i = 0; i < 8; i++ )
|
||||||
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n+i );
|
||||||
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
|
}
|
||||||
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
|
_mm512_set1_epi64( 0x0000000800000000 ) );
|
||||||
|
n += 8;
|
||||||
|
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#elif defined(X20R_4WAY)
|
||||||
|
|
||||||
|
int x20r_4x64_hash( void* output, const void* input, int thrid )
|
||||||
|
{
|
||||||
|
uint8_t hash[64*4] __attribute__ ((aligned (64)));
|
||||||
|
if ( !x16r_4x64_hash_generic( hash, input, thrid, x20r_hash_order,
|
||||||
|
X20R_HASH_FUNC_COUNT ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
memcpy( output, hash, 32 );
|
||||||
|
memcpy( output+32, hash+64, 32 );
|
||||||
|
memcpy( output+64, hash+128, 32 );
|
||||||
|
memcpy( output+96, hash+192, 32 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_x20r_4x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
|
{
|
||||||
|
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 4;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
|
if ( work->height != saved_height )
|
||||||
|
{
|
||||||
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
|
vdata[3] = bswap_32( pdata[3] );
|
||||||
|
saved_height = work->height;
|
||||||
|
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x20r_hash_order );
|
||||||
|
}
|
||||||
|
|
||||||
|
x16r_4x64_prehash( vdata, pdata, x20r_hash_order );
|
||||||
|
*noncev = mm256_intrlv_blend_32(
|
||||||
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if ( x20r_4x64_hash( hash, vdata, thr_id ) );
|
||||||
|
for ( int i = 0; i < 4; i++ )
|
||||||
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n+i );
|
||||||
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
|
}
|
||||||
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
|
_mm256_set1_epi64x( 0x0000000400000000 ) );
|
||||||
|
n += 4;
|
||||||
|
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(X20R_2WAY)
|
||||||
|
|
||||||
|
int x20r_2x64_hash( void* output, const void* input, int thrid )
|
||||||
|
{
|
||||||
|
uint8_t hash[64*2] __attribute__ ((aligned (64)));
|
||||||
|
if ( !x16r_2x64_hash_generic( hash, input, thrid, x20r_hash_order,
|
||||||
|
X20R_HASH_FUNC_COUNT ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
memcpy( output, hash, 32 );
|
||||||
|
memcpy( output+32, hash+64, 32 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_x20r_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
|
{
|
||||||
|
uint32_t hash[16*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vdata[20*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 2;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
v128_t *noncev = (v128_t*)vdata + 9;
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
|
if ( work->height != saved_height )
|
||||||
|
{
|
||||||
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
|
vdata[3] = bswap_32( pdata[3] );
|
||||||
|
saved_height = work->height;
|
||||||
|
x20r_getAlgoString( (const uint8_t*)(&vdata[1]), x20r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x20r_hash_order );
|
||||||
|
}
|
||||||
|
|
||||||
|
x16r_2x64_prehash( vdata, pdata, x20r_hash_order );
|
||||||
|
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if ( x20r_2x64_hash( hash, vdata, thr_id ) );
|
||||||
|
for ( int i = 0; i < 2; i++ )
|
||||||
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n+i );
|
||||||
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
|
}
|
||||||
|
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
|
||||||
|
n += 2;
|
||||||
|
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
int x20r_hash( void* output, const void* input, int thrid )
|
||||||
|
{
|
||||||
|
uint8_t hash[64] __attribute__ ((aligned (64)));
|
||||||
|
if ( !x16r_hash_generic( hash, input, thrid, x20r_hash_order,
|
||||||
|
X20R_HASH_FUNC_COUNT ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
memcpy( output, hash, 32 );
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_x20r( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t _ALIGN(32) hash32[8];
|
||||||
|
uint32_t _ALIGN(32) edata[20];
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
uint32_t nonce = first_nonce;
|
||||||
|
volatile uint8_t *restart = &( work_restart[thr_id].restart );
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
|
if ( work->height != saved_height )
|
||||||
|
{
|
||||||
|
edata[1] = bswap_32( pdata[1] );
|
||||||
|
edata[2] = bswap_32( pdata[2] );
|
||||||
|
edata[3] = bswap_32( pdata[3] );
|
||||||
|
saved_height = work->height;
|
||||||
|
x20r_getAlgoString( (const uint8_t*)(&edata[1]), x20r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x20r_hash_order );
|
||||||
|
}
|
||||||
|
|
||||||
|
x16r_prehash( edata, pdata, x20r_hash_order );
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
edata[19] = nonce;
|
||||||
|
if ( x20r_hash( hash32, edata, thr_id ) )
|
||||||
|
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( nonce );
|
||||||
|
submit_solution( work, hash32, mythr );
|
||||||
|
}
|
||||||
|
nonce++;
|
||||||
|
} while ( nonce < max_nonce && !(*restart) );
|
||||||
|
pdata[19] = nonce;
|
||||||
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
bool register_x20r_algo( algo_gate_t* gate )
|
||||||
|
{
|
||||||
|
#if defined (X20R_8WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_x20r_8x64;
|
||||||
|
#elif defined (X20R_4WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_x20r_4x64;
|
||||||
|
#elif defined (X20R_2WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_x20r_2x64;
|
||||||
|
#else
|
||||||
|
gate->scanhash = (void*)&scanhash_x20r;
|
||||||
|
#endif
|
||||||
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
|
||||||
|
| NEON_OPT;
|
||||||
|
opt_target_factor = 256.0;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
@@ -9,6 +9,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "algo/haval/haval-hash-4way.h"
|
#include "algo/haval/haval-hash-4way.h"
|
||||||
|
#include "algo/haval/sph-haval.h"
|
||||||
#include "algo/tiger/sph_tiger.h"
|
#include "algo/tiger/sph_tiger.h"
|
||||||
#include "algo/gost/sph_gost.h"
|
#include "algo/gost/sph_gost.h"
|
||||||
#include "algo/lyra2/lyra2.h"
|
#include "algo/lyra2/lyra2.h"
|
||||||
@@ -42,7 +43,8 @@ int x21s_8way_hash( void* output, const void* input, int thrid )
|
|||||||
uint32_t *hash7 = (uint32_t*)( shash+448 );
|
uint32_t *hash7 = (uint32_t*)( shash+448 );
|
||||||
x21s_8way_context_overlay ctx;
|
x21s_8way_context_overlay ctx;
|
||||||
|
|
||||||
if ( !x16r_8way_hash_generic( shash, input, thrid ) )
|
if ( !x16r_8way_hash_generic( shash, input, thrid, x16r_hash_order,
|
||||||
|
X16R_HASH_FUNC_COUNT ) )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||||
@@ -134,7 +136,6 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &hash[7<<3];
|
uint32_t *hash7 = &hash[7<<3];
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
@@ -148,20 +149,18 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
if ( bench ) ptarget[7] = 0x0cff;
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
bedata1[0] = bswap_32( pdata[1] );
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
bedata1[1] = bswap_32( pdata[2] );
|
if ( work->height != saved_height )
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
|
||||||
uint32_t ntime = bswap_32( pdata[17] );
|
|
||||||
if ( s_ntime != ntime )
|
|
||||||
{
|
{
|
||||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
s_ntime = ntime;
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
if ( opt_debug && !thr_id )
|
saved_height = work->height;
|
||||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x16r_hash_order );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_8way_prehash( vdata, pdata );
|
x16r_8way_prehash( vdata, pdata, x16r_hash_order );
|
||||||
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
||||||
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
@@ -223,7 +222,8 @@ int x21s_4way_hash( void* output, const void* input, int thrid )
|
|||||||
uint32_t *hash2 = (uint32_t*)( shash+128 );
|
uint32_t *hash2 = (uint32_t*)( shash+128 );
|
||||||
uint32_t *hash3 = (uint32_t*)( shash+192 );
|
uint32_t *hash3 = (uint32_t*)( shash+192 );
|
||||||
|
|
||||||
if ( !x16r_4way_hash_generic( shash, input, thrid ) )
|
if ( !x16r_4way_hash_generic( shash, input, thrid, x16r_hash_order,
|
||||||
|
X16R_HASH_FUNC_COUNT ) )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
@@ -294,7 +294,6 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
@@ -307,20 +306,18 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
if ( bench ) ptarget[7] = 0x0cff;
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
bedata1[0] = bswap_32( pdata[1] );
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
bedata1[1] = bswap_32( pdata[2] );
|
if ( work->height != saved_height )
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
|
||||||
uint32_t ntime = bswap_32( pdata[17] );
|
|
||||||
if ( s_ntime != ntime )
|
|
||||||
{
|
{
|
||||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
s_ntime = ntime;
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
if ( opt_debug && !thr_id )
|
saved_height = work->height;
|
||||||
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
|
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x16r_hash_order );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_4way_prehash( vdata, pdata );
|
x16r_4way_prehash( vdata, pdata, x16r_hash_order );
|
||||||
*noncev = mm256_intrlv_blend_32(
|
*noncev = mm256_intrlv_blend_32(
|
||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
@@ -351,4 +348,117 @@ bool x21s_4way_thread_init()
|
|||||||
return x21s_4way_matrix;
|
return x21s_4way_matrix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined (X21S_2WAY)
|
||||||
|
|
||||||
|
static __thread uint64_t* x21s_2x64_matrix;
|
||||||
|
|
||||||
|
union _x21s_2x64_context_overlay
|
||||||
|
{
|
||||||
|
sph_haval256_5_context haval;
|
||||||
|
sph_tiger_context tiger;
|
||||||
|
sph_gost512_context gost;
|
||||||
|
} __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
typedef union _x21s_2x64_context_overlay x21s_2x64_context_overlay;
|
||||||
|
|
||||||
|
int x21s_2x64_hash( void* output, const void* input, int thrid )
|
||||||
|
{
|
||||||
|
uint8_t shash[64*2] __attribute__ ((aligned (64)));
|
||||||
|
x21s_2x64_context_overlay ctx;
|
||||||
|
uint32_t *hash0 = (uint32_t*) shash;
|
||||||
|
uint32_t *hash1 = (uint32_t*)( shash+64 );
|
||||||
|
|
||||||
|
if ( !x16r_2x64_hash_generic( shash, input, thrid, x16r_hash_order,
|
||||||
|
X16R_HASH_FUNC_COUNT ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
sph_haval256_5_init( &ctx.haval );
|
||||||
|
sph_haval256_5( &ctx.haval, hash0, 64 );
|
||||||
|
sph_haval256_5_close( &ctx.haval, hash0 );
|
||||||
|
sph_haval256_5_init( &ctx.haval );
|
||||||
|
sph_haval256_5( &ctx.haval, hash1, 64 );
|
||||||
|
sph_haval256_5_close( &ctx.haval, hash1 );
|
||||||
|
|
||||||
|
sph_tiger_init( &ctx.tiger );
|
||||||
|
sph_tiger ( &ctx.tiger, (const void*) hash0, 64 );
|
||||||
|
sph_tiger_close( &ctx.tiger, (void*) hash0 );
|
||||||
|
sph_tiger_init( &ctx.tiger );
|
||||||
|
sph_tiger ( &ctx.tiger, (const void*) hash1, 64 );
|
||||||
|
sph_tiger_close( &ctx.tiger, (void*) hash1 );
|
||||||
|
|
||||||
|
LYRA2REV2( x21s_2x64_matrix, (void*) hash0, 32, (const void*) hash0, 32,
|
||||||
|
(const void*) hash0, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV2( x21s_2x64_matrix, (void*) hash1, 32, (const void*) hash1, 32,
|
||||||
|
(const void*) hash1, 32, 1, 4, 4 );
|
||||||
|
|
||||||
|
sph_gost512_init( &ctx.gost );
|
||||||
|
sph_gost512 ( &ctx.gost, (const void*) hash0, 64 );
|
||||||
|
sph_gost512_close( &ctx.gost, (void*) hash0 );
|
||||||
|
sph_gost512_init( &ctx.gost );
|
||||||
|
sph_gost512 ( &ctx.gost, (const void*) hash1, 64 );
|
||||||
|
sph_gost512_close( &ctx.gost, (void*) hash1 );
|
||||||
|
|
||||||
|
sha256_full( output, hash0, 64 );
|
||||||
|
sha256_full( output+32, hash1, 64 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_x21s_2x64( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
|
{
|
||||||
|
uint32_t hash[16*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vdata[20*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 2;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
v128_t *noncev = (v128_t*)vdata + 9;
|
||||||
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
|
static __thread uint32_t saved_height = UINT32_MAX;
|
||||||
|
if ( work->height != saved_height )
|
||||||
|
{
|
||||||
|
vdata[1] = bswap_32( pdata[1] );
|
||||||
|
vdata[2] = bswap_32( pdata[2] );
|
||||||
|
saved_height = work->height;
|
||||||
|
x16_r_s_getAlgoString( (const uint8_t*)(&vdata[1]), x16r_hash_order );
|
||||||
|
if ( !opt_quiet && !thr_id )
|
||||||
|
applog( LOG_INFO, "hash order %s", x16r_hash_order );
|
||||||
|
}
|
||||||
|
|
||||||
|
x16r_2x64_prehash( vdata, pdata, x16r_hash_order );
|
||||||
|
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if ( x21s_2x64_hash( hash, vdata, thr_id ) )
|
||||||
|
for ( int i = 0; i < 2; i++ )
|
||||||
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n+i );
|
||||||
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
|
}
|
||||||
|
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
|
||||||
|
n += 2;
|
||||||
|
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool x21s_2x64_thread_init()
|
||||||
|
{
|
||||||
|
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
||||||
|
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||||
|
|
||||||
|
const int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||||
|
x21s_2x64_matrix = mm_malloc( size, 64 );
|
||||||
|
return x21s_2x64_matrix;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -15,7 +15,7 @@
|
|||||||
#include "algo/gost/sph_gost.h"
|
#include "algo/gost/sph_gost.h"
|
||||||
#include "algo/lyra2/lyra2.h"
|
#include "algo/lyra2/lyra2.h"
|
||||||
|
|
||||||
#if !defined(X16R_8WAY) && !defined(X16R_4WAY)
|
#if !defined(X21S_8WAY) && !defined(X21S_4WAY)
|
||||||
|
|
||||||
static __thread uint64_t* x21s_matrix;
|
static __thread uint64_t* x21s_matrix;
|
||||||
|
|
||||||
@@ -33,7 +33,8 @@ int x21s_hash( void* output, const void* input, int thrid )
|
|||||||
uint32_t _ALIGN(128) hash[16];
|
uint32_t _ALIGN(128) hash[16];
|
||||||
x21s_context_overlay ctx;
|
x21s_context_overlay ctx;
|
||||||
|
|
||||||
if ( !x16r_hash_generic( hash, input, thrid ) )
|
if ( !x16r_hash_generic( hash, input, thrid, x16r_hash_order,
|
||||||
|
X16R_HASH_FUNC_COUNT ) )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
sph_haval256_5_init( &ctx.haval );
|
sph_haval256_5_init( &ctx.haval );
|
||||||
@@ -84,7 +85,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
|||||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_prehash( edata, pdata );
|
x16r_prehash( edata, pdata, x16r_hash_order );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@@ -928,29 +928,24 @@ int scanhash_x17_4x64( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
#elif defined(X17_2X64)
|
#elif defined(X17_2X64)
|
||||||
|
|
||||||
// Need sph in some cases
|
|
||||||
#include "algo/luffa/luffa_for_sse2.h"
|
#include "algo/luffa/luffa_for_sse2.h"
|
||||||
#include "algo/cubehash/cubehash_sse2.h"
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
//#include "algo/simd/sph_simd.h"
|
|
||||||
//#include "algo/simd/nist.h"
|
|
||||||
#if !( defined(__SSE4_2__) || defined(__ARM_NEON) )
|
#if !( defined(__SSE4_2__) || defined(__ARM_NEON) )
|
||||||
#include "algo/hamsi/sph_hamsi.h"
|
#include "algo/hamsi/sph_hamsi.h"
|
||||||
#endif
|
#endif
|
||||||
#include "algo/shabal/sph_shabal.h"
|
#include "algo/shabal/sph_shabal.h"
|
||||||
#include "algo/haval/sph-haval.h"
|
#include "algo/haval/sph-haval.h"
|
||||||
#if !( defined(__AES__) ) //|| defined(__ARM_FEATURE_AES) )
|
|
||||||
#include "algo/groestl/sph_groestl.h"
|
|
||||||
#endif
|
|
||||||
#if !( defined(__AES__) || defined(__ARM_FEATURE_AES) )
|
#if !( defined(__AES__) || defined(__ARM_FEATURE_AES) )
|
||||||
|
#include "algo/groestl/sph_groestl.h"
|
||||||
#include "algo/echo/sph_echo.h"
|
#include "algo/echo/sph_echo.h"
|
||||||
#endif
|
|
||||||
#include "algo/fugue/sph_fugue.h"
|
#include "algo/fugue/sph_fugue.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
union _x17_context_overlay
|
union _x17_context_overlay
|
||||||
{
|
{
|
||||||
blake512_2x64_context blake;
|
blake512_2x64_context blake;
|
||||||
bmw512_2x64_context bmw;
|
bmw512_2x64_context bmw;
|
||||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
hashState_groestl groestl;
|
hashState_groestl groestl;
|
||||||
#else
|
#else
|
||||||
sph_groestl512_context groestl;
|
sph_groestl512_context groestl;
|
||||||
@@ -960,7 +955,7 @@ union _x17_context_overlay
|
|||||||
#else
|
#else
|
||||||
sph_echo512_context echo;
|
sph_echo512_context echo;
|
||||||
#endif
|
#endif
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
hashState_fugue fugue;
|
hashState_fugue fugue;
|
||||||
#else
|
#else
|
||||||
sph_fugue512_context fugue;
|
sph_fugue512_context fugue;
|
||||||
@@ -1000,7 +995,7 @@ int x17_2x64_hash( void *output, const void *input, int thr_id )
|
|||||||
|
|
||||||
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
dintrlv_2x64( hash0, hash1, vhash, 512 );
|
||||||
|
|
||||||
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
groestl512_full( &ctx.groestl, hash0, hash0, 512 );
|
groestl512_full( &ctx.groestl, hash0, hash0, 512 );
|
||||||
groestl512_full( &ctx.groestl, hash1, hash1, 512 );
|
groestl512_full( &ctx.groestl, hash1, hash1, 512 );
|
||||||
#else
|
#else
|
||||||
@@ -1061,7 +1056,7 @@ int x17_2x64_hash( void *output, const void *input, int thr_id )
|
|||||||
sph_hamsi512_close( &ctx.hamsi, hash1 );
|
sph_hamsi512_close( &ctx.hamsi, hash1 );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
fugue512_full( &ctx.fugue, hash0, hash0, 64 );
|
fugue512_full( &ctx.fugue, hash0, hash0, 64 );
|
||||||
fugue512_full( &ctx.fugue, hash1, hash1, 64 );
|
fugue512_full( &ctx.fugue, hash1, hash1, 64 );
|
||||||
#else
|
#else
|
||||||
@@ -1133,14 +1128,12 @@ int scanhash_x17_2x64( struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
applog(LOG_INFO,"Submitted Thread %d, lane %d",thr_id,0);
|
|
||||||
pdata[19] = bswap_32( n );
|
pdata[19] = bswap_32( n );
|
||||||
// pdata[19] = n;
|
// pdata[19] = n;
|
||||||
submit_solution( work, hash, mythr );
|
submit_solution( work, hash, mythr );
|
||||||
}
|
}
|
||||||
if ( unlikely( valid_hash( hash+8, ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash+8, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
applog(LOG_INFO,"Submitted Thread %d, lane %d",thr_id,1);
|
|
||||||
pdata[19] = bswap_32( n+1 );
|
pdata[19] = bswap_32( n+1 );
|
||||||
submit_solution( work, hash+8, mythr );
|
submit_solution( work, hash+8, mythr );
|
||||||
}
|
}
|
||||||
|
@@ -4,25 +4,24 @@
|
|||||||
|
|
||||||
#include "algo/blake/blake512-hash.h"
|
#include "algo/blake/blake512-hash.h"
|
||||||
#include "algo/bmw/sph_bmw.h"
|
#include "algo/bmw/sph_bmw.h"
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
|
||||||
#include "algo/fugue/fugue-aesni.h"
|
#include "algo/fugue/fugue-aesni.h"
|
||||||
|
#else
|
||||||
|
#include "algo/fugue/sph_fugue.h"
|
||||||
|
#endif
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#else
|
#else
|
||||||
#include "algo/groestl/sph_groestl.h"
|
#include "algo/groestl/sph_groestl.h"
|
||||||
#include "algo/echo/sph_echo.h"
|
#include "algo/echo/sph_echo.h"
|
||||||
#include "algo/fugue/sph_fugue.h"
|
|
||||||
#endif
|
#endif
|
||||||
#include "algo/skein/sph_skein.h"
|
#include "algo/skein/sph_skein.h"
|
||||||
#include "algo/jh/sph_jh.h"
|
#include "algo/jh/sph_jh.h"
|
||||||
#include "algo/keccak/sph_keccak.h"
|
#include "algo/keccak/sph_keccak.h"
|
||||||
#include "algo/cubehash/cubehash_sse2.h"
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
#include "algo/shavite/sph_shavite.h"
|
#include "algo/shavite/sph_shavite.h"
|
||||||
#if defined(__aarch64__)
|
#include "algo/simd/simd-hash-2way.h"
|
||||||
#include "algo/simd/sph_simd.h"
|
|
||||||
#else
|
|
||||||
#include "algo/simd/nist.h"
|
|
||||||
#endif
|
|
||||||
#include "algo/hamsi/sph_hamsi.h"
|
#include "algo/hamsi/sph_hamsi.h"
|
||||||
#include "algo/shabal/sph_shabal.h"
|
#include "algo/shabal/sph_shabal.h"
|
||||||
#include "algo/whirlpool/sph_whirlpool.h"
|
#include "algo/whirlpool/sph_whirlpool.h"
|
||||||
@@ -39,14 +38,17 @@ union _x22i_context_overlay
|
|||||||
{
|
{
|
||||||
blake512_context blake;
|
blake512_context blake;
|
||||||
sph_bmw512_context bmw;
|
sph_bmw512_context bmw;
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_fugue fugue;
|
||||||
|
#else
|
||||||
|
sph_fugue512_context fugue;
|
||||||
|
#endif
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
hashState_groestl groestl;
|
hashState_groestl groestl;
|
||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
hashState_fugue fugue;
|
|
||||||
#else
|
#else
|
||||||
sph_groestl512_context groestl;
|
sph_groestl512_context groestl;
|
||||||
sph_echo512_context echo;
|
sph_echo512_context echo;
|
||||||
sph_fugue512_context fugue;
|
|
||||||
#endif
|
#endif
|
||||||
sph_jh512_context jh;
|
sph_jh512_context jh;
|
||||||
sph_keccak512_context keccak;
|
sph_keccak512_context keccak;
|
||||||
@@ -54,11 +56,7 @@ union _x22i_context_overlay
|
|||||||
hashState_luffa luffa;
|
hashState_luffa luffa;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
sph_shavite512_context shavite;
|
sph_shavite512_context shavite;
|
||||||
#if defined(__aarch64__)
|
simd512_context simd;
|
||||||
sph_simd512_context simd;
|
|
||||||
#else
|
|
||||||
hashState_sd simd;
|
|
||||||
#endif
|
|
||||||
sph_hamsi512_context hamsi;
|
sph_hamsi512_context hamsi;
|
||||||
sph_shabal512_context shabal;
|
sph_shabal512_context shabal;
|
||||||
sph_whirlpool_context whirlpool;
|
sph_whirlpool_context whirlpool;
|
||||||
@@ -83,10 +81,8 @@ int x22i_hash( void *output, const void *input, int thrid )
|
|||||||
sph_bmw512(&ctx.bmw, (const void*) hash, 64);
|
sph_bmw512(&ctx.bmw, (const void*) hash, 64);
|
||||||
sph_bmw512_close(&ctx.bmw, hash);
|
sph_bmw512_close(&ctx.bmw, hash);
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
init_groestl( &ctx.groestl, 64 );
|
groestl512_full( &ctx.groestl, hash, hash, 512 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
|
||||||
(const char*)hash, 512 );
|
|
||||||
#else
|
#else
|
||||||
sph_groestl512_init( &ctx.groestl );
|
sph_groestl512_init( &ctx.groestl );
|
||||||
sph_groestl512( &ctx.groestl, hash, 64 );
|
sph_groestl512( &ctx.groestl, hash, 64 );
|
||||||
@@ -109,26 +105,16 @@ int x22i_hash( void *output, const void *input, int thrid )
|
|||||||
|
|
||||||
luffa_full( &ctx.luffa, hash, 512, hash, 64 );
|
luffa_full( &ctx.luffa, hash, 512, hash, 64 );
|
||||||
|
|
||||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
cubehash_full( &ctx.cube, hash, 512, hash, 64 );
|
||||||
cubehashUpdateDigest( &ctx.cube, hash, hash, 64 );
|
|
||||||
|
|
||||||
sph_shavite512_init(&ctx.shavite);
|
sph_shavite512_init(&ctx.shavite);
|
||||||
sph_shavite512(&ctx.shavite, (const void*) hash, 64);
|
sph_shavite512(&ctx.shavite, (const void*) hash, 64);
|
||||||
sph_shavite512_close(&ctx.shavite, hash);
|
sph_shavite512_close(&ctx.shavite, hash);
|
||||||
|
|
||||||
#if defined(__aarch64__)
|
simd512_ctx( &ctx.simd, hash, hash, 64 );
|
||||||
sph_simd512_init(&ctx.simd );
|
|
||||||
sph_simd512(&ctx.simd, (const void*) hash, 64);
|
|
||||||
sph_simd512_close(&ctx.simd, hash);
|
|
||||||
#else
|
|
||||||
simd_full( &ctx.simd, (BitSequence *)hash,
|
|
||||||
(const BitSequence *)hash, 512 );
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
init_echo( &ctx.echo, 512 );
|
echo_full( &ctx.echo, hash, 512, hash, 64 );
|
||||||
update_final_echo ( &ctx.echo, (BitSequence*)hash,
|
|
||||||
(const BitSequence*)hash, 512 );
|
|
||||||
#else
|
#else
|
||||||
sph_echo512_init( &ctx.echo );
|
sph_echo512_init( &ctx.echo );
|
||||||
sph_echo512( &ctx.echo, hash, 64 );
|
sph_echo512( &ctx.echo, hash, 64 );
|
||||||
@@ -141,7 +127,7 @@ int x22i_hash( void *output, const void *input, int thrid )
|
|||||||
sph_hamsi512(&ctx.hamsi, (const void*) hash, 64);
|
sph_hamsi512(&ctx.hamsi, (const void*) hash, 64);
|
||||||
sph_hamsi512_close(&ctx.hamsi, hash);
|
sph_hamsi512_close(&ctx.hamsi, hash);
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
fugue512_full( &ctx.fugue, hash, hash, 64 );
|
fugue512_full( &ctx.fugue, hash, hash, 64 );
|
||||||
#else
|
#else
|
||||||
sph_fugue512_init(&ctx.fugue);
|
sph_fugue512_init(&ctx.fugue);
|
||||||
@@ -192,8 +178,8 @@ int x22i_hash( void *output, const void *input, int thrid )
|
|||||||
int scanhash_x22i( struct work *work, uint32_t max_nonce,
|
int scanhash_x22i( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr)
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
{
|
{
|
||||||
uint32_t edata[20] __attribute__((aligned(64)));
|
uint32_t edata[20] __attribute__((aligned(32)));
|
||||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
|
@@ -4,25 +4,24 @@
|
|||||||
|
|
||||||
#include "algo/blake/blake512-hash.h"
|
#include "algo/blake/blake512-hash.h"
|
||||||
#include "algo/bmw/sph_bmw.h"
|
#include "algo/bmw/sph_bmw.h"
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
|
||||||
#include "algo/fugue/fugue-aesni.h"
|
#include "algo/fugue/fugue-aesni.h"
|
||||||
|
#else
|
||||||
|
#include "algo/fugue/sph_fugue.h"
|
||||||
|
#endif
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#else
|
#else
|
||||||
#include "algo/groestl/sph_groestl.h"
|
#include "algo/groestl/sph_groestl.h"
|
||||||
#include "algo/echo/sph_echo.h"
|
#include "algo/echo/sph_echo.h"
|
||||||
#include "algo/fugue/sph_fugue.h"
|
|
||||||
#endif
|
#endif
|
||||||
#include "algo/skein/sph_skein.h"
|
#include "algo/skein/sph_skein.h"
|
||||||
#include "algo/jh/sph_jh.h"
|
#include "algo/jh/sph_jh.h"
|
||||||
#include "algo/keccak/sph_keccak.h"
|
#include "algo/keccak/sph_keccak.h"
|
||||||
#include "algo/cubehash/cubehash_sse2.h"
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
#include "algo/shavite/sph_shavite.h"
|
#include "algo/shavite/sph_shavite.h"
|
||||||
#if defined(__aarch64__)
|
#include "algo/simd/simd-hash-2way.h"
|
||||||
#include "algo/simd/sph_simd.h"
|
|
||||||
#else
|
|
||||||
#include "algo/simd/nist.h"
|
|
||||||
#endif
|
|
||||||
#include "algo/hamsi/sph_hamsi.h"
|
#include "algo/hamsi/sph_hamsi.h"
|
||||||
#include "algo/shabal/sph_shabal.h"
|
#include "algo/shabal/sph_shabal.h"
|
||||||
#include "algo/whirlpool/sph_whirlpool.h"
|
#include "algo/whirlpool/sph_whirlpool.h"
|
||||||
@@ -42,14 +41,17 @@ union _x25x_context_overlay
|
|||||||
{
|
{
|
||||||
blake512_context blake;
|
blake512_context blake;
|
||||||
sph_bmw512_context bmw;
|
sph_bmw512_context bmw;
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
|
hashState_fugue fugue;
|
||||||
|
#else
|
||||||
|
sph_fugue512_context fugue;
|
||||||
|
#endif
|
||||||
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
hashState_groestl groestl;
|
hashState_groestl groestl;
|
||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
hashState_fugue fugue;
|
|
||||||
#else
|
#else
|
||||||
sph_groestl512_context groestl;
|
sph_groestl512_context groestl;
|
||||||
sph_echo512_context echo;
|
sph_echo512_context echo;
|
||||||
sph_fugue512_context fugue;
|
|
||||||
#endif
|
#endif
|
||||||
sph_jh512_context jh;
|
sph_jh512_context jh;
|
||||||
sph_keccak512_context keccak;
|
sph_keccak512_context keccak;
|
||||||
@@ -57,11 +59,7 @@ union _x25x_context_overlay
|
|||||||
hashState_luffa luffa;
|
hashState_luffa luffa;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
sph_shavite512_context shavite;
|
sph_shavite512_context shavite;
|
||||||
#if defined(__aarch64__)
|
simd512_context simd;
|
||||||
sph_simd512_context simd;
|
|
||||||
#else
|
|
||||||
hashState_sd simd;
|
|
||||||
#endif
|
|
||||||
sph_hamsi512_context hamsi;
|
sph_hamsi512_context hamsi;
|
||||||
sph_shabal512_context shabal;
|
sph_shabal512_context shabal;
|
||||||
sph_whirlpool_context whirlpool;
|
sph_whirlpool_context whirlpool;
|
||||||
@@ -88,10 +86,8 @@ int x25x_hash( void *output, const void *input, int thrid )
|
|||||||
sph_bmw512(&ctx.bmw, (const void*) &hash[0], 64);
|
sph_bmw512(&ctx.bmw, (const void*) &hash[0], 64);
|
||||||
sph_bmw512_close(&ctx.bmw, &hash[1]);
|
sph_bmw512_close(&ctx.bmw, &hash[1]);
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
init_groestl( &ctx.groestl, 64 );
|
groestl512_full( &ctx.groestl, (void*)&hash[2], (const void*)&hash[1], 512 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)&hash[2],
|
|
||||||
(const char*)&hash[1], 512 );
|
|
||||||
#else
|
#else
|
||||||
sph_groestl512_init( &ctx.groestl );
|
sph_groestl512_init( &ctx.groestl );
|
||||||
sph_groestl512( &ctx.groestl, &hash[1], 64 );
|
sph_groestl512( &ctx.groestl, &hash[1], 64 );
|
||||||
@@ -112,28 +108,18 @@ int x25x_hash( void *output, const void *input, int thrid )
|
|||||||
|
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
init_luffa( &ctx.luffa, 512 );
|
luffa_full( &ctx.luffa, (void*)&hash[6], 512, (const void*)&hash[5], 64 );
|
||||||
luffa_full( &ctx.luffa, &hash[6], 512, &hash[5], 64 );
|
|
||||||
|
|
||||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
cubehash_full( &ctx.cube, (void*)&hash[7], 512, (const void*)&hash[6], 64 );
|
||||||
cubehashUpdateDigest( &ctx.cube, &hash[7], &hash[6], 64 );
|
|
||||||
|
|
||||||
sph_shavite512_init(&ctx.shavite);
|
sph_shavite512_init(&ctx.shavite);
|
||||||
sph_shavite512(&ctx.shavite, (const void*) &hash[7], 64);
|
sph_shavite512(&ctx.shavite, (const void*) &hash[7], 64);
|
||||||
sph_shavite512_close(&ctx.shavite, &hash[8]);
|
sph_shavite512_close(&ctx.shavite, &hash[8]);
|
||||||
|
|
||||||
#if defined(__aarch64__)
|
simd512_ctx( &ctx.simd, (void*)&hash[9], (const void*)&hash[8], 64 );
|
||||||
sph_simd512(&ctx.simd, (const void*) &hash[8], 64);
|
|
||||||
sph_simd512_close(&ctx.simd, &hash[9] );
|
|
||||||
#else
|
|
||||||
update_final_sd( &ctx.simd, (BitSequence *)&hash[9],
|
|
||||||
(const BitSequence *)&hash[8], 512 );
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
init_echo( &ctx.echo, 512 );
|
echo_full( &ctx.echo, (void*)&hash[10], 512, (const void*)&hash[9], 64 );
|
||||||
update_final_echo ( &ctx.echo, (BitSequence*)&hash[10],
|
|
||||||
(const BitSequence*)&hash[9], 512 );
|
|
||||||
#else
|
#else
|
||||||
sph_echo512_init( &ctx.echo );
|
sph_echo512_init( &ctx.echo );
|
||||||
sph_echo512( &ctx.echo, &hash[9], 64 );
|
sph_echo512( &ctx.echo, &hash[9], 64 );
|
||||||
@@ -146,7 +132,7 @@ int x25x_hash( void *output, const void *input, int thrid )
|
|||||||
sph_hamsi512(&ctx.hamsi, (const void*) &hash[10], 64);
|
sph_hamsi512(&ctx.hamsi, (const void*) &hash[10], 64);
|
||||||
sph_hamsi512_close(&ctx.hamsi, &hash[11]);
|
sph_hamsi512_close(&ctx.hamsi, &hash[11]);
|
||||||
|
|
||||||
#if defined(__AES__)
|
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
|
||||||
fugue512_full( &ctx.fugue, &hash[12], &hash[11], 64 );
|
fugue512_full( &ctx.fugue, &hash[12], &hash[11], 64 );
|
||||||
#else
|
#else
|
||||||
sph_fugue512_init(&ctx.fugue);
|
sph_fugue512_init(&ctx.fugue);
|
||||||
@@ -227,8 +213,8 @@ int x25x_hash( void *output, const void *input, int thrid )
|
|||||||
int scanhash_x25x( struct work *work, uint32_t max_nonce,
|
int scanhash_x25x( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr)
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
{
|
{
|
||||||
uint32_t edata[20] __attribute__((aligned(64)));
|
uint32_t edata[20] __attribute__((aligned(32)));
|
||||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
@@ -245,7 +231,7 @@ int scanhash_x25x( struct work *work, uint32_t max_nonce,
|
|||||||
do
|
do
|
||||||
{
|
{
|
||||||
edata[19] = n;
|
edata[19] = n;
|
||||||
if ( x25x_hash( hash64, edata, thr_id ) )
|
if ( x25x_hash( hash64, edata, thr_id ) );
|
||||||
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n );
|
pdata[19] = bswap_32( n );
|
||||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.10.
|
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.15.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
||||||
@@ -608,8 +608,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='cpuminer-opt'
|
PACKAGE_NAME='cpuminer-opt'
|
||||||
PACKAGE_TARNAME='cpuminer-opt'
|
PACKAGE_TARNAME='cpuminer-opt'
|
||||||
PACKAGE_VERSION='23.10'
|
PACKAGE_VERSION='23.15'
|
||||||
PACKAGE_STRING='cpuminer-opt 23.10'
|
PACKAGE_STRING='cpuminer-opt 23.15'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures cpuminer-opt 23.10 to adapt to many kinds of systems.
|
\`configure' configures cpuminer-opt 23.15 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1432,7 +1432,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of cpuminer-opt 23.10:";;
|
short | recursive ) echo "Configuration of cpuminer-opt 23.15:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1538,7 +1538,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
cpuminer-opt configure 23.10
|
cpuminer-opt configure 23.15
|
||||||
generated by GNU Autoconf 2.71
|
generated by GNU Autoconf 2.71
|
||||||
|
|
||||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||||
@@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by cpuminer-opt $as_me 23.10, which was
|
It was created by cpuminer-opt $as_me 23.15, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
$ $0$ac_configure_args_raw
|
$ $0$ac_configure_args_raw
|
||||||
@@ -3593,7 +3593,7 @@ fi
|
|||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='cpuminer-opt'
|
PACKAGE='cpuminer-opt'
|
||||||
VERSION='23.10'
|
VERSION='23.15'
|
||||||
|
|
||||||
|
|
||||||
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
|
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
|
||||||
@@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by cpuminer-opt $as_me 23.10, which was
|
This file was extended by cpuminer-opt $as_me 23.15, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config='$ac_cs_config_escaped'
|
ac_cs_config='$ac_cs_config_escaped'
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
cpuminer-opt config.status 23.10
|
cpuminer-opt config.status 23.15
|
||||||
configured by $0, generated by GNU Autoconf 2.71,
|
configured by $0, generated by GNU Autoconf 2.71,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([cpuminer-opt], [23.10])
|
AC_INIT([cpuminer-opt], [23.15])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
|
20
configure~
20
configure~
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.9.
|
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.14.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
||||||
@@ -608,8 +608,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='cpuminer-opt'
|
PACKAGE_NAME='cpuminer-opt'
|
||||||
PACKAGE_TARNAME='cpuminer-opt'
|
PACKAGE_TARNAME='cpuminer-opt'
|
||||||
PACKAGE_VERSION='23.9'
|
PACKAGE_VERSION='23.14'
|
||||||
PACKAGE_STRING='cpuminer-opt 23.9'
|
PACKAGE_STRING='cpuminer-opt 23.14'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures cpuminer-opt 23.9 to adapt to many kinds of systems.
|
\`configure' configures cpuminer-opt 23.14 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1432,7 +1432,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of cpuminer-opt 23.9:";;
|
short | recursive ) echo "Configuration of cpuminer-opt 23.14:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1538,7 +1538,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
cpuminer-opt configure 23.9
|
cpuminer-opt configure 23.14
|
||||||
generated by GNU Autoconf 2.71
|
generated by GNU Autoconf 2.71
|
||||||
|
|
||||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||||
@@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by cpuminer-opt $as_me 23.9, which was
|
It was created by cpuminer-opt $as_me 23.14, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
$ $0$ac_configure_args_raw
|
$ $0$ac_configure_args_raw
|
||||||
@@ -3593,7 +3593,7 @@ fi
|
|||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='cpuminer-opt'
|
PACKAGE='cpuminer-opt'
|
||||||
VERSION='23.9'
|
VERSION='23.14'
|
||||||
|
|
||||||
|
|
||||||
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
|
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
|
||||||
@@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by cpuminer-opt $as_me 23.9, which was
|
This file was extended by cpuminer-opt $as_me 23.14, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config='$ac_cs_config_escaped'
|
ac_cs_config='$ac_cs_config_escaped'
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
cpuminer-opt config.status 23.9
|
cpuminer-opt config.status 23.14
|
||||||
configured by $0, generated by GNU Autoconf 2.71,
|
configured by $0, generated by GNU Autoconf 2.71,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
14
cpu-miner.c
14
cpu-miner.c
@@ -2837,15 +2837,6 @@ static void show_credits()
|
|||||||
#define check_cpu_capability() cpu_capability( false )
|
#define check_cpu_capability() cpu_capability( false )
|
||||||
#define display_cpu_capability() cpu_capability( true )
|
#define display_cpu_capability() cpu_capability( true )
|
||||||
|
|
||||||
#if defined(__aarch64__)
|
|
||||||
|
|
||||||
#define XSTR(x) STR(x)
|
|
||||||
#define STR(x) #x
|
|
||||||
|
|
||||||
//#pragma message "Building for armv" XSTR(__ARM_ARCH)
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static bool cpu_capability( bool display_only )
|
static bool cpu_capability( bool display_only )
|
||||||
{
|
{
|
||||||
char cpu_brand[0x40];
|
char cpu_brand[0x40];
|
||||||
@@ -3675,11 +3666,6 @@ static int thread_create(struct thr_info *thr, void* func)
|
|||||||
|
|
||||||
void get_defconfig_path(char *out, size_t bufsize, char *argv0);
|
void get_defconfig_path(char *out, size_t bufsize, char *argv0);
|
||||||
|
|
||||||
|
|
||||||
#include "simd-utils.h"
|
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
|
||||||
#include "compat/aes_helper.c"
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
struct thr_info *thr;
|
struct thr_info *thr;
|
||||||
|
3
miner.h
3
miner.h
@@ -672,6 +672,7 @@ enum algos {
|
|||||||
ALGO_X16RT_VEIL,
|
ALGO_X16RT_VEIL,
|
||||||
ALGO_X16S,
|
ALGO_X16S,
|
||||||
ALGO_X17,
|
ALGO_X17,
|
||||||
|
ALGO_X20R,
|
||||||
ALGO_X21S,
|
ALGO_X21S,
|
||||||
ALGO_X22I,
|
ALGO_X22I,
|
||||||
ALGO_X25X,
|
ALGO_X25X,
|
||||||
@@ -767,6 +768,7 @@ static const char* const algo_names[] = {
|
|||||||
"x16rt-veil",
|
"x16rt-veil",
|
||||||
"x16s",
|
"x16s",
|
||||||
"x17",
|
"x17",
|
||||||
|
"x20r",
|
||||||
"x21s",
|
"x21s",
|
||||||
"x22i",
|
"x22i",
|
||||||
"x25x",
|
"x25x",
|
||||||
@@ -930,6 +932,7 @@ Options:\n\
|
|||||||
x16rt-veil Veil (VEIL)\n\
|
x16rt-veil Veil (VEIL)\n\
|
||||||
x16s\n\
|
x16s\n\
|
||||||
x17\n\
|
x17\n\
|
||||||
|
x20r\n\
|
||||||
x21s\n\
|
x21s\n\
|
||||||
x22i\n\
|
x22i\n\
|
||||||
x25x\n\
|
x25x\n\
|
||||||
|
@@ -381,7 +381,7 @@ static inline void dintrlv_4x32_512( void *dst0, void *dst1, void *dst2,
|
|||||||
d0[15] = s[ 60]; d1[15] = s[ 61]; d2[15] = s[ 62]; d3[15] = s[ 63];
|
d0[15] = s[ 60]; d1[15] = s[ 61]; d2[15] = s[ 62]; d3[15] = s[ 63];
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // SSE4_1 else SSE2 or NEON
|
#endif // SSE4_1 or NEON else SSE2
|
||||||
|
|
||||||
static inline void extr_lane_4x32( void *d, const void *s,
|
static inline void extr_lane_4x32( void *d, const void *s,
|
||||||
const int lane, const int bit_len )
|
const int lane, const int bit_len )
|
||||||
|
@@ -207,7 +207,7 @@ static inline __m128i mm128_mov32_128( const uint32_t n )
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// broadcast lane l to all lanes
|
// broadcast (replicate) lane l to all lanes
|
||||||
#define v128_replane64( v, l ) \
|
#define v128_replane64( v, l ) \
|
||||||
( (l) == 0 ) ? _mm_shuffle_epi32( v, 0x44 ) \
|
( (l) == 0 ) ? _mm_shuffle_epi32( v, 0x44 ) \
|
||||||
: _mm_shuffle_epi32( v, 0xee )
|
: _mm_shuffle_epi32( v, 0xee )
|
||||||
@@ -319,7 +319,7 @@ static inline __m128i v128_neg1_fn()
|
|||||||
// c[7:6] source element selector
|
// c[7:6] source element selector
|
||||||
|
|
||||||
// Convert type and abbreviate name: eXtract Insert Mask = XIM
|
// Convert type and abbreviate name: eXtract Insert Mask = XIM
|
||||||
#define mm128_xim_32( v1, v0, c ) \
|
#define v128_xim32( v1, v0, c ) \
|
||||||
_mm_castps_si128( _mm_insert_ps( _mm_castsi128_ps( v1 ), \
|
_mm_castps_si128( _mm_insert_ps( _mm_castsi128_ps( v1 ), \
|
||||||
_mm_castsi128_ps( v0 ), c ) )
|
_mm_castsi128_ps( v0 ), c ) )
|
||||||
|
|
||||||
@@ -327,20 +327,19 @@ static inline __m128i v128_neg1_fn()
|
|||||||
/*
|
/*
|
||||||
// Copy i32 to element c of dest and copy remaining elemnts from v.
|
// Copy i32 to element c of dest and copy remaining elemnts from v.
|
||||||
#define v128_put32( v, i32, c ) \
|
#define v128_put32( v, i32, c ) \
|
||||||
mm128_xim_32( v, mm128_mov32_128( i32 ), (c)<<4 )
|
v128_xim_32( v, mm128_mov32_128( i32 ), (c)<<4 )
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#define mm128_mask_32( v, m ) mm128_xim_32( v, v, m )
|
#define v128_mask32( v, m ) v128_xim32( v, v, m & 0xf )
|
||||||
|
|
||||||
// Zero 32 bit elements when corresponding bit in 4 bit mask is set.
|
// Zero 32 bit elements when corresponding bit in 4 bit mask is set.
|
||||||
//static inline __m128i mm128_mask_32( const __m128i v, const int m )
|
//static inline __m128i v128_mask32( const __m128i v, const int m )
|
||||||
//{ return mm128_xim_32( v, v, m ); }
|
//{ return v128_xim32( v, v, m ); }
|
||||||
#define v128_mask32 mm128_mask_32
|
|
||||||
|
|
||||||
// Copy element i2 of v2 to element i1 of dest and copy remaining elements from v1.
|
// Copy element l0 of v0 to element l1 of dest and copy remaining elements from v1.
|
||||||
#define v128_movlane32( v1, l1, v0, l0 ) \
|
#define v128_movlane32( v1, l1, v0, l0 ) \
|
||||||
mm128_xim_32( v1, v0, ( (l1)<<4 ) | ( (l0)<<6 ) )
|
v128_xim32( v1, v0, ( (l1)<<4 ) | ( (l0)<<6 ) )
|
||||||
|
|
||||||
#endif // SSE4_1
|
#endif // SSE4_1
|
||||||
|
|
||||||
@@ -451,7 +450,7 @@ static inline void v128_memcpy( v128_t *dst, const v128_t *src, const int n )
|
|||||||
|
|
||||||
#define v128_orand( a, b, c ) _mm_or_si128( a, _mm_and_si128( b, c ) )
|
#define v128_orand( a, b, c ) _mm_or_si128( a, _mm_and_si128( b, c ) )
|
||||||
|
|
||||||
#define v128_xnor( a, b ) mm128_not( _mm_xor_si128( a, b ) )
|
#define v128_xnor( a, b ) v128_not( _mm_xor_si128( a, b ) )
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -482,7 +481,7 @@ static inline void v128_memcpy( v128_t *dst, const v128_t *src, const int n )
|
|||||||
#define v128_qrev16(v) v128_shuffle16( v, 0x1b )
|
#define v128_qrev16(v) v128_shuffle16( v, 0x1b )
|
||||||
#define v128_lrev16(v) v128_shuffle16( v, 0xb1 )
|
#define v128_lrev16(v) v128_shuffle16( v, 0xb1 )
|
||||||
|
|
||||||
// These should never be callled from application code, use rol/ror.
|
// Internal use only, should never be callled from application code.
|
||||||
#define v128_ror64_sse2( v, c ) \
|
#define v128_ror64_sse2( v, c ) \
|
||||||
_mm_or_si128( _mm_srli_epi64( v, c ), _mm_slli_epi64( v, 64-(c) ) )
|
_mm_or_si128( _mm_srli_epi64( v, c ), _mm_slli_epi64( v, 64-(c) ) )
|
||||||
|
|
||||||
@@ -497,14 +496,14 @@ static inline void v128_memcpy( v128_t *dst, const v128_t *src, const int n )
|
|||||||
|
|
||||||
#if defined(__AVX512VL__)
|
#if defined(__AVX512VL__)
|
||||||
|
|
||||||
// AVX512 fastest all rotations.
|
// AVX512 fastest for all rotations.
|
||||||
#define v128_ror64 _mm_ror_epi64
|
#define v128_ror64 _mm_ror_epi64
|
||||||
#define v128_rol64 _mm_rol_epi64
|
#define v128_rol64 _mm_rol_epi64
|
||||||
#define v128_ror32 _mm_ror_epi32
|
#define v128_ror32 _mm_ror_epi32
|
||||||
#define v128_rol32 _mm_rol_epi32
|
#define v128_rol32 _mm_rol_epi32
|
||||||
|
|
||||||
// ror/rol will always find the fastest but these names may fit better with
|
// ror/rol will always find the fastest but these names may fit better with
|
||||||
// application code performing shuffles rather than bit rotations.
|
// application code performing byte operations rather than bit rotations.
|
||||||
#define v128_shuflr64_8( v) _mm_ror_epi64( v, 8 )
|
#define v128_shuflr64_8( v) _mm_ror_epi64( v, 8 )
|
||||||
#define v128_shufll64_8( v) _mm_rol_epi64( v, 8 )
|
#define v128_shufll64_8( v) _mm_rol_epi64( v, 8 )
|
||||||
#define v128_shuflr64_16(v) _mm_ror_epi64( v, 16 )
|
#define v128_shuflr64_16(v) _mm_ror_epi64( v, 16 )
|
||||||
@@ -576,7 +575,7 @@ static inline void v128_memcpy( v128_t *dst, const v128_t *src, const int n )
|
|||||||
: v128_rol32_sse2( v, c )
|
: v128_rol32_sse2( v, c )
|
||||||
|
|
||||||
#elif defined(__SSE2__)
|
#elif defined(__SSE2__)
|
||||||
// SSE2: fastest 32 bit, very fast 16
|
// SSE2: fastest 32 bit, very fast 16, all else slow
|
||||||
|
|
||||||
#define v128_ror64( v, c ) \
|
#define v128_ror64( v, c ) \
|
||||||
( (c) == 16 ) ? v128_shuffle16( v, 0x39 ) \
|
( (c) == 16 ) ? v128_shuffle16( v, 0x39 ) \
|
||||||
@@ -607,9 +606,7 @@ static inline void v128_memcpy( v128_t *dst, const v128_t *src, const int n )
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//#define v128_ror64 mm128_ror_64
|
// deprecated
|
||||||
//#define v128_rol64 mm128_rol_64
|
|
||||||
//#define v128_ror32 mm128_ror_32
|
|
||||||
#define mm128_rol_32 v128_rol32
|
#define mm128_rol_32 v128_rol32
|
||||||
|
|
||||||
/* not used
|
/* not used
|
||||||
@@ -632,7 +629,7 @@ static inline void v128_memcpy( v128_t *dst, const v128_t *src, const int n )
|
|||||||
_mm_ror_epi32( v0, c ); \
|
_mm_ror_epi32( v0, c ); \
|
||||||
_mm_ror_epi32( v1, c )
|
_mm_ror_epi32( v1, c )
|
||||||
|
|
||||||
#define mm128_2rol32( v1, v0, c ) \
|
#define v128_2rol32( v1, v0, c ) \
|
||||||
_mm_rol_epi32( v0, c ); \
|
_mm_rol_epi32( v0, c ); \
|
||||||
_mm_rol_epi32( v1, c )
|
_mm_rol_epi32( v1, c )
|
||||||
|
|
||||||
@@ -683,11 +680,13 @@ static inline void v128_memcpy( v128_t *dst, const v128_t *src, const int n )
|
|||||||
|
|
||||||
// Cross lane shuffles
|
// Cross lane shuffles
|
||||||
|
|
||||||
|
// No NEON version
|
||||||
#define v128_shuffle32 _mm_shuffle_epi32
|
#define v128_shuffle32 _mm_shuffle_epi32
|
||||||
|
|
||||||
// shuffle using vector mask, for compatibility with NEON
|
/* Not used, exists only for compatibility with NEON if ever needed.
|
||||||
#define v128_shufflev32( v, vmask ) \
|
#define v128_shufflev32( v, vmask ) \
|
||||||
v128_shuffle32( v, mm128_movmask_32( vmask ) )
|
v128_shuffle32( v, mm128_movmask_32( vmask ) )
|
||||||
|
*/
|
||||||
|
|
||||||
#define v128_shuffle8 _mm_shuffle_epi8
|
#define v128_shuffle8 _mm_shuffle_epi8
|
||||||
|
|
||||||
@@ -696,12 +695,10 @@ static inline void v128_memcpy( v128_t *dst, const v128_t *src, const int n )
|
|||||||
#define v128_shuffle2_64( v1, v2, c ) \
|
#define v128_shuffle2_64( v1, v2, c ) \
|
||||||
_mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( v1 ), \
|
_mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( v1 ), \
|
||||||
_mm_castsi128_pd( v2 ), c ) );
|
_mm_castsi128_pd( v2 ), c ) );
|
||||||
#define mm128_shuffle2_64 v128_shuffle2_64
|
|
||||||
|
|
||||||
#define v128_shuffle2_32( v1, v2, c ) \
|
#define v128_shuffle2_32( v1, v2, c ) \
|
||||||
_mm_castps_si128( _mm_shuffle_ps( _mm_castsi128_ps( v1 ), \
|
_mm_castps_si128( _mm_shuffle_ps( _mm_castsi128_ps( v1 ), \
|
||||||
_mm_castsi128_ps( v2 ), c ) );
|
_mm_castsi128_ps( v2 ), c ) );
|
||||||
#define mm128_shuffle2_32 v128_shuffle2_32
|
|
||||||
|
|
||||||
// Rotate vector elements accross all lanes
|
// Rotate vector elements accross all lanes
|
||||||
|
|
||||||
@@ -733,6 +730,7 @@ static inline void v128_memcpy( v128_t *dst, const v128_t *src, const int n )
|
|||||||
#define v128_bswap32( v ) \
|
#define v128_bswap32( v ) \
|
||||||
_mm_shuffle_epi8( v, _mm_set_epi64x( 0x0c0d0e0f08090a0b, \
|
_mm_shuffle_epi8( v, _mm_set_epi64x( 0x0c0d0e0f08090a0b, \
|
||||||
0x0405060700010203 ) )
|
0x0405060700010203 ) )
|
||||||
|
// deprecated
|
||||||
#define mm128_bswap_32 v128_bswap32
|
#define mm128_bswap_32 v128_bswap32
|
||||||
|
|
||||||
#define v128_bswap16( v ) \
|
#define v128_bswap16( v ) \
|
||||||
|
@@ -40,7 +40,7 @@
|
|||||||
#define v128u8_load( p ) vld1q_u16( (uint8_t*)(p) )
|
#define v128u8_load( p ) vld1q_u16( (uint8_t*)(p) )
|
||||||
#define v128u8_store( p, v ) vst1q_u16( (uint8_t*)(p), v )
|
#define v128u8_store( p, v ) vst1q_u16( (uint8_t*)(p), v )
|
||||||
|
|
||||||
// load & set1 combined
|
// load & set1 combined, doesn't work
|
||||||
#define v128_load1_64(p) vld1q_dup_u64( (uint64_t*)(p) )
|
#define v128_load1_64(p) vld1q_dup_u64( (uint64_t*)(p) )
|
||||||
#define v128_load1_32(p) vld1q_dup_u32( (uint32_t*)(p) )
|
#define v128_load1_32(p) vld1q_dup_u32( (uint32_t*)(p) )
|
||||||
#define v128_load1_16(p) vld1q_dup_u16( (uint16_t*)(p) )
|
#define v128_load1_16(p) vld1q_dup_u16( (uint16_t*)(p) )
|
||||||
@@ -68,7 +68,7 @@
|
|||||||
#define v128_mul32 vmulq_u32
|
#define v128_mul32 vmulq_u32
|
||||||
#define v128_mul16 vmulq_u16
|
#define v128_mul16 vmulq_u16
|
||||||
|
|
||||||
// slow, tested with argon2d
|
// Widening multiply, align source elements with Intel
|
||||||
static inline uint64x2_t v128_mulw32( uint32x4_t v1, uint32x4_t v0 )
|
static inline uint64x2_t v128_mulw32( uint32x4_t v1, uint32x4_t v0 )
|
||||||
{
|
{
|
||||||
return vmull_u32( vget_low_u32( vcopyq_laneq_u32( v1, 1, v1, 2 ) ),
|
return vmull_u32( vget_low_u32( vcopyq_laneq_u32( v1, 1, v1, 2 ) ),
|
||||||
@@ -86,7 +86,7 @@ static inline uint64x2_t v128_mulw32( uint32x4_t v1, uint32x4_t v0 )
|
|||||||
|
|
||||||
// Not yet needed
|
// Not yet needed
|
||||||
//#define v128_cmpeq1
|
//#define v128_cmpeq1
|
||||||
|
// Signed
|
||||||
#define v128_cmpgt64( v1, v0 ) vcgtq_s64( (int64x2_t)v1, (int64x2_t)v0 )
|
#define v128_cmpgt64( v1, v0 ) vcgtq_s64( (int64x2_t)v1, (int64x2_t)v0 )
|
||||||
#define v128_cmpgt32( v1, v0 ) vcgtq_s32( (int32x4_t)v1, (int32x4_t)v0 )
|
#define v128_cmpgt32( v1, v0 ) vcgtq_s32( (int32x4_t)v1, (int32x4_t)v0 )
|
||||||
#define v128_cmpgt16( v1, v0 ) vcgtq_s16( (int16x8_t)v1, (int16x8_t)v0 )
|
#define v128_cmpgt16( v1, v0 ) vcgtq_s16( (int16x8_t)v1, (int16x8_t)v0 )
|
||||||
@@ -97,7 +97,7 @@ static inline uint64x2_t v128_mulw32( uint32x4_t v1, uint32x4_t v0 )
|
|||||||
#define v128_cmplt16( v1, v0 ) vcltq_s16( (int16x8_t)v1, (int16x8_t)v0 )
|
#define v128_cmplt16( v1, v0 ) vcltq_s16( (int16x8_t)v1, (int16x8_t)v0 )
|
||||||
#define v128_cmplt8( v1, v0 ) vcltq_s8( (int8x16_t)v1, (int8x16_t)v0 )
|
#define v128_cmplt8( v1, v0 ) vcltq_s8( (int8x16_t)v1, (int8x16_t)v0 )
|
||||||
|
|
||||||
// bit shift
|
// Logical bit shift
|
||||||
#define v128_sl64 vshlq_n_u64
|
#define v128_sl64 vshlq_n_u64
|
||||||
#define v128_sl32 vshlq_n_u32
|
#define v128_sl32 vshlq_n_u32
|
||||||
#define v128_sl16 vshlq_n_u16
|
#define v128_sl16 vshlq_n_u16
|
||||||
@@ -108,7 +108,7 @@ static inline uint64x2_t v128_mulw32( uint32x4_t v1, uint32x4_t v0 )
|
|||||||
#define v128_sr16 vshrq_n_u16
|
#define v128_sr16 vshrq_n_u16
|
||||||
#define v128_sr8 vshrq_n_u8
|
#define v128_sr8 vshrq_n_u8
|
||||||
|
|
||||||
// Unit tested, working.
|
// Arithmetic shift.
|
||||||
#define v128_sra64( v, c ) vshrq_n_s64( (int64x2_t)v, c )
|
#define v128_sra64( v, c ) vshrq_n_s64( (int64x2_t)v, c )
|
||||||
#define v128_sra32( v, c ) vshrq_n_s32( (int32x4_t)v, c )
|
#define v128_sra32( v, c ) vshrq_n_s32( (int32x4_t)v, c )
|
||||||
#define v128_sra16( v, c ) vshrq_n_s16( (int16x8_t)v, c )
|
#define v128_sra16( v, c ) vshrq_n_s16( (int16x8_t)v, c )
|
||||||
@@ -406,34 +406,17 @@ static inline void v128_memcpy( void *dst, const void *src, const int n )
|
|||||||
v1 = vorrq_u32( v1, t1 ); \
|
v1 = vorrq_u32( v1, t1 ); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cross lane shuffles, no programmable shuffle in NEON
|
/* not used anywhere and hopefully never will
|
||||||
|
// vector mask, use as last resort. prefer tbl, rev, alignr, etc
|
||||||
// vector mask, use as last resort. prefer rev, alignr, etc
|
|
||||||
#define v128_shufflev32( v, vmask ) \
|
#define v128_shufflev32( v, vmask ) \
|
||||||
v128_set32( ((uint32_t*)&v)[ ((uint32_t*)(&vmask))[3] ], \
|
v128_set32( ((uint32_t*)&v)[ ((uint32_t*)(&vmask))[3] ], \
|
||||||
((uint32_t*)&v)[ ((uint32_t*)(&vmask))[2] ], \
|
((uint32_t*)&v)[ ((uint32_t*)(&vmask))[2] ], \
|
||||||
((uint32_t*)&v)[ ((uint32_t*)(&vmask))[1] ], \
|
((uint32_t*)&v)[ ((uint32_t*)(&vmask))[1] ], \
|
||||||
((uint32_t*)&v)[ ((uint32_t*)(&vmask))[0] ] ) \
|
((uint32_t*)&v)[ ((uint32_t*)(&vmask))[0] ] ) \
|
||||||
|
*/
|
||||||
|
|
||||||
// compatible with x86_64, but very slow, avoid
|
|
||||||
#define v128_shuffle8( v, vmask ) \
|
#define v128_shuffle8( v, vmask ) \
|
||||||
v128_set8( ((uint8_t*)&v)[ ((uint8_t*)(&vmask))[15] ], \
|
vqtbl1q_u8( (uint8x16_t)v, (uint8x16_t)vmask )
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[14] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[13] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[12] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[11] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[10] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 9] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 8] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 7] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 6] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 5] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 4] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 3] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 2] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 1] ], \
|
|
||||||
((uint8_t*)&v)[ ((uint8_t*)(&vmask))[ 0] ] )
|
|
||||||
|
|
||||||
|
|
||||||
// sub-vector shuffles sometimes mirror bit rotation. Shuffle is faster.
|
// sub-vector shuffles sometimes mirror bit rotation. Shuffle is faster.
|
||||||
// Bit rotation already promotes faster widths. Usage is context sensitive.
|
// Bit rotation already promotes faster widths. Usage is context sensitive.
|
||||||
@@ -551,20 +534,6 @@ static inline uint16x8_t v128_shufll16( uint16x8_t v )
|
|||||||
casti_v128u64( dst,15 ) = v128_bswap64( casti_v128u64( src,15 ) ); \
|
casti_v128u64( dst,15 ) = v128_bswap64( casti_v128u64( src,15 ) ); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prograsmmable shuffles
|
|
||||||
// no compatible shuffles with x86_64, will require targeted user code.
|
|
||||||
|
|
||||||
#define v128_extractmask8( df, de, dd, dc, db, da, d9, d8, \
|
|
||||||
d7, d6, d5, d4, d3, d2, d1, d0, vmask ) \
|
|
||||||
d0 = ((uint8_t*)(&vmask))[0]; d1 = ((uint8_t*)(&vmask))[1]; \
|
|
||||||
d2 = ((uint8_t*)(&vmask))[2]; d3 = ((uint8_t*)(&vmask))[3]; \
|
|
||||||
d4 = ((uint8_t*)(&vmask))[0]; d5 = ((uint8_t*)(&vmask))[1]; \
|
|
||||||
d6 = ((uint8_t*)(&vmask))[2]; d7 = ((uint8_t*)(&vmask))[3]; \
|
|
||||||
d8 = ((uint8_t*)(&vmask))[0]; d9 = ((uint8_t*)(&vmask))[1]; \
|
|
||||||
da = ((uint8_t*)(&vmask))[2]; db = ((uint8_t*)(&vmask))[3]; \
|
|
||||||
dc = ((uint8_t*)(&vmask))[0]; dd = ((uint8_t*)(&vmask))[1]; \
|
|
||||||
de = ((uint8_t*)(&vmask))[2]; df = ((uint8_t*)(&vmask))[3];
|
|
||||||
|
|
||||||
// Blendv
|
// Blendv
|
||||||
#define v128_blendv( v1, v0, mask ) \
|
#define v128_blendv( v1, v0, mask ) \
|
||||||
v128_or( v128_andnot( mask, v1 ), v128_and( mask, v0 ) )
|
v128_or( v128_andnot( mask, v1 ), v128_and( mask, v0 ) )
|
||||||
|
@@ -930,7 +930,9 @@ static inline void cpu_brand_string( char* s )
|
|||||||
|
|
||||||
#elif defined(__arm__) || defined(__aarch64__)
|
#elif defined(__arm__) || defined(__aarch64__)
|
||||||
|
|
||||||
sprintf( s, "ARM 64 bit CPU" );
|
unsigned int cpu_info[4] = { 0 };
|
||||||
|
cpuid( 0, 0, cpu_info );
|
||||||
|
sprintf( s, "ARM 64 bit CPU, HWCAP %08x", cpu_info[0] );
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user