This commit is contained in:
Jay D Dee
2023-11-20 11:51:57 -05:00
parent f3fde95f27
commit fc696dbbe5
36 changed files with 3597 additions and 3152 deletions

View File

@@ -27,17 +27,19 @@ See INSTALL_LINUX or INSTALL_WINDOWS for compile instructions
Requirements
------------
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
supported.
- A x86_64 architecture CPU with a minimum of SSE2 support. This includes Intel Core2 and newer and AMD equivalents.
- Arm CPU supporting AArch64 and NEON.
64 bit Linux or Windows operating system. Apple, Android and Raspberry Pi
are not supported. FreeBSD YMMV.
32 bit CPUs are not supported.
ARM requirements (Beta):
Older CPUs are supported by open source cpuminer-multi by TPruvot but at reduced performance.
CPU: Armv8 and NEON, SHA2 & AES are optional
OS: Linux distribution built for AArch64.
Packages: source code only.
Mining on mobile devices that meet the requirements is not recommended due to the risk of
overheating and damaging the battery. Mining has unlimited demand, it will push any device
to or beyond its limits. There is also a fire risk with overheated lithium batteries.
Beware of apps claiming "mobile only mining". There is no such thing, they aren't miners.
If a mobile CPU can mine it any CPU can.
See wiki for details.
@@ -73,12 +75,22 @@ If not what makes it happen or not happen?
Change Log
----------
v23.12
Several bugs fixes and speed improvements for x16r family for all CPU architectures.
v23.11
This is a release candidate for full AArch64 support, marking the end of the Beta phase.
Fixed hmq1725 & x25x algos, SSE2 & NEON, broken in v3.23.4.
Most CPU-mineable SHA3 algos (X*) upgraded to 2-way SSE2 & NEON.
v23.10
x86_64: Fixed scrypt, scryptn2 algos SSE2.
Fixed sha512d256d algo AVX2, SSE2, NEON.
Fixed sha512256d algo AVX2, SSE2, NEON.
Fixed a bug in Skein N-way that reduced performance.
ARM: Skein algo optimized for NEON & SHA2.
ARM: Skein optimized for NEON, SHA2 & SSE2.
Skein2 algo 2-way optimized for NEON & SSE2.
v23.9

View File

@@ -99,7 +99,7 @@ typedef uint32_t set_t;
#define AES_OPT 1 << 7 // Intel Westmere, AArch64
#define VAES_OPT 1 << 8 // Icelake, Zen3
#define SHA_OPT 1 << 9 // Zen1, Icelake, AArch64
#define SHA512_OPT 1 << 10 // AArch64
#define SHA512_OPT 1 << 10 // Intel Arrow Lake, AArch64
#define NEON_OPT 1 << 11 // AArch64
// AVX10 does not have explicit algo features:

View File

@@ -2,12 +2,11 @@
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
//#include "sph_keccak.h"
#include "bmw-hash-4way.h"
#if defined(BMW512_8WAY)
void bmw512hash_8way(void *state, const void *input)
void bmw512hash_8way( void *state, const void *input )
{
bmw512_8way_context ctx;
bmw512_8way_init( &ctx );
@@ -27,9 +26,9 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
__m512i *noncev = (__m512i*)vdata + 9;
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id;
const int thr_id = mythr->id;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
do {
@@ -43,7 +42,7 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
if ( unlikely( hash7[ lane<<1 ] <= Htarg ) )
{
extr_lane_8x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ))
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
@@ -59,9 +58,7 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
#elif defined(BMW512_4WAY)
//#ifdef BMW512_4WAY
void bmw512hash_4way(void *state, const void *input)
void bmw512hash_4way( void *state, const void *input )
{
bmw512_4way_context ctx;
bmw512_4way_init( &ctx );
@@ -80,10 +77,10 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
do {
@@ -96,7 +93,7 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
if ( unlikely( hash7[ lane<<1 ] <= Htarg ) )
{
extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ))
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
@@ -110,4 +107,55 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
return 0;
}
#elif defined(BMW512_2WAY)
void bmw512hash_2x64( void *state, const void *input )
{
bmw512_2x64_context ctx;
bmw512_2x64_init( &ctx );
bmw512_2x64_update( &ctx, input, 80 );
bmw512_2x64_close( &ctx, state );
}
int scanhash_bmw512_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
uint32_t hash[16*2] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[13]); // 3*4+1
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 2;
v128_t *noncev = (v128_t*)vdata + 9;
const uint32_t Htarg = ptarget[7];
const int thr_id = mythr->id;
v128_bswap32_intrlv80_2x64( vdata, pdata );
do {
*noncev = v128_intrlv_blend_32( v128_bswap32(
v128_set32( n+1, 0, n, 0 ) ), *noncev );
bmw512hash_2x64( hash, vdata );
for ( int lane = 0; lane < 2; lane++ )
if ( unlikely( hash7[ lane<<1 ] <= Htarg ) )
{
extr_lane_2x64( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ))
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
}
n += 2;
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce;
return 0;
}
#endif

View File

@@ -2,7 +2,7 @@
bool register_bmw512_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT | AVX512_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
opt_target_factor = 256.0;
#if defined (BMW512_8WAY)
gate->scanhash = (void*)&scanhash_bmw512_8way;
@@ -10,6 +10,9 @@ bool register_bmw512_algo( algo_gate_t* gate )
#elif defined (BMW512_4WAY)
gate->scanhash = (void*)&scanhash_bmw512_4way;
gate->hash = (void*)&bmw512hash_4way;
#elif defined (BMW512_2WAY)
gate->scanhash = (void*)&scanhash_bmw512_2x64;
gate->hash = (void*)&bmw512hash_2x64;
#else
gate->scanhash = (void*)&scanhash_bmw512;
gate->hash = (void*)&bmw512hash;

View File

@@ -8,19 +8,27 @@
#define BMW512_8WAY 1
#elif defined(__AVX2__)
#define BMW512_4WAY 1
#elif defined(__SSE2__) || defined(__ARM_NEON)
#define BMW512_2WAY 1
#endif
#if defined(BMW512_8WAY)
void bmw512hash_8way( void *state, const void *input );
int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(BMW512_4WAY)
void bmw512hash_4way( void *state, const void *input );
int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(BMW512_2WAY)
void bmw512hash_2x64( void *state, const void *input );
int scanhash_bmw512_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#else

View File

@@ -236,9 +236,7 @@ void Compress(hashState_echo *ctx, const unsigned char *pmsg, unsigned int uBloc
}
HashReturn init_echo(hashState_echo *ctx, int nHashSize)
HashReturn init_echo( hashState_echo *ctx, int nHashSize )
{
int i, j;
@@ -280,7 +278,8 @@ HashReturn init_echo(hashState_echo *ctx, int nHashSize)
return SUCCESS;
}
HashReturn update_echo(hashState_echo *state, const BitSequence *data, DataLength databitlen)
HashReturn update_echo( hashState_echo *state, const void *data,
uint32_t databitlen )
{
unsigned int uByteLength, uBlockCount, uRemainingBytes;
@@ -330,7 +329,7 @@ HashReturn update_echo(hashState_echo *state, const BitSequence *data, DataLengt
return SUCCESS;
}
HashReturn final_echo(hashState_echo *state, BitSequence *hashval)
HashReturn final_echo( hashState_echo *state, void *hashval)
{
v128_t remainingbits;
@@ -407,8 +406,8 @@ HashReturn final_echo(hashState_echo *state, BitSequence *hashval)
return SUCCESS;
}
HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
const BitSequence *data, DataLength databitlen )
HashReturn update_final_echo( hashState_echo *state, void *hashval,
const void *data, uint32_t databitlen )
{
unsigned int uByteLength, uBlockCount, uRemainingBytes;
@@ -530,8 +529,8 @@ HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
return SUCCESS;
}
HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
int nHashSize, const BitSequence *data, DataLength datalen )
HashReturn echo_full( hashState_echo *state, void *hashval,
int nHashSize, const void *data, uint32_t datalen )
{
int i, j;
@@ -578,7 +577,7 @@ HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
{
// Fill the buffer
memcpy( state->buffer + state->uBufferBytes,
(void*)data, state->uBlockLength - state->uBufferBytes );
data, state->uBlockLength - state->uBufferBytes );
// Process buffer
Compress( state, state->buffer, 1 );
@@ -601,7 +600,7 @@ HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
}
if( uRemainingBytes > 0 )
memcpy(state->buffer, (void*)data, uRemainingBytes);
memcpy(state->buffer, data, uRemainingBytes);
state->uBufferBytes = uRemainingBytes;
}
@@ -689,7 +688,7 @@ HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
}
#if 0
HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
{
HashReturn hRet;
@@ -746,5 +745,6 @@ HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databit
return SUCCESS;
}
#endif
#endif

View File

@@ -47,16 +47,16 @@ HashReturn init_echo(hashState_echo *state, int hashbitlen);
HashReturn reinit_echo(hashState_echo *state);
HashReturn update_echo(hashState_echo *state, const BitSequence *data, DataLength databitlen);
HashReturn update_echo(hashState_echo *state, const void *data, uint32_t databitlen);
HashReturn final_echo(hashState_echo *state, BitSequence *hashval);
HashReturn final_echo(hashState_echo *state, void *hashval);
HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
HashReturn hash_echo(int hashbitlen, const void *data, uint32_t databitlen, void *hashval);
HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
const BitSequence *data, DataLength databitlen );
HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
int nHashSize, const BitSequence *data, DataLength databitlen );
HashReturn update_final_echo( hashState_echo *state, void *hashval,
const void *data, uint32_t databitlen );
HashReturn echo_full( hashState_echo *state, void *hashval,
int nHashSize, const void *data, uint32_t databitlen );
#endif // HASH_API_H

View File

@@ -36,7 +36,6 @@
#include "sph_echo.h"
#if !defined(__AES__)
#ifdef __cplusplus
extern "C"{
@@ -1031,4 +1030,3 @@ sph_echo512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
#ifdef __cplusplus
}
#endif
#endif // !AES

View File

@@ -36,8 +36,6 @@
#ifndef SPH_ECHO_H__
#define SPH_ECHO_H__
#if !defined(__AES__)
#ifdef __cplusplus
extern "C"{
#endif
@@ -318,5 +316,4 @@ void sph_echo512_addbits_and_close(
#ifdef __cplusplus
}
#endif
#endif // !AES
#endif

View File

@@ -35,8 +35,6 @@
#include "sph_groestl.h"
#if !defined(__AES__)
#ifdef __cplusplus
extern "C"{
#endif
@@ -3119,5 +3117,4 @@ sph_groestl512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
#ifdef __cplusplus
}
#endif // !AES
#endif

View File

@@ -42,7 +42,6 @@ extern "C"{
#include <stddef.h>
#include "compat/sph_types.h"
#if !defined(__AES__)
/**
* Output size (in bits) for Groestl-224.
*/
@@ -327,5 +326,4 @@ void sph_groestl512_addbits_and_close(
}
#endif
#endif // !AES
#endif

View File

@@ -78,7 +78,7 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
@@ -108,4 +108,53 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
return 0;
}
#elif defined(KECCAK_2WAY)
void keccakhash_2x64(void *state, const void *input)
{
keccak256_2x64_context ctx;
keccak256_2x64_init( &ctx );
keccak256_2x64_update( &ctx, input, 80 );
keccak256_2x64_close( &ctx, state );
}
int scanhash_keccak_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
uint32_t hash[16*2] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[13]); // 3*4+1
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
v128_t *noncev = (v128_t*)vdata + 9;
const uint32_t Htarg = ptarget[7];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
v128_bswap32_intrlv80_2x64( vdata, pdata );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do {
keccakhash_2x64( hash, vdata );
for ( int lane = 0; lane < 2; lane++ )
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
{
extr_lane_2x64( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ))
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
n += 2;
} while ( (n < max_nonce-2) && !work_restart[thr_id].restart);
pdata[19] = n;
*hashes_done = n - first_nonce + 1;
return 0;
}
#endif

View File

@@ -17,6 +17,9 @@ bool register_keccak_algo( algo_gate_t* gate )
#elif defined (KECCAK_4WAY)
gate->scanhash = (void*)&scanhash_keccak_4way;
gate->hash = (void*)&keccakhash_4way;
#elif defined (KECCAK_2WAY)
gate->scanhash = (void*)&scanhash_keccak_2x64;
gate->hash = (void*)&keccakhash_2x64;
#else
gate->scanhash = (void*)&scanhash_keccak;
gate->hash = (void*)&keccakhash;
@@ -37,6 +40,9 @@ bool register_keccakc_algo( algo_gate_t* gate )
#elif defined (KECCAK_4WAY)
gate->scanhash = (void*)&scanhash_keccak_4way;
gate->hash = (void*)&keccakhash_4way;
#elif defined (KECCAK_2WAY)
gate->scanhash = (void*)&scanhash_keccak_2x64;
gate->hash = (void*)&keccakhash_2x64;
#else
gate->scanhash = (void*)&scanhash_keccak;
gate->hash = (void*)&keccakhash;
@@ -75,15 +81,17 @@ void sha3d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
bool register_sha3d_algo( algo_gate_t* gate )
{
hard_coded_eb = 6;
// opt_extranonce = false;
gate->optimizations = AVX2_OPT | AVX512_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
gate->gen_merkle_root = (void*)&sha3d_gen_merkle_root;
#if defined (KECCAK_8WAY)
#if defined (SHA3D_8WAY)
gate->scanhash = (void*)&scanhash_sha3d_8way;
gate->hash = (void*)&sha3d_hash_8way;
#elif defined (KECCAK_4WAY)
#elif defined (SHA3D_4WAY)
gate->scanhash = (void*)&scanhash_sha3d_4way;
gate->hash = (void*)&sha3d_hash_4way;
#elif defined (SHA3D_2WAY)
gate->scanhash = (void*)&scanhash_sha3d_2x64;
gate->hash = (void*)&sha3d_hash_2x64;
#else
gate->scanhash = (void*)&scanhash_sha3d;
gate->hash = (void*)&sha3d_hash;

View File

@@ -8,6 +8,16 @@
#define KECCAK_8WAY 1
#elif defined(__AVX2__)
#define KECCAK_4WAY 1
#elif defined(__SSE2__) || defined(__ARM_NEON)
#define KECCAK_2WAY 1
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define SHA3D_8WAY 1
#elif defined(__AVX2__)
#define SHA3D_4WAY 1
#elif defined(__SSE2__) || defined(__ARM_NEON)
#define SHA3D_2WAY 1
#endif
extern int hard_coded_eb;
@@ -16,27 +26,47 @@ extern int hard_coded_eb;
void keccakhash_8way( void *state, const void *input );
int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void sha3d_hash_8way( void *state, const void *input );
int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(KECCAK_4WAY)
void keccakhash_4way( void *state, const void *input );
int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
uint64_t *hashes_done, struct thr_info *mythr );
void sha3d_hash_4way( void *state, const void *input );
int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(KECCAK_2WAY)
void keccakhash_2x64( void *state, const void *input );
int scanhash_keccak_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#else
void keccakhash( void *state, const void *input );
int scanhash_keccak( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
uint64_t *hashes_done, struct thr_info *mythr );
#endif
#if defined(SHA3D_8WAY)
void sha3d_hash_8way( void *state, const void *input );
int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(SHA3D_4WAY)
void sha3d_hash_4way( void *state, const void *input );
int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(SHA3D_2WAY)
void sha3d_hash_2x64( void *state, const void *input );
int scanhash_sha3d_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#else
void sha3d_hash( void *state, const void *input );
int scanhash_sha3d( struct work *work, uint32_t max_nonce,

View File

@@ -4,7 +4,7 @@
#include <stdint.h>
#include "keccak-hash-4way.h"
#if defined(KECCAK_8WAY)
#if defined(SHA3D_8WAY)
void sha3d_hash_8way(void *state, const void *input)
{
@@ -64,7 +64,7 @@ int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
return 0;
}
#elif defined(KECCAK_4WAY)
#elif defined(SHA3D_4WAY)
void sha3d_hash_4way(void *state, const void *input)
{
@@ -122,4 +122,60 @@ int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
return 0;
}
#elif defined(SHA3D_2WAY)
void sha3d_hash_2x64(void *state, const void *input)
{
uint32_t buffer[16*4] __attribute__ ((aligned (64)));
keccak256_2x64_context ctx;
keccak256_2x64_init( &ctx );
keccak256_2x64_update( &ctx, input, 80 );
keccak256_2x64_close( &ctx, buffer );
keccak256_2x64_init( &ctx );
keccak256_2x64_update( &ctx, buffer, 32 );
keccak256_2x64_close( &ctx, state );
}
int scanhash_sha3d_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
uint32_t hash[16*2] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[13]); // 3*4+1
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 2;
v128_t *noncev = (v128_t*)vdata + 9;
const uint32_t Htarg = ptarget[7];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
v128_bswap32_intrlv80_2x64( vdata, pdata );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do {
sha3d_hash_2x64( hash, vdata );
for ( int lane = 0; lane < 2; lane++ )
if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
{
extr_lane_2x64( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = bswap_32( n + lane );
submit_solution( work, lane_hash, mythr );
}
}
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
n += 2;
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#endif

View File

@@ -9,11 +9,11 @@ bool register_hmq1725_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_hmq1725_4way;
gate->hash = (void*)&hmq1725_4way_hash;
#else
init_hmq1725_ctx();
gate->scanhash = (void*)&scanhash_hmq1725;
gate->hash = (void*)&hmq1725hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
| NEON_OPT;
opt_target_factor = 65536.0;
return true;
};

View File

@@ -29,7 +29,6 @@ int scanhash_hmq1725_4way( struct work *work, uint32_t max_nonce,
void hmq1725hash( void *state, const void *input );
int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void init_hmq1725_ctx();
#endif

View File

@@ -4,346 +4,267 @@
#include <string.h>
#include <stdint.h>
#include "algo/blake/sph_blake.h"
#include "algo/blake/blake512-hash.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#if defined(__AES__)
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/fugue/fugue-aesni.h"
#else
#include "algo/groestl/sph_groestl.h"
#include "algo/fugue/sph_fugue.h"
#endif
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
#include "algo/echo/aes_ni/hash_api.h"
#else
#include "algo/echo/sph_echo.h"
#endif
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/haval/sph-haval.h"
#include "algo/sha/sph_sha2.h"
#if defined(__AES__)
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/fugue/fugue-aesni.h"
#else
#include "algo/groestl/sph_groestl.h"
#include "algo/echo/sph_echo.h"
#include "algo/fugue/sph_fugue.h"
#endif
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
typedef struct {
sph_blake512_context blake1, blake2;
sph_bmw512_context bmw1, bmw2, bmw3;
sph_skein512_context skein1, skein2;
sph_jh512_context jh1, jh2;
sph_keccak512_context keccak1, keccak2;
hashState_luffa luffa1, luffa2;
cubehashParam cube;
sph_shavite512_context shavite1, shavite2;
#if defined(__aarch64__)
sph_simd512_context simd1, simd2;
#else
hashState_sd simd1, simd2;
#endif
sph_hamsi512_context hamsi1;
sph_shabal512_context shabal1;
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
sph_sha512_context sha1, sha2;
sph_haval256_5_context haval1, haval2;
#if defined(__AES__)
hashState_echo echo1, echo2;
hashState_groestl groestl1, groestl2;
hashState_fugue fugue1, fugue2;
#else
sph_groestl512_context groestl1, groestl2;
sph_echo512_context echo1, echo2;
sph_fugue512_context fugue1, fugue2;
#endif
} hmq1725_ctx_holder;
static hmq1725_ctx_holder hmq1725_ctx __attribute__ ((aligned (64)));
static __thread sph_bmw512_context hmq_bmw_mid __attribute__ ((aligned (64)));
void init_hmq1725_ctx()
union _hmq1725_ctx_holder
{
sph_blake512_init(&hmq1725_ctx.blake1);
sph_blake512_init(&hmq1725_ctx.blake2);
sph_bmw512_init(&hmq1725_ctx.bmw1);
sph_bmw512_init(&hmq1725_ctx.bmw2);
sph_bmw512_init(&hmq1725_ctx.bmw3);
sph_skein512_init(&hmq1725_ctx.skein1);
sph_skein512_init(&hmq1725_ctx.skein2);
sph_jh512_init(&hmq1725_ctx.jh1);
sph_jh512_init(&hmq1725_ctx.jh2);
sph_keccak512_init(&hmq1725_ctx.keccak1);
sph_keccak512_init(&hmq1725_ctx.keccak2);
init_luffa( &hmq1725_ctx.luffa1, 512 );
init_luffa( &hmq1725_ctx.luffa2, 512 );
cubehashInit( &hmq1725_ctx.cube, 512, 16, 32 );
sph_shavite512_init(&hmq1725_ctx.shavite1);
sph_shavite512_init(&hmq1725_ctx.shavite2);
#if defined(__aarch64__)
sph_simd512_init(&hmq1725_ctx.simd1);
sph_simd512_init(&hmq1725_ctx.simd2);
#else
init_sd( &hmq1725_ctx.simd1, 512 );
init_sd( &hmq1725_ctx.simd2, 512 );
#endif
sph_hamsi512_init(&hmq1725_ctx.hamsi1);
blake512_context blake;
sph_bmw512_context bmw;
#if defined(__AES__)
fugue512_Init( &hmq1725_ctx.fugue1, 512 );
fugue512_Init( &hmq1725_ctx.fugue2, 512 );
hashState_groestl groestl;
hashState_fugue fugue;
#else
sph_fugue512_init(&hmq1725_ctx.fugue1);
sph_fugue512_init(&hmq1725_ctx.fugue2);
sph_groestl512_context groestl;
sph_fugue512_context fugue;
#endif
sph_shabal512_init(&hmq1725_ctx.shabal1);
sph_whirlpool_init(&hmq1725_ctx.whirlpool1);
sph_whirlpool_init(&hmq1725_ctx.whirlpool2);
sph_whirlpool_init(&hmq1725_ctx.whirlpool3);
sph_whirlpool_init(&hmq1725_ctx.whirlpool4);
sph_sha512_init( &hmq1725_ctx.sha1 );
sph_sha512_init( &hmq1725_ctx.sha2 );
sph_haval256_5_init(&hmq1725_ctx.haval1);
sph_haval256_5_init(&hmq1725_ctx.haval2);
#if defined(__AES__)
init_echo( &hmq1725_ctx.echo1, 512 );
init_echo( &hmq1725_ctx.echo2, 512 );
init_groestl( &hmq1725_ctx.groestl1, 64 );
init_groestl( &hmq1725_ctx.groestl2, 64 );
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
hashState_echo echo;
#else
sph_groestl512_init( &hmq1725_ctx.groestl1 );
sph_groestl512_init( &hmq1725_ctx.groestl2 );
sph_echo512_init( &hmq1725_ctx.echo1 );
sph_echo512_init( &hmq1725_ctx.echo2 );
sph_echo512_context echo;
#endif
}
sph_skein512_context skein;
sph_jh512_context jh;
sph_keccak512_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
simd512_context simd;
sph_hamsi512_context hamsi;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
sph_sha512_context sha;
sph_haval256_5_context haval;
};
typedef union _hmq1725_ctx_holder hmq1725_ctx_holder;
void hmq_bmw512_midstate( const void* input )
{
memcpy( &hmq_bmw_mid, &hmq1725_ctx.bmw1, sizeof hmq_bmw_mid );
sph_bmw512( &hmq_bmw_mid, input, 64 );
}
__thread hmq1725_ctx_holder h_ctx __attribute__ ((aligned (64)));
//static hmq1725_ctx_holder hmq1725_ctx __attribute__ ((aligned (64)));
//static __thread sph_bmw512_context hmq_bmw_mid __attribute__ ((aligned (64)));
extern void hmq1725hash(void *state, const void *input)
{
const uint32_t mask = 24;
uint32_t hashA[32] __attribute__((aligned(64)));
uint32_t hashB[32] __attribute__((aligned(64)));
const int midlen = 64; // bytes
const int tail = 80 - midlen; // 16
uint32_t hashA[32] __attribute__((aligned(32)));
uint32_t hashB[32] __attribute__((aligned(32)));
hmq1725_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy(&h_ctx, &hmq1725_ctx, sizeof(hmq1725_ctx));
sph_bmw512_init( &ctx.bmw );
sph_bmw512( &ctx.bmw, input, 80 );
sph_bmw512_close( &ctx.bmw, hashA ); //1
memcpy( &h_ctx.bmw1, &hmq_bmw_mid, sizeof hmq_bmw_mid );
sph_bmw512( &h_ctx.bmw1, input + midlen, tail );
sph_bmw512_close(&h_ctx.bmw1, hashA); //1
sph_whirlpool (&h_ctx.whirlpool1, hashA, 64); //0
sph_whirlpool_close(&h_ctx.whirlpool1, hashB); //1
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hashA, 64 ); //0
sph_whirlpool_close( &ctx.whirlpool, hashB ); //1
if ( hashB[0] & mask ) //1
{
#if defined(__AES__)
update_and_final_groestl( &h_ctx.groestl1, (char*)hashA,
(const char*)hashB, 512 );
groestl512_full( &ctx.groestl, hashA, hashB, 512 );
#else
sph_groestl512 (&h_ctx.groestl1, hashB, 64); //1
sph_groestl512_close(&h_ctx.groestl1, hashA); //2
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, hashB, 64 ); //1
sph_groestl512_close( &ctx.groestl, hashA ); //2
#endif
}
else
{
sph_skein512 (&h_ctx.skein1, hashB, 64); //1
sph_skein512_close(&h_ctx.skein1, hashA); //2
sph_skein512_init( &ctx.skein );
sph_skein512( &ctx.skein, hashB, 64 ); //1
sph_skein512_close( &ctx.skein, hashA ); //2
}
sph_jh512 (&h_ctx.jh1, hashA, 64); //3
sph_jh512_close(&h_ctx.jh1, hashB); //4
sph_jh512_init( &ctx.jh );
sph_jh512( &ctx.jh, hashA, 64 ); //3
sph_jh512_close( &ctx.jh, hashB ); //4
sph_keccak512 (&h_ctx.keccak1, hashB, 64); //2
sph_keccak512_close(&h_ctx.keccak1, hashA); //3
sph_keccak512_init( &ctx.keccak );
sph_keccak512( &ctx.keccak, hashB, 64 ); //2
sph_keccak512_close( &ctx.keccak, hashA ); //3
if ( hashA[0] & mask ) //4
{
sph_blake512 (&h_ctx.blake1, hashA, 64); //
sph_blake512_close(&h_ctx.blake1, hashB); //5
blake512_init( &ctx.blake );
blake512_update( &ctx.blake, hashA, 64 );
blake512_close( &ctx.blake, hashB );
}
else
{
sph_bmw512 (&h_ctx.bmw2, hashA, 64); //4
sph_bmw512_close(&h_ctx.bmw2, hashB); //5
sph_bmw512_init( &ctx.bmw );
sph_bmw512( &ctx.bmw, hashA, 64 ); //4
sph_bmw512_close( &ctx.bmw, hashB ); //5
}
update_and_final_luffa( &h_ctx.luffa1, hashA, hashB, 64 );
luffa_full( &ctx.luffa, hashA, 512, hashB, 64 );
cubehashUpdateDigest( &h_ctx.cube, hashB, hashA, 64 );
cubehash_full( &ctx.cube, hashB, 512, hashA, 64 );
if ( hashB[0] & mask ) //7
{
sph_keccak512 (&h_ctx.keccak2, hashB, 64); //
sph_keccak512_close(&h_ctx.keccak2, hashA); //8
sph_keccak512_init( &ctx.keccak );
sph_keccak512( &ctx.keccak, hashB, 64 ); //
sph_keccak512_close( &ctx.keccak, hashA ); //8
}
else
{
sph_jh512 (&h_ctx.jh2, hashB, 64); //7
sph_jh512_close(&h_ctx.jh2, hashA); //8
sph_jh512_init( &ctx.jh );
sph_jh512( &ctx.jh, hashB, 64 ); //7
sph_jh512_close( &ctx.jh, hashA ); //8
}
sph_shavite512 (&h_ctx.shavite1, hashA, 64); //3
sph_shavite512_close(&h_ctx.shavite1, hashB); //4
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hashA, 64 ); //3
sph_shavite512_close( &ctx.shavite, hashB ); //4
#if defined(__aarch64__)
sph_simd512 (&h_ctx.simd1, hashB, 64); //3
sph_simd512_close(&h_ctx.simd1, hashA); //4
#else
update_final_sd( &h_ctx.simd1, (BitSequence *)hashA,
(const BitSequence *)hashB, 512 );
#endif
simd512_ctx( &ctx.simd, hashA, hashB, 64 );
if ( hashA[0] & mask ) //4
{
sph_whirlpool (&h_ctx.whirlpool2, hashA, 64); //
sph_whirlpool_close(&h_ctx.whirlpool2, hashB); //5
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hashA, 64 ); //
sph_whirlpool_close( &ctx.whirlpool, hashB ); //5
}
else
{
sph_haval256_5 (&h_ctx.haval1, hashA, 64); //4
sph_haval256_5_close(&h_ctx.haval1, hashB); //5
sph_haval256_5_init( &ctx.haval );
sph_haval256_5( &ctx.haval, hashA, 64 ); //4
sph_haval256_5_close( &ctx.haval, hashB ); //5
memset(&hashB[8], 0, 32);
}
#if defined(__AES__)
update_final_echo ( &h_ctx.echo1, (BitSequence *)hashA,
(const BitSequence *)hashB, 512 );
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
echo_full( &ctx.echo, hashA, 512, hashB, 64 );
#else
sph_echo512 (&h_ctx.echo1, hashB, 64); //5
sph_echo512_close(&h_ctx.echo1, hashA); //6
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, hashB, 64 ); //5
sph_echo512_close( &ctx.echo, hashA ); //6
#endif
sph_blake512 (&h_ctx.blake2, hashA, 64); //6
sph_blake512_close(&h_ctx.blake2, hashB); //7
blake512_init( &ctx.blake );
blake512_update( &ctx.blake, hashA, 64 );
blake512_close( &ctx.blake, hashB );
if ( hashB[0] & mask ) //7
{
sph_shavite512 (&h_ctx.shavite2, hashB, 64); //
sph_shavite512_close(&h_ctx.shavite2, hashA); //8
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hashB, 64 ); //
sph_shavite512_close( &ctx.shavite, hashA ); //8
}
else
{
update_and_final_luffa( &h_ctx.luffa2, hashA, hashB, 64 );
}
luffa_full( &ctx.luffa, hashA, 512, hashB, 64 );
sph_hamsi512 (&h_ctx.hamsi1, hashA, 64); //3
sph_hamsi512_close(&h_ctx.hamsi1, hashB); //4
sph_hamsi512_init( &ctx.hamsi );
sph_hamsi512( &ctx.hamsi, hashA, 64 ); //3
sph_hamsi512_close( &ctx.hamsi, hashB ); //4
#if defined(__AES__)
fugue512_Update( &h_ctx.fugue1, hashB, 512 ); //2 ////
fugue512_Final( &h_ctx.fugue1, hashA ); //3
fugue512_full( &ctx.fugue, hashA, hashB, 64 );
#else
sph_fugue512 (&h_ctx.fugue1, hashB, 64); //2 ////
sph_fugue512_close(&h_ctx.fugue1, hashA); //3
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hashB, 64 ); //2 ////
sph_fugue512_close( &ctx.fugue, hashA ); //3
#endif
if ( hashA[0] & mask ) //4
{
#if defined(__AES__)
update_final_echo ( &h_ctx.echo2, (BitSequence *)hashB,
(const BitSequence *)hashA, 512 );
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
echo_full( &ctx.echo, hashB, 512, hashA, 64 );
#else
sph_echo512 (&h_ctx.echo2, hashA, 64); //
sph_echo512_close(&h_ctx.echo2, hashB); //5
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, hashA, 64 ); //
sph_echo512_close( &ctx.echo, hashB ); //5
#endif
}
else
{
#if defined(__aarch64__)
sph_simd512(&h_ctx.simd2, hashA, 64); //6
sph_simd512_close(&h_ctx.simd2, hashB); //7
#else
update_final_sd( &h_ctx.simd2, (BitSequence *)hashB,
(const BitSequence *)hashA, 512 );
#endif
}
simd512_ctx( &ctx.simd, hashB, hashA, 64 );
sph_shabal512 (&h_ctx.shabal1, hashB, 64); //5
sph_shabal512_close(&h_ctx.shabal1, hashA); //6
sph_shabal512_init( &ctx.shabal );
sph_shabal512( &ctx.shabal, hashB, 64 ); //5
sph_shabal512_close( &ctx.shabal, hashA ); //6
sph_whirlpool (&h_ctx.whirlpool3, hashA, 64); //6
sph_whirlpool_close(&h_ctx.whirlpool3, hashB); //7
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hashA, 64 ); //6
sph_whirlpool_close( &ctx.whirlpool, hashB ); //7
if ( hashB[0] & mask ) //7
{
#if defined(__AES__)
fugue512_Update( &h_ctx.fugue2, hashB, 512 ); //
fugue512_Final( &h_ctx.fugue2, hashA ); //8
fugue512_full( &ctx.fugue, hashA, hashB, 64 );
#else
sph_fugue512 (&h_ctx.fugue2, hashB, 64); //
sph_fugue512_close(&h_ctx.fugue2, hashA); //8
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hashB, 64 ); //
sph_fugue512_close( &ctx.fugue, hashA ); //8
#endif
}
else
{
sph_sha512( &h_ctx.sha1, hashB, 64 );
sph_sha512_close( &h_ctx.sha1, hashA );
sph_sha512_init( &ctx.sha );
sph_sha512( &ctx.sha, hashB, 64 );
sph_sha512_close( &ctx.sha, hashA );
}
#if defined(__AES__)
update_and_final_groestl( &h_ctx.groestl2, (char*)hashB,
(const char*)hashA, 512 );
groestl512_full( &ctx.groestl, hashB, hashA, 512 );
#else
sph_groestl512 (&h_ctx.groestl2, hashA, 64); //3
sph_groestl512_close(&h_ctx.groestl2, hashB); //4
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, hashA, 64 ); //3
sph_groestl512_close( &ctx.groestl, hashB ); //4
#endif
sph_sha512( &h_ctx.sha2, hashB, 64 );
sph_sha512_close( &h_ctx.sha2, hashA );
sph_sha512_init( &ctx.sha );
sph_sha512( &ctx.sha, hashB, 64 );
sph_sha512_close( &ctx.sha, hashA );
if ( hashA[0] & mask ) //4
{
sph_haval256_5 (&h_ctx.haval2, hashA, 64); //
sph_haval256_5_close(&h_ctx.haval2, hashB); //5
memset(&hashB[8], 0, 32);
sph_haval256_5_init( &ctx.haval );
sph_haval256_5( &ctx.haval, hashA, 64 ); //
sph_haval256_5_close( &ctx.haval, hashB ); //5
memset( &hashB[8], 0, 32 );
}
else
{
sph_whirlpool (&h_ctx.whirlpool4, hashA, 64); //4
sph_whirlpool_close(&h_ctx.whirlpool4, hashB); //5
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, hashA, 64 ); //4
sph_whirlpool_close( &ctx.whirlpool, hashB ); //5
}
sph_bmw512 (&h_ctx.bmw3, hashB, 64); //5
sph_bmw512_close(&h_ctx.bmw3, hashA); //6
sph_bmw512_init( &ctx.bmw );
sph_bmw512( &ctx.bmw, hashB, 64 ); //5
sph_bmw512_close( &ctx.bmw, hashA ); //6
memcpy(state, hashA, 32);
memcpy( state, hashA, 32 );
}
int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
// uint32_t endiandata[32] __attribute__((aligned(64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t endiandata[20] __attribute__((aligned(32)));
uint32_t hash64[8] __attribute__((aligned(32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1;
@@ -356,7 +277,7 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
for (int k = 0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);
hmq_bmw512_midstate( endiandata );
// hmq_bmw512_midstate( endiandata );
// if (opt_debug)
// {

View File

@@ -23,13 +23,12 @@ static void hex_getAlgoString(const uint32_t* prevblock, char *output)
*sptr = '\0';
}
static __thread x16r_context_overlay hex_ctx;
int hex_hash( void* output, const void* input, int thrid )
{
uint32_t _ALIGN(128) hash[16];
x16r_context_overlay ctx;
memcpy( &ctx, &hex_ctx, sizeof(ctx) );
memcpy( &ctx, &x16r_ref_ctx, sizeof(ctx) );
void *in = (void*) input;
int size = 80;
@@ -52,7 +51,7 @@ int hex_hash( void* output, const void* input, int thrid )
break;
case GROESTL:
#if defined(__AES__)
groestl512_full( &ctx.groestl, (char*)hash, (char*)in, size<<3 );
groestl512_full( &ctx.groestl, hash, in, size<<3 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in, size );
@@ -87,7 +86,7 @@ int hex_hash( void* output, const void* input, int thrid )
case LUFFA:
if ( i == 0 )
{
update_and_final_luffa( &ctx.luffa, hash, (const void*)in+64, 16 );
update_and_final_luffa( &ctx.luffa, hash, in+64, 16 );
}
else
{
@@ -97,7 +96,7 @@ int hex_hash( void* output, const void* input, int thrid )
break;
case CUBEHASH:
if ( i == 0 )
cubehashUpdateDigest( &ctx.cube, hash, (const void*)in+64, 16 );
cubehashUpdateDigest( &ctx.cube, hash, in+64, 16 );
else
{
cubehashInit( &ctx.cube, 512, 16, 32 );
@@ -108,26 +107,15 @@ int hex_hash( void* output, const void* input, int thrid )
shavite512_full( &ctx.shavite, hash, in, size );
break;
case SIMD:
#if defined(__aarch64__)
sph_simd512_init( &ctx.simd );
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
simd_full( &ctx.simd, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
#endif
simd512_ctx( &ctx.simd, hash, in, size<<3 );
break;
case ECHO:
#if defined(__AES__)
echo_full( &ctx.echo, (BitSequence *)hash, 512,
(const BitSequence *)in, size );
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
echo_full( &ctx.echo, hash, 512, in, size );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in, size );
sph_echo512_close( &ctx.echo, hash );
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in, size );
sph_echo512_close( &ctx.echo, hash );
#endif
break;
case HAMSI:
@@ -216,32 +204,32 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
switch ( algo )
{
case JH:
sph_jh512_init( &hex_ctx.jh );
sph_jh512( &hex_ctx.jh, edata, 64 );
sph_jh512_init( &x16r_ref_ctx.jh );
sph_jh512( &x16r_ref_ctx.jh, edata, 64 );
break;
case SKEIN:
sph_skein512_init( &hex_ctx.skein );
sph_skein512( &hex_ctx.skein, edata, 64 );
sph_skein512_init( &x16r_ref_ctx.skein );
sph_skein512( &x16r_ref_ctx.skein, edata, 64 );
break;
case LUFFA:
init_luffa( &hex_ctx.luffa, 512 );
update_luffa( &hex_ctx.luffa, edata, 64 );
init_luffa( &x16r_ref_ctx.luffa, 512 );
update_luffa( &x16r_ref_ctx.luffa, edata, 64 );
break;
case CUBEHASH:
cubehashInit( &hex_ctx.cube, 512, 16, 32 );
cubehashUpdate( &hex_ctx.cube, edata, 64 );
cubehashInit( &x16r_ref_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16r_ref_ctx.cube, edata, 64 );
break;
case HAMSI:
sph_hamsi512_init( &hex_ctx.hamsi );
sph_hamsi512( &hex_ctx.hamsi, edata, 64 );
sph_hamsi512_init( &x16r_ref_ctx.hamsi );
sph_hamsi512( &x16r_ref_ctx.hamsi, edata, 64 );
break;
case SHABAL:
sph_shabal512_init( &hex_ctx.shabal );
sph_shabal512( &hex_ctx.shabal, edata, 64 );
sph_shabal512_init( &x16r_ref_ctx.shabal );
sph_shabal512( &x16r_ref_ctx.shabal, edata, 64 );
break;
case WHIRLPOOL:
sph_whirlpool_init( &hex_ctx.whirlpool );
sph_whirlpool( &hex_ctx.whirlpool, edata, 64 );
sph_whirlpool_init( &x16r_ref_ctx.whirlpool );
sph_whirlpool( &x16r_ref_ctx.whirlpool, edata, 64 );
break;
}

View File

@@ -14,9 +14,9 @@
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/simd-hash-2way.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#endif
//#if defined(__aarch64__)
// #include "algo/simd/sph_simd.h"
//#endif
#include "algo/hamsi/sph_hamsi.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
@@ -24,10 +24,14 @@
#include "algo/yespower/yespower.h"
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#else
#include "algo/echo/sph_echo.h"
#endif
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
#include "algo/groestl/aes_ni/hash-groestl.h"
#else
#include "algo/groestl/sph_groestl.h"
#endif
#include "algo/echo/sph_echo.h"
#include "algo/groestl/sph_groestl.h"
#if defined(__AES__)
#include "algo/fugue/fugue-aesni.h"
#else

View File

@@ -971,4 +971,405 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
return 0;
}
#elif defined (X16R_2WAY)
void x16r_2x64_prehash( void *vdata, void *pdata )
{
uint32_t edata[20] __attribute__ ((aligned (64)));
const char elem = x16r_hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case JH:
v128_bswap32_intrlv80_2x64( vdata, pdata );
jh512_2x64_init( &x16r_ctx.jh );
jh512_2x64_update( &x16r_ctx.jh, vdata, 64 );
break;
case KECCAK:
v128_bswap32_intrlv80_2x64( vdata, pdata );
keccak512_2x64_init( &x16r_ctx.keccak );
keccak512_2x64_update( &x16r_ctx.keccak, vdata, 72 );
break;
case SKEIN:
v128_bswap32_intrlv80_2x64( vdata, pdata );
skein512_2x64_prehash64( &x16r_ctx.skein, vdata );
break;
case LUFFA:
{
v128_bswap32_80( edata, pdata );
init_luffa( &x16r_ctx.luffa, 512 );
update_luffa( &x16r_ctx.luffa, edata, 64 );
intrlv_2x64( vdata, edata, edata, 640 );
}
break;
case CUBEHASH:
{
v128_bswap32_80( edata, pdata );
cubehashInit( &x16r_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16r_ctx.cube, edata, 64 );
intrlv_2x64( vdata, edata, edata, 640 );
}
break;
case HAMSI:
#if defined(__SSE4_2__) || defined(__ARM_NEON)
v128_bswap32_intrlv80_2x64( vdata, pdata );
hamsi512_2x64_init( &x16r_ctx.hamsi );
hamsi512_2x64_update( &x16r_ctx.hamsi, vdata, 72 );
#else
v128_bswap32_80( edata, pdata );
sph_hamsi512_init( &x16r_ctx.hamsi );
sph_hamsi512( &x16r_ctx.hamsi, edata, 72 );
intrlv_2x64( vdata, edata, edata, 640 );
#endif
break;
case FUGUE:
v128_bswap32_80( edata, pdata );
#if defined(__AES__)
fugue512_init( &x16r_ctx.fugue );
fugue512_update( &x16r_ctx.fugue, edata, 76 );
#else
sph_fugue512_init( &x16r_ctx.fugue );
sph_fugue512( &x16r_ctx.fugue, edata, 76 );
#endif
intrlv_2x64( vdata, edata, edata, 640 );
break;
case SHABAL:
v128_bswap32_80( edata, pdata );
sph_shabal512_init( &x16r_ctx.shabal );
sph_shabal512( &x16r_ctx.shabal, edata, 64);
intrlv_2x64( vdata, edata, edata, 640 );
break;
case WHIRLPOOL:
v128_bswap32_80( edata, pdata );
sph_whirlpool_init( &x16r_ctx.whirlpool );
sph_whirlpool( &x16r_ctx.whirlpool, edata, 64 );
intrlv_2x64( vdata, edata, edata, 640 );
break;
default:
v128_bswap32_intrlv80_2x64( vdata, pdata );
}
}
int x16r_2x64_hash_generic( void* output, const void* input, int thrid )
{
uint32_t vhash[20*2] __attribute__ ((aligned (64)));
uint32_t hash0[20] __attribute__ ((aligned (32)));
uint32_t hash1[20] __attribute__ ((aligned (32)));
x16r_2x64_context_overlay ctx;
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
void *in0 = (void*) hash0;
void *in1 = (void*) hash1;
int size = 80;
dintrlv_2x64( hash0, hash1, input, 640 );
for ( int i = 0; i < 16; i++ )
{
const char elem = x16r_hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
if ( i == 0 )
blake512_2x64_full( &ctx.blake, vhash, input, size );
else
{
intrlv_2x64( vhash, in0, in1, size<<3 );
blake512_2x64_full( &ctx.blake, vhash, vhash, size );
}
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
case BMW:
bmw512_2x64_init( &ctx.bmw );
if ( i == 0 )
bmw512_2x64_update( &ctx.bmw, input, size );
else
{
intrlv_2x64( vhash, in0, in1, size<<3 );
bmw512_2x64_update( &ctx.bmw, vhash, size );
}
bmw512_2x64_close( &ctx.bmw, vhash );
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
case GROESTL:
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
groestl512_full( &ctx.groestl, hash0, in0, size<<3 );
groestl512_full( &ctx.groestl, hash1, in1, size<<3 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in0, size );
sph_groestl512_close( &ctx.groestl, hash0 );
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in1, size );
sph_groestl512_close( &ctx.groestl, hash1 );
#endif
break;
case JH:
if ( i == 0 )
jh512_2x64_update( &ctx.jh, input + (64*2), 16 );
else
{
intrlv_2x64( vhash, in0, in1, size<<3 );
jh512_2x64_init( &ctx.jh );
jh512_2x64_update( &ctx.jh, vhash, size );
}
jh512_2x64_close( &ctx.jh, vhash );
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
case KECCAK:
if ( i == 0 )
keccak512_2x64_update( &ctx.keccak, input + (72*2), 8 );
else
{
intrlv_2x64( vhash, in0, in1, size<<3 );
keccak512_2x64_init( &ctx.keccak );
keccak512_2x64_update( &ctx.keccak, vhash, size );
}
keccak512_2x64_close( &ctx.keccak, vhash );
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
case SKEIN:
if ( i == 0 )
skein512_2x64_final16( &ctx.skein, vhash, input + (64*2) );
else
{
intrlv_2x64( vhash, in0, in1, size<<3 );
skein512_2x64_full( &ctx.skein, vhash, vhash, size );
}
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
case LUFFA:
if ( i == 0 )
{
update_and_final_luffa( &ctx.luffa, hash0, in0 + 64, 16 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
update_and_final_luffa( &ctx.luffa, hash1, in1 + 64, 16 );
}
else
{
luffa_full( &ctx.luffa, hash0, 512, hash0, size );
luffa_full( &ctx.luffa, hash1, 512, hash1, size );
}
break;
case CUBEHASH:
if ( i == 0 )
{
cubehashUpdateDigest( &ctx.cube, hash0, in0 + 64, 16 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, hash1, in1 + 64, 16 );
}
else
{
cubehash_full( &ctx.cube, hash0, 512, hash0, size );
cubehash_full( &ctx.cube, hash1, 512, hash1, size );
}
break;
case SHAVITE:
shavite512_full( &ctx.shavite, hash0, in0, size );
shavite512_full( &ctx.shavite, hash1, in1, size );
break;
case SIMD:
simd512_ctx( &ctx.simd, hash0, in0, size );
simd512_ctx( &ctx.simd, hash1, in1, size );
break;
case ECHO:
#if defined(__AES__)
echo_full( &ctx.echo, hash0, 512, in0, size );
echo_full( &ctx.echo, hash1, 512, in1, size );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in0, size );
sph_echo512_close( &ctx.echo, hash0 );
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in1, size );
sph_echo512_close( &ctx.echo, hash1 );
#endif
break;
case HAMSI:
#if defined(__SSE4_2__) || defined(__ARM_NEON)
if ( i == 0 )
hamsi512_2x64_update( &ctx.hamsi, input + (72*2), 8 );
else
{
intrlv_2x64( vhash, hash0, hash1, size<<3 );
hamsi512_2x64_init( &ctx.hamsi );
hamsi512_2x64_update( &ctx.hamsi, vhash, size );
}
hamsi512_2x64_close( &ctx.hamsi, vhash );
dintrlv_2x64( hash0, hash1, vhash, 512 );
#else
if ( i == 0 )
{
sph_hamsi512( &ctx.hamsi, in0 + 72, 8 );
sph_hamsi512_close( &ctx.hamsi, hash0 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
sph_hamsi512( &ctx.hamsi, in1 + 72, 8 );
sph_hamsi512_close( &ctx.hamsi, hash1 );
}
else
{
sph_hamsi512_init( &ctx.hamsi );
sph_hamsi512( &ctx.hamsi, hash0, size );
sph_hamsi512_close( &ctx.hamsi, hash0 );
sph_hamsi512_init( &ctx.hamsi );
sph_hamsi512( &ctx.hamsi, hash1, size );
sph_hamsi512_close( &ctx.hamsi, hash1 );
}
#endif
break;
case FUGUE:
#if defined(__AES__)
if ( i == 0 )
{
fugue512_update( &ctx.fugue, in0 + 76, 4 );
fugue512_final( &ctx.fugue, hash0 );
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in1 + 76, 4 );
fugue512_final( &ctx.fugue, hash1 );
}
else
{
fugue512_full( &ctx.fugue, hash0, hash0, size );
fugue512_full( &ctx.fugue, hash1, hash1, size );
}
#else
if ( i == 0 )
{
sph_fugue512( &ctx.fugue, in0 + 76, 4 );
sph_fugue512_close( &ctx.fugue, hash0 );
memcpy( &ctx, &x16r_ctx, sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, in1 + 76, 4 );
sph_fugue512_close( &ctx.fugue, hash1 );
}
else
{
sph_fugue512_full( &ctx.fugue, hash0, hash0, size );
sph_fugue512_full( &ctx.fugue, hash1, hash1, size );
}
#endif
break;
case SHABAL:
if ( i == 0 )
{
sph_shabal512( &ctx.shabal, in0 + 64, 16 );
sph_shabal512_close( &ctx.shabal, hash0 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
sph_shabal512( &ctx.shabal, in1 + 64, 16 );
sph_shabal512_close( &ctx.shabal, hash1 );
}
else
{
sph_shabal512_init( &ctx.shabal );
sph_shabal512( &ctx.shabal, hash0, size );
sph_shabal512_close( &ctx.shabal, hash0 );
sph_shabal512_init( &ctx.shabal );
sph_shabal512( &ctx.shabal, hash1, size );
sph_shabal512_close( &ctx.shabal, hash1 );
}
break;
case WHIRLPOOL:
if ( i == 0 )
{
sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
}
else
{
sph_whirlpool512_full( &ctx.whirlpool, hash0, hash0, size );
sph_whirlpool512_full( &ctx.whirlpool, hash1, hash1, size );
}
break;
case SHA_512:
sha512_2x64_init( &ctx.sha512 );
if ( i == 0 )
sha512_2x64_update( &ctx.sha512, input, size );
else
{
intrlv_2x64( vhash, in0, in1, size<<3 );
sha512_2x64_init( &ctx.sha512 );
sha512_2x64_update( &ctx.sha512, vhash, size );
}
sha512_2x64_close( &ctx.sha512, vhash );
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
}
if ( work_restart[thrid].restart ) return 0;
size = 64;
}
memcpy( output, hash0, 64 );
memcpy( output+64, hash1, 64 );
return 1;
}
int x16r_2x64_hash( void* output, const void* input, int thrid )
{
uint8_t hash[64*2] __attribute__ ((aligned (64)));
if ( !x16r_2x64_hash_generic( hash, input, thrid ) )
return 0;
memcpy( output, hash, 32 );
memcpy( output+32, hash+64, 32 );
return 1;
}
int scanhash_x16r_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[16*2] __attribute__ ((aligned (64)));
uint32_t vdata[20*2] __attribute__ ((aligned (64)));
uint32_t bedata1[2];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 2;
uint32_t n = first_nonce;
v128_t *noncev = (v128_t*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime );
}
x16r_2x64_prehash( vdata, pdata );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do
{
if ( x16r_2x64_hash( hash, vdata, thr_id ) );
for ( int i = 0; i < 2; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
n += 2;
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#endif

View File

@@ -13,10 +13,13 @@ __thread x16r_8way_context_overlay x16r_ctx;
__thread x16r_4way_context_overlay x16r_ctx;
#elif defined (X16R_2WAY)
__thread x16r_2x64_context_overlay x16r_ctx;
#endif
__thread x16r_context_overlay x16_ctx;
__thread x16r_context_overlay x16r_ref_ctx;
void x16r_getAlgoString( const uint8_t* prevblock, char *output )
{
@@ -58,11 +61,15 @@ bool register_x16r_algo( algo_gate_t* gate )
#elif defined (X16R_4WAY)
gate->scanhash = (void*)&scanhash_x16r_4way;
gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16R_2WAY)
gate->scanhash = (void*)&scanhash_x16r_2x64;
gate->hash = (void*)&x16r_2x64_hash;
#else
gate->scanhash = (void*)&scanhash_x16r;
gate->hash = (void*)&x16r_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
| NEON_OPT;
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
opt_target_factor = 256.0;
return true;
@@ -76,11 +83,15 @@ bool register_x16rv2_algo( algo_gate_t* gate )
#elif defined (X16RV2_4WAY)
gate->scanhash = (void*)&scanhash_x16rv2_4way;
gate->hash = (void*)&x16rv2_4way_hash;
#elif defined (X16RV2_2WAY)
gate->scanhash = (void*)&scanhash_x16rv2_2x64;
gate->hash = (void*)&x16rv2_2x64_hash;
#else
gate->scanhash = (void*)&scanhash_x16rv2;
gate->hash = (void*)&x16rv2_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
| NEON_OPT;
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
opt_target_factor = 256.0;
return true;
@@ -94,11 +105,15 @@ bool register_x16s_algo( algo_gate_t* gate )
#elif defined (X16R_4WAY)
gate->scanhash = (void*)&scanhash_x16r_4way;
gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16R_2WAY)
gate->scanhash = (void*)&scanhash_x16r_2x64;
gate->hash = (void*)&x16r_2x64_hash;
#else
gate->scanhash = (void*)&scanhash_x16r;
gate->hash = (void*)&x16r_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
| NEON_OPT;
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
opt_target_factor = 256.0;
return true;
@@ -108,7 +123,6 @@ bool register_x16s_algo( algo_gate_t* gate )
//
// X16RT
void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash )
{
int32_t maskedTime = timeStamp & 0xffffff80;
@@ -221,34 +235,42 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
bool register_x16rt_algo( algo_gate_t* gate )
{
#if defined (X16R_8WAY)
#if defined (X16RT_8WAY)
gate->scanhash = (void*)&scanhash_x16rt_8way;
gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16R_4WAY)
#elif defined (X16RT_4WAY)
gate->scanhash = (void*)&scanhash_x16rt_4way;
gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16RT_2WAY)
gate->scanhash = (void*)&scanhash_x16rt_2x64;
gate->hash = (void*)&x16r_2x64_hash;
#else
gate->scanhash = (void*)&scanhash_x16rt;
gate->hash = (void*)&x16r_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
| NEON_OPT;
opt_target_factor = 256.0;
return true;
};
bool register_x16rt_veil_algo( algo_gate_t* gate )
{
#if defined (X16R_8WAY)
#if defined (X16RT_8WAY)
gate->scanhash = (void*)&scanhash_x16rt_8way;
gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16R_4WAY)
#elif defined (X16RT_4WAY)
gate->scanhash = (void*)&scanhash_x16rt_4way;
gate->hash = (void*)&x16r_4way_hash;
#elif defined (X16RT_2WAY)
gate->scanhash = (void*)&scanhash_x16rt_2x64;
gate->hash = (void*)&x16r_2x64_hash;
#else
gate->scanhash = (void*)&scanhash_x16rt;
gate->hash = (void*)&x16r_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
| NEON_OPT;
gate->build_extraheader = (void*)&veil_build_extraheader;
opt_target_factor = 256.0;
return true;
@@ -262,7 +284,7 @@ bool register_hex_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_hex;
gate->hash = (void*)&x16r_hash;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
gate->gen_merkle_root = (void*)&sha256_gen_merkle_root;
opt_target_factor = 128.0;
return true;
@@ -274,20 +296,25 @@ bool register_hex_algo( algo_gate_t* gate )
bool register_x21s_algo( algo_gate_t* gate )
{
#if defined (X16R_8WAY)
#if defined (X21S_8WAY)
gate->scanhash = (void*)&scanhash_x21s_8way;
gate->hash = (void*)&x21s_8way_hash;
gate->miner_thread_init = (void*)&x21s_8way_thread_init;
#elif defined (X16R_4WAY)
#elif defined (X21S_4WAY)
gate->scanhash = (void*)&scanhash_x21s_4way;
gate->hash = (void*)&x21s_4way_hash;
gate->miner_thread_init = (void*)&x21s_4way_thread_init;
#elif defined (X21S_2WAY)
gate->scanhash = (void*)&scanhash_x21s_2x64;
gate->hash = (void*)&x21s_2x64_hash;
gate->miner_thread_init = (void*)&x21s_2x64_thread_init;
#else
gate->scanhash = (void*)&scanhash_x21s;
gate->hash = (void*)&x21s_hash;
gate->miner_thread_init = (void*)&x21s_thread_init;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT
| NEON_OPT;
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
opt_target_factor = 256.0;
return true;

View File

@@ -7,13 +7,15 @@
#include <unistd.h>
#include "algo/blake/blake512-hash.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/sph_simd.h"
#include "algo/simd/nist.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
@@ -21,13 +23,13 @@
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/sha/sha512-hash.h"
#if defined(__AES__)
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/fugue/fugue-aesni.h"
#endif
#if defined (__AVX2__)
//#if defined (__AVX2__)
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
@@ -39,7 +41,7 @@
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/hamsi/hamsi-hash-4way.h"
#include "algo/shabal/shabal-hash-4way.h"
#endif
//#endif
#if defined(__VAES__)
#include "algo/groestl/groestl512-hash-4way.h"
@@ -48,28 +50,41 @@
#include "algo/echo/echo-hash-4way.h"
#endif
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
// X16R, X16S
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define X16R_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define X16R_4WAY 1
#elif defined(__SSE2__) || defined(__ARM_NEON)
#define X16R_2WAY 1
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define X16R_8WAY 1
#define X16RV2_8WAY 1
#define X16RT_8WAY 1
#define X21S_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define X16RV2_4WAY 1
#define X16RT_4WAY 1
#define X21S_4WAY 1
#define X16R_4WAY 1
#elif defined(__SSE2__) || defined(__ARM_NEON)
#define X16RV2_2WAY 1
#endif
// X16RT, VEIL
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define X16RT_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define X16RT_4WAY 1
#elif defined(__SSE2__) || defined(__ARM_NEON)
#define X16RT_2WAY 1
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define X21S_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define X21S_4WAY 1
#elif defined(__SSE2__) || defined(__ARM_NEON)
#define X21S_2WAY 1
#endif
enum x16r_Algo {
BLAKE = 0,
BMW,
@@ -167,7 +182,6 @@ union _x16r_4way_context_overlay
keccak512_4way_context keccak;
luffa_2way_context luffa;
cube_2way_context cube;
hashState_luffa luffa1;
simd_2way_context simd;
hamsi512_4way_context hamsi;
hashState_fugue fugue;
@@ -187,34 +201,84 @@ int scanhash_x16r_4way( struct work *, uint32_t,
uint64_t *, struct thr_info * );
extern __thread x16r_4way_context_overlay x16r_ctx;
#elif defined(X16R_2WAY)
union _x16r_2x64_context_overlay
{
blake512_2x64_context blake;
bmw512_2x64_context bmw;
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
hashState_groestl groestl;
#else
sph_groestl512_context groestl;
#endif
skein512_2x64_context skein;
jh512_2x64_context jh;
keccak512_2x64_context keccak;
hashState_luffa luffa;
cubehashParam cube;
shavite512_context shavite;
simd512_context simd;
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
hashState_echo echo;
#else
sph_echo512_context echo;
#endif
#if defined(__SSE4_2__) || defined(__ARM_NEON)
hamsi_2x64_context hamsi;
#else
sph_hamsi512_context hamsi;
#endif
#if defined(__AES__)
hashState_fugue fugue;
#else
sph_fugue512_context fugue;
#endif
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
sha512_2x64_context sha512;
} __attribute__ ((aligned (64)));
typedef union _x16r_2x64_context_overlay x16r_2x64_context_overlay;
void x16r_2x64_prehash( void *, void * );
int x16r_2x64_hash_generic( void *, const void *, int );
int x16r_2x64_hash( void *, const void *, int );
int scanhash_x16r_2x64( struct work *, uint32_t,
uint64_t *, struct thr_info * );
extern __thread x16r_2x64_context_overlay x16r_ctx;
#endif
// need a reference, add hooks for SSE2.
// needed for hex
union _x16r_context_overlay
{
#if defined(__AES__)
hashState_echo echo;
hashState_groestl groestl;
hashState_fugue fugue;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
sph_fugue512_context fugue;
#endif
blake512_context blake;
sph_bmw512_context bmw;
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
hashState_groestl groestl;
#else
sph_groestl512_context groestl;
#endif
sph_skein512_context skein;
sph_jh512_context jh;
sph_keccak512_context keccak;
hashState_luffa luffa;
cubehashParam cube;
shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
simd512_context simd;
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
hashState_echo echo;
#else
hashState_sd simd;
sph_echo512_context echo;
#endif
sph_hamsi512_context hamsi;
#if defined(__AES__)
hashState_fugue fugue;
#else
sph_fugue512_context fugue;
#endif
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
sph_sha512_context sha512;
@@ -222,7 +286,7 @@ union _x16r_context_overlay
typedef union _x16r_context_overlay x16r_context_overlay;
extern __thread x16r_context_overlay x16_ctx;
extern __thread x16r_context_overlay x16r_ref_ctx;
void x16r_prehash( void *, void * );
int x16r_hash_generic( void *, const void *, int );
@@ -242,6 +306,12 @@ int x16rv2_4way_hash( void *state, const void *input, int thrid );
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(X16RV2_2WAY)
int x16rv2_2x64_hash( void *state, const void *input, int thrid );
int scanhash_x16rv2_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#else
int x16rv2_hash( void *state, const void *input, int thr_id );
@@ -251,18 +321,24 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
#endif
// x16rt, veil
#if defined(X16R_8WAY)
#if defined(X16RT_8WAY)
//void x16rt_8way_hash( void *state, const void *input );
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(X16R_4WAY)
#elif defined(X16RT_4WAY)
//void x16rt_4way_hash( void *state, const void *input );
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(X16RT_2WAY)
//void x16rt_4way_hash( void *state, const void *input );
int scanhash_x16rt_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#else
//void x16rt_hash( void *state, const void *input );
@@ -272,20 +348,27 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
#endif
// x21s
#if defined(X16R_8WAY)
#if defined(X21S_8WAY)
int x21s_8way_hash( void *state, const void *input, int thrid );
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
bool x21s_8way_thread_init();
#elif defined(X16R_4WAY)
#elif defined(X21S_4WAY)
int x21s_4way_hash( void *state, const void *input, int thrid );
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
bool x21s_4way_thread_init();
#elif defined(X21S_2WAY)
int x21s_2x64_hash( void *state, const void *input, int thrid );
int scanhash_x21s_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
bool x21s_2x64_thread_init();
#else
int x21s_hash( void *state, const void *input, int thr_id );

View File

@@ -18,32 +18,36 @@ void x16r_prehash( void *edata, void *pdata )
switch ( algo )
{
case JH:
sph_jh512_init( &x16_ctx.jh );
sph_jh512( &x16_ctx.jh, edata, 64 );
sph_jh512_init( &x16r_ref_ctx.jh );
sph_jh512( &x16r_ref_ctx.jh, edata, 64 );
break;
case SKEIN:
sph_skein512_init( &x16_ctx.skein );
sph_skein512( &x16_ctx.skein, edata, 64 );
sph_skein512_init( &x16r_ref_ctx.skein );
sph_skein512( &x16r_ref_ctx.skein, edata, 64 );
break;
case KECCAK:
sph_keccak512_init( &x16r_ref_ctx.keccak );
sph_keccak512( &x16r_ref_ctx.keccak, edata, 72 );
break;
case LUFFA:
init_luffa( &x16_ctx.luffa, 512 );
update_luffa( &x16_ctx.luffa, edata, 64 );
init_luffa( &x16r_ref_ctx.luffa, 512 );
update_luffa( &x16r_ref_ctx.luffa, edata, 64 );
break;
case CUBEHASH:
cubehashInit( &x16_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16_ctx.cube, edata, 64 );
cubehashInit( &x16r_ref_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16r_ref_ctx.cube, edata, 64 );
break;
case HAMSI:
sph_hamsi512_init( &x16_ctx.hamsi );
sph_hamsi512( &x16_ctx.hamsi, edata, 64 );
break;
sph_hamsi512_init( &x16r_ref_ctx.hamsi );
sph_hamsi512( &x16r_ref_ctx.hamsi, edata, 72 );
break;
case SHABAL:
sph_shabal512_init( &x16_ctx.shabal );
sph_shabal512( &x16_ctx.shabal, edata, 64 );
sph_shabal512_init( &x16r_ref_ctx.shabal );
sph_shabal512( &x16r_ref_ctx.shabal, edata, 64 );
break;
case WHIRLPOOL:
sph_whirlpool_init( &x16_ctx.whirlpool );
sph_whirlpool( &x16_ctx.whirlpool, edata, 64 );
sph_whirlpool_init( &x16r_ref_ctx.whirlpool );
sph_whirlpool( &x16r_ref_ctx.whirlpool, edata, 64 );
break;
}
}
@@ -52,7 +56,7 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
{
uint32_t _ALIGN(128) hash[16];
x16r_context_overlay ctx;
memcpy( &ctx, &x16_ctx, sizeof(ctx) );
memcpy( &ctx, &x16r_ref_ctx, sizeof(ctx) );
void *in = (void*) input;
int size = 80;
@@ -70,36 +74,41 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
break;
case BMW:
sph_bmw512_init( &ctx.bmw );
sph_bmw512(&ctx.bmw, in, size);
sph_bmw512_close(&ctx.bmw, hash);
sph_bmw512( &ctx.bmw, in, size );
sph_bmw512_close( &ctx.bmw, hash );
break;
case GROESTL:
#if defined(__AES__)
groestl512_full( &ctx.groestl, (char*)hash, (char*)in, size<<3 );
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
groestl512_full( &ctx.groestl, hash, in, size<<3 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in, size );
sph_groestl512_close(&ctx.groestl, hash);
sph_groestl512_close( &ctx.groestl, hash );
#endif
break;
case JH:
if ( i == 0 )
sph_jh512(&ctx.jh, in+64, 16 );
sph_jh512( &ctx.jh, in+64, 16 );
else
{
sph_jh512_init( &ctx.jh );
sph_jh512(&ctx.jh, in, size );
sph_jh512( &ctx.jh, in, size );
}
sph_jh512_close(&ctx.jh, hash );
sph_jh512_close( &ctx.jh, hash );
break;
case KECCAK:
sph_keccak512_init( &ctx.keccak );
sph_keccak512( &ctx.keccak, in, size );
if ( i == 0 )
sph_keccak512( &ctx.keccak, in+72, 8 );
else
{
sph_keccak512_init( &ctx.keccak );
sph_keccak512( &ctx.keccak, in, size );
}
sph_keccak512_close( &ctx.keccak, hash );
break;
case SKEIN:
if ( i == 0 )
sph_skein512(&ctx.skein, in+64, 16 );
sph_skein512( &ctx.skein, in+64, 16 );
else
{
sph_skein512_init( &ctx.skein );
@@ -109,13 +118,13 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
break;
case LUFFA:
if ( i == 0 )
update_and_final_luffa( &ctx.luffa, hash, (const void*)in+64, 16 );
update_and_final_luffa( &ctx.luffa, hash, in+64, 16 );
else
luffa_full( &ctx.luffa, hash, 512, in, size );
break;
case CUBEHASH:
if ( i == 0 )
cubehashUpdateDigest( &ctx.cube, hash, (const void*)in+64, 16 );
cubehashUpdateDigest( &ctx.cube, hash, in+64, 16 );
else
cubehash_full( &ctx.cube, hash, 512, in, size );
break;
@@ -123,19 +132,13 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
shavite512_full( &ctx.shavite, hash, in, size );
break;
case SIMD:
#if defined(__aarch64__)
sph_simd512_init( &ctx.simd );
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
simd_full( &ctx.simd, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
#endif
sph_simd512( &ctx.simd, hash, size );
sph_simd512_close( &ctx.simd, hash );
break;
case ECHO:
#if defined(__AES__)
echo_full( &ctx.echo, (BitSequence*)hash, 512,
(const BitSequence*)in, size );
echo_full( &ctx.echo, hash, 512, in, size );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in, size );
@@ -144,7 +147,7 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
break;
case HAMSI:
if ( i == 0 )
sph_hamsi512( &ctx.hamsi, in+64, 16 );
sph_hamsi512( &ctx.hamsi, in+72, 8 );
else
{
sph_hamsi512_init( &ctx.hamsi );
@@ -153,12 +156,8 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
sph_hamsi512_close( &ctx.hamsi, hash );
break;
case FUGUE:
#if defined(__AES__)
fugue512_full( &ctx.fugue, hash, in, size );
#else
sph_fugue512_full( &ctx.fugue, hash, in, size );
#endif
break;
sph_fugue512_full( &ctx.fugue, hash, in, size );
break;
case SHABAL:
if ( i == 0 )
sph_shabal512( &ctx.shabal, in+64, 16 );

View File

@@ -3,7 +3,7 @@
#include <stdlib.h>
#include <string.h>
#if defined (X16R_8WAY)
#if defined (X16RT_8WAY)
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
@@ -57,7 +57,7 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
return 0;
}
#elif defined (X16R_4WAY)
#elif defined (X16RT_4WAY)
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
@@ -110,4 +110,55 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
return 0;
}
#elif defined (X16RT_2WAY)
int scanhash_x16rt_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[2*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
uint32_t _ALIGN(64) timeHash[4*8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 2;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
v128_t *noncev = (v128_t*)vdata + 9;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
if ( s_ntime != masked_ntime )
{
x16rt_getTimeHash( masked_ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = masked_ntime;
if ( !thr_id )
applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x",
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
}
x16r_2x64_prehash( vdata, pdata );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do
{
if ( x16r_2x64_hash( hash, vdata, thr_id ) )
for ( int i = 0; i < 2; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
n += 2;
} while ( ( n < last_nonce ) && !(*restart) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#endif

View File

@@ -1,6 +1,6 @@
#include "x16r-gate.h"
#if !defined(X16R_8WAY) && !defined(X16R_4WAY)
#if !defined(X16RT_8WAY) && !defined(X16RT_4WAY)
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )

View File

@@ -395,7 +395,7 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
break;
case HAMSI:
if ( i == 0 )
hamsi512_8way_update( &ctx.hamsi, input + (64<<3), 16 );
hamsi512_8way_update( &ctx.hamsi, input + (72<<3), 8 );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
@@ -409,14 +409,43 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
hash7, vhash );
break;
case FUGUE:
fugue512_full( &ctx.fugue, hash0, in0, size );
fugue512_full( &ctx.fugue, hash1, in1, size );
fugue512_full( &ctx.fugue, hash2, in2, size );
fugue512_full( &ctx.fugue, hash3, in3, size );
fugue512_full( &ctx.fugue, hash4, in4, size );
fugue512_full( &ctx.fugue, hash5, in5, size );
fugue512_full( &ctx.fugue, hash6, in6, size );
fugue512_full( &ctx.fugue, hash7, in7, size );
if ( i == 0 )
{
fugue512_update( &ctx.fugue, in0 + 76, 4 );
fugue512_final( &ctx.fugue, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in1 + 76, 4 );
fugue512_final( &ctx.fugue, hash1 );
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in2 + 76, 4 );
fugue512_final( &ctx.fugue, hash2 );
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in3 + 76, 4 );
fugue512_final( &ctx.fugue, hash3 );
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in4 + 76, 4 );
fugue512_final( &ctx.fugue, hash4 );
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in5 + 76, 4 );
fugue512_final( &ctx.fugue, hash5 );
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in6 + 76, 4 );
fugue512_final( &ctx.fugue, hash6 );
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in7 + 76, 4 );
fugue512_final( &ctx.fugue, hash7 );
}
else
{
fugue512_full( &ctx.fugue, hash0, hash0, size );
fugue512_full( &ctx.fugue, hash1, hash1, size );
fugue512_full( &ctx.fugue, hash2, hash2, size );
fugue512_full( &ctx.fugue, hash3, hash3, size );
fugue512_full( &ctx.fugue, hash4, hash4, size );
fugue512_full( &ctx.fugue, hash5, hash5, size );
fugue512_full( &ctx.fugue, hash6, hash6, size );
fugue512_full( &ctx.fugue, hash7, hash7, size );
}
break;
case SHABAL:
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
@@ -588,7 +617,7 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
@@ -626,7 +655,14 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
case HAMSI:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
hamsi512_8way_init( &x16rv2_ctx.hamsi );
hamsi512_8way_update( &x16rv2_ctx.hamsi, vdata, 64 );
hamsi512_8way_update( &x16rv2_ctx.hamsi, vdata, 72 );
break;
case FUGUE:
v128_bswap32_80( edata, pdata );
fugue512_init( &x16rv2_ctx.fugue );
fugue512_update( &x16rv2_ctx.fugue, edata, 76 );
intrlv_8x64( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
break;
case SHABAL:
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
@@ -824,8 +860,8 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, size );
skein512_4way_close( &ctx.skein, vhash );
}
skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break;
case LUFFA:
@@ -945,7 +981,7 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
break;
case HAMSI:
if ( i == 0 )
hamsi512_4way_update( &ctx.hamsi, input + (64<<2), 16 );
hamsi512_4way_update( &ctx.hamsi, input + (72<<2), 8 );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
@@ -956,10 +992,27 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break;
case FUGUE:
fugue512_full( &ctx.fugue, hash0, in0, size );
fugue512_full( &ctx.fugue, hash1, in1, size );
fugue512_full( &ctx.fugue, hash2, in2, size );
fugue512_full( &ctx.fugue, hash3, in3, size );
if ( i == 0 )
{
fugue512_update( &ctx.fugue, in0 + 76, 4 );
fugue512_final( &ctx.fugue, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in1 + 76, 4 );
fugue512_final( &ctx.fugue, hash1 );
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in2 + 76, 4 );
fugue512_final( &ctx.fugue, hash2 );
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in3 + 76, 4 );
fugue512_final( &ctx.fugue, hash3 );
}
else
{
fugue512_full( &ctx.fugue, hash0, hash0, size );
fugue512_full( &ctx.fugue, hash1, hash1, size );
fugue512_full( &ctx.fugue, hash2, hash2, size );
fugue512_full( &ctx.fugue, hash3, hash3, size );
}
break;
case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
@@ -1077,7 +1130,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
@@ -1101,7 +1154,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
break;
case SKEIN:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
skein512_4way_prehash64( &x16rv2_ctx.skein, vdata );
break;
case CUBEHASH:
v128_bswap32_80( edata, pdata );
@@ -1112,7 +1165,13 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
case HAMSI:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
hamsi512_4way_init( &x16rv2_ctx.hamsi );
hamsi512_4way_update( &x16rv2_ctx.hamsi, vdata, 64 );
hamsi512_4way_update( &x16rv2_ctx.hamsi, vdata, 72 );
break;
case FUGUE:
v128_bswap32_80( edata, pdata );
fugue512_init( &x16rv2_ctx.fugue );
fugue512_update( &x16rv2_ctx.fugue, edata, 76 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
case SHABAL:
v128_bswap32_intrlv80_4x32( vdata32, pdata );
@@ -1151,4 +1210,453 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
return 0;
}
#elif defined (X16RV2_2WAY)
union _x16rv2_2x64_context_overlay
{
blake512_2x64_context blake;
bmw512_2x64_context bmw;
#if defined(__AES__) // || defined(__ARM_FEATURE_AES)
hashState_groestl groestl;
#else
sph_groestl512_context groestl;
#endif
skein512_2x64_context skein;
jh512_2x64_context jh;
keccak512_2x64_context keccak;
hashState_luffa luffa;
cubehashParam cube;
shavite512_context shavite;
simd512_context simd;
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
hashState_echo echo;
#else
sph_echo512_context echo;
#endif
#if defined(__SSE4_2__) || defined(__ARM_NEON)
hamsi_2x64_context hamsi;
#else
sph_hamsi512_context hamsi;
#endif
#if defined(__AES__)
hashState_fugue fugue;
#else
sph_fugue512_context fugue;
#endif
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
sha512_2x64_context sha512;
sph_tiger_context tiger;
} __attribute__ ((aligned (64)));
typedef union _x16rv2_2x64_context_overlay x16rv2_2x64_context_overlay;
static __thread x16rv2_2x64_context_overlay x16rv2_ctx;
// Pad the 24 bytes tiger hash to 64 bytes
static inline void padtiger512( uint32_t* hash )
{
for ( int i = 6; i < 16; i++ ) hash[i] = 0;
}
int x16rv2_2x64_hash( void* output, const void* input, int thrid )
{
uint32_t vhash[20*2] __attribute__ ((aligned (64)));
uint32_t hash0[20] __attribute__ ((aligned (32)));
uint32_t hash1[20] __attribute__ ((aligned (32)));
x16rv2_2x64_context_overlay ctx;
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
void *in0 = (void*) hash0;
void *in1 = (void*) hash1;
int size = 80;
dintrlv_2x64( hash0, hash1, input, 640 );
for ( int i = 0; i < 16; i++ )
{
const char elem = x16r_hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
if ( i == 0 )
blake512_2x64_full( &ctx.blake, vhash, input, size );
else
{
intrlv_2x64( vhash, in0, in1, size<<3 );
blake512_2x64_full( &ctx.blake, vhash, vhash, size );
}
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
case BMW:
bmw512_2x64_init( &ctx.bmw );
if ( i == 0 )
bmw512_2x64_update( &ctx.bmw, input, size );
else
{
intrlv_2x64( vhash, in0, in1, size<<3 );
bmw512_2x64_update( &ctx.bmw, vhash, size );
}
bmw512_2x64_close( &ctx.bmw, vhash );
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
case GROESTL:
#if defined(__AES__)
groestl512_full( &ctx.groestl, hash0, in0, size<<3 );
groestl512_full( &ctx.groestl, hash1, in1, size<<3 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in0, size );
sph_groestl512_close( &ctx.groestl, hash0 );
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in1, size );
sph_groestl512_close( &ctx.groestl, hash1 );
#endif
break;
case JH:
if ( i == 0 )
jh512_2x64_update( &ctx.jh, input + (64<<1), 16 );
else
{
intrlv_2x64( vhash, in0, in1, size<<3 );
jh512_2x64_init( &ctx.jh );
jh512_2x64_update( &ctx.jh, vhash, size );
}
jh512_2x64_close( &ctx.jh, vhash );
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
case KECCAK:
if ( i == 0 )
{
sph_tiger( &ctx.tiger, in0 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in1 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash1 );
}
else
{
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in0, size );
sph_tiger_close( &ctx.tiger, hash0 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in1, size );
sph_tiger_close( &ctx.tiger, hash1 );
}
for ( int i = (24/4); i < (64/4); i++ )
hash0[i] = hash1[i] = 0;
intrlv_2x64( vhash, hash0, hash1, 512 );
keccak512_2x64_init( &ctx.keccak );
keccak512_2x64_update( &ctx.keccak, vhash, 64 );
keccak512_2x64_close( &ctx.keccak, vhash );
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
case SKEIN:
if ( i == 0 )
skein512_2x64_final16( &ctx.skein, vhash, input + (64*2) );
else
{
intrlv_2x64( vhash, in0, in1, size<<3 );
skein512_2x64_full( &ctx.skein, vhash, vhash, size );
}
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
case LUFFA:
if ( i == 0 )
{
sph_tiger( &ctx.tiger, in0 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in1 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash1 );
}
else
{
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in0, size );
sph_tiger_close( &ctx.tiger, hash0 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in1, size );
sph_tiger_close( &ctx.tiger, hash1 );
}
for ( int i = (24/4); i < (64/4); i++ )
hash0[i] = hash1[i] = 0;
luffa_full( &ctx.luffa, hash0, 512, hash0, 64 );
luffa_full( &ctx.luffa, hash1, 512, hash1, 64 );
break;
case CUBEHASH:
if ( i == 0 )
{
cubehashUpdateDigest( &ctx.cube, hash0, in0 + 64, 16 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, hash1, in1 + 64, 16 );
}
else
{
cubehash_full( &ctx.cube, hash0, 512, hash0, size );
cubehash_full( &ctx.cube, hash1, 512, hash1, size );
}
break;
case SHAVITE:
shavite512_full( &ctx.shavite, hash0, in0, size );
shavite512_full( &ctx.shavite, hash1, in1, size );
break;
case SIMD:
simd512_ctx( &ctx.simd, hash0, in0, size );
simd512_ctx( &ctx.simd, hash1, in1, size );
break;
case ECHO:
#if defined(__AES__)
echo_full( &ctx.echo, hash0, 512, in0, size );
echo_full( &ctx.echo, hash1, 512, in1, size );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in0, size );
sph_echo512_close( &ctx.echo, hash0 );
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in1, size );
sph_echo512_close( &ctx.echo, hash1 );
#endif
break;
case HAMSI:
#if defined(__SSE4_2__) || defined(__ARM_NEON)
if ( i == 0 )
hamsi512_2x64_update( &ctx.hamsi, input + (72*2), 8 );
else
{
intrlv_2x64( vhash, hash0, hash1, size<<3 );
hamsi512_2x64_init( &ctx.hamsi );
hamsi512_2x64_update( &ctx.hamsi, vhash, size );
}
hamsi512_2x64_close( &ctx.hamsi, vhash );
dintrlv_2x64( hash0, hash1, vhash, 512 );
#else
if ( i == 0 )
{
sph_hamsi512( &ctx.hamsi, in0 + 72, 8 );
sph_hamsi512_close( &ctx.hamsi, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_hamsi512( &ctx.hamsi, in1 + 72, 8 );
sph_hamsi512_close( &ctx.hamsi, hash1 );
}
else
{
sph_hamsi512_init( &ctx.hamsi );
sph_hamsi512( &ctx.hamsi, hash0, size );
sph_hamsi512_close( &ctx.hamsi, hash0 );
sph_hamsi512_init( &ctx.hamsi );
sph_hamsi512( &ctx.hamsi, hash1, size );
sph_hamsi512_close( &ctx.hamsi, hash1 );
}
#endif
break;
case FUGUE:
#if defined(__AES__)
if ( i == 0 )
{
fugue512_update( &ctx.fugue, in0 + 76, 4 );
fugue512_final( &ctx.fugue, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in1 + 76, 4 );
fugue512_final( &ctx.fugue, hash1 );
}
else
{
fugue512_full( &ctx.fugue, hash0, hash0, size );
fugue512_full( &ctx.fugue, hash1, hash1, size );
}
#else
if ( i == 0 )
{
sph_fugue512( &ctx.fugue, in0 + 76, 4 );
sph_fugue512_close( &ctx.fugue, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, in1 + 76, 4 );
sph_fugue512_close( &ctx.fugue, hash1 );
}
else
{
sph_fugue512_full( &ctx.fugue, hash0, hash0, size );
sph_fugue512_full( &ctx.fugue, hash1, hash1, size );
}
#endif
break;
case SHABAL:
if ( i == 0 )
{
sph_shabal512( &ctx.shabal, in0 + 64, 16 );
sph_shabal512_close( &ctx.shabal, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_shabal512( &ctx.shabal, in1 + 64, 16 );
sph_shabal512_close( &ctx.shabal, hash1 );
}
else
{
sph_shabal512_init( &ctx.shabal );
sph_shabal512( &ctx.shabal, hash0, size );
sph_shabal512_close( &ctx.shabal, hash0 );
sph_shabal512_init( &ctx.shabal );
sph_shabal512( &ctx.shabal, hash1, size );
sph_shabal512_close( &ctx.shabal, hash1 );
}
break;
case WHIRLPOOL:
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
break;
case SHA_512:
if ( i == 0 )
{
sph_tiger( &ctx.tiger, in0 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash0 );
memcpy( &ctx, &x16rv2_ctx, sizeof(ctx) );
sph_tiger( &ctx.tiger, in1 + 64, 16 );
sph_tiger_close( &ctx.tiger, hash1 );
}
else
{
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in0, size );
sph_tiger_close( &ctx.tiger, hash0 );
sph_tiger_init( &ctx.tiger );
sph_tiger( &ctx.tiger, in1, size );
sph_tiger_close( &ctx.tiger, hash1 );
}
for ( int i = (24/4); i < (64/4); i++ )
hash0[i] = hash1[i] = 0;
intrlv_2x64( vhash, hash0, hash1, 512 );
sha512_2x64_init( &ctx.sha512 );
sha512_2x64_update( &ctx.sha512, vhash, 64 );
sha512_2x64_close( &ctx.sha512, vhash );
dintrlv_2x64( hash0, hash1, vhash, 512 );
break;
}
if ( work_restart[thrid].restart ) return 0;
size = 64;
}
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
return 1;
}
int scanhash_x16rv2_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[2*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
uint32_t edata[20];
uint32_t bedata1[2];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 2;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
v128_t *noncev = (v128_t*)vdata + 9;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0fff;
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32(pdata[17]);
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( !opt_quiet && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
const char elem = x16r_hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case JH:
v128_bswap32_intrlv80_2x64( vdata, pdata );
jh512_2x64_init( &x16rv2_ctx.jh );
jh512_2x64_update( &x16rv2_ctx.jh, vdata, 64 );
break;
case KECCAK:
case LUFFA:
case SHA_512:
v128_bswap32_80( edata, pdata );
sph_tiger_init( &x16rv2_ctx.tiger );
sph_tiger( &x16rv2_ctx.tiger, edata, 64 );
intrlv_2x64( vdata, edata, edata, 640 );
break;
case SKEIN:
v128_bswap32_intrlv80_2x64( vdata, pdata );
skein512_2x64_prehash64( &x16rv2_ctx.skein, vdata );
break;
case CUBEHASH:
v128_bswap32_80( edata, pdata );
cubehashInit( &x16rv2_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16rv2_ctx.cube, edata, 64 );
intrlv_2x64( vdata, edata, edata, 640 );
break;
case HAMSI:
#if defined(__SSE4_2__) || defined(__ARM_NEON)
v128_bswap32_intrlv80_2x64( vdata, pdata );
hamsi512_2x64_init( &x16rv2_ctx.hamsi );
hamsi512_2x64_update( &x16rv2_ctx.hamsi, vdata, 72 );
#else
v128_bswap32_80( edata, pdata );
sph_hamsi512_init( &x16rv2_ctx.hamsi );
sph_hamsi512( &x16rv2_ctx.hamsi, edata, 72 );
intrlv_2x64( vdata, edata, edata, 640 );
#endif
break;
case FUGUE:
v128_bswap32_80( edata, pdata );
#if defined(__AES__)
fugue512_init( &x16rv2_ctx.fugue );
fugue512_update( &x16rv2_ctx.fugue, edata, 76 );
#else
sph_fugue512_init( &x16rv2_ctx.fugue );
sph_fugue512( &x16rv2_ctx.fugue, edata, 76 );
#endif
intrlv_2x64( vdata, edata, edata, 640 );
break;
case SHABAL:
v128_bswap32_80( edata, pdata );
sph_shabal512_init( &x16rv2_ctx.shabal );
sph_shabal512( &x16rv2_ctx.shabal, edata, 64);
intrlv_2x64( vdata, edata, edata, 640 );
break;
default:
v128_bswap32_intrlv80_2x64( vdata, pdata );
}
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do
{
if ( x16rv2_2x64_hash( hash, vdata, thr_id ) )
for ( int i = 0; i < 2; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
n += 2;
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#endif

View File

@@ -6,21 +6,15 @@
*/
#include "x16r-gate.h"
#if !defined(X16R_8WAY) && !defined(X16R_4WAY)
#if !defined(X16RV2_8WAY) && !defined(X16RV2_4WAY) && !defined(X16RV2_2WAY)
#include "algo/tiger/sph_tiger.h"
union _x16rv2_context_overlay
{
#if defined(__AES__)
hashState_echo echo;
hashState_groestl groestl;
hashState_fugue fugue;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
sph_fugue512_context fugue;
#endif
blake512_context blake;
sph_bmw512_context bmw;
sph_skein512_context skein;
@@ -29,11 +23,7 @@ union _x16rv2_context_overlay
hashState_luffa luffa;
cubehashParam cube;
shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
sph_hamsi512_context hamsi;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
@@ -72,15 +62,9 @@ int x16rv2_hash( void* output, const void* input, int thrid )
sph_bmw512_close(&ctx.bmw, hash);
break;
case GROESTL:
#if defined(__AES__)
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)in, size<<3 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in, size );
sph_groestl512_close(&ctx.groestl, hash);
#endif
break;
case SKEIN:
sph_skein512_init( &ctx.skein );
@@ -117,25 +101,14 @@ int x16rv2_hash( void* output, const void* input, int thrid )
shavite512_full( &ctx.shavite, hash, in, size );
break;
case SIMD:
#if defined(__aarch64__)
sph_simd512_init( &ctx.simd );
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512(&ctx.simd, hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
simd_full( &ctx.simd, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
#endif
break;
case ECHO:
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in, size );
sph_echo512_close( &ctx.echo, hash );
#endif
break;
case HAMSI:
sph_hamsi512_init( &ctx.hamsi );
@@ -143,11 +116,7 @@ int x16rv2_hash( void* output, const void* input, int thrid )
sph_hamsi512_close( &ctx.hamsi, hash );
break;
case FUGUE:
#if defined(__AES__)
fugue512_full( &ctx.fugue, hash, in, size );
#else
sph_fugue512_full( &ctx.fugue, hash, in, size );
#endif
break;
case SHABAL:
sph_shabal512_init( &ctx.shabal );

View File

@@ -9,6 +9,7 @@
#include <stdlib.h>
#include <string.h>
#include "algo/haval/haval-hash-4way.h"
#include "algo/haval/sph-haval.h"
#include "algo/tiger/sph_tiger.h"
#include "algo/gost/sph_gost.h"
#include "algo/lyra2/lyra2.h"
@@ -351,4 +352,119 @@ bool x21s_4way_thread_init()
return x21s_4way_matrix;
}
#elif defined (X21S_2WAY)
static __thread uint64_t* x21s_2x64_matrix;
union _x21s_2x64_context_overlay
{
sph_haval256_5_context haval;
sph_tiger_context tiger;
sph_gost512_context gost;
} __attribute__ ((aligned (64)));
typedef union _x21s_2x64_context_overlay x21s_2x64_context_overlay;
int x21s_2x64_hash( void* output, const void* input, int thrid )
{
uint8_t shash[64*2] __attribute__ ((aligned (64)));
x21s_2x64_context_overlay ctx;
uint32_t *hash0 = (uint32_t*) shash;
uint32_t *hash1 = (uint32_t*)( shash+64 );
if ( !x16r_2x64_hash_generic( shash, input, thrid ) )
return 0;
sph_haval256_5_init( &ctx.haval );
sph_haval256_5( &ctx.haval, hash0, 64 );
sph_haval256_5_close( &ctx.haval, hash0 );
sph_haval256_5_init( &ctx.haval );
sph_haval256_5( &ctx.haval, hash1, 64 );
sph_haval256_5_close( &ctx.haval, hash1 );
sph_tiger_init( &ctx.tiger );
sph_tiger ( &ctx.tiger, (const void*) hash0, 64 );
sph_tiger_close( &ctx.tiger, (void*) hash0 );
sph_tiger_init( &ctx.tiger );
sph_tiger ( &ctx.tiger, (const void*) hash1, 64 );
sph_tiger_close( &ctx.tiger, (void*) hash1 );
LYRA2REV2( x21s_2x64_matrix, (void*) hash0, 32, (const void*) hash0, 32,
(const void*) hash0, 32, 1, 4, 4 );
LYRA2REV2( x21s_2x64_matrix, (void*) hash1, 32, (const void*) hash1, 32,
(const void*) hash1, 32, 1, 4, 4 );
sph_gost512_init( &ctx.gost );
sph_gost512 ( &ctx.gost, (const void*) hash0, 64 );
sph_gost512_close( &ctx.gost, (void*) hash0 );
sph_gost512_init( &ctx.gost );
sph_gost512 ( &ctx.gost, (const void*) hash1, 64 );
sph_gost512_close( &ctx.gost, (void*) hash1 );
sha256_full( output, hash0, 64 );
sha256_full( output+32, hash1, 64 );
return 1;
}
int scanhash_x21s_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[16*2] __attribute__ ((aligned (64)));
uint32_t vdata[20*2] __attribute__ ((aligned (64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 2;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
v128_t *noncev = (v128_t*)vdata + 9;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( bench ) ptarget[7] = 0x0cff;
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
}
x16r_2x64_prehash( vdata, pdata );
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do
{
if ( x21s_2x64_hash( hash, vdata, thr_id ) )
for ( int i = 0; i < 2; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*noncev = v128_add32( *noncev, v128_64( 0x0000000200000000 ) );
n += 2;
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
bool x21s_2x64_thread_init()
{
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
const int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
x21s_2x64_matrix = mm_malloc( size, 64 );
return x21s_2x64_matrix;
}
#endif

View File

@@ -15,7 +15,7 @@
#include "algo/gost/sph_gost.h"
#include "algo/lyra2/lyra2.h"
#if !defined(X16R_8WAY) && !defined(X16R_4WAY)
#if !defined(X21S_8WAY) && !defined(X21S_4WAY)
static __thread uint64_t* x21s_matrix;

View File

@@ -1133,14 +1133,12 @@ int scanhash_x17_2x64( struct work *work, uint32_t max_nonce,
{
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
{
applog(LOG_INFO,"Submitted Thread %d, lane %d",thr_id,0);
pdata[19] = bswap_32( n );
// pdata[19] = n;
submit_solution( work, hash, mythr );
}
if ( unlikely( valid_hash( hash+8, ptarget ) && !bench ) )
{
applog(LOG_INFO,"Submitted Thread %d, lane %d",thr_id,1);
pdata[19] = bswap_32( n+1 );
submit_solution( work, hash+8, mythr );
}

View File

@@ -5,24 +5,23 @@
#include "algo/blake/blake512-hash.h"
#include "algo/bmw/sph_bmw.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/fugue/fugue-aesni.h"
#else
#include "algo/groestl/sph_groestl.h"
#include "algo/echo/sph_echo.h"
#include "algo/fugue/sph_fugue.h"
#endif
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
#include "algo/echo/aes_ni/hash_api.h"
#else
#include "algo/echo/sph_echo.h"
#endif
#include "algo/skein/sph_skein.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/shavite/sph_shavite.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
@@ -41,12 +40,15 @@ union _x22i_context_overlay
sph_bmw512_context bmw;
#if defined(__AES__)
hashState_groestl groestl;
hashState_echo echo;
hashState_fugue fugue;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
sph_fugue512_context fugue;
#endif
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
hashState_echo echo;
#else
sph_echo512_context echo;
#endif
sph_jh512_context jh;
sph_keccak512_context keccak;
@@ -54,11 +56,7 @@ union _x22i_context_overlay
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_hamsi512_context hamsi;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
@@ -84,9 +82,7 @@ int x22i_hash( void *output, const void *input, int thrid )
sph_bmw512_close(&ctx.bmw, hash);
#if defined(__AES__)
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)hash, 512 );
groestl512_full( &ctx.groestl, hash, hash, 512 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, hash, 64 );
@@ -109,26 +105,16 @@ int x22i_hash( void *output, const void *input, int thrid )
luffa_full( &ctx.luffa, hash, 512, hash, 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, hash, hash, 64 );
cubehash_full( &ctx.cube, hash, 512, hash, 64 );
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash, 64);
sph_shavite512_close(&ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512_init(&ctx.simd );
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
simd_full( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash,
(const BitSequence*)hash, 512 );
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
echo_full( &ctx.echo, hash, 512, hash, 64 );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, hash, 64 );
@@ -192,8 +178,8 @@ int x22i_hash( void *output, const void *input, int thrid )
int scanhash_x22i( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t edata[20] __attribute__((aligned(32)));
uint32_t hash64[8] __attribute__((aligned(32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];

View File

@@ -5,24 +5,23 @@
#include "algo/blake/blake512-hash.h"
#include "algo/bmw/sph_bmw.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/fugue/fugue-aesni.h"
#else
#include "algo/groestl/sph_groestl.h"
#include "algo/echo/sph_echo.h"
#include "algo/fugue/sph_fugue.h"
#endif
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
#include "algo/echo/aes_ni/hash_api.h"
#else
#include "algo/echo/sph_echo.h"
#endif
#include "algo/skein/sph_skein.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/shavite/sph_shavite.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
@@ -44,12 +43,15 @@ union _x25x_context_overlay
sph_bmw512_context bmw;
#if defined(__AES__)
hashState_groestl groestl;
hashState_echo echo;
hashState_fugue fugue;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
sph_fugue512_context fugue;
#endif
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
hashState_echo echo;
#else
sph_echo512_context echo;
#endif
sph_jh512_context jh;
sph_keccak512_context keccak;
@@ -57,11 +59,7 @@ union _x25x_context_overlay
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_hamsi512_context hamsi;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
@@ -89,9 +87,7 @@ int x25x_hash( void *output, const void *input, int thrid )
sph_bmw512_close(&ctx.bmw, &hash[1]);
#if defined(__AES__)
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)&hash[2],
(const char*)&hash[1], 512 );
groestl512_full( &ctx.groestl, (void*)&hash[2], (const void*)&hash[1], 512 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, &hash[1], 64 );
@@ -112,28 +108,18 @@ int x25x_hash( void *output, const void *input, int thrid )
if ( work_restart[thrid].restart ) return 0;
init_luffa( &ctx.luffa, 512 );
luffa_full( &ctx.luffa, &hash[6], 512, &hash[5], 64 );
luffa_full( &ctx.luffa, (void*)&hash[6], 512, (const void*)&hash[5], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, &hash[7], &hash[6], 64 );
cubehash_full( &ctx.cube, (void*)&hash[7], 512, (const void*)&hash[6], 64 );
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) &hash[7], 64);
sph_shavite512_close(&ctx.shavite, &hash[8]);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) &hash[8], 64);
sph_simd512_close(&ctx.simd, &hash[9] );
#else
update_final_sd( &ctx.simd, (BitSequence *)&hash[9],
(const BitSequence *)&hash[8], 512 );
#endif
simd512_ctx( &ctx.simd, (void*)&hash[9], (const void*)&hash[8], 64 );
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)&hash[10],
(const BitSequence*)&hash[9], 512 );
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
echo_full( &ctx.echo, (void*)&hash[10], 512, (const void*)&hash[9], 64 );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, &hash[9], 64 );
@@ -227,8 +213,8 @@ int x25x_hash( void *output, const void *input, int thrid )
int scanhash_x25x( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t edata[20] __attribute__((aligned(32)));
uint32_t hash64[8] __attribute__((aligned(32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
@@ -245,7 +231,7 @@ int scanhash_x25x( struct work *work, uint32_t max_nonce,
do
{
edata[19] = n;
if ( x25x_hash( hash64, edata, thr_id ) )
if ( x25x_hash( hash64, edata, thr_id ) );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n );

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.10.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 23.12.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -608,8 +608,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='23.10'
PACKAGE_STRING='cpuminer-opt 23.10'
PACKAGE_VERSION='23.12'
PACKAGE_STRING='cpuminer-opt 23.12'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 23.10 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 23.12 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1432,7 +1432,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 23.10:";;
short | recursive ) echo "Configuration of cpuminer-opt 23.12:";;
esac
cat <<\_ACEOF
@@ -1538,7 +1538,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 23.10
cpuminer-opt configure 23.12
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 23.10, which was
It was created by cpuminer-opt $as_me 23.12, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@@ -3593,7 +3593,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='23.10'
VERSION='23.12'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 23.10, which was
This file was extended by cpuminer-opt $as_me 23.12, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
cpuminer-opt config.status 23.10
cpuminer-opt config.status 23.12
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [23.10])
AC_INIT([cpuminer-opt], [23.12])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

4355
configure~

File diff suppressed because it is too large Load Diff