This commit is contained in:
Jay D Dee
2020-02-04 01:31:59 -05:00
parent 0681ca996d
commit 1b76cee239
106 changed files with 1695 additions and 4481 deletions

View File

@@ -102,9 +102,6 @@ cpuminer_SOURCES = \
algo/hamsi/hamsi-hash-4way.c \
algo/haval/haval.c \
algo/haval/haval-hash-4way.c \
algo/heavy/sph_hefty1.c \
algo/heavy/heavy.c \
algo/heavy/bastion.c \
algo/hodl/aes.c \
algo/hodl/hodl-gate.c \
algo/hodl/hodl-wolf.c \
@@ -123,8 +120,6 @@ cpuminer_SOURCES = \
algo/keccak/sha3d-4way.c \
algo/keccak/sha3d.c \
algo/lanehash/lane.c \
algo/luffa/sph_luffa.c \
algo/luffa/luffa.c \
algo/luffa/luffa_for_sse2.c \
algo/luffa/luffa-hash-2way.c \
algo/lyra2/lyra2.c \
@@ -153,7 +148,6 @@ cpuminer_SOURCES = \
algo/nist5/zr5.c \
algo/panama/panama-hash-4way.c \
algo/panama/sph_panama.c \
algo/radiogatun/sph_radiogatun.c \
algo/quark/quark-gate.c \
algo/quark/quark.c \
algo/quark/quark-4way.c \
@@ -176,7 +170,6 @@ cpuminer_SOURCES = \
algo/ripemd/lbry-4way.c \
algo/scrypt/scrypt.c \
algo/scrypt/neoscrypt.c \
algo/scrypt/pluck.c \
algo/sha/sph_sha2.c \
algo/sha/sph_sha2big.c \
algo/sha/sha256-hash-4way.c \
@@ -195,7 +188,6 @@ cpuminer_SOURCES = \
algo/shavite/shavite-hash-2way.c \
algo/shavite/shavite-hash-4way.c \
algo/shavite/shavite.c \
algo/simd/sph_simd.c \
algo/simd/nist.c \
algo/simd/vector.c \
algo/simd/simd-hash-2way.c \
@@ -233,7 +225,6 @@ cpuminer_SOURCES = \
algo/x11/timetravel10-gate.c \
algo/x11/timetravel10.c \
algo/x11/timetravel10-4way.c \
algo/x11/fresh.c \
algo/x11/x11evo.c \
algo/x11/x11evo-4way.c \
algo/x11/x11evo-gate.c \
@@ -252,7 +243,6 @@ cpuminer_SOURCES = \
algo/x13/skunk-gate.c \
algo/x13/skunk-4way.c \
algo/x13/skunk.c \
algo/x13/drop.c \
algo/x13/x13bcd-4way.c \
algo/x13/x13bcd.c \
algo/x14/x14-gate.c \
@@ -287,7 +277,6 @@ cpuminer_SOURCES = \
algo/x17/sonoa-gate.c \
algo/x17/sonoa-4way.c \
algo/x17/sonoa.c \
algo/x20/x20r.c \
algo/x22/x22i-4way.c \
algo/x22/x22i.c \
algo/x22/x22i-gate.c \

View File

@@ -53,7 +53,6 @@ Supported Algorithms
argon2d500 argon2d-dyn, Dynamic (DYN)
argon2d4096 argon2d-uis, Unitus, (UIS)
axiom Shabal-256 MemoHash
bastion
blake Blake-256 (SFR)
blake2b Blake2b 256
blake2s Blake-2 S
@@ -64,10 +63,7 @@ Supported Algorithms
decred
deep Deepcoin (DCN)
dmd-gr Diamond-Groestl
drop Dropcoin
fresh Fresh
groestl Groestl coin
heavy Heavy
hex x16r-hex
hmq1725 Espers
hodl Hodlcoin

View File

@@ -65,6 +65,20 @@ If not what makes it happen or not happen?
Change Log
----------
v3.11.9
Fixed x16r invalid shares when Luffa was first in hash order.
New startup message for status of stratum connection, API & extranonce.
New log report for CPU temperature, frequency of fastest and slowest cores.
Compile time is a little shorter and binary file size a little smaller
using conditional compilation..
Removed code for Bastion, Drop, Heavy, Luffa an Pluck algos and other unused
code.
v3.11.8
Fixed network hashrate showing incorrect data, should be close now.

View File

@@ -162,7 +162,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_ARGON2D500: register_argon2d_dyn_algo ( gate ); break;
case ALGO_ARGON2D4096: register_argon2d4096_algo ( gate ); break;
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
case ALGO_BASTION: register_bastion_algo ( gate ); break;
case ALGO_BLAKE: register_blake_algo ( gate ); break;
case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
@@ -175,10 +174,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_DECRED: register_decred_algo ( gate ); break;
case ALGO_DEEP: register_deep_algo ( gate ); break;
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
case ALGO_DROP: register_drop_algo ( gate ); break;
case ALGO_FRESH: register_fresh_algo ( gate ); break;
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
case ALGO_HEX: register_hex_algo ( gate ); break;
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
case ALGO_HODL: register_hodl_algo ( gate ); break;
@@ -186,7 +182,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
case ALGO_LBRY: register_lbry_algo ( gate ); break;
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break;
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
@@ -200,7 +195,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
case ALGO_PHI1612: register_phi1612_algo ( gate ); break;
case ALGO_PHI2: register_phi2_algo ( gate ); break;
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
case ALGO_POWER2B: register_power2b_algo ( gate ); break;
case ALGO_QUARK: register_quark_algo ( gate ); break;
@@ -275,10 +269,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
// override std defaults with jr2 defaults
bool register_json_rpc2( algo_gate_t *gate )
{
applog(LOG_WARNING,"\nCryptonight algorithm and variants are no longer");
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
// gate->wait_for_diff = (void*)&do_nothing;
gate->get_new_work = (void*)&jr2_get_new_work;
gate->get_nonceptr = (void*)&jr2_get_nonceptr;
@@ -360,7 +350,7 @@ void get_algo_alias( char** algo_or_alias )
if ( !strcasecmp( *algo_or_alias, algo_alias_map[i][ ALIAS ] ) )
{
// found valid alias, return proper name
*algo_or_alias = (char* const)( algo_alias_map[i][ PROPER ] );
*algo_or_alias = (const char*)( algo_alias_map[i][ PROPER ] );
return;
}
}

View File

@@ -1,4 +1,5 @@
#include "argon2d-gate.h"
#include "simd-utils.h"
#include "argon2d/argon2.h"
static const size_t INPUT_BYTES = 80; // Lenth of a block header in bytes. Input Length = Salt Length (salt = input)
@@ -36,7 +37,7 @@ void argon2d_crds_hash( void *output, const void *input )
int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) endiandata[20];
uint32_t _ALIGN(64) edata[20];
uint32_t _ALIGN(64) hash[8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -45,11 +46,11 @@ int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce,
const uint32_t Htarg = ptarget[7];
uint32_t nonce = first_nonce;
swab32_array( endiandata, pdata, 20 );
swab32_array( edata, pdata, 20 );
do {
be32enc(&endiandata[19], nonce);
argon2d_crds_hash( hash, endiandata );
be32enc(&edata[19], nonce);
argon2d_crds_hash( hash, edata );
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
@@ -103,31 +104,32 @@ void argon2d_dyn_hash( void *output, const void *input )
int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) endiandata[20];
uint32_t _ALIGN(64) edata[20];
uint32_t _ALIGN(64) hash[8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
const int thr_id = mythr->id;
const uint32_t first_nonce = (const uint32_t)pdata[19];
const uint32_t last_nonce = (const uint32_t)max_nonce;
uint32_t nonce = first_nonce;
const bool bench = opt_benchmark;
swab32_array( endiandata, pdata, 20 );
mm128_bswap32_80( edata, pdata );
do
{
be32enc(&endiandata[19], nonce);
argon2d_dyn_hash( hash, endiandata );
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
edata[19] = nonce;
argon2d_dyn_hash( hash, edata );
if ( unlikely( valid_hash( (uint64_t*)hash, (uint64_t*)ptarget )
&& !bench ) )
{
pdata[19] = nonce;
pdata[19] = bswap_32( nonce );;
submit_solution( work, hash, mythr );
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
} while ( likely( nonce < last_nonce && !work_restart[thr_id].restart ) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}
@@ -146,36 +148,34 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) vhash[8];
uint32_t _ALIGN(64) endiandata[20];
uint32_t _ALIGN(64) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = (const uint32_t)max_nonce;
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t t_cost = 1; // 1 iteration
uint32_t m_cost = 4096; // use 4MB
uint32_t parallelism = 1; // 1 thread, 2 lanes
const bool bench = opt_benchmark;
for ( int i = 0; i < 19; i++ )
be32enc( &endiandata[i], pdata[i] );
mm128_bswap32_80( edata, pdata );
do {
be32enc( &endiandata[19], n );
argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) endiandata, 80,
(char*) endiandata, 80, (char*) vhash, 32, ARGON2_VERSION_13 );
if ( vhash[7] < Htarg && fulltest( vhash, ptarget ) && !opt_benchmark )
edata[19] = n;
argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) edata, 80,
(char*) edata, 80, (char*) vhash, 32, ARGON2_VERSION_13 );
if ( unlikely( valid_hash( vhash, ptarget ) && !bench ) )
{
pdata[19] = n;
be32enc( &pdata[19], n );
submit_solution( work, vhash, mythr );
}
n++;
} while ( likely( n < last_nonce && !work_restart[thr_id].restart ) );
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}

View File

@@ -33,6 +33,8 @@
#include "blake2b-hash-4way.h"
#if defined(__AVX2__)
static const uint8_t sigma[12][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
@@ -203,9 +205,9 @@ void blake2b_8way_final( blake2b_8way_ctx *ctx, void *out )
casti_m512i( out, 3 ) = ctx->h[3];
}
#endif
#endif // AVX512
#if defined(__AVX2__)
// AVX2
// G Mixing function.
@@ -369,4 +371,4 @@ void blake2b_4way_final( blake2b_4way_ctx *ctx, void *out )
casti_m256i( out, 3 ) = ctx->h[3];
}
#endif
#endif // AVX2

View File

@@ -4,6 +4,9 @@
*/
#include "blake2b-gate.h"
#if !defined(BLAKE2B_8WAY) && !defined(BLAKE2B_4WAY)
#include <string.h>
#include <stdint.h>
#include "algo/blake/sph_blake2b.h"
@@ -58,3 +61,4 @@ int scanhash_blake2b( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,5 +1,7 @@
#include "blake2s-gate.h"
#if !defined(BLAKE2S_16WAY) && !defined(BLAKE2S_8WAY) && !defined(BLAKE2S)
#include <string.h>
#include <stdint.h>
@@ -70,3 +72,4 @@ int scanhash_blake2s( struct work *work,
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "blakecoin-gate.h"
#if !defined(BLAKECOIN_8WAY) && !defined(BLAKECOIN_4WAY)
#define BLAKE32_ROUNDS 8
#include "sph_blake.h"
@@ -93,3 +96,4 @@ int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "decred-gate.h"
#if !defined(DECRED_8WAY) && !defined(DECRED_4WAY)
#include "sph_blake.h"
#include <string.h>
@@ -275,3 +278,5 @@ bool register_decred_algo( algo_gate_t* gate )
return true;
}
*/
#endif

View File

@@ -1,4 +1,7 @@
#include "pentablake-gate.h"
#if !defined(PENTABLAKE_8WAY) && !defined(PENTABLAKE_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -111,3 +114,4 @@ int scanhash_pentablake( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,5 +1,7 @@
#include "algo-gate-api.h"
#if !defined(BMW512_8WAY) && !defined(BMW512_4WAY)
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
@@ -50,4 +52,4 @@ int scanhash_bmw512( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -48,6 +48,8 @@ extern "C"{
#pragma warning (disable: 4146)
#endif
#if !defined(__AVX2__)
static const sph_u32 IV224[] = {
SPH_C32(0x00010203), SPH_C32(0x04050607),
SPH_C32(0x08090A0B), SPH_C32(0x0C0D0E0F),
@@ -70,6 +72,8 @@ static const sph_u32 IV256[] = {
SPH_C32(0x78797A7B), SPH_C32(0x7C7D7E7F)
};
#endif // !AVX2
#if SPH_64
static const sph_u64 IV384[] = {
@@ -135,6 +139,8 @@ static const sph_u64 IV512[] = {
#define M16_30 14, 15, 1, 2, 5, 8, 9
#define M16_31 15, 16, 2, 3, 6, 9, 10
#if !defined(__AVX2__)
#define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \
^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19))
#define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \
@@ -189,6 +195,8 @@ static const sph_u64 IV512[] = {
#define expand2s_(qf, mf, hf, i16, ix, iy) \
expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
#endif // !AVX2
#if SPH_64
#define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \
@@ -291,6 +299,8 @@ static const sph_u64 Kb_tab[] = {
tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
#if !defined(__AVX2__)
#define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14)
#define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15)
#define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15)
@@ -407,6 +417,8 @@ static const sph_u64 Kb_tab[] = {
#define Qs(j) (qt[j])
#endif // !AVX2
#if SPH_64
#define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14)
@@ -557,7 +569,6 @@ static const sph_u64 Kb_tab[] = {
+ ((xl >> 2) ^ qf(22) ^ qf(15))); \
} while (0)
#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH)
#if SPH_64
@@ -565,6 +576,10 @@ static const sph_u64 Kb_tab[] = {
#endif
#if !defined(__AVX2__)
#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH)
static void
compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16])
{
@@ -711,6 +726,8 @@ bmw32_close(sph_bmw_small_context *sc, unsigned ub, unsigned n,
sph_enc32le(out + 4 * u, h1[v]);
}
#endif // !AVX2
#if SPH_64
static void
@@ -840,6 +857,8 @@ bmw64_close(sph_bmw_big_context *sc, unsigned ub, unsigned n,
#endif
#if !defined(__AVX2__)
/* see sph_bmw.h */
void
sph_bmw224_init(void *cc)
@@ -898,6 +917,8 @@ sph_bmw256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
// sph_bmw256_init(cc);
}
#endif // !AVX2
#if SPH_64
/* see sph_bmw.h */

View File

@@ -77,6 +77,9 @@ extern "C"{
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
#if !defined(__AVX2__)
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
@@ -102,6 +105,8 @@ typedef sph_bmw_small_context sph_bmw224_context;
*/
typedef sph_bmw_small_context sph_bmw256_context;
#endif // !AVX2
#if SPH_64
/**
@@ -137,6 +142,8 @@ typedef sph_bmw_big_context sph_bmw512_context;
#endif
#if !defined(__AVX2__)
/**
* Initialize a BMW-224 context. This process performs no memory allocation.
*
@@ -227,6 +234,8 @@ void sph_bmw256_close(void *cc, void *dst);
void sph_bmw256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#endif // !AVX2
#if SPH_64
/**

View File

@@ -358,6 +358,9 @@ int scanhash_cryptolight( struct work *work,
bool register_cryptolight_algo( algo_gate_t* gate )
{
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
register_json_rpc2( gate );
gate->optimizations = SSE2_OPT | AES_OPT;
gate->scanhash = (void*)&scanhash_cryptolight;

View File

@@ -105,6 +105,9 @@ int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
bool register_cryptonight_algo( algo_gate_t* gate )
{
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
cryptonightV7 = false;
register_json_rpc2( gate );
gate->optimizations = SSE2_OPT | AES_OPT;
@@ -116,6 +119,9 @@ bool register_cryptonight_algo( algo_gate_t* gate )
bool register_cryptonightv7_algo( algo_gate_t* gate )
{
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
cryptonightV7 = true;
register_json_rpc2( gate );
gate->optimizations = SSE2_OPT | AES_OPT;

View File

@@ -36,6 +36,8 @@
#include "sph_echo.h"
#if !defined(__AES__)
#ifdef __cplusplus
extern "C"{
#endif
@@ -1029,3 +1031,4 @@ sph_echo512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
#ifdef __cplusplus
}
#endif
#endif // !AES

View File

@@ -36,6 +36,8 @@
#ifndef SPH_ECHO_H__
#define SPH_ECHO_H__
#if !defined(__AES__)
#ifdef __cplusplus
extern "C"{
#endif
@@ -316,5 +318,5 @@ void sph_echo512_addbits_and_close(
#ifdef __cplusplus
}
#endif
#endif // !AES
#endif

View File

@@ -1,3 +1,6 @@
#if !defined GROESTL_INTR_AES_H__
#define GROESTL_INTR_AES_H__
/* groestl-intr-aes.h Aug 2011
*
* Groestl implementation with intrinsics using ssse3, sse4.1, and aes
@@ -11,6 +14,52 @@
#include <wmmintrin.h>
#include "hash-groestl.h"
static const __m128i round_const_p[] __attribute__ ((aligned (64))) =
{
{ 0x7060504030201000, 0xf0e0d0c0b0a09080 },
{ 0x7161514131211101, 0xf1e1d1c1b1a19181 },
{ 0x7262524232221202, 0xf2e2d2c2b2a29282 },
{ 0x7363534333231303, 0xf3e3d3c3b3a39383 },
{ 0x7464544434241404, 0xf4e4d4c4b4a49484 },
{ 0x7565554535251505, 0xf5e5d5c5b5a59585 },
{ 0x7666564636261606, 0xf6e6d6c6b6a69686 },
{ 0x7767574737271707, 0xf7e7d7c7b7a79787 },
{ 0x7868584838281808, 0xf8e8d8c8b8a89888 },
{ 0x7969594939291909, 0xf9e9d9c9b9a99989 },
{ 0x7a6a5a4a3a2a1a0a, 0xfaeadacabaaa9a8a },
{ 0x7b6b5b4b3b2b1b0b, 0xfbebdbcbbbab9b8b },
{ 0x7c6c5c4c3c2c1c0c, 0xfcecdcccbcac9c8c },
{ 0x7d6d5d4d3d2d1d0d, 0xfdedddcdbdad9d8d }
};
static const __m128i round_const_q[] __attribute__ ((aligned (64))) =
{
{ 0x8f9fafbfcfdfefff, 0x0f1f2f3f4f5f6f7f },
{ 0x8e9eaebecedeeefe, 0x0e1e2e3e4e5e6e7e },
{ 0x8d9dadbdcdddedfd, 0x0d1d2d3d4d5d6d7d },
{ 0x8c9cacbcccdcecfc, 0x0c1c2c3c4c5c6c7c },
{ 0x8b9babbbcbdbebfb, 0x0b1b2b3b4b5b6b7b },
{ 0x8a9aaabacadaeafa, 0x0a1a2a3a4a5a6a7a },
{ 0x8999a9b9c9d9e9f9, 0x0919293949596979 },
{ 0x8898a8b8c8d8e8f8, 0x0818283848586878 },
{ 0x8797a7b7c7d7e7f7, 0x0717273747576777 },
{ 0x8696a6b6c6d6e6f6, 0x0616263646566676 },
{ 0x8595a5b5c5d5e5f5, 0x0515253545556575 },
{ 0x8494a4b4c4d4e4f4, 0x0414243444546474 },
{ 0x8393a3b3c3d3e3f3, 0x0313233343536373 },
{ 0x8292a2b2c2d2e2f2, 0x0212223242526272 }
};
static const __m128i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02 };
static const __m128i SUBSH_MASK0 = { 0x0b0e0104070a0d00, 0x0306090c0f020508 };
static const __m128i SUBSH_MASK1 = { 0x0c0f0205080b0e01, 0x04070a0d00030609 };
static const __m128i SUBSH_MASK2 = { 0x0d000306090c0f02, 0x05080b0e0104070a };
static const __m128i SUBSH_MASK3 = { 0x0e0104070a0d0003, 0x06090c0f0205080b };
static const __m128i SUBSH_MASK4 = { 0x0f0205080b0e0104, 0x070a0d000306090c };
static const __m128i SUBSH_MASK5 = { 0x000306090c0f0205, 0x080b0e0104070a0d };
static const __m128i SUBSH_MASK6 = { 0x0104070a0d000306, 0x090c0f0205080b0e };
static const __m128i SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003 };
#define tos(a) #a
#define tostr(a) tos(a)
@@ -141,42 +190,6 @@
}/*MixBytes*/
static const uint64_t round_const_p[] __attribute__ ((aligned (64))) =
{
0x7060504030201000, 0xf0e0d0c0b0a09080,
0x7161514131211101, 0xf1e1d1c1b1a19181,
0x7262524232221202, 0xf2e2d2c2b2a29282,
0x7363534333231303, 0xf3e3d3c3b3a39383,
0x7464544434241404, 0xf4e4d4c4b4a49484,
0x7565554535251505, 0xf5e5d5c5b5a59585,
0x7666564636261606, 0xf6e6d6c6b6a69686,
0x7767574737271707, 0xf7e7d7c7b7a79787,
0x7868584838281808, 0xf8e8d8c8b8a89888,
0x7969594939291909, 0xf9e9d9c9b9a99989,
0x7a6a5a4a3a2a1a0a, 0xfaeadacabaaa9a8a,
0x7b6b5b4b3b2b1b0b, 0xfbebdbcbbbab9b8b,
0x7c6c5c4c3c2c1c0c, 0xfcecdcccbcac9c8c,
0x7d6d5d4d3d2d1d0d, 0xfdedddcdbdad9d8d
};
static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
{
0x8f9fafbfcfdfefff, 0x0f1f2f3f4f5f6f7f,
0x8e9eaebecedeeefe, 0x0e1e2e3e4e5e6e7e,
0x8d9dadbdcdddedfd, 0x0d1d2d3d4d5d6d7d,
0x8c9cacbcccdcecfc, 0x0c1c2c3c4c5c6c7c,
0x8b9babbbcbdbebfb, 0x0b1b2b3b4b5b6b7b,
0x8a9aaabacadaeafa, 0x0a1a2a3a4a5a6a7a,
0x8999a9b9c9d9e9f9, 0x0919293949596979,
0x8898a8b8c8d8e8f8, 0x0818283848586878,
0x8797a7b7c7d7e7f7, 0x0717273747576777,
0x8696a6b6c6d6e6f6, 0x0616263646566676,
0x8595a5b5c5d5e5f5, 0x0515253545556575,
0x8494a4b4c4d4e4f4, 0x0414243444546474,
0x8393a3b3c3d3e3f3, 0x0313233343536373,
0x8292a2b2c2d2e2f2, 0x0212223242526272
};
/* one round
* a0-a7 = input rows
* b0-b7 = output rows
@@ -203,22 +216,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
xmm8 = _mm_xor_si128( xmm8, \
casti_m128i( round_const_p, round_counter ) ); \
/* ShiftBytes P1024 + pre-AESENCLAST */\
xmm8 = _mm_shuffle_epi8( xmm8, m128_const_64( 0x0306090c0f020508, \
0x0b0e0104070a0d00 ) ); \
xmm9 = _mm_shuffle_epi8( xmm9, m128_const_64( 0x04070a0d00030609, \
0x0c0f0205080b0e01 ) ); \
xmm10 = _mm_shuffle_epi8( xmm10, m128_const_64( 0x05080b0e0104070a, \
0x0d000306090c0f02 ) ); \
xmm11 = _mm_shuffle_epi8( xmm11, m128_const_64( 0x06090c0f0205080b, \
0x0e0104070a0d0003 ) ); \
xmm12 = _mm_shuffle_epi8( xmm12, m128_const_64( 0x070a0d000306090c, \
0x0f0205080b0e0104 ) ); \
xmm13 = _mm_shuffle_epi8( xmm13, m128_const_64( 0x080b0e0104070a0d, \
0x000306090c0f0205 ) ); \
xmm14 = _mm_shuffle_epi8( xmm14, m128_const_64( 0x090c0f0205080b0e, \
0x0104070a0d000306 ) ); \
xmm15 = _mm_shuffle_epi8( xmm15, m128_const_64( 0x0e0104070a0d0003, \
0x06090c0f0205080b ) ); \
xmm8 = _mm_shuffle_epi8( xmm8, SUBSH_MASK0 ); \
xmm9 = _mm_shuffle_epi8( xmm9, SUBSH_MASK1 ); \
xmm10 = _mm_shuffle_epi8( xmm10, SUBSH_MASK2 ); \
xmm11 = _mm_shuffle_epi8( xmm11, SUBSH_MASK3 ); \
xmm12 = _mm_shuffle_epi8( xmm12, SUBSH_MASK4 ); \
xmm13 = _mm_shuffle_epi8( xmm13, SUBSH_MASK5 ); \
xmm14 = _mm_shuffle_epi8( xmm14, SUBSH_MASK6 ); \
xmm15 = _mm_shuffle_epi8( xmm15, SUBSH_MASK7 ); \
/* SubBytes + MixBytes */\
SUBMIX( xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, \
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 ); \
@@ -226,22 +231,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
/* AddRoundConstant P1024 */\
xmm0 = _mm_xor_si128( xmm0, \
casti_m128i( round_const_p, round_counter+1 ) ); \
xmm0 = _mm_shuffle_epi8( xmm0, m128_const_64( 0x0306090c0f020508, \
0x0b0e0104070a0d00 ) ); \
xmm1 = _mm_shuffle_epi8( xmm1, m128_const_64( 0x04070a0d00030609, \
0x0c0f0205080b0e01 ) ); \
xmm2 = _mm_shuffle_epi8( xmm2, m128_const_64( 0x05080b0e0104070a, \
0x0d000306090c0f02 ) ); \
xmm3 = _mm_shuffle_epi8( xmm3, m128_const_64( 0x06090c0f0205080b, \
0x0e0104070a0d0003 ) ); \
xmm4 = _mm_shuffle_epi8( xmm4, m128_const_64( 0x070a0d000306090c, \
0x0f0205080b0e0104 ) ); \
xmm5 = _mm_shuffle_epi8( xmm5, m128_const_64( 0x080b0e0104070a0d, \
0x000306090c0f0205 ) ); \
xmm6 = _mm_shuffle_epi8( xmm6, m128_const_64( 0x090c0f0205080b0e, \
0x0104070a0d000306 ) ); \
xmm7 = _mm_shuffle_epi8( xmm7, m128_const_64( 0x0e0104070a0d0003, \
0x06090c0f0205080b ) ); \
xmm0 = _mm_shuffle_epi8( xmm0, SUBSH_MASK0 ); \
xmm1 = _mm_shuffle_epi8( xmm1, SUBSH_MASK1 ); \
xmm2 = _mm_shuffle_epi8( xmm2, SUBSH_MASK2 ); \
xmm3 = _mm_shuffle_epi8( xmm3, SUBSH_MASK3 ); \
xmm4 = _mm_shuffle_epi8( xmm4, SUBSH_MASK4 ); \
xmm5 = _mm_shuffle_epi8( xmm5, SUBSH_MASK5 ); \
xmm6 = _mm_shuffle_epi8( xmm6, SUBSH_MASK6 ); \
xmm7 = _mm_shuffle_epi8( xmm7, SUBSH_MASK7 ); \
SUBMIX( xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, \
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 ); \
}\
@@ -262,22 +259,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
xmm15 = _mm_xor_si128( xmm15, \
casti_m128i( round_const_q, round_counter ) ); \
/* ShiftBytes Q1024 + pre-AESENCLAST */\
xmm8 = _mm_shuffle_epi8( xmm8, m128_const_64( 0x04070a0d00030609, \
0x0c0f0205080b0e01 ) ); \
xmm9 = _mm_shuffle_epi8( xmm9, m128_const_64( 0x06090c0f0205080b, \
0x0e0104070a0d0003 ) ); \
xmm10 = _mm_shuffle_epi8( xmm10, m128_const_64( 0x080b0e0104070a0d, \
0x000306090c0f0205 ) ); \
xmm11 = _mm_shuffle_epi8( xmm11, m128_const_64( 0x0e0104070a0d0003, \
0x06090c0f0205080b ) ); \
xmm12 = _mm_shuffle_epi8( xmm12, m128_const_64( 0x0306090c0f020508, \
0x0b0e0104070a0d00 ) ); \
xmm13 = _mm_shuffle_epi8( xmm13, m128_const_64( 0x05080b0e0104070a, \
0x0d000306090c0f02 ) ); \
xmm14 = _mm_shuffle_epi8( xmm14, m128_const_64( 0x070a0d000306090c, \
0x0f0205080b0e0104 ) ); \
xmm15 = _mm_shuffle_epi8( xmm15, m128_const_64( 0x090c0f0205080b0e, \
0x0104070a0d000306 ) ); \
xmm8 = _mm_shuffle_epi8( xmm8, SUBSH_MASK1 ); \
xmm9 = _mm_shuffle_epi8( xmm9, SUBSH_MASK3 ); \
xmm10 = _mm_shuffle_epi8( xmm10, SUBSH_MASK5 ); \
xmm11 = _mm_shuffle_epi8( xmm11, SUBSH_MASK7 ); \
xmm12 = _mm_shuffle_epi8( xmm12, SUBSH_MASK0 ); \
xmm13 = _mm_shuffle_epi8( xmm13, SUBSH_MASK2 ); \
xmm14 = _mm_shuffle_epi8( xmm14, SUBSH_MASK4 ); \
xmm15 = _mm_shuffle_epi8( xmm15, SUBSH_MASK6 ); \
/* SubBytes + MixBytes */\
SUBMIX( xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, \
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6 , xmm7 ); \
@@ -294,22 +283,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
xmm7 = _mm_xor_si128( xmm7, \
casti_m128i( round_const_q, round_counter+1 ) ); \
/* ShiftBytes Q1024 + pre-AESENCLAST */\
xmm0 = _mm_shuffle_epi8( xmm0, m128_const_64( 0x04070a0d00030609, \
0x0c0f0205080b0e01 ) ); \
xmm1 = _mm_shuffle_epi8( xmm1, m128_const_64( 0x06090c0f0205080b, \
0x0e0104070a0d0003 ) ); \
xmm2 = _mm_shuffle_epi8( xmm2, m128_const_64( 0x080b0e0104070a0d, \
0x000306090c0f0205 ) ); \
xmm3 = _mm_shuffle_epi8( xmm3, m128_const_64( 0x0e0104070a0d0003, \
0x06090c0f0205080b ) ); \
xmm4 = _mm_shuffle_epi8( xmm4, m128_const_64( 0x0306090c0f020508, \
0x0b0e0104070a0d00 ) ); \
xmm5 = _mm_shuffle_epi8( xmm5, m128_const_64( 0x05080b0e0104070a, \
0x0d000306090c0f02 ) ); \
xmm6 = _mm_shuffle_epi8( xmm6, m128_const_64( 0x070a0d000306090c, \
0x0f0205080b0e0104 ) ); \
xmm7 = _mm_shuffle_epi8( xmm7, m128_const_64( 0x090c0f0205080b0e, \
0x0104070a0d000306 ) ); \
xmm0 = _mm_shuffle_epi8( xmm0, SUBSH_MASK1 ); \
xmm1 = _mm_shuffle_epi8( xmm1, SUBSH_MASK3 ); \
xmm2 = _mm_shuffle_epi8( xmm2, SUBSH_MASK5 ); \
xmm3 = _mm_shuffle_epi8( xmm3, SUBSH_MASK7 ); \
xmm4 = _mm_shuffle_epi8( xmm4, SUBSH_MASK0 ); \
xmm5 = _mm_shuffle_epi8( xmm5, SUBSH_MASK2 ); \
xmm6 = _mm_shuffle_epi8( xmm6, SUBSH_MASK4 ); \
xmm7 = _mm_shuffle_epi8( xmm7, SUBSH_MASK6 ); \
/* SubBytes + MixBytes */\
SUBMIX( xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, \
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 ); \
@@ -324,7 +305,7 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
* clobbers: t0-t7
*/
#define Matrix_Transpose(i0, i1, i2, i3, i4, i5, i6, i7, t0, t1, t2, t3, t4, t5, t6, t7){\
t0 = m128_const_64( 0x0f070b030e060a02, 0x0d0509010c040800 );\
t0 = TRANSP_MASK; \
\
i6 = _mm_shuffle_epi8(i6, t0);\
i0 = _mm_shuffle_epi8(i0, t0);\
@@ -412,7 +393,7 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
i4 = _mm_unpacklo_epi64(i4, i5);\
t1 = _mm_unpackhi_epi64(t1, i5);\
t2 = i6;\
o0 = m128_const_64( 0x0f070b030e060a02, 0x0d0509010c040800 ); \
o0 = TRANSP_MASK; \
i6 = _mm_unpacklo_epi64(i6, i7);\
t2 = _mm_unpackhi_epi64(t2, i7);\
/* load transpose mask into a register, because it will be used 8 times */\
@@ -653,3 +634,4 @@ void OF1024( __m128i* chaining )
return;
}
#endif

View File

@@ -11,6 +11,45 @@
#include <wmmintrin.h>
#include "hash-groestl256.h"
static const __m128i round_const_l0[] __attribute__ ((aligned (64))) =
{
{ 0x7060504030201000, 0xffffffffffffffff },
{ 0x7161514131211101, 0xffffffffffffffff },
{ 0x7262524232221202, 0xffffffffffffffff },
{ 0x7363534333231303, 0xffffffffffffffff },
{ 0x7464544434241404, 0xffffffffffffffff },
{ 0x7565554535251505, 0xffffffffffffffff },
{ 0x7666564636261606, 0xffffffffffffffff },
{ 0x7767574737271707, 0xffffffffffffffff },
{ 0x7868584838281808, 0xffffffffffffffff },
{ 0x7969594939291909, 0xffffffffffffffff }
};
static const __m128i round_const_l7[] __attribute__ ((aligned (64))) =
{
{ 0x0000000000000000, 0x8f9fafbfcfdfefff },
{ 0x0000000000000000, 0x8e9eaebecedeeefe },
{ 0x0000000000000000, 0x8d9dadbdcdddedfd },
{ 0x0000000000000000, 0x8c9cacbcccdcecfc },
{ 0x0000000000000000, 0x8b9babbbcbdbebfb },
{ 0x0000000000000000, 0x8a9aaabacadaeafa },
{ 0x0000000000000000, 0x8999a9b9c9d9e9f9 },
{ 0x0000000000000000, 0x8898a8b8c8d8e8f8 },
{ 0x0000000000000000, 0x8797a7b7c7d7e7f7 },
{ 0x0000000000000000, 0x8696a6b6c6d6e6f6 }
};
static const __m128i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02 };
static const __m128i SUBSH_MASK0 = { 0x0c0f0104070b0e00, 0x03060a0d08020509 };
static const __m128i SUBSH_MASK1 = { 0x0e090205000d0801, 0x04070c0f0a03060b };
static const __m128i SUBSH_MASK2 = { 0x080b0306010f0a02, 0x05000e090c04070d };
static const __m128i SUBSH_MASK3 = { 0x0a0d040702090c03, 0x0601080b0e05000f };
static const __m128i SUBSH_MASK4 = { 0x0b0e0500030a0d04, 0x0702090c0f060108 };
static const __m128i SUBSH_MASK5 = { 0x0d080601040c0f05, 0x00030b0e0907020a };
static const __m128i SUBSH_MASK6 = { 0x0f0a0702050e0906, 0x01040d080b00030c };
static const __m128i SUBSH_MASK7 = { 0x090c000306080b07, 0x02050f0a0d01040e };
#define tos(a) #a
#define tostr(a) tos(a)
@@ -26,8 +65,6 @@
i = _mm_xor_si128(i, j);\
}
/**/
/* Yet another implementation of MixBytes.
This time we use the formulae (3) from the paper "Byte Slicing Groestl".
Input: a0, ..., a7
@@ -141,36 +178,6 @@
b1 = _mm_xor_si128(b1, a4);\
}/*MixBytes*/
static const uint64_t round_const_l0[] __attribute__ ((aligned (64))) =
{
0x7060504030201000, 0xffffffffffffffff,
0x7161514131211101, 0xffffffffffffffff,
0x7262524232221202, 0xffffffffffffffff,
0x7363534333231303, 0xffffffffffffffff,
0x7464544434241404, 0xffffffffffffffff,
0x7565554535251505, 0xffffffffffffffff,
0x7666564636261606, 0xffffffffffffffff,
0x7767574737271707, 0xffffffffffffffff,
0x7868584838281808, 0xffffffffffffffff,
0x7969594939291909, 0xffffffffffffffff
};
static const uint64_t round_const_l7[] __attribute__ ((aligned (64))) =
{
0x0000000000000000, 0x8f9fafbfcfdfefff,
0x0000000000000000, 0x8e9eaebecedeeefe,
0x0000000000000000, 0x8d9dadbdcdddedfd,
0x0000000000000000, 0x8c9cacbcccdcecfc,
0x0000000000000000, 0x8b9babbbcbdbebfb,
0x0000000000000000, 0x8a9aaabacadaeafa,
0x0000000000000000, 0x8999a9b9c9d9e9f9,
0x0000000000000000, 0x8898a8b8c8d8e8f8,
0x0000000000000000, 0x8797a7b7c7d7e7f7,
0x0000000000000000, 0x8696a6b6c6d6e6f6
};
/* one round
* i = round number
* a0-a7 = input rows
@@ -190,29 +197,21 @@ static const uint64_t round_const_l7[] __attribute__ ((aligned (64))) =
\
/* ShiftBytes + SubBytes (interleaved) */\
b0 = _mm_xor_si128(b0, b0);\
a0 = _mm_shuffle_epi8( a0, m128_const_64( 0x03060a0d08020509, \
0x0c0f0104070b0e00 ) ); \
a0 = _mm_shuffle_epi8( a0, SUBSH_MASK0 ); \
a0 = _mm_aesenclast_si128( a0, b0 );\
a1 = _mm_shuffle_epi8( a1, m128_const_64( 0x04070c0f0a03060b, \
0x0e090205000d0801 ) ); \
a1 = _mm_shuffle_epi8( a1, SUBSH_MASK1 ); \
a1 = _mm_aesenclast_si128( a1, b0 );\
a2 = _mm_shuffle_epi8( a2, m128_const_64( 0x05000e090c04070d, \
0x080b0306010f0a02 ) ); \
a2 = _mm_shuffle_epi8( a2, SUBSH_MASK2 ); \
a2 = _mm_aesenclast_si128( a2, b0 );\
a3 = _mm_shuffle_epi8( a3, m128_const_64( 0x0601080b0e05000f, \
0x0a0d040702090c03 ) ); \
a3 = _mm_shuffle_epi8( a3, SUBSH_MASK3 ); \
a3 = _mm_aesenclast_si128( a3, b0 );\
a4 = _mm_shuffle_epi8( a4, m128_const_64( 0x0702090c0f060108, \
0x0b0e0500030a0d04 ) ); \
a4 = _mm_shuffle_epi8( a4, SUBSH_MASK4 ); \
a4 = _mm_aesenclast_si128( a4, b0 );\
a5 = _mm_shuffle_epi8( a5, m128_const_64( 0x00030b0e0907020a, \
0x0d080601040c0f05 ) ); \
a5 = _mm_shuffle_epi8( a5, SUBSH_MASK5 ); \
a5 = _mm_aesenclast_si128( a5, b0 );\
a6 = _mm_shuffle_epi8( a6, m128_const_64( 0x01040d080b00030c, \
0x0f0a0702050e0906 ) ); \
a6 = _mm_shuffle_epi8( a6, SUBSH_MASK6 ); \
a6 = _mm_aesenclast_si128( a6, b0 );\
a7 = _mm_shuffle_epi8( a7, m128_const_64( 0x02050f0a0d01040e, \
0x090c000306080b07 ) ); \
a7 = _mm_shuffle_epi8( a7, SUBSH_MASK7 ); \
a7 = _mm_aesenclast_si128( a7, b0 );\
\
/* MixBytes */\
@@ -241,8 +240,9 @@ static const uint64_t round_const_l7[] __attribute__ ((aligned (64))) =
* outputs: i0, o1-o3
* clobbers: t0
*/
#define Matrix_Transpose_A(i0, i1, i2, i3, o1, o2, o3, t0){\
t0 = m128_const_64( 0x0f070b030e060a02, 0x0d0509010c040800 ); \
t0 = TRANSP_MASK; \
\
i0 = _mm_shuffle_epi8(i0, t0);\
i1 = _mm_shuffle_epi8(i1, t0);\

View File

@@ -214,6 +214,98 @@ HashReturn_gr update_and_final_groestl256( hashState_groestl256* ctx,
return SUCCESS_GR;
}
int groestl256_full( hashState_groestl256* ctx,
void* output, const void* input, DataLength_gr databitlen )
{
int i;
ctx->hashlen = 32;
for ( i = 0; i < SIZE256; i++ )
{
ctx->chaining[i] = _mm_setzero_si128();
ctx->buffer[i] = _mm_setzero_si128();
}
((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH);
INIT256( ctx->chaining );
ctx->buf_ptr = 0;
ctx->rem_ptr = 0;
const int len = (int)databitlen / 128;
const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i
const int hash_offset = SIZE256 - hashlen_m128i;
int rem = ctx->rem_ptr;
int blocks = len / SIZE256;
__m128i* in = (__m128i*)input;
// --- update ---
// digest any full blocks, process directly from input
for ( i = 0; i < blocks; i++ )
TF512( ctx->chaining, &in[ i * SIZE256 ] );
ctx->buf_ptr = blocks * SIZE256;
// cryptonight has 200 byte input, an odd number of __m128i
// remainder is only 8 bytes, ie u64.
if ( databitlen % 128 !=0 )
{
// must be cryptonight, copy 64 bits of data
*(uint64_t*)(ctx->buffer) = *(uint64_t*)(&in[ ctx->buf_ptr ] );
i = -1; // signal for odd length
}
else
{
// Copy any remaining data to buffer for final transform
for ( i = 0; i < len % SIZE256; i++ )
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
i += rem; // use i as rem_ptr in final
}
//--- final ---
// adjust for final block
blocks++;
if ( i == len - 1 )
{
// all padding at once
ctx->buffer[i] = _mm_set_epi8( blocks,blocks>>8,0,0, 0,0,0,0,
0, 0,0,0, 0,0,0,0x80 );
}
else
{
if ( i == -1 )
{
// cryptonight odd length
((uint64_t*)ctx->buffer)[ 1 ] = 0x80ull;
// finish the block with zero and length padding as normal
i = 0;
}
else
{
// add first padding
ctx->buffer[i] = _mm_set_epi8( 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 );
}
// add zero padding
for ( i += 1; i < SIZE256 - 1; i++ )
ctx->buffer[i] = _mm_setzero_si128();
// add length padding
// cheat since we know the block count is trivial, good if block < 256
ctx->buffer[i] = _mm_set_epi8( blocks,blocks>>8,0,0, 0,0,0,0,
0, 0,0,0, 0,0,0,0 );
}
// digest final padding block and do output transform
TF512( ctx->chaining, ctx->buffer );
OF512( ctx->chaining );
// store hash result in output
for ( i = 0; i < hashlen_m128i; i++ )
casti_m128i( output, i ) = ctx->chaining[ hash_offset + i ];
return SUCCESS_GR;
}
/* hash bit sequence */
HashReturn_gr hash_groestl256(int hashbitlen,
const BitSequence_gr* data,

View File

@@ -115,4 +115,7 @@ HashReturn_gr hash_groestli256( int, const BitSequence_gr*, DataLength_gr,
HashReturn_gr update_and_final_groestl256( hashState_groestl256*, void*,
const void*, DataLength_gr );
int groestl256_full( hashState_groestl256* ctx,
void* output, const void* input, DataLength_gr databitlen );
#endif /* __hash_h */

View File

@@ -1,4 +1,7 @@
#include "groestl-gate.h"
#if !defined(GROESTL_8WAY) && !defined(GROESTLX16R_4WAY)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
@@ -88,4 +91,4 @@ int scanhash_groestl( struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
#endif

View File

@@ -23,7 +23,6 @@ int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen )
int i;
ctx->hashlen = hashlen;
SET_CONSTANTS();
if (ctx->chaining == NULL || ctx->buffer == NULL)
return 1;
@@ -36,9 +35,6 @@ int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen )
// The only non-zero in the IV is len. It can be hard coded.
ctx->chaining[ 3 ] = m512_const2_64( 0, 0x0100000000000000 );
// uint64_t len = U64BIG((uint64_t)LENGTH);
// ctx->chaining[ COLS/2 -1 ] = _mm512_set4_epi64( len, 0, len, 0 );
// INIT256_4way(ctx->chaining);
ctx->buf_ptr = 0;
ctx->rem_ptr = 0;
@@ -46,6 +42,77 @@ int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen )
return 0;
}
int groestl256_4way_full( groestl256_4way_context* ctx, void* output,
const void* input, uint64_t databitlen )
{
const int len = (int)databitlen / 128;
const int hashlen_m128i = 32 / 16; // bytes to __m128i
const int hash_offset = SIZE256 - hashlen_m128i;
int rem = ctx->rem_ptr;
int blocks = len / SIZE256;
__m512i* in = (__m512i*)input;
int i;
if (ctx->chaining == NULL || ctx->buffer == NULL)
return 1;
for ( i = 0; i < SIZE256; i++ )
{
ctx->chaining[i] = m512_zero;
ctx->buffer[i] = m512_zero;
}
// The only non-zero in the IV is len. It can be hard coded.
ctx->chaining[ 3 ] = m512_const2_64( 0, 0x0100000000000000 );
ctx->buf_ptr = 0;
ctx->rem_ptr = 0;
// --- update ---
// digest any full blocks, process directly from input
for ( i = 0; i < blocks; i++ )
TF512_4way( ctx->chaining, &in[ i * SIZE256 ] );
ctx->buf_ptr = blocks * SIZE256;
// copy any remaining data to buffer, it may already contain data
// from a previous update for a midstate precalc
for ( i = 0; i < len % SIZE256; i++ )
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
i += rem; // use i as rem_ptr in final
//--- final ---
blocks++; // adjust for final block
if ( i == SIZE256 - 1 )
{
// only 1 vector left in buffer, all padding at once
ctx->buffer[i] = m512_const2_64( (uint64_t)blocks << 56, 0x80 );
}
else
{
// add first padding
ctx->buffer[i] = m512_const4_64( 0, 0x80, 0, 0x80 );
// add zero padding
for ( i += 1; i < SIZE256 - 1; i++ )
ctx->buffer[i] = m512_zero;
// add length padding, second last byte is zero unless blocks > 255
ctx->buffer[i] = m512_const2_64( (uint64_t)blocks << 56, 0 );
}
// digest final padding block and do output transform
TF512_4way( ctx->chaining, ctx->buffer );
OF512_4way( ctx->chaining );
// store hash result in output
for ( i = 0; i < hashlen_m128i; i++ )
casti_m512i( output, i ) = ctx->chaining[ hash_offset + i ];
return 0;
}
int groestl256_4way_update_close( groestl256_4way_context* ctx, void* output,
const void* input, uint64_t databitlen )
{

View File

@@ -71,5 +71,8 @@ int groestl256_4way_init( groestl256_4way_context*, uint64_t );
int groestl256_4way_update_close( groestl256_4way_context*, void*,
const void*, uint64_t );
int groestl256_4way_full( groestl256_4way_context*, void*,
const void*, uint64_t );
#endif
#endif

View File

@@ -14,17 +14,78 @@
#include "groestl256-hash-4way.h"
#if defined(__VAES__)
static const __m128i round_const_l0[] __attribute__ ((aligned (64))) =
{
{ 0x7060504030201000, 0xffffffffffffffff },
{ 0x7161514131211101, 0xffffffffffffffff },
{ 0x7262524232221202, 0xffffffffffffffff },
{ 0x7363534333231303, 0xffffffffffffffff },
{ 0x7464544434241404, 0xffffffffffffffff },
{ 0x7565554535251505, 0xffffffffffffffff },
{ 0x7666564636261606, 0xffffffffffffffff },
{ 0x7767574737271707, 0xffffffffffffffff },
{ 0x7868584838281808, 0xffffffffffffffff },
{ 0x7969594939291909, 0xffffffffffffffff }
};
/* global constants */
__m512i ROUND_CONST_Lx;
__m512i ROUND_CONST_L0[ROUNDS512];
__m512i ROUND_CONST_L7[ROUNDS512];
//__m512i ROUND_CONST_P[ROUNDS1024];
//__m512i ROUND_CONST_Q[ROUNDS1024];
__m512i TRANSP_MASK;
__m512i SUBSH_MASK[8];
__m512i ALL_1B;
__m512i ALL_FF;
static const __m128i round_const_l7[] __attribute__ ((aligned (64))) =
{
{ 0x0000000000000000, 0x8f9fafbfcfdfefff },
{ 0x0000000000000000, 0x8e9eaebecedeeefe },
{ 0x0000000000000000, 0x8d9dadbdcdddedfd },
{ 0x0000000000000000, 0x8c9cacbcccdcecfc },
{ 0x0000000000000000, 0x8b9babbbcbdbebfb },
{ 0x0000000000000000, 0x8a9aaabacadaeafa },
{ 0x0000000000000000, 0x8999a9b9c9d9e9f9 },
{ 0x0000000000000000, 0x8898a8b8c8d8e8f8 },
{ 0x0000000000000000, 0x8797a7b7c7d7e7f7 },
{ 0x0000000000000000, 0x8696a6b6c6d6e6f6 }
};
static const __m512i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02,
0x1d1519111c141810, 0x1f171b131e161a12,
0x2d2529212c242820, 0x2f272b232e262a22,
0x3d3539313c343830, 0x3f373b333e363a32 };
static const __m512i SUBSH_MASK0 = { 0x0c0f0104070b0e00, 0x03060a0d08020509,
0x1c1f1114171b1e10, 0x13161a1d18121519,
0x2c2f2124272b2e20, 0x23262a2d28222529,
0x3c3f3134373b3e30, 0x33363a3d38323539 };
static const __m512i SUBSH_MASK1 = { 0x0e090205000d0801, 0x04070c0f0a03060b,
0x1e191215101d1801, 0x14171c1f1a13161b,
0x2e292225202d2821, 0x24272c2f2a23262b,
0x3e393235303d3831, 0x34373c3f3a33363b };
static const __m512i SUBSH_MASK2 = { 0x080b0306010f0a02, 0x05000e090c04070d,
0x181b1316111f1a12, 0x15101e191c14171d,
0x282b2326212f2a22, 0x25202e292c24272d,
0x383b3336313f3a32, 0x35303e393c34373d };
static const __m512i SUBSH_MASK3 = { 0x0a0d040702090c03, 0x0601080b0e05000f,
0x1a1d141712191c13, 0x1611181b1e15101f,
0x2a2d242722292c23, 0x2621282b2e25202f,
0x3a3d343732393c33, 0x3631383b3e35303f };
static const __m512i SUBSH_MASK4 = { 0x0b0e0500030a0d04, 0x0702090c0f060108,
0x1b1e1510131a1d14, 0x1712191c1f161118,
0x2b2e2520232a2d24, 0x2722292c2f262128,
0x3b3e3530333a3d34, 0x3732393c3f363138 };
static const __m512i SUBSH_MASK5 = { 0x0d080601040c0f05, 0x00030b0e0907020a,
0x1d181611141c1f15, 0x10131b1e1917121a,
0x2d282621242c2f25, 0x20232b2e2927222a,
0x3d383631343c3f35, 0x30333b3e3937323a };
static const __m512i SUBSH_MASK6 = { 0x0f0a0702050e0906, 0x01040d080b00030c,
0x1f1a1712151e1916, 0x11141d181b10131c,
0x2f2a2722252e2926, 0x21242d282b20232c,
0x3f3a3732353e3936, 0x31343d383b30333c };
static const __m512i SUBSH_MASK7 = { 0x090c000306080b07, 0x02050f0a0d01040e,
0x191c101316181b17, 0x12151f1a1d11141e,
0x292c202326282b27, 0x22252f2a2d21242e,
0x393c303336383b37, 0x32353f3a3d31343e };
#define tos(a) #a
#define tostr(a) tos(a)
@@ -40,8 +101,6 @@ __m512i ALL_FF;
i = _mm512_xor_si512(i, j);\
}
/**/
/* Yet another implementation of MixBytes.
This time we use the formulae (3) from the paper "Byte Slicing Groestl".
Input: a0, ..., a7
@@ -155,95 +214,36 @@ __m512i ALL_FF;
b1 = _mm512_xor_si512(b1, a4);\
}/*MixBytes*/
// calculate the round constants seperately and load at startup
#define SET_CONSTANTS(){\
ALL_1B = _mm512_set1_epi32( 0x1b1b1b1b );\
TRANSP_MASK = _mm512_set_epi32( \
0x3f373b33, 0x3e363a32, 0x3d353931, 0x3c343830, \
0x2f272b23, 0x2e262a22, 0x2d252921, 0x2c242820, \
0x1f171b13, 0x1e161a12, 0x1d151911, 0x1c141810, \
0x0f070b03, 0x0e060a02, 0x0d050901, 0x0c040800 ); \
SUBSH_MASK[0] = _mm512_set_epi32( \
0x33363a3d, 0x38323539, 0x3c3f3134, 0x373b3e30, \
0x23262a2d, 0x28222529, 0x2c2f2124, 0x272b2e20, \
0x13161a1d, 0x18121519, 0x1c1f1114, 0x171b1e10, \
0x03060a0d, 0x08020509, 0x0c0f0104, 0x070b0e00 ); \
SUBSH_MASK[1] = _mm512_set_epi32( \
0x34373c3f, 0x3a33363b, 0x3e393235, 0x303d3831, \
0x24272c2f, 0x2a23262b, 0x2e292225, 0x202d2821, \
0x14171c1f, 0x1a13161b, 0x1e191215, 0x101d1801, \
0x04070c0f, 0x0a03060b, 0x0e090205, 0x000d0801 );\
SUBSH_MASK[2] = _mm512_set_epi32( \
0x35303e39, 0x3c34373d, 0x383b3336, 0x313f3a32, \
0x25202e29, 0x2c24272d, 0x282b2326, 0x212f2a22, \
0x15101e19, 0x1c14171d, 0x181b1316, 0x111f1a12, \
0x05000e09, 0x0c04070d, 0x080b0306, 0x010f0a02 );\
SUBSH_MASK[3] = _mm512_set_epi32( \
0x3631383b, 0x3e35303f, 0x3a3d3437, 0x32393c33, \
0x2621282b, 0x2e25202f, 0x2a2d2427, 0x22292c23, \
0x1611181b, 0x1e15101f, 0x1a1d1417, 0x12191c13, \
0x0601080b, 0x0e05000f, 0x0a0d0407, 0x02090c03 );\
SUBSH_MASK[4] = _mm512_set_epi32( \
0x3732393c, 0x3f363138, 0x3b3e3530, 0x333a3d34, \
0x2722292c, 0x2f262128, 0x2b2e2520, 0x232a2d24, \
0x1712191c, 0x1f161118, 0x1b1e1510, 0x131a1d14, \
0x0702090c, 0x0f060108, 0x0b0e0500, 0x030a0d04 );\
SUBSH_MASK[5] = _mm512_set_epi32( \
0x30333b3e, 0x3937323a, 0x3d383631, 0x343c3f35, \
0x20232b2e, 0x2927222a, 0x2d282621, 0x242c2f25, \
0x10131b1e, 0x1917121a, 0x1d181611, 0x141c1f15, \
0x00030b0e, 0x0907020a, 0x0d080601, 0x040c0f05 );\
SUBSH_MASK[6] = _mm512_set_epi32( \
0x31343d38, 0x3b30333c, 0x3f3a3732, 0x353e3936, \
0x21242d28, 0x2b20232c, 0x2f2a2722, 0x252e2926, \
0x11141d18, 0x1b10131c, 0x1f1a1712, 0x151e1916, \
0x01040d08, 0x0b00030c, 0x0f0a0702, 0x050e0906 );\
SUBSH_MASK[7] = _mm512_set_epi32( \
0x32353f3a, 0x3d31343e, 0x393c3033, 0x36383b37, \
0x22252f2a, 0x2d21242e, 0x292c2023, 0x26282b27, \
0x12151f1a, 0x1d11141e, 0x191c1013, 0x16181b17, \
0x02050f0a, 0x0d01040e, 0x090c0003, 0x06080b07 );\
for ( i = 0; i < ROUNDS512; i++ ) \
{\
ROUND_CONST_L0[i] = _mm512_set4_epi32( 0xffffffff, 0xffffffff, \
0x70605040 ^ ( i * 0x01010101 ), 0x30201000 ^ ( i * 0x01010101 ) ); \
ROUND_CONST_L7[i] = _mm512_set4_epi32( 0x8f9fafbf ^ ( i * 0x01010101 ), \
0xcfdfefff ^ ( i * 0x01010101 ), 0x00000000, 0x00000000 ); \
}\
ROUND_CONST_Lx = _mm512_set4_epi32( 0xffffffff, 0xffffffff, \
0x00000000, 0x00000000 ); \
}while(0);\
#define ROUND(i, a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
/* AddRoundConstant */\
b1 = ROUND_CONST_Lx;\
a0 = _mm512_xor_si512( a0, (ROUND_CONST_L0[i]) );\
b1 = m512_const2_64( 0xffffffffffffffff, 0 ); \
a0 = _mm512_xor_si512( a0, m512_const1_128( round_const_l0[i] ) );\
a1 = _mm512_xor_si512( a1, b1 );\
a2 = _mm512_xor_si512( a2, b1 );\
a3 = _mm512_xor_si512( a3, b1 );\
a4 = _mm512_xor_si512( a4, b1 );\
a5 = _mm512_xor_si512( a5, b1 );\
a6 = _mm512_xor_si512( a6, b1 );\
a7 = _mm512_xor_si512( a7, (ROUND_CONST_L7[i]) );\
a7 = _mm512_xor_si512( a7, m512_const1_128( round_const_l7[i] ) );\
\
/* ShiftBytes + SubBytes (interleaved) */\
b0 = _mm512_xor_si512( b0, b0 );\
a0 = _mm512_shuffle_epi8( a0, (SUBSH_MASK[0]) );\
a0 = _mm512_shuffle_epi8( a0, SUBSH_MASK0 );\
a0 = _mm512_aesenclast_epi128(a0, b0 );\
a1 = _mm512_shuffle_epi8( a1, (SUBSH_MASK[1]) );\
a1 = _mm512_shuffle_epi8( a1, SUBSH_MASK1 );\
a1 = _mm512_aesenclast_epi128(a1, b0 );\
a2 = _mm512_shuffle_epi8( a2, (SUBSH_MASK[2]) );\
a2 = _mm512_shuffle_epi8( a2, SUBSH_MASK2 );\
a2 = _mm512_aesenclast_epi128(a2, b0 );\
a3 = _mm512_shuffle_epi8( a3, (SUBSH_MASK[3]) );\
a3 = _mm512_shuffle_epi8( a3, SUBSH_MASK3 );\
a3 = _mm512_aesenclast_epi128(a3, b0 );\
a4 = _mm512_shuffle_epi8( a4, (SUBSH_MASK[4]) );\
a4 = _mm512_shuffle_epi8( a4, SUBSH_MASK4 );\
a4 = _mm512_aesenclast_epi128(a4, b0 );\
a5 = _mm512_shuffle_epi8( a5, (SUBSH_MASK[5]) );\
a5 = _mm512_shuffle_epi8( a5, SUBSH_MASK5 );\
a5 = _mm512_aesenclast_epi128(a5, b0 );\
a6 = _mm512_shuffle_epi8( a6, (SUBSH_MASK[6]) );\
a6 = _mm512_shuffle_epi8( a6, SUBSH_MASK6 );\
a6 = _mm512_aesenclast_epi128(a6, b0 );\
a7 = _mm512_shuffle_epi8( a7, (SUBSH_MASK[7]) );\
a7 = _mm512_shuffle_epi8( a7, SUBSH_MASK7 );\
a7 = _mm512_aesenclast_epi128( a7, b0 );\
\
/* MixBytes */\
@@ -390,29 +390,6 @@ __m512i ALL_FF;
}/**/
void INIT256_4way( __m512i* chaining )
{
static __m512i xmm0, xmm2, xmm6, xmm7;
static __m512i xmm12, xmm13, xmm14, xmm15;
/* load IV into registers xmm12 - xmm15 */
xmm12 = chaining[0];
xmm13 = chaining[1];
xmm14 = chaining[2];
xmm15 = chaining[3];
/* transform chaining value from column ordering into row ordering */
/* we put two rows (64 bit) of the IV into one 128-bit XMM register */
Matrix_Transpose_A(xmm12, xmm13, xmm14, xmm15, xmm2, xmm6, xmm7, xmm0);
/* store transposed IV */
chaining[0] = xmm12;
chaining[1] = xmm2;
chaining[2] = xmm6;
chaining[3] = xmm7;
}
void TF512_4way( __m512i* chaining, __m512i* message )
{
static __m512i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;

View File

@@ -19,10 +19,6 @@
int groestl512_4way_init( groestl512_4way_context* ctx, uint64_t hashlen )
{
int i;
SET_CONSTANTS();
if (ctx->chaining == NULL || ctx->buffer == NULL)
return 1;
@@ -99,7 +95,6 @@ int groestl512_4way_full( groestl512_4way_context* ctx, void* output,
// --- init ---
SET_CONSTANTS();
memset_zero_512( ctx->chaining, SIZE512 );
memset_zero_512( ctx->buffer, SIZE512 );
ctx->chaining[ 6 ] = m512_const2_64( 0x0200000000000000, 0 );

View File

@@ -15,16 +15,86 @@
#if defined(__VAES__)
/* global constants */
__m512i ROUND_CONST_Lx;
//__m128i ROUND_CONST_L0[ROUNDS512];
//__m128i ROUND_CONST_L7[ROUNDS512];
__m512i ROUND_CONST_P[ROUNDS1024];
__m512i ROUND_CONST_Q[ROUNDS1024];
__m512i TRANSP_MASK;
__m512i SUBSH_MASK[8];
__m512i ALL_1B;
__m512i ALL_FF;
static const __m128i round_const_p[] __attribute__ ((aligned (64))) =
{
{ 0x7060504030201000, 0xf0e0d0c0b0a09080 },
{ 0x7161514131211101, 0xf1e1d1c1b1a19181 },
{ 0x7262524232221202, 0xf2e2d2c2b2a29282 },
{ 0x7363534333231303, 0xf3e3d3c3b3a39383 },
{ 0x7464544434241404, 0xf4e4d4c4b4a49484 },
{ 0x7565554535251505, 0xf5e5d5c5b5a59585 },
{ 0x7666564636261606, 0xf6e6d6c6b6a69686 },
{ 0x7767574737271707, 0xf7e7d7c7b7a79787 },
{ 0x7868584838281808, 0xf8e8d8c8b8a89888 },
{ 0x7969594939291909, 0xf9e9d9c9b9a99989 },
{ 0x7a6a5a4a3a2a1a0a, 0xfaeadacabaaa9a8a },
{ 0x7b6b5b4b3b2b1b0b, 0xfbebdbcbbbab9b8b },
{ 0x7c6c5c4c3c2c1c0c, 0xfcecdcccbcac9c8c },
{ 0x7d6d5d4d3d2d1d0d, 0xfdedddcdbdad9d8d }
};
static const __m128i round_const_q[] __attribute__ ((aligned (64))) =
{
{ 0x8f9fafbfcfdfefff, 0x0f1f2f3f4f5f6f7f },
{ 0x8e9eaebecedeeefe, 0x0e1e2e3e4e5e6e7e },
{ 0x8d9dadbdcdddedfd, 0x0d1d2d3d4d5d6d7d },
{ 0x8c9cacbcccdcecfc, 0x0c1c2c3c4c5c6c7c },
{ 0x8b9babbbcbdbebfb, 0x0b1b2b3b4b5b6b7b },
{ 0x8a9aaabacadaeafa, 0x0a1a2a3a4a5a6a7a },
{ 0x8999a9b9c9d9e9f9, 0x0919293949596979 },
{ 0x8898a8b8c8d8e8f8, 0x0818283848586878 },
{ 0x8797a7b7c7d7e7f7, 0x0717273747576777 },
{ 0x8696a6b6c6d6e6f6, 0x0616263646566676 },
{ 0x8595a5b5c5d5e5f5, 0x0515253545556575 },
{ 0x8494a4b4c4d4e4f4, 0x0414243444546474 },
{ 0x8393a3b3c3d3e3f3, 0x0313233343536373 },
{ 0x8292a2b2c2d2e2f2, 0x0212223242526272 }
};
static const __m512i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02,
0x1d1519111c141810, 0x1f171b131e161a12,
0x2d2529212c242820, 0x2f272b232e262a22,
0x3d3539313c343830, 0x3f373b333e363a32 };
static const __m512i SUBSH_MASK0 = { 0x0b0e0104070a0d00, 0x0306090c0f020508,
0x1b1e1114171a1d10, 0x1316191c1f121518,
0x2b2e2124272a2d20, 0x2326292c2f222528,
0x3b3e3134373a3d30, 0x3336393c3f323538 };
static const __m512i SUBSH_MASK1 = { 0x0c0f0205080b0e01, 0x04070a0d00030609,
0x1c1f1215181b1e11, 0x14171a1d10131619,
0x2c2f2225282b2e21, 0x24272a2d20232629,
0x3c3f3235383b3e31, 0x34373a3d30333639 };
static const __m512i SUBSH_MASK2 = { 0x0d000306090c0f02, 0x05080b0e0104070a,
0x1d101316191c1f12, 0x15181b1e1114171a,
0x2d202326292c2f22, 0x25282b2e2124272a,
0x3d303336393c3f32, 0x35383b3e3134373a };
static const __m512i SUBSH_MASK3 = { 0x0e0104070a0d0003, 0x06090c0f0205080b,
0x1e1114171a1d1013, 0x16191c1f1215181b,
0x2e2124272a2d2023, 0x26292c2f2225282b,
0x3e3134373a3d3033, 0x36393c3f3235383b };
static const __m512i SUBSH_MASK4 = { 0x0f0205080b0e0104, 0x070a0d000306090c,
0x1f1215181b1e1114, 0x171a1d101316191c,
0x2f2225282b2e2124, 0x272a2d202326292c,
0x3f3235383b3e3134, 0x373a3d303336393c };
static const __m512i SUBSH_MASK5 = { 0x000306090c0f0205, 0x080b0e0104070a0d,
0x101316191c1f1215, 0x181b1e1114171a1d,
0x202326292c2f2225, 0x282b2e2124272a2d,
0x303336393c3f3235, 0x383b3e3134373a3d };
static const __m512i SUBSH_MASK6 = { 0x0104070a0d000306, 0x090c0f0205080b0e,
0x1114171a1d101316, 0x191c1f1215181b1e,
0x2124272a2d202326, 0x292c2f2225282b2e,
0x3134373a3d303336, 0x393c3f3235383b3e };
static const __m512i SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003,
0x16191c1f1215181b, 0x1e1114171a1d1013,
0x26292c2f2225282b, 0x2e2124272a2d2023,
0x36393c3f3235383b, 0x3e3134373a3d3033 };
#define tos(a) #a
#define tostr(a) tos(a)
@@ -155,69 +225,6 @@ __m512i ALL_FF;
b1 = _mm512_xor_si512(b1, a4);\
}/*MixBytes*/
// calculate the round constants seperately and load at startup
#define SET_CONSTANTS(){\
ALL_FF = _mm512_set1_epi32( 0xffffffff );\
ALL_1B = _mm512_set1_epi32( 0x1b1b1b1b );\
TRANSP_MASK = _mm512_set_epi32( \
0x3f373b33, 0x3e363a32, 0x3d353931, 0x3c343830, \
0x2f272b23, 0x2e262a22, 0x2d252921, 0x2c242820, \
0x1f171b13, 0x1e161a12, 0x1d151911, 0x1c141810, \
0x0f070b03, 0x0e060a02, 0x0d050901, 0x0c040800 ); \
SUBSH_MASK[0] = _mm512_set_epi32( \
0x3336393c, 0x3f323538, 0x3b3e3134, 0x373a3d30, \
0x2326292c, 0x2f222528, 0x2b2e2124, 0x272a2d20, \
0x1316191c, 0x1f121518, 0x1b1e1114, 0x171a1d10, \
0x0306090c, 0x0f020508, 0x0b0e0104, 0x070a0d00 ); \
SUBSH_MASK[1] = _mm512_set_epi32( \
0x34373a3d, 0x30333639, 0x3c3f3235, 0x383b3e31, \
0x24272a2d, 0x20232629, 0x2c2f2225, 0x282b2e21, \
0x14171a1d, 0x10131619, 0x1c1f1215, 0x181b1e11, \
0x04070a0d, 0x00030609, 0x0c0f0205, 0x080b0e01 ); \
SUBSH_MASK[2] = _mm512_set_epi32( \
0x35383b3e, 0x3134373a, 0x3d303336, 0x393c3f32, \
0x25282b2e, 0x2124272a, 0x2d202326, 0x292c2f22, \
0x15181b1e, 0x1114171a, 0x1d101316, 0x191c1f12, \
0x05080b0e, 0x0104070a, 0x0d000306, 0x090c0f02 ); \
SUBSH_MASK[3] = _mm512_set_epi32( \
0x36393c3f, 0x3235383b, 0x3e313437, 0x3a3d3033, \
0x26292c2f, 0x2225282b, 0x2e212427, 0x2a2d2023, \
0x16191c1f, 0x1215181b, 0x1e111417, 0x1a1d1013, \
0x06090c0f, 0x0205080b, 0x0e010407, 0x0a0d0003 ); \
SUBSH_MASK[4] = _mm512_set_epi32( \
0x373a3d30, 0x3336393c, 0x3f323538, 0x3b3e3134, \
0x272a2d20, 0x2326292c, 0x2f222528, 0x2b2e2124, \
0x171a1d10, 0x1316191c, 0x1f121518, 0x1b1e1114, \
0x070a0d00, 0x0306090c, 0x0f020508, 0x0b0e0104 ); \
SUBSH_MASK[5] = _mm512_set_epi32( \
0x383b3e31, 0x34373a3d, 0x30333639, 0x3c3f3235, \
0x282b2e21, 0x24272a2d, 0x20232629, 0x2c2f2225, \
0x181b1e11, 0x14171a1d, 0x10131619, 0x1c1f1215, \
0x080b0e01, 0x04070a0d, 0x00030609, 0x0c0f0205 ); \
SUBSH_MASK[6] = _mm512_set_epi32( \
0x393c3f32, 0x35383b3e, 0x3134373a, 0x3d303336, \
0x292c2f22, 0x25282b2e, 0x2124272a, 0x2d202326, \
0x191c1f12, 0x15181b1e, 0x1114171a, 0x1d101316, \
0x090c0f02, 0x05080b0e, 0x0104070a, 0x0d000306 ); \
SUBSH_MASK[7] = _mm512_set_epi32( \
0x3e313437, 0x3a3d3033, 0x36393c3f, 0x3235383b, \
0x2e212427, 0x2a2d2023, 0x26292c2f, 0x2225282b, \
0x1e111417, 0x1a1d1013, 0x16191c1f, 0x1215181b, \
0x0e010407, 0x0a0d0003, 0x06090c0f, 0x0205080b ); \
for( i = 0; i < ROUNDS1024; i++ ) \
{ \
ROUND_CONST_P[i] = _mm512_set4_epi32( 0xf0e0d0c0 ^ (i * 0x01010101), \
0xb0a09080 ^ (i * 0x01010101), \
0x70605040 ^ (i * 0x01010101), \
0x30201000 ^ (i * 0x01010101) ); \
ROUND_CONST_Q[i] = _mm512_set4_epi32( 0x0f1f2f3f ^ (i * 0x01010101), \
0x4f5f6f7f ^ (i * 0x01010101), \
0x8f9fafbf ^ (i * 0x01010101), \
0xcfdfefff ^ (i * 0x01010101));\
} \
}while(0);\
/* one round
* a0-a7 = input rows
* b0-b7 = output rows
@@ -242,30 +249,32 @@ __m512i ALL_FF;
for ( round_counter = 0; round_counter < 14; round_counter += 2 ) \
{ \
/* AddRoundConstant P1024 */\
xmm8 = _mm512_xor_si512( xmm8, ( ROUND_CONST_P[ round_counter ] ) );\
xmm8 = _mm512_xor_si512( xmm8, m512_const1_128( \
casti_m128i( round_const_p, round_counter ) ) ); \
/* ShiftBytes P1024 + pre-AESENCLAST */\
xmm8 = _mm512_shuffle_epi8( xmm8, ( SUBSH_MASK[0] ) );\
xmm9 = _mm512_shuffle_epi8( xmm9, ( SUBSH_MASK[1] ) );\
xmm10 = _mm512_shuffle_epi8( xmm10, ( SUBSH_MASK[2] ) );\
xmm11 = _mm512_shuffle_epi8( xmm11, ( SUBSH_MASK[3] ) );\
xmm12 = _mm512_shuffle_epi8( xmm12, ( SUBSH_MASK[4] ) );\
xmm13 = _mm512_shuffle_epi8( xmm13, ( SUBSH_MASK[5] ) );\
xmm14 = _mm512_shuffle_epi8( xmm14, ( SUBSH_MASK[6] ) );\
xmm15 = _mm512_shuffle_epi8( xmm15, ( SUBSH_MASK[7] ) );\
xmm8 = _mm512_shuffle_epi8( xmm8, SUBSH_MASK0 ); \
xmm9 = _mm512_shuffle_epi8( xmm9, SUBSH_MASK1 );\
xmm10 = _mm512_shuffle_epi8( xmm10, SUBSH_MASK2 );\
xmm11 = _mm512_shuffle_epi8( xmm11, SUBSH_MASK3 );\
xmm12 = _mm512_shuffle_epi8( xmm12, SUBSH_MASK4 );\
xmm13 = _mm512_shuffle_epi8( xmm13, SUBSH_MASK5 );\
xmm14 = _mm512_shuffle_epi8( xmm14, SUBSH_MASK6 );\
xmm15 = _mm512_shuffle_epi8( xmm15, SUBSH_MASK7 );\
/* SubBytes + MixBytes */\
SUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
\
/* AddRoundConstant P1024 */\
xmm0 = _mm512_xor_si512( xmm0, ( ROUND_CONST_P[ round_counter+1 ] ) );\
xmm0 = _mm512_xor_si512( xmm0, m512_const1_128( \
casti_m128i( round_const_p, round_counter+1 ) ) ); \
/* ShiftBytes P1024 + pre-AESENCLAST */\
xmm0 = _mm512_shuffle_epi8( xmm0, ( SUBSH_MASK[0] ) );\
xmm1 = _mm512_shuffle_epi8( xmm1, ( SUBSH_MASK[1] ) );\
xmm2 = _mm512_shuffle_epi8( xmm2, ( SUBSH_MASK[2] ) );\
xmm3 = _mm512_shuffle_epi8( xmm3, ( SUBSH_MASK[3] ) );\
xmm4 = _mm512_shuffle_epi8( xmm4, ( SUBSH_MASK[4] ) );\
xmm5 = _mm512_shuffle_epi8( xmm5, ( SUBSH_MASK[5] ) );\
xmm6 = _mm512_shuffle_epi8( xmm6, ( SUBSH_MASK[6] ) );\
xmm7 = _mm512_shuffle_epi8( xmm7, ( SUBSH_MASK[7] ) );\
xmm0 = _mm512_shuffle_epi8( xmm0, SUBSH_MASK0 );\
xmm1 = _mm512_shuffle_epi8( xmm1, SUBSH_MASK1 );\
xmm2 = _mm512_shuffle_epi8( xmm2, SUBSH_MASK2 );\
xmm3 = _mm512_shuffle_epi8( xmm3, SUBSH_MASK3 );\
xmm4 = _mm512_shuffle_epi8( xmm4, SUBSH_MASK4 );\
xmm5 = _mm512_shuffle_epi8( xmm5, SUBSH_MASK5 );\
xmm6 = _mm512_shuffle_epi8( xmm6, SUBSH_MASK6 );\
xmm7 = _mm512_shuffle_epi8( xmm7, SUBSH_MASK7 );\
/* SubBytes + MixBytes */\
SUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
}\
@@ -284,16 +293,17 @@ __m512i ALL_FF;
xmm12 = _mm512_xor_si512( xmm12, xmm1 );\
xmm13 = _mm512_xor_si512( xmm13, xmm1 );\
xmm14 = _mm512_xor_si512( xmm14, xmm1 );\
xmm15 = _mm512_xor_si512( xmm15, ( ROUND_CONST_Q[ round_counter ] ) );\
xmm15 = _mm512_xor_si512( xmm15, m512_const1_128( \
casti_m128i( round_const_q, round_counter ) ) ); \
/* ShiftBytes Q1024 + pre-AESENCLAST */\
xmm8 = _mm512_shuffle_epi8( xmm8, ( SUBSH_MASK[1] ) );\
xmm9 = _mm512_shuffle_epi8( xmm9, ( SUBSH_MASK[3] ) );\
xmm10 = _mm512_shuffle_epi8( xmm10, ( SUBSH_MASK[5] ) );\
xmm11 = _mm512_shuffle_epi8( xmm11, ( SUBSH_MASK[7] ) );\
xmm12 = _mm512_shuffle_epi8( xmm12, ( SUBSH_MASK[0] ) );\
xmm13 = _mm512_shuffle_epi8( xmm13, ( SUBSH_MASK[2] ) );\
xmm14 = _mm512_shuffle_epi8( xmm14, ( SUBSH_MASK[4] ) );\
xmm15 = _mm512_shuffle_epi8( xmm15, ( SUBSH_MASK[6] ) );\
xmm8 = _mm512_shuffle_epi8( xmm8, SUBSH_MASK1 );\
xmm9 = _mm512_shuffle_epi8( xmm9, SUBSH_MASK3 );\
xmm10 = _mm512_shuffle_epi8( xmm10, SUBSH_MASK5 );\
xmm11 = _mm512_shuffle_epi8( xmm11, SUBSH_MASK7 );\
xmm12 = _mm512_shuffle_epi8( xmm12, SUBSH_MASK0 );\
xmm13 = _mm512_shuffle_epi8( xmm13, SUBSH_MASK2 );\
xmm14 = _mm512_shuffle_epi8( xmm14, SUBSH_MASK4 );\
xmm15 = _mm512_shuffle_epi8( xmm15, SUBSH_MASK6 );\
/* SubBytes + MixBytes */\
SUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
\
@@ -306,16 +316,17 @@ __m512i ALL_FF;
xmm4 = _mm512_xor_si512( xmm4, xmm9 );\
xmm5 = _mm512_xor_si512( xmm5, xmm9 );\
xmm6 = _mm512_xor_si512( xmm6, xmm9 );\
xmm7 = _mm512_xor_si512( xmm7, ( ROUND_CONST_Q[ round_counter+1 ] ) );\
xmm7 = _mm512_xor_si512( xmm7, m512_const1_128( \
casti_m128i( round_const_q, round_counter+1 ) ) ); \
/* ShiftBytes Q1024 + pre-AESENCLAST */\
xmm0 = _mm512_shuffle_epi8( xmm0, ( SUBSH_MASK[1] ) );\
xmm1 = _mm512_shuffle_epi8( xmm1, ( SUBSH_MASK[3] ) );\
xmm2 = _mm512_shuffle_epi8( xmm2, ( SUBSH_MASK[5] ) );\
xmm3 = _mm512_shuffle_epi8( xmm3, ( SUBSH_MASK[7] ) );\
xmm4 = _mm512_shuffle_epi8( xmm4, ( SUBSH_MASK[0] ) );\
xmm5 = _mm512_shuffle_epi8( xmm5, ( SUBSH_MASK[2] ) );\
xmm6 = _mm512_shuffle_epi8( xmm6, ( SUBSH_MASK[4] ) );\
xmm7 = _mm512_shuffle_epi8( xmm7, ( SUBSH_MASK[6] ) );\
xmm0 = _mm512_shuffle_epi8( xmm0, SUBSH_MASK1 );\
xmm1 = _mm512_shuffle_epi8( xmm1, SUBSH_MASK3 );\
xmm2 = _mm512_shuffle_epi8( xmm2, SUBSH_MASK5 );\
xmm3 = _mm512_shuffle_epi8( xmm3, SUBSH_MASK7 );\
xmm4 = _mm512_shuffle_epi8( xmm4, SUBSH_MASK0 );\
xmm5 = _mm512_shuffle_epi8( xmm5, SUBSH_MASK2 );\
xmm6 = _mm512_shuffle_epi8( xmm6, SUBSH_MASK4 );\
xmm7 = _mm512_shuffle_epi8( xmm7, SUBSH_MASK6 );\
/* SubBytes + MixBytes */\
SUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
}\

View File

@@ -1,4 +1,7 @@
#include "myrgr-gate.h"
#if !defined(MYRGR_8WAY) && !defined(MYRGR_4WAY)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
@@ -86,3 +89,4 @@ int scanhash_myriad( struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
#endif

View File

@@ -35,6 +35,8 @@
#include "sph_groestl.h"
#if !defined(__AES__)
#ifdef __cplusplus
extern "C"{
#endif
@@ -3116,4 +3118,6 @@ sph_groestl512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
#ifdef __cplusplus
}
#endif // !AES
#endif

View File

@@ -42,6 +42,7 @@ extern "C"{
#include <stddef.h>
#include "algo/sha/sph_types.h"
#if !defined(__AES__)
/**
* Output size (in bits) for Groestl-224.
*/
@@ -326,4 +327,5 @@ void sph_groestl512_addbits_and_close(
}
#endif
#endif // !AES
#endif

View File

@@ -1,156 +0,0 @@
#include "algo-gate-api.h"
#include <stdio.h>
#include <string.h>
#include <openssl/sha.h>
#include <stdint.h>
#include <stdlib.h>
#include "sph_hefty1.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/skein/sph_skein.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/luffa/luffa_for_sse2.h"
#ifdef __AES__
#include "algo/echo/aes_ni/hash_api.h"
#endif
void bastionhash(void *output, const void *input)
{
unsigned char hash[64] __attribute__ ((aligned (64)));
#ifdef __AES__
hashState_echo ctx_echo;
#else
sph_echo512_context ctx_echo;
#endif
hashState_luffa ctx_luffa;
sph_fugue512_context ctx_fugue;
sph_whirlpool_context ctx_whirlpool;
sph_shabal512_context ctx_shabal;
sph_hamsi512_context ctx_hamsi;
sph_skein512_context ctx_skein;
HEFTY1(input, 80, hash);
init_luffa( &ctx_luffa, 512 );
update_and_final_luffa( &ctx_luffa, (BitSequence*)hash,
(const BitSequence*)hash, 64 );
if (hash[0] & 0x8)
{
sph_fugue512_init(&ctx_fugue);
sph_fugue512(&ctx_fugue, hash, 64);
sph_fugue512_close(&ctx_fugue, hash);
} else {
sph_skein512_init( &ctx_skein );
sph_skein512( &ctx_skein, hash, 64 );
sph_skein512_close( &ctx_skein, hash );
}
sph_whirlpool_init(&ctx_whirlpool);
sph_whirlpool(&ctx_whirlpool, hash, 64);
sph_whirlpool_close(&ctx_whirlpool, hash);
sph_fugue512_init(&ctx_fugue);
sph_fugue512(&ctx_fugue, hash, 64);
sph_fugue512_close(&ctx_fugue, hash);
if (hash[0] & 0x8)
{
#ifdef __AES__
init_echo( &ctx_echo, 512 );
update_final_echo ( &ctx_echo,(BitSequence*)hash,
(const BitSequence*)hash, 512 );
#else
sph_echo512_init(&ctx_echo);
sph_echo512(&ctx_echo, hash, 64);
sph_echo512_close(&ctx_echo, hash);
#endif
} else {
init_luffa( &ctx_luffa, 512 );
update_and_final_luffa( &ctx_luffa, (BitSequence*)hash,
(const BitSequence*)hash, 64 );
}
sph_shabal512_init(&ctx_shabal);
sph_shabal512(&ctx_shabal, hash, 64);
sph_shabal512_close(&ctx_shabal, hash);
sph_skein512_init( &ctx_skein );
sph_skein512( &ctx_skein, hash, 64 );
sph_skein512_close( &ctx_skein, hash );
if (hash[0] & 0x8)
{
sph_shabal512_init(&ctx_shabal);
sph_shabal512(&ctx_shabal, hash, 64);
sph_shabal512_close(&ctx_shabal, hash);
} else {
sph_whirlpool_init(&ctx_whirlpool);
sph_whirlpool(&ctx_whirlpool, hash, 64);
sph_whirlpool_close(&ctx_whirlpool, hash);
}
sph_shabal512_init(&ctx_shabal);
sph_shabal512(&ctx_shabal, hash, 64);
sph_shabal512_close(&ctx_shabal, hash);
if (hash[0] & 0x8)
{
sph_hamsi512_init(&ctx_hamsi);
sph_hamsi512(&ctx_hamsi, hash, 64);
sph_hamsi512_close(&ctx_hamsi, hash);
} else {
init_luffa( &ctx_luffa, 512 );
update_and_final_luffa( &ctx_luffa, (BitSequence*)hash,
(const BitSequence*)hash, 64 );
}
memcpy(output, hash, 32);
}
int scanhash_bastion( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t _ALIGN(64) hash32[8];
uint32_t _ALIGN(64) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
for (int i=0; i < 19; i++)
be32enc(&endiandata[i], pdata[i]);
do {
be32enc(&endiandata[19], n);
bastionhash(hash32, endiandata);
if (hash32[7] < Htarg && fulltest(hash32, ptarget)) {
pdata[19] = n;
submit_solution( work, hash32, mythr );
}
n++;
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
bool register_bastion_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT;
gate->scanhash = (void*)&scanhash_bastion;
gate->hash = (void*)&bastionhash;
return true;
};

View File

@@ -1,111 +0,0 @@
#include <string.h>
#include <openssl/sha.h>
#include <stdint.h>
#include "algo-gate-api.h"
#include "sph_hefty1.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/blake/sph_blake.h"
#include "algo/groestl/sph_groestl.h"
/* Combines top 64-bits from each hash into a single hash */
static void combine_hashes(uint32_t *out, uint32_t *hash1, uint32_t *hash2, uint32_t *hash3, uint32_t *hash4)
{
uint32_t *hash[4] = { hash1, hash2, hash3, hash4 };
/* Transpose first 64 bits of each hash into out */
memset(out, 0, 32);
int bits = 0;
for (unsigned int i = 7; i >= 6; i--) {
for (uint32_t mask = 0x80000000; mask; mask >>= 1) {
for (unsigned int k = 0; k < 4; k++) {
out[(255 - bits)/32] <<= 1;
if ((hash[k][i] & mask) != 0)
out[(255 - bits)/32] |= 1;
bits++;
}
}
}
}
extern void heavyhash(unsigned char* output, const unsigned char* input, int len)
{
unsigned char hash1[32];
HEFTY1(input, len, hash1);
// HEFTY1 is new, so take an extra security measure to eliminate
// * the possiblity of collisions:
// *
// * Hash(x) = SHA256(x + HEFTY1(x))
// *
// * N.B. '+' is concatenation.
//
unsigned char hash2[32];;
SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, input, len);
SHA256_Update(&ctx, hash1, sizeof(hash1));
SHA256_Final(hash2, &ctx);
// * Additional security: Do not rely on a single cryptographic hash
// * function. Instead, combine the outputs of 4 of the most secure
// * cryptographic hash functions-- SHA256, KECCAK512, GROESTL512
// * and BLAKE512.
uint32_t hash3[16];
sph_keccak512_context keccakCtx;
sph_keccak512_init(&keccakCtx);
sph_keccak512(&keccakCtx, input, len);
sph_keccak512(&keccakCtx, hash1, sizeof(hash1));
sph_keccak512_close(&keccakCtx, (void *)&hash3);
uint32_t hash4[16];
sph_groestl512_context groestlCtx;
sph_groestl512_init(&groestlCtx);
sph_groestl512(&groestlCtx, input, len);
sph_groestl512(&groestlCtx, hash1, sizeof(hash1));
sph_groestl512_close(&groestlCtx, (void *)&hash4);
uint32_t hash5[16];
sph_blake512_context blakeCtx;
sph_blake512_init(&blakeCtx);
sph_blake512(&blakeCtx, input, len);
sph_blake512(&blakeCtx, (unsigned char *)&hash1, sizeof(hash1));
sph_blake512_close(&blakeCtx, (void *)&hash5);
uint32_t *final = (uint32_t *)output;
combine_hashes(final, (uint32_t *)hash2, hash3, hash4, hash5);
}
int scanhash_heavy( uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[8];
uint32_t start_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
do {
heavyhash((unsigned char *)hash, (unsigned char *)pdata, 80);
if (hash[7] <= ptarget[7]) {
if (fulltest(hash, ptarget)) {
*hashes_done = pdata[19] - start_nonce;
return 1;
break;
}
}
pdata[19]++;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[19] - start_nonce;
return 0;
}
bool register_heavy_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_heavy;
gate->hash = (void*)&heavyhash;
return true;
};

View File

@@ -1,4 +1,7 @@
#include "jha-gate.h"
#if !defined(JHA_8WAY) && !defined(JHA_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -133,3 +136,4 @@ int scanhash_jha( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -3,6 +3,8 @@
#include "keccak-hash-4way.h"
#include "keccak-gate.h"
#if defined(__AVX2__)
static const uint64_t RC[] = {
0x0000000000000001, 0x0000000000008082,
0x800000000000808A, 0x8000000080008000,
@@ -239,7 +241,7 @@ keccak512_8way_close(void *cc, void *dst)
#endif // AVX512
#if defined(__AVX2__)
// AVX2
#define INPUT_BUF(size) do { \
size_t j; \

View File

@@ -1,4 +1,6 @@
#include "algo-gate-api.h"
#include "keccak-gate.h"
#if !defined(KECCAK_8WAY) && !defined(KECCAK_4WAY)
#include <stdlib.h>
#include <string.h>
@@ -49,3 +51,4 @@ int scanhash_keccak( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "algo-gate-api.h"
#include "keccak-gate.h"
#if !defined(KECCAK_8WAY) && !defined(KECCAK_4WAY)
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
@@ -48,3 +51,4 @@ int scanhash_sha3d( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,63 +0,0 @@
#include "algo-gate-api.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "sph_luffa.h"
void luffahash(void *output, const void *input)
{
unsigned char _ALIGN(128) hash[64];
sph_luffa512_context ctx_luffa;
sph_luffa512_init(&ctx_luffa);
sph_luffa512 (&ctx_luffa, input, 80);
sph_luffa512_close(&ctx_luffa, (void*) hash);
memcpy(output, hash, 32);
}
int scanhash_luffa(int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done)
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t _ALIGN(64) hash64[8];
uint32_t _ALIGN(64) endiandata[20];
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
for (int i=0; i < 19; i++)
be32enc(&endiandata[i], pdata[i]);
do {
be32enc(&endiandata[19], n);
luffahash(hash64, endiandata);
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return true;
}
n++;
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
bool register_luffa_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_luffa;
gate->hash = (void*)&luffahash;
return true;
};

View File

@@ -1,3 +1,6 @@
#if !defined(LUFFA_FOR_SSE2_H__)
#define LUFFA_FOR_SSE2_H__ 1
/*
* luffa_for_sse2.h
* Version 2.0 (Sep 15th 2009)
@@ -48,8 +51,6 @@
typedef struct {
uint32 buffer[8] __attribute((aligned(32)));
__m128i chainv[10] __attribute((aligned(32))); /* Chaining values */
// uint64 bitlen[2]; /* Message length in bits */
// uint32 rembitlen; /* Length of buffer data to be hashed */
int hashbitlen;
int rembytes;
} hashState_luffa;
@@ -67,4 +68,4 @@ HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output,
int luffa_full( hashState_luffa *state, BitSequence* output, int hashbitlen,
const BitSequence* data, size_t inlen );
#endif // LUFFA_FOR_SSE2_H___

View File

@@ -115,9 +115,8 @@ void allium_16way_hash( void *state, const void *input )
intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, 256 );
intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, 256 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
cube_4way_full( &ctx.cube, vhashA, 256, vhashA, 32 );
cube_4way_full( &ctx.cube, vhashB, 256, vhashB, 32 );
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
@@ -125,10 +124,8 @@ void allium_16way_hash( void *state, const void *input )
intrlv_4x128( vhashA, hash8, hash9, hash10, hash11, 256 );
intrlv_4x128( vhashB, hash12, hash13, hash14, hash15, 256 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
cube_4way_full( &ctx.cube, vhashA, 256, vhashA, 32 );
cube_4way_full( &ctx.cube, vhashB, 256, vhashB, 32 );
dintrlv_4x128( hash8, hash9, hash10, hash11, vhashA, 256 );
dintrlv_4x128( hash12, hash13, hash14, hash15, vhashB, 256 );
@@ -169,7 +166,6 @@ void allium_16way_hash( void *state, const void *input )
skein256_8way_update( &ctx.skein, vhashB, 32 );
skein256_8way_close( &ctx.skein, vhashB );
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhashA, 256 );
dintrlv_8x64( hash8, hash9, hash10, hash11, hash12, hash13, hash14, hash15,
@@ -179,77 +175,43 @@ void allium_16way_hash( void *state, const void *input )
intrlv_4x128( vhash, hash0, hash1, hash2, hash3, 256 );
groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 );
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
dintrlv_4x128( state, state+32, state+64, state+96, vhash, 256 );
intrlv_4x128( vhash, hash4, hash5, hash6, hash7, 256 );
groestl256_4way_init( &ctx.groestl, 32 );
groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 );
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
dintrlv_4x128( state+128, state+160, state+192, state+224, vhash, 256 );
intrlv_4x128( vhash, hash8, hash9, hash10, hash11, 256 );
groestl256_4way_init( &ctx.groestl, 32 );
groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 );
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
dintrlv_4x128( state+256, state+288, state+320, state+352, vhash, 256 );
intrlv_4x128( vhash, hash12, hash13, hash14, hash15, 256 );
groestl256_4way_init( &ctx.groestl, 32 );
groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 );
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
dintrlv_4x128( state+384, state+416, state+448, state+480, vhash, 256 );
#else
update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+32, hash1, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+64, hash2, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+128, hash4, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+160, hash5, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+192, hash6, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+224, hash7, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+256, hash8, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+288, hash9, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+320, hash10, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+352, hash11, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+384, hash12, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+416, hash13, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+448, hash14, 256 );
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, state+480, hash15, 256 );
groestl256_full( &ctx.groestl, state, hash0, 256 );
groestl256_full( &ctx.groestl, state+32, hash1, 256 );
groestl256_full( &ctx.groestl, state+64, hash2, 256 );
groestl256_full( &ctx.groestl, state+96, hash3, 256 );
groestl256_full( &ctx.groestl, state+128, hash4, 256 );
groestl256_full( &ctx.groestl, state+160, hash5, 256 );
groestl256_full( &ctx.groestl, state+192, hash6, 256 );
groestl256_full( &ctx.groestl, state+224, hash7, 256 );
groestl256_full( &ctx.groestl, state+256, hash8, 256 );
groestl256_full( &ctx.groestl, state+288, hash9, 256 );
groestl256_full( &ctx.groestl, state+320, hash10, 256 );
groestl256_full( &ctx.groestl, state+352, hash11, 256 );
groestl256_full( &ctx.groestl, state+384, hash12, 256 );
groestl256_full( &ctx.groestl, state+416, hash13, 256 );
groestl256_full( &ctx.groestl, state+448, hash14, 256 );
groestl256_full( &ctx.groestl, state+480, hash15, 256 );
#endif
}
@@ -393,28 +355,14 @@ void allium_8way_hash( void *hash, const void *input )
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, 256 );
dintrlv_4x64( hash4, hash5, hash6, hash7, vhashB, 256 );
update_and_final_groestl256( &ctx.groestl, hash0, hash0, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash1, hash1, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash2, hash2, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash3, hash3, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash4, hash4, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash5, hash5, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash6, hash6, 256 );
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash7, hash7, 256 );
groestl256_full( &ctx.groestl, hash0, hash0, 256 );
groestl256_full( &ctx.groestl, hash1, hash1, 256 );
groestl256_full( &ctx.groestl, hash2, hash2, 256 );
groestl256_full( &ctx.groestl, hash3, hash3, 256 );
groestl256_full( &ctx.groestl, hash4, hash4, 256 );
groestl256_full( &ctx.groestl, hash5, hash5, 256 );
groestl256_full( &ctx.groestl, hash6, hash6, 256 );
groestl256_full( &ctx.groestl, hash7, hash7, 256 );
}
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,

View File

@@ -1,4 +1,7 @@
#include "lyra2-gate.h"
#if !( defined(ALLIUM_16WAY) || defined(ALLIUM_8WAY) || defined(ALLIUM_4WAY) )
#include <memory.h>
#include "algo/blake/sph_blake.h"
#include "algo/keccak/sph_keccak.h"
@@ -107,3 +110,4 @@ int scanhash_allium( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -75,7 +75,6 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
bool init_lyra2rev2_4way_ctx();
#else
void lyra2rev2_hash( void *state, const void *input );
int scanhash_lyra2rev2( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );

View File

@@ -1,4 +1,7 @@
#include "lyra2-gate.h"
#if !( defined(LYRA2H_8WAY) || defined(LYRA2H_4WAY) )
#include <memory.h>
#include <mm_malloc.h>
#include "lyra2.h"
@@ -71,3 +74,4 @@ int scanhash_lyra2h( struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "lyra2-gate.h"
#if !( defined(LYRA2REV2_16WAY) || defined(LYRA2REV2_8WAY) || defined(LYRA2REV2_4WAY) )
#include <memory.h>
#include "algo/blake/sph_blake.h"
#include "algo/cubehash/sph_cubehash.h"
@@ -107,4 +110,4 @@ int scanhash_lyra2rev2( struct work *work,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
#endif

View File

@@ -79,19 +79,16 @@ void lyra2rev3_16way_hash( void *state, const void *input )
dintrlv_2x256( hash14, hash15, vhash, 256 );
intrlv_4x128( vhash, hash0, hash1, hash2, hash3, 256 );
cube_4way_update_close( &ctx.cube, vhash, vhash, 32 );
cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 );
dintrlv_4x128( hash0, hash1, hash2, hash3, vhash, 256 );
intrlv_4x128( vhash, hash4, hash5, hash6, hash7, 256 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, 32 );
cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 );
dintrlv_4x128( hash4, hash5, hash6, hash7, vhash, 256 );
intrlv_4x128( vhash, hash8, hash9, hash10, hash11, 256 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, 32 );
cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 );
dintrlv_4x128( hash8, hash9, hash10, hash11, vhash, 256 );
intrlv_4x128( vhash, hash12, hash13, hash14, hash15, 256 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhash, vhash, 32 );
cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 );
dintrlv_4x128( hash12, hash13, hash14, hash15, vhash, 256 );
intrlv_2x256( vhash, hash0, hash1, 256 );
@@ -224,21 +221,14 @@ void lyra2rev3_8way_hash( void *state, const void *input )
LYRA2REV3( l2v3_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
LYRA2REV3( l2v3_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash4, (const byte*) hash4, 32 );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash5, (const byte*) hash5, 32 );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash6, (const byte*) hash6, 32 );
cubehashInit( &ctx.cube, 256, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash7, (const byte*) hash7, 32 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
LYRA2REV3( l2v3_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
@@ -265,25 +255,24 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
uint32_t *hash7 = &hash[7<<3];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 19; // aligned
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
if ( bench ) ptarget[7] = 0x0000ff;
mm256_bswap32_intrlv80_8x32( vdata, pdata );
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
blake256_8way_init( &l2v3_8way_ctx.blake );
blake256_8way_update( &l2v3_8way_ctx.blake, vdata, 64 );
do
{
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
n+3, n+2, n+1, n ) );
lyra2rev3_8way_hash( hash, vdata );
pdata[19] = n;
@@ -291,15 +280,17 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
if ( unlikely( hash7[lane] <= Htarg ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
n += 8;
} while ( likely( (n < max_nonce-8) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -1,4 +1,7 @@
#include "lyra2-gate.h"
#if !( defined(LYRA2REV3_16WAY) || defined(LYRA2REV3_8WAY) || defined(LYRA2REV3_4WAY) )
#include <memory.h>
#include "algo/blake/sph_blake.h"
#include "algo/cubehash/sph_cubehash.h"
@@ -96,4 +99,4 @@ int scanhash_lyra2rev3( struct work *work,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
#endif

View File

@@ -97,41 +97,42 @@ void lyra2z_16way_hash( void *state, const void *input )
int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint64_t hash[4*16] __attribute__ ((aligned (128)));
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
const uint32_t last_nonce = max_nonce - 16;
__m512i *noncev = (__m512i*)vdata + 19; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if ( opt_benchmark )
ptarget[7] = 0x0000ff;
if ( bench ) ptarget[7] = 0x0000ff;
mm512_bswap32_intrlv80_16x32( vdata, pdata );
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
lyra2z_16way_midstate( vdata );
do {
*noncev = mm512_bswap_32( _mm512_set_epi32( n+15, n+14, n+13, n+12,
n+11, n+10, n+ 9, n+ 8,
n+ 7, n+ 6, n+ 5, n+ 4,
n+ 3, n+ 2, n+ 1, n ) );
lyra2z_16way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 16; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
&& !opt_benchmark )
for ( int lane = 0; lane < 16; lane++ )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
const uint64_t *lane_hash = hash + (lane<<2);
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
n += 16;
} while ( (n < max_nonce-16) && !work_restart[thr_id].restart);
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
@@ -195,39 +196,40 @@ void lyra2z_8way_hash( void *state, const void *input )
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint64_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 19; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if ( opt_benchmark )
ptarget[7] = 0x0000ff;
if ( bench ) ptarget[7] = 0x0000ff;
mm256_bswap32_intrlv80_8x32( vdata, pdata );
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
lyra2z_8way_midstate( vdata );
do {
*noncev = mm256_bswap_32(
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
lyra2z_8way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 8; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
&& !opt_benchmark )
for ( int lane = 0; lane < 8; lane++ )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
const uint64_t *lane_hash = hash + (lane<<2);
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
n += 8;
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
@@ -274,39 +276,40 @@ void lyra2z_4way_hash( void *state, const void *input )
int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint64_t hash[4*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
__m128i *noncev = (__m128i*)vdata + 19; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if ( opt_benchmark )
ptarget[7] = 0x0000ff;
if ( bench ) ptarget[7] = 0x0000ff;
mm128_bswap32_intrlv80_4x32( vdata, pdata );
*noncev = _mm_set_epi32( n+3, n+2, n+1, n );
lyra2z_4way_midstate( vdata );
do {
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
lyra2z_4way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
&& !opt_benchmark )
for ( int lane = 0; lane < 4; lane++ )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
const uint64_t *lane_hash = hash + (lane<<2);
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
n += 4;
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -1,6 +1,9 @@
#include <memory.h>
#include <mm_malloc.h>
#include "lyra2-gate.h"
#if !( defined(LYRA2Z_16WAY) || defined(LYRA2Z_8WAY) || defined(LYRA2Z_4WAY) )
#include "lyra2.h"
#include "algo/blake/sph_blake.h"
#include "simd-utils.h"
@@ -80,4 +83,4 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
#endif

View File

@@ -96,32 +96,30 @@ int scanhash_phi2( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash[8];
uint32_t _ALIGN(128) endiandata[36];
uint32_t _ALIGN(128) edata[36];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
if(opt_benchmark){
ptarget[7] = 0x00ff;
}
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if( bench ) ptarget[7] = 0x00ff;
phi2_has_roots = false;
for ( int i=0; i < 36; i++ )
for ( int i = 0; i < 36; i++ )
{
be32enc(&endiandata[i], pdata[i]);
be32enc( &edata[i], pdata[i] );
if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
}
do {
be32enc( &endiandata[19], n );
phi2_hash( hash, endiandata );
if ( hash[7] < Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
edata[19] = n;
phi2_hash( hash, edata );
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n;
be32enc( pdata+19, n );
submit_solution( work, hash, mythr );
}
n++;

View File

@@ -89,6 +89,9 @@ inline void initState( uint64_t State[/*16*/] )
*
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
*/
#if !defined(__AVX512F__) && !defined(__AVX2__) && !defined(__SSE2__)
inline static void blake2bLyra( uint64_t *v )
{
ROUND_LYRA(0);
@@ -114,6 +117,8 @@ inline static void reducedBlake2bLyra( uint64_t *v )
ROUND_LYRA(0);
}
#endif
/**
* Performs a squeeze operation, using Blake2b's G function as the
* internal permutation

View File

@@ -171,7 +171,6 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7)
#endif // AVX2 else SSE2
// Scalar
@@ -200,7 +199,6 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
union _ovly_512

View File

@@ -1,4 +1,7 @@
#include "nist5-gate.h"
#if !defined(NIST5_8WAY) && !defined(NIST5_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -105,13 +108,4 @@ int scanhash_nist5( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
/*
bool register_nist5_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT;
init_nist5_ctx();
gate->scanhash = (void*)&scanhash_nist5;
gate->hash = (void*)&nist5hash;
return true;
};
*/
#endif

View File

@@ -1,5 +1,8 @@
#include "cpuminer-config.h"
#include "anime-gate.h"
#if !defined(ANIME_8WAY) && !defined(ANIME_4WAY)
#include <stdio.h>
#include <string.h>
#include <stdint.h>
@@ -169,4 +172,4 @@ int scanhash_anime( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "hmq1725-gate.h"
#if !defined(HMQ1725_8WAY) && !defined(HMQ1725_4WAY)
#include <string.h>
#include <stdint.h>
#include "algo/blake/sph_blake.h"
@@ -7,10 +10,7 @@
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
@@ -21,6 +21,9 @@
#if defined(__AES__)
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#else
#include "algo/groestl/sph_groestl.h"
#include "algo/echo/sph_echo.h"
#endif
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
@@ -392,3 +395,4 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,5 +1,8 @@
#include "cpuminer-config.h"
#include "quark-gate.h"
#if !defined(QUARK_8WAY) && !defined(QUARK_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -137,4 +140,4 @@ int scanhash_quark( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "deep-gate.h"
#if !defined(DEEP_8WAY) && !defined(DEEP_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -114,4 +117,4 @@ int scanhash_deep( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "qubit-gate.h"
#if !defined(QUBIT_8WAY) && !defined(QUBIT_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -126,4 +129,4 @@ int scanhash_qubit( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "lbry-gate.h"
#if !defined(LBRY_16WAY) && !defined(LBRY_8WAY) && !defined(LBRY_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -100,3 +103,4 @@ int scanhash_lbry( struct work *work, uint32_t max_nonce,
pdata[27] = n;
return 0;
}
#endif

View File

@@ -1,505 +0,0 @@
/*
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2014 pooler, 2015 Jordan Earls
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "cpuminer-config.h"
#include "algo-gate-api.h"
#include <stdlib.h>
#include <string.h>
#define BLOCK_HEADER_SIZE 80
// windows
#ifndef htobe32
#define htobe32(x) ((uint32_t)htonl((uint32_t)(x)))
#endif
#ifdef _MSC_VER
#define ROTL(a, b) _rotl(a,b)
#define ROTR(a, b) _rotr(a,b)
#else
#define ROTL(a, b) (((a) << b) | ((a) >> (32 - b)))
#define ROTR(a, b) ((a >> b) | (a << (32 - b)))
#endif
#if defined(_MSC_VER) && defined(_M_X64)
#define _VECTOR __vectorcall
#include <intrin.h>
//#include <emmintrin.h> //SSE2
//#include <pmmintrin.h> //SSE3
//#include <tmmintrin.h> //SSSE3
//#include <smmintrin.h> //SSE4.1
//#include <nmmintrin.h> //SSE4.2
//#include <ammintrin.h> //SSE4A
//#include <wmmintrin.h> //AES
//#include <immintrin.h> //AVX
#define OPT_COMPATIBLE
#elif defined(__GNUC__) && defined(__x86_64__)
#include <x86intrin.h>
#define _VECTOR
#endif
static __thread char *scratchbuf;
#ifdef OPT_COMPATIBLE
static void _VECTOR xor_salsa8(__m128i B[4], const __m128i Bx[4], int i)
{
__m128i X0, X1, X2, X3;
if (i <= 128) {
// a xor 0 = a
X0 = B[0] = Bx[0];
X1 = B[1] = Bx[1];
X2 = B[2] = Bx[2];
X3 = B[3] = Bx[3];
} else {
X0 = B[0] = _mm_xor_si128(B[0], Bx[0]);
X1 = B[1] = _mm_xor_si128(B[1], Bx[1]);
X2 = B[2] = _mm_xor_si128(B[2], Bx[2]);
X3 = B[3] = _mm_xor_si128(B[3], Bx[3]);
}
for (i = 0; i < 4; i++) {
/* Operate on columns. */
X1.m128i_u32[0] ^= ROTL(X0.m128i_u32[0] + X3.m128i_u32[0], 7);
X2.m128i_u32[1] ^= ROTL(X1.m128i_u32[1] + X0.m128i_u32[1], 7);
X3.m128i_u32[2] ^= ROTL(X2.m128i_u32[2] + X1.m128i_u32[2], 7);
X0.m128i_u32[3] ^= ROTL(X3.m128i_u32[3] + X2.m128i_u32[3], 7);
X2.m128i_u32[0] ^= ROTL(X1.m128i_u32[0] + X0.m128i_u32[0], 9);
X3.m128i_u32[1] ^= ROTL(X2.m128i_u32[1] + X1.m128i_u32[1], 9);
X0.m128i_u32[2] ^= ROTL(X3.m128i_u32[2] + X2.m128i_u32[2], 9);
X1.m128i_u32[3] ^= ROTL(X0.m128i_u32[3] + X3.m128i_u32[3], 9);
X3.m128i_u32[0] ^= ROTL(X2.m128i_u32[0] + X1.m128i_u32[0], 13);
X0.m128i_u32[1] ^= ROTL(X3.m128i_u32[1] + X2.m128i_u32[1], 13);
X1.m128i_u32[2] ^= ROTL(X0.m128i_u32[2] + X3.m128i_u32[2], 13);
X2.m128i_u32[3] ^= ROTL(X1.m128i_u32[3] + X0.m128i_u32[3], 13);
X0.m128i_u32[0] ^= ROTL(X3.m128i_u32[0] + X2.m128i_u32[0], 18);
X1.m128i_u32[1] ^= ROTL(X0.m128i_u32[1] + X3.m128i_u32[1], 18);
X2.m128i_u32[2] ^= ROTL(X1.m128i_u32[2] + X0.m128i_u32[2], 18);
X3.m128i_u32[3] ^= ROTL(X2.m128i_u32[3] + X1.m128i_u32[3], 18);
/* Operate on rows. */
X0.m128i_u32[1] ^= ROTL(X0.m128i_u32[0] + X0.m128i_u32[3], 7); X1.m128i_u32[2] ^= ROTL(X1.m128i_u32[1] + X1.m128i_u32[0], 7);
X2.m128i_u32[3] ^= ROTL(X2.m128i_u32[2] + X2.m128i_u32[1], 7); X3.m128i_u32[0] ^= ROTL(X3.m128i_u32[3] + X3.m128i_u32[2], 7);
X0.m128i_u32[2] ^= ROTL(X0.m128i_u32[1] + X0.m128i_u32[0], 9); X1.m128i_u32[3] ^= ROTL(X1.m128i_u32[2] + X1.m128i_u32[1], 9);
X2.m128i_u32[0] ^= ROTL(X2.m128i_u32[3] + X2.m128i_u32[2], 9); X3.m128i_u32[1] ^= ROTL(X3.m128i_u32[0] + X3.m128i_u32[3], 9);
X0.m128i_u32[3] ^= ROTL(X0.m128i_u32[2] + X0.m128i_u32[1], 13); X1.m128i_u32[0] ^= ROTL(X1.m128i_u32[3] + X1.m128i_u32[2], 13);
X2.m128i_u32[1] ^= ROTL(X2.m128i_u32[0] + X2.m128i_u32[3], 13); X3.m128i_u32[2] ^= ROTL(X3.m128i_u32[1] + X3.m128i_u32[0], 13);
X0.m128i_u32[0] ^= ROTL(X0.m128i_u32[3] + X0.m128i_u32[2], 18); X1.m128i_u32[1] ^= ROTL(X1.m128i_u32[0] + X1.m128i_u32[3], 18);
X2.m128i_u32[2] ^= ROTL(X2.m128i_u32[1] + X2.m128i_u32[0], 18); X3.m128i_u32[3] ^= ROTL(X3.m128i_u32[2] + X3.m128i_u32[1], 18);
}
B[0] = _mm_add_epi32(B[0], X0);
B[1] = _mm_add_epi32(B[1], X1);
B[2] = _mm_add_epi32(B[2], X2);
B[3] = _mm_add_epi32(B[3], X3);
}
#else
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16], int i)
{
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
if (i <= 128) {
// a xor 0 = a
x00 = B[ 0] = Bx[ 0]; x01 = B[ 1] = Bx[ 1]; x02 = B[ 2] = Bx[ 2]; x03 = B[ 3] = Bx[ 3];
x04 = B[ 4] = Bx[ 4]; x05 = B[ 5] = Bx[ 5]; x06 = B[ 6] = Bx[ 6]; x07 = B[ 7] = Bx[ 7];
x08 = B[ 8] = Bx[ 8]; x09 = B[ 9] = Bx[ 9]; x10 = B[10] = Bx[10]; x11 = B[11] = Bx[11];
x12 = B[12] = Bx[12]; x13 = B[13] = Bx[13]; x14 = B[14] = Bx[14]; x15 = B[15] = Bx[15];
} else {
x00 = (B[ 0] ^= Bx[ 0]);
x01 = (B[ 1] ^= Bx[ 1]);
x02 = (B[ 2] ^= Bx[ 2]);
x03 = (B[ 3] ^= Bx[ 3]);
x04 = (B[ 4] ^= Bx[ 4]);
x05 = (B[ 5] ^= Bx[ 5]);
x06 = (B[ 6] ^= Bx[ 6]);
x07 = (B[ 7] ^= Bx[ 7]);
x08 = (B[ 8] ^= Bx[ 8]);
x09 = (B[ 9] ^= Bx[ 9]);
x10 = (B[10] ^= Bx[10]);
x11 = (B[11] ^= Bx[11]);
x12 = (B[12] ^= Bx[12]);
x13 = (B[13] ^= Bx[13]);
x14 = (B[14] ^= Bx[14]);
x15 = (B[15] ^= Bx[15]);
}
for (i = 0; i < 8; i += 2) {
/* Operate on columns. */
x04 ^= ROTL(x00 + x12, 7); x09 ^= ROTL(x05 + x01, 7);
x14 ^= ROTL(x10 + x06, 7); x03 ^= ROTL(x15 + x11, 7);
x08 ^= ROTL(x04 + x00, 9); x13 ^= ROTL(x09 + x05, 9);
x02 ^= ROTL(x14 + x10, 9); x07 ^= ROTL(x03 + x15, 9);
x12 ^= ROTL(x08 + x04, 13); x01 ^= ROTL(x13 + x09, 13);
x06 ^= ROTL(x02 + x14, 13); x11 ^= ROTL(x07 + x03, 13);
x00 ^= ROTL(x12 + x08, 18); x05 ^= ROTL(x01 + x13, 18);
x10 ^= ROTL(x06 + x02, 18); x15 ^= ROTL(x11 + x07, 18);
/* Operate on rows. */
x01 ^= ROTL(x00 + x03, 7); x06 ^= ROTL(x05 + x04, 7);
x11 ^= ROTL(x10 + x09, 7); x12 ^= ROTL(x15 + x14, 7);
x02 ^= ROTL(x01 + x00, 9); x07 ^= ROTL(x06 + x05, 9);
x08 ^= ROTL(x11 + x10, 9); x13 ^= ROTL(x12 + x15, 9);
x03 ^= ROTL(x02 + x01, 13); x04 ^= ROTL(x07 + x06, 13);
x09 ^= ROTL(x08 + x11, 13); x14 ^= ROTL(x13 + x12, 13);
x00 ^= ROTL(x03 + x02, 18); x05 ^= ROTL(x04 + x07, 18);
x10 ^= ROTL(x09 + x08, 18); x15 ^= ROTL(x14 + x13, 18);
}
B[ 0] += x00;
B[ 1] += x01;
B[ 2] += x02;
B[ 3] += x03;
B[ 4] += x04;
B[ 5] += x05;
B[ 6] += x06;
B[ 7] += x07;
B[ 8] += x08;
B[ 9] += x09;
B[10] += x10;
B[11] += x11;
B[12] += x12;
B[13] += x13;
B[14] += x14;
B[15] += x15;
}
#endif
static const uint32_t sha256_k[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
/* Elementary functions used by SHA256 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3))
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10))
/* SHA256 round function */
#define RND(a, b, c, d, e, f, g, h, k) \
do { \
t0 = h + S1(e) + Ch(e, f, g) + k; \
t1 = S0(a) + Maj(a, b, c); \
d += t0; \
h = t0 + t1; \
} while (0)
/* Adjusted round function for rotating state */
#define RNDr(S, W, i) \
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
S[(66 - i) % 8], S[(67 - i) % 8], \
S[(68 - i) % 8], S[(69 - i) % 8], \
S[(70 - i) % 8], S[(71 - i) % 8], \
W[i] + sha256_k[i])
static void sha256_transform_volatile(uint32_t *state, uint32_t *block)
{
uint32_t* W=block; //note: block needs to be a mutable 64 int32_t
uint32_t S[8];
uint32_t t0, t1;
int i;
for (i = 16; i < 64; i += 2) {
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
}
/* 2. Initialize working variables. */
memcpy(S, state, 32);
/* 3. Mix. */
RNDr(S, W, 0);
RNDr(S, W, 1);
RNDr(S, W, 2);
RNDr(S, W, 3);
RNDr(S, W, 4);
RNDr(S, W, 5);
RNDr(S, W, 6);
RNDr(S, W, 7);
RNDr(S, W, 8);
RNDr(S, W, 9);
RNDr(S, W, 10);
RNDr(S, W, 11);
RNDr(S, W, 12);
RNDr(S, W, 13);
RNDr(S, W, 14);
RNDr(S, W, 15);
RNDr(S, W, 16);
RNDr(S, W, 17);
RNDr(S, W, 18);
RNDr(S, W, 19);
RNDr(S, W, 20);
RNDr(S, W, 21);
RNDr(S, W, 22);
RNDr(S, W, 23);
RNDr(S, W, 24);
RNDr(S, W, 25);
RNDr(S, W, 26);
RNDr(S, W, 27);
RNDr(S, W, 28);
RNDr(S, W, 29);
RNDr(S, W, 30);
RNDr(S, W, 31);
RNDr(S, W, 32);
RNDr(S, W, 33);
RNDr(S, W, 34);
RNDr(S, W, 35);
RNDr(S, W, 36);
RNDr(S, W, 37);
RNDr(S, W, 38);
RNDr(S, W, 39);
RNDr(S, W, 40);
RNDr(S, W, 41);
RNDr(S, W, 42);
RNDr(S, W, 43);
RNDr(S, W, 44);
RNDr(S, W, 45);
RNDr(S, W, 46);
RNDr(S, W, 47);
RNDr(S, W, 48);
RNDr(S, W, 49);
RNDr(S, W, 50);
RNDr(S, W, 51);
RNDr(S, W, 52);
RNDr(S, W, 53);
RNDr(S, W, 54);
RNDr(S, W, 55);
RNDr(S, W, 56);
RNDr(S, W, 57);
RNDr(S, W, 58);
RNDr(S, W, 59);
RNDr(S, W, 60);
RNDr(S, W, 61);
RNDr(S, W, 62);
RNDr(S, W, 63);
/* 4. Mix local working variables into global state */
for (i = 0; i < 8; i++)
state[i] += S[i];
}
// standard sha256 hash
#if 1
static void sha256_hash(unsigned char *hash, const unsigned char *data, int len)
{
uint32_t _ALIGN(64) S[16];
uint32_t _ALIGN(64) T[64];
int i, r;
sha256_init(S);
for (r = len; r > -9; r -= 64) {
if (r < 64)
memset(T, 0, 64);
memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
if (r >= 0 && r < 64)
((unsigned char *)T)[r] = 0x80;
for (i = 0; i < 16; i++)
T[i] = be32dec(T + i);
if (r < 56)
T[15] = 8 * len;
//sha256_transform(S, T, 0);
sha256_transform_volatile(S, T);
}
for (i = 0; i < 8; i++)
be32enc((uint32_t *)hash + i, S[i]);
}
#else
#include <openssl/sha.h>
static void sha256_hash(unsigned char *hash, const unsigned char *data, int len)
{
SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, data, len);
SHA256_Final(hash, &ctx);
}
#endif
// hash exactly 64 bytes (ie, sha256 block size)
static void sha256_hash512(uint32_t *hash, const uint32_t *data)
{
uint32_t _ALIGN(64) S[16];
uint32_t _ALIGN(64) T[64];
uchar _ALIGN(64) E[64*4] = { 0 };
int i;
sha256_init(S);
for (i = 0; i < 16; i++)
T[i] = be32dec(&data[i]);
sha256_transform_volatile(S, T);
E[3] = 0x80;
E[61] = 0x02; // T[15] = 8 * 64 => 0x200;
sha256_transform_volatile(S, (uint32_t*)E);
for (i = 0; i < 8; i++)
be32enc(&hash[i], S[i]);
}
void pluck_hash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const int N)
{
int size = N * 1024;
sha256_hash(hashbuffer, (void*)data, BLOCK_HEADER_SIZE);
memset(&hashbuffer[32], 0, 32);
for(int i = 64; i < size - 32; i += 32)
{
uint32_t _ALIGN(64) randseed[16];
uint32_t _ALIGN(64) randbuffer[16];
uint32_t _ALIGN(64) joint[16];
//i-4 because we use integers for all references against this, and we don't want to go 3 bytes over the defined area
//we could use size here, but then it's probable to use 0 as the value in most cases
int randmax = i - 4;
//setup randbuffer to be an array of random indexes
memcpy(randseed, &hashbuffer[i - 64], 64);
if(i > 128) memcpy(randbuffer, &hashbuffer[i - 128], 64);
//else memset(randbuffer, 0, 64);
xor_salsa8((void*)randbuffer, (void*)randseed, i);
memcpy(joint, &hashbuffer[i - 32], 32);
//use the last hash value as the seed
for (int j = 32; j < 64; j += 4)
{
//every other time, change to next random index
//randmax - 32 as otherwise we go beyond memory that's already been written to
uint32_t rand = randbuffer[(j - 32) >> 2] % (randmax - 32);
joint[j >> 2] = *((uint32_t *)&hashbuffer[rand]);
}
sha256_hash512((uint32_t*) &hashbuffer[i], joint);
//setup randbuffer to be an array of random indexes
//use last hash value and previous hash value(post-mixing)
memcpy(randseed, &hashbuffer[i - 32], 64);
if(i > 128) memcpy(randbuffer, &hashbuffer[i - 128], 64);
//else memset(randbuffer, 0, 64);
xor_salsa8((void*)randbuffer, (void*)randseed, i);
//use the last hash value as the seed
for (int j = 0; j < 32; j += 2)
{
uint32_t rand = randbuffer[j >> 1] % randmax;
*((uint32_t *)(hashbuffer + rand)) = *((uint32_t *)(hashbuffer + j + randmax));
}
}
memcpy(hash, hashbuffer, 32);
}
int scanhash_pluck( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t _ALIGN(64) endiandata[20];
uint32_t _ALIGN(64) hash[8];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
volatile uint8_t *restart = &(work_restart[thr_id].restart);
uint32_t n = first_nonce;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0ffff;
for (int i=0; i < 19; i++)
be32enc(&endiandata[i], pdata[i]);
const uint32_t Htarg = ptarget[7];
do {
//be32enc(&endiandata[19], n);
endiandata[19] = n;
pluck_hash(hash, endiandata, scratchbuf, opt_pluck_n);
if (hash[7] <= Htarg && fulltest(hash, ptarget))
{
*hashes_done = n - first_nonce + 1;
pdata[19] = htobe32(endiandata[19]);
return 1;
}
n++;
} while (n < max_nonce && !(*restart));
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
bool pluck_miner_thread_init( int thr_id )
{
scratchbuf = malloc( 128 * 1024 );
if ( scratchbuf )
return true;
applog( LOG_ERR, "Thread %u: Pluck buffer allocation failed", thr_id );
return false;
}
bool register_pluck_algo( algo_gate_t* gate )
{
algo_not_tested();
gate->miner_thread_init = (void*)&pluck_miner_thread_init;
gate->scanhash = (void*)&scanhash_pluck;
gate->hash = (void*)&pluck_hash;
opt_target_factor = 65536.0;
return true;
};

View File

@@ -1,4 +1,7 @@
#include "sha256t-gate.h"
#if !defined(SHA256T_16WAY) && !defined(SHA256T_8WAY) && !defined(SHA256T_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -102,3 +105,4 @@ int scanhash_sha256q( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "sha256t-gate.h"
#if !defined(SHA256T_16WAY) && !defined(SHA256T_8WAY) && !defined(SHA256T_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -98,3 +101,5 @@ int scanhash_sha256t( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "algo-gate-api.h"
#if !defined(SKEIN_8WAY) && !defined(SKEIN_4WAY)
#include <string.h>
#include <stdint.h>
#include "sph_skein.h"
@@ -52,4 +55,4 @@ int scanhash_skein( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "skein-gate.h"
#if !defined(SKEIN_8WAY) && !defined(SKEIN_4WAY)
#include <string.h>
#include <stdint.h>
@@ -66,4 +69,4 @@ int scanhash_skein2( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -252,12 +252,6 @@ SPH_XCAT(HASH, _addbits_and_close)(void *cc,
current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
#endif
//uint64_t *b= (uint64_t*)sc->buf;
//uint64_t *s= (uint64_t*)sc->state;
// printf("Sptr 1= %u\n",current);
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[0], b[1], b[2], b[3] );
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[4], b[5], b[6], b[7] );
#ifdef PW01
sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n);
#else
@@ -269,10 +263,6 @@ SPH_XCAT(HASH, _addbits_and_close)(void *cc,
}
#endif
// printf("Sptr 2= %u\n",current);
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[0], b[1], b[2], b[3] );
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[4], b[5], b[6], b[7] );
if (current > SPH_MAXPAD) {
memset(sc->buf + current, 0, SPH_BLEN - current);
RFUN(sc->buf, SPH_VAL);
@@ -333,16 +323,8 @@ SPH_XCAT(HASH, _addbits_and_close)(void *cc,
#endif
#endif
// printf("Sptr 3= %u\n",current);
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[0], b[1], b[2], b[3] );
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[4], b[5], b[6], b[7] );
RFUN(sc->buf, SPH_VAL);
// printf("Sptr after= %u\n",current);
// printf("SState %016llx %016llx %016llx %016llx\n", s[0], s[1], s[2], s[3] );
// printf("SState %016llx %016llx %016llx %016llx\n", s[4], s[5], s[6], s[7] );
#ifdef SPH_NO_OUTPUT
(void)dst;
(void)rnum;

View File

@@ -1,4 +1,7 @@
#include "c11-gate.h"
#if !defined(C11_8WAY) && !defined(C11_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -9,9 +12,6 @@
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/simd/sph_simd.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
@@ -149,3 +149,4 @@ int scanhash_c11( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,131 +0,0 @@
#include "algo-gate-api.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
//#define DEBUG_ALGO
extern void freshhash(void* output, const void* input, uint32_t len)
{
unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
#define hashA hash
#define hashB hash+64
sph_shavite512_context ctx_shavite;
sph_simd512_context ctx_simd;
sph_echo512_context ctx_echo;
sph_shavite512_init(&ctx_shavite);
sph_shavite512(&ctx_shavite, input, len);
sph_shavite512_close(&ctx_shavite, hashA);
sph_simd512_init(&ctx_simd);
sph_simd512(&ctx_simd, hashA, 64);
sph_simd512_close(&ctx_simd, hashB);
sph_shavite512_init(&ctx_shavite);
sph_shavite512(&ctx_shavite, hashB, 64);
sph_shavite512_close(&ctx_shavite, hashA);
sph_simd512_init(&ctx_simd);
sph_simd512(&ctx_simd, hashA, 64);
sph_simd512_close(&ctx_simd, hashB);
sph_echo512_init(&ctx_echo);
sph_echo512(&ctx_echo, hashB, 64);
sph_echo512_close(&ctx_echo, hashA);
memcpy(output, hash, 32);
}
int scanhash_fresh( struct work *work,
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t len = 80;
int thr_id = mythr->id; // thr_id arg is deprecated
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
#ifdef _MSC_VER
uint32_t __declspec(align(32)) hash64[8];
#else
uint32_t hash64[8] __attribute__((aligned(32)));
#endif
uint32_t endiandata[32];
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
// we need bigendian data...
for (int k = 0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);
#ifdef DEBUG_ALGO
if (Htarg != 0)
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
freshhash(hash64, endiandata, len);
#ifndef DEBUG_ALGO
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
bool register_fresh_algo( algo_gate_t* gate )
{
algo_not_tested();
gate->scanhash = (void*)&scanhash_fresh;
gate->hash = (void*)&freshhash;
opt_target_factor = 256.0;
return true;
};

View File

@@ -1,5 +1,7 @@
#include "timetravel-gate.h"
#if !defined(TIMETRAVEL_8WAY) && !defined(TIMETRAVEL_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -290,4 +292,4 @@ int scanhash_timetravel( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "timetravel10-gate.h"
#if !defined(TIMETRAVEL10_8WAY) && !defined(TIMETRAVEL10_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -329,3 +332,4 @@ int scanhash_timetravel10( struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
#endif

View File

@@ -1,12 +1,13 @@
#include "tribus-gate.h"
#if !defined(TRIBUS_8WAY) && !defined(TRIBUS_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/jh//sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#ifdef __AES__
#include "algo/echo/aes_ni/hash_api.h"
#else
@@ -117,4 +118,4 @@ int scanhash_tribus( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,5 +1,8 @@
#include "cpuminer-config.h"
#include "x11-gate.h"
#if !defined(X11_8WAY) && !defined(X11_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -10,9 +13,6 @@
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/simd/sph_simd.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
@@ -172,3 +172,4 @@ int scanhash_x11( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,5 +1,8 @@
#include "cpuminer-config.h"
#include "x11evo-gate.h"
#if !defined(X11EVO_8WAY) && !defined(X11EVO_4WAY)
#include <string.h>
#include <stdint.h>
#include <compat/portable_endian.h>
@@ -8,10 +11,7 @@
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sph_simd.h"
#ifdef __AES__
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
@@ -204,3 +204,4 @@ int scanhash_x11evo( struct work* work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "x11gost-gate.h"
#if !defined(X11GOST_8WAY) && !defined(X11GOST_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -10,9 +13,6 @@
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/simd/sph_simd.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
@@ -160,3 +160,4 @@ int scanhash_x11gost( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,5 +1,7 @@
#include "x12-gate.h"
#if !defined(X12_8WAY) && !defined(X12_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -12,9 +14,6 @@
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/luffa/luffa_for_sse2.h"
@@ -177,3 +176,4 @@ int scanhash_x12( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,262 +0,0 @@
/**
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2015 kernels10, tpruvot
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file drop.c
* @author kernels10 <kernels10@gmail.com.com>
* @author tpruvot <tpruvot@github>
*/
#define POK_BOOL_MASK 0x00008000
#define POK_DATA_MASK 0xFFFF0000
#include "algo-gate-api.h"
#include <string.h>
#include "algo/blake/sph_blake.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/echo/sph_echo.h"
#include "algo/fugue//sph_fugue.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/simd/sph_simd.h"
#include "algo/shavite/sph_shavite.h"
static void shiftr_lp(const uint32_t *input, uint32_t *output, unsigned int shift)
{
if(!shift) {
memcpy(output, input, 64);
return;
}
memset(output, 0, 64);
for(int i = 0; i < 15; ++i) {
output[i + 1] |= (input[i] >> (32 - shift));
output[i] |= (input[i] << shift);
}
output[15] |= (input[15] << shift);
return;
}
static void switchHash(const void *input, void *output, int id)
{
/*
sph_keccak512_context ctx_keccak;
sph_blake512_context ctx_blake;
sph_groestl512_context ctx_groestl;
sph_skein512_context ctx_skein;
sph_luffa512_context ctx_luffa;
sph_echo512_context ctx_echo;
sph_simd512_context ctx_simd;
sph_cubehash512_context ctx_cubehash;
sph_fugue512_context ctx_fugue;
sph_shavite512_context ctx_shavite;
switch(id) {
case 0:
sph_keccak512_init(&ctx_keccak); sph_keccak512(&ctx_keccak, input, 64); sph_keccak512_close(&ctx_keccak, output);
break;
case 1:
sph_blake512_init(&ctx_blake); sph_blake512(&ctx_blake, input, 64); sph_blake512_close(&ctx_blake, output);
break;
case 2:
sph_groestl512_init(&ctx_groestl); sph_groestl512(&ctx_groestl, input, 64); sph_groestl512_close(&ctx_groestl, output);
break;
case 3:
sph_skein512_init(&ctx_skein); sph_skein512(&ctx_skein, input, 64); sph_skein512_close(&ctx_skein, output);
break;
case 4:
sph_luffa512_init(&ctx_luffa); sph_luffa512(&ctx_luffa, input, 64); sph_luffa512_close(&ctx_luffa, output);
break;
case 5:
sph_echo512_init(&ctx_echo); sph_echo512(&ctx_echo, input, 64); sph_echo512_close(&ctx_echo, output);
break;
case 6:
sph_shavite512_init(&ctx_shavite); sph_shavite512(&ctx_shavite, input, 64); sph_shavite512_close(&ctx_shavite, output);
break;
case 7:
sph_fugue512_init(&ctx_fugue); sph_fugue512(&ctx_fugue, input, 64); sph_fugue512_close(&ctx_fugue, output);
break;
case 8:
sph_simd512_init(&ctx_simd); sph_simd512(&ctx_simd, input, 64); sph_simd512_close(&ctx_simd, output);
break;
case 9:
sph_cubehash512_init(&ctx_cubehash); sph_cubehash512(&ctx_cubehash, input, 64); sph_cubehash512_close(&ctx_cubehash, output);
break;
default:
break;
}
*/
}
void droplp_hash(void *state, const void *input)
{
uint32_t _ALIGN(64) hash[2][16];
sph_jh512_context ctx_jh;
uint32_t *hashA = hash[0];
uint32_t *hashB = hash[1];
sph_jh512_init(&ctx_jh);
sph_jh512(&ctx_jh, input, 80);
sph_jh512_close(&ctx_jh, (void*)(hashA));
unsigned int startPosition = hashA[0] % 31;
unsigned int i = 0;
int j = 0;
int start = 0;
for (i = startPosition; i < 31; i+=9) {
start = i % 10;
for (j = start; j < 10; j++) {
shiftr_lp(hashA, hashB, (i & 3));
switchHash((const void*)hashB, (void*)hashA, j);
}
for (j = 0; j < start; j++) {
shiftr_lp(hashA, hashB, (i & 3));
switchHash((const void*)hashB, (void*)hashA, j);
}
}
for (i = 0; i < startPosition; i += 9) {
start = i % 10;
for (j = start; j < 10; j++) {
shiftr_lp(hashA, hashB, (i & 3));
switchHash((const void*)hashB, (void*)hashA, j);
}
for (j = 0; j < start; j++) {
shiftr_lp(hashA, hashB, (i & 3));
switchHash((const void*)hashB, (void*)hashA, j);
}
}
memcpy(state, hashA, 32);
}
static void droplp_hash_pok(void *output, uint32_t *pdata, const uint32_t version)
{
uint32_t _ALIGN(64) hash[8];
uint32_t pok;
pdata[0] = version;
droplp_hash(hash, pdata);
// fill PoK
pok = version | (hash[0] & POK_DATA_MASK);
if (pdata[0] != pok) {
pdata[0] = pok;
droplp_hash(hash, pdata);
}
memcpy(output, hash, 32);
}
int scanhash_drop( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(64) hash[16];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t version = pdata[0] & (~POK_DATA_MASK);
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
#define tmpdata pdata
if (opt_benchmark)
ptarget[7] = 0x07ff;
const uint32_t htarg = ptarget[7];
do {
tmpdata[19] = nonce;
droplp_hash_pok(hash, tmpdata, version);
if (hash[7] <= htarg && fulltest(hash, ptarget)) {
pdata[0] = tmpdata[0];
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
if (opt_debug)
applog(LOG_INFO, "found nonce %x", nonce);
return 1;
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
void drop_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t* end_nonce_ptr )
{
// ignore POK in first word
// const int nonce_i = 19;
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
|| ( *nonceptr >= *end_nonce_ptr ) )
{
work_free( work );
work_copy( work, g_work );
*nonceptr = ( 0xffffffffU / opt_n_threads ) * thr_id;
if ( opt_randomize )
*nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
}
else
++(*nonceptr);
}
void drop_display_pok( struct work* work )
{
if ( work->data[0] & 0x00008000 )
applog(LOG_BLUE, "POK received: %08xx", work->data[0] );
}
int drop_get_work_data_size() { return 80; }
// Need to fix POK offset problems like zr5
bool register_drop_algo( algo_gate_t* gate )
{
algo_not_tested();
gate->scanhash = (void*)&scanhash_drop;
gate->hash = (void*)&droplp_hash_pok;
gate->get_new_work = (void*)&drop_get_new_work;
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
gate->decode_extra_data = (void*)&drop_display_pok;
gate->get_work_data_size = (void*)&drop_get_work_data_size;
gate->work_cmp_size = 72;
opt_target_factor = 65536.0;
return true;
};

View File

@@ -1,4 +1,7 @@
#include "phi1612-gate.h"
#if !defined(PHI1612_8WAY) && !defined(PHI1612_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -123,3 +126,4 @@ int scanhash_phi1612( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "skunk-gate.h"
#if !defined(SKUNK_8WAY) && !defined(SKUNK_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -88,3 +91,4 @@ bool skunk_thread_init()
sph_gost512_init( &skunk_ctx.gost );
return true;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "x13-gate.h"
#if !defined(X13_8WAY) && !defined(X13_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -9,9 +12,6 @@
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/simd/sph_simd.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/luffa/luffa_for_sse2.h"
@@ -185,3 +185,4 @@ int scanhash_x13( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "x13sm3-gate.h"
#if !defined(X13BCD_8WAY) && !defined(X13VCD_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -10,7 +13,6 @@
#include "algo/sm3/sph_sm3.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sph_simd.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/cubehash/cubehash_sse2.h"
@@ -184,3 +186,4 @@ int scanhash_x13bcd( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "x13sm3-gate.h"
#if !defined(X13SM3_8WAY) && !defined(X13SM3_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -10,7 +13,6 @@
#include "algo/sm3/sph_sm3.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sph_simd.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/luffa/luffa_for_sse2.h"
@@ -197,3 +199,4 @@ int scanhash_x13sm3( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "polytimos-gate.h"
#if !defined(POLYTIMOS_8WAY) && !defined(POLYTIMOS_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -111,3 +114,4 @@ int scanhash_polytimos( struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "veltor-gate.h"
#if !defined(VELTOR_8WAY) && !defined(VELTOR_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -102,3 +105,4 @@ int scanhash_veltor( struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
#endif

View File

@@ -1,19 +1,17 @@
#include "x14-gate.h"
#if !defined(X14_8WAY) && !defined(X14_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
@@ -186,3 +184,4 @@ int scanhash_x14( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "x15-gate.h"
#if !defined(X15_8WAY) && !defined(X15_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -9,9 +12,6 @@
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/simd/sph_simd.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
@@ -217,3 +217,4 @@ int scanhash_x15( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -85,13 +85,6 @@ void hex_hash( void* output, const void* input )
memcpy( &ctx, &hex_ctx, sizeof(ctx) );
void *in = (void*) input;
int size = 80;
/*
if ( s_ntime == UINT32_MAX )
{
const uint8_t* in8 = (uint8_t*) input;
x16_r_s_getAlgoString( &in8[4], hashOrder );
}
*/
char elem = hashOrder[0];
uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
@@ -249,11 +242,7 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0cff;
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm128_bswap32_80( edata, pdata );
uint32_t ntime = swab32(pdata[17]);
if ( s_ntime != ntime )
@@ -277,6 +266,10 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
sph_skein512_init( &hex_ctx.skein );
sph_skein512( &hex_ctx.skein, edata, 64 );
break;
case LUFFA:
init_luffa( &hex_ctx.luffa, 512 );
update_luffa( &hex_ctx.luffa, (const BitSequence*)edata, 64 );
break;
case CUBEHASH:
cubehashInit( &hex_ctx.cube, 512, 16, 32 );
cubehashUpdate( &hex_ctx.cube, (const byte*)edata, 64 );

View File

@@ -2,74 +2,85 @@
* x16r algo implementation
*
* Implementation by tpruvot@github Jan 2018
* Optimized by JayDDee@github Jan 2018
* Optimized by https://github.com/JayDDee/ Jan 2018
*/
#include "x16r-gate.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa-hash-2way.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cube-hash-2way.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/simd-hash-2way.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/hamsi/hamsi-hash-4way.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/sha/sha-hash-4way.h"
#if defined(__VAES__)
#include "algo/groestl/groestl512-hash-4way.h"
#include "algo/shavite/shavite-hash-4way.h"
#include "algo/echo/echo-hash-4way.h"
#endif
// The hash and prehash code is shared among x16r, x16s, x16rt, and x21s.
// The generic function performs the x16 hash as per the hash order
// and produces a 512 bit intermediate hash which needs to be converted
// to 256 bit final hash by a wrapper function.
#if defined (X16R_8WAY)
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
// Perform midstate prehash of hash functions with block size <= 64 bytes
// and interleave 4x64 before nonce insertion for final hash.
union _x16r_8way_context_overlay
void x16r_8way_prehash( void *vdata, void *pdata )
{
blake512_8way_context blake;
bmw512_8way_context bmw;
skein512_8way_context skein;
jh512_8way_context jh;
keccak512_8way_context keccak;
luffa_4way_context luffa;
cubehashParam cube;
// cube_4way_context cube;
simd_4way_context simd;
hamsi512_8way_context hamsi;
sph_fugue512_context fugue;
shabal512_8way_context shabal;
sph_whirlpool_context whirlpool;
sha512_8way_context sha512;
#if defined(__VAES__)
groestl512_4way_context groestl;
shavite512_4way_context shavite;
echo_4way_context echo;
#else
hashState_groestl groestl;
sph_shavite512_context shavite;
hashState_echo echo;
#endif
} __attribute__ ((aligned (64)));
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
typedef union _x16r_8way_context_overlay x16r_8way_context_overlay;
const char elem = x16r_hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
static __thread x16r_8way_context_overlay x16r_ctx;
switch ( algo )
{
case JH:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
jh512_8way_init( &x16r_ctx.jh );
jh512_8way_update( &x16r_ctx.jh, vdata, 64 );
break;
case SKEIN:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
skein512_8way_init( &x16r_ctx.skein );
skein512_8way_update( &x16r_ctx.skein, vdata, 64 );
break;
case LUFFA:
mm128_bswap32_80( edata, pdata );
intrlv_4x128( vdata2, edata, edata, edata, edata, 640 );
luffa_4way_init( &x16r_ctx.luffa, 512 );
luffa_4way_update( &x16r_ctx.luffa, vdata2, 64 );
rintrlv_4x128_8x64( vdata, vdata2, vdata2, 640 );
break;
case CUBEHASH:
mm128_bswap32_80( edata, pdata );
cubehashInit( &x16r_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16r_ctx.cube, (const byte*)edata, 64 );
intrlv_8x64( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
break;
case HAMSI:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
hamsi512_8way_init( &x16r_ctx.hamsi );
hamsi512_8way_update( &x16r_ctx.hamsi, vdata, 64 );
break;
case SHABAL:
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
shabal512_8way_init( &x16r_ctx.shabal );
shabal512_8way_update( &x16r_ctx.shabal, vdata2, 64 );
rintrlv_8x32_8x64( vdata, vdata2, 640 );
break;
case WHIRLPOOL:
mm128_bswap32_80( edata, pdata );
sph_whirlpool_init( &x16r_ctx.whirlpool );
sph_whirlpool( &x16r_ctx.whirlpool, edata, 64 );
intrlv_8x64( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
break;
default:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
}
}
void x16r_8way_hash( void* output, const void* input )
// Perform the full x16r hash and returns 512 bit intermediate hash.
// Called by wrapper hash function to optionally continue hashing and
// convert to final hash.
void x16r_8way_hash_generic( void* output, const void* input )
{
uint32_t vhash[20*8] __attribute__ ((aligned (128)));
uint32_t hash0[20] __attribute__ ((aligned (64)));
@@ -97,7 +108,7 @@ void x16r_8way_hash( void* output, const void* input )
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const char elem = x16r_hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -464,23 +475,39 @@ void x16r_8way_hash( void* output, const void* input )
size = 64;
}
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
memcpy( output+128, hash4, 32 );
memcpy( output+160, hash5, 32 );
memcpy( output+192, hash6, 32 );
memcpy( output+224, hash7, 32 );
memcpy( output, hash0, 64 );
memcpy( output+64, hash1, 64 );
memcpy( output+128, hash2, 64 );
memcpy( output+192, hash3, 64 );
memcpy( output+256, hash4, 64 );
memcpy( output+320, hash5, 64 );
memcpy( output+384, hash6, 64 );
memcpy( output+448, hash7, 64 );
}
// x16-r,-s,-rt wrapper called directly by scanhash to repackage 512 bit
// hash to 256 bit final hash.
void x16r_8way_hash( void* output, const void* input )
{
uint8_t hash[64*8] __attribute__ ((aligned (128)));
x16r_8way_hash_generic( hash, input );
memcpy( output, hash, 32 );
memcpy( output+32, hash+64, 32 );
memcpy( output+64, hash+128, 32 );
memcpy( output+96, hash+192, 32 );
memcpy( output+128, hash+256, 32 );
memcpy( output+160, hash+320, 32 );
memcpy( output+192, hash+384, 32 );
memcpy( output+224, hash+448, 32 );
}
// x16r only
int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[16*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -496,66 +523,18 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
const char elem = hashOrder[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case JH:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
jh512_8way_init( &x16r_ctx.jh );
jh512_8way_update( &x16r_ctx.jh, vdata, 64 );
break;
case SKEIN:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
skein512_8way_init( &x16r_ctx.skein );
skein512_8way_update( &x16r_ctx.skein, vdata, 64 );
break;
case LUFFA:
mm128_bswap32_80( edata, pdata );
intrlv_4x128( vdata2, edata, edata, edata, edata, 640 );
luffa_4way_init( &x16r_ctx.luffa, 512 );
luffa_4way_update( &x16r_ctx.luffa, vdata2, 64 );
rintrlv_4x128_8x64( vdata, vdata2, vdata2, 640 );
break;
case CUBEHASH:
mm128_bswap32_80( edata, pdata );
cubehashInit( &x16r_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16r_ctx.cube, (const byte*)edata, 64 );
intrlv_8x64( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
break;
case HAMSI:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
hamsi512_8way_init( &x16r_ctx.hamsi );
hamsi512_8way_update( &x16r_ctx.hamsi, vdata, 64 );
break;
case SHABAL:
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
shabal512_8way_init( &x16r_ctx.shabal );
shabal512_8way_update( &x16r_ctx.shabal, vdata2, 64 );
rintrlv_8x32_8x64( vdata, vdata2, 640 );
break;
case WHIRLPOOL:
mm128_bswap32_80( edata, pdata );
sph_whirlpool_init( &x16r_ctx.whirlpool );
sph_whirlpool( &x16r_ctx.whirlpool, edata, 64 );
intrlv_8x64( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
break;
default:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
x16r_8way_prehash( vdata, pdata );
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
@@ -580,34 +559,62 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
#elif defined (X16R_4WAY)
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16r_4way_context_overlay
void x16r_4way_prehash( void *vdata, void *pdata )
{
blake512_4way_context blake;
bmw512_4way_context bmw;
hashState_echo echo;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
luffa_2way_context luffa;
hashState_luffa luffa1;
cubehashParam cube;
sph_shavite512_context shavite;
simd_2way_context simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
sha512_4way_context sha512;
} __attribute__ ((aligned (64)));
typedef union _x16r_4way_context_overlay x16r_4way_context_overlay;
uint32_t vdata2[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
static __thread x16r_4way_context_overlay x16r_ctx;
const char elem = x16r_hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
void x16r_4way_hash( void* output, const void* input )
switch ( algo )
{
case JH:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
jh512_4way_init( &x16r_ctx.jh );
jh512_4way_update( &x16r_ctx.jh, vdata, 64 );
break;
case SKEIN:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
skein512_4way_init( &x16r_ctx.skein );
skein512_4way_update( &x16r_ctx.skein, vdata, 64 );
break;
case LUFFA:
mm128_bswap32_80( edata, pdata );
intrlv_2x128( vdata2, edata, edata, 640 );
luffa_2way_init( &x16r_ctx.luffa, 512 );
luffa_2way_update( &x16r_ctx.luffa, vdata2, 64 );
rintrlv_2x128_4x64( vdata, vdata2, vdata2, 640 );
break;
case CUBEHASH:
mm128_bswap32_80( edata, pdata );
cubehashInit( &x16r_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16r_ctx.cube, (const byte*)edata, 64 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
case HAMSI:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
hamsi512_4way_init( &x16r_ctx.hamsi );
hamsi512_4way_update( &x16r_ctx.hamsi, vdata, 64 );
break;
case SHABAL:
mm128_bswap32_intrlv80_4x32( vdata2, pdata );
shabal512_4way_init( &x16r_ctx.shabal );
shabal512_4way_update( &x16r_ctx.shabal, vdata2, 64 );
rintrlv_4x32_4x64( vdata, vdata2, 640 );
break;
case WHIRLPOOL:
mm128_bswap32_80( edata, pdata );
sph_whirlpool_init( &x16r_ctx.whirlpool );
sph_whirlpool( &x16r_ctx.whirlpool, edata, 64 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
default:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
}
}
void x16r_4way_hash_generic( void* output, const void* input )
{
uint32_t vhash[20*4] __attribute__ ((aligned (128)));
uint32_t hash0[20] __attribute__ ((aligned (64)));
@@ -626,7 +633,7 @@ void x16r_4way_hash( void* output, const void* input )
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const char elem = x16r_hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -698,11 +705,12 @@ void x16r_4way_hash( void* output, const void* input )
case LUFFA:
if ( i == 0 )
{
intrlv_2x128( vhash, in0, in1, size<<3 );
luffa512_2way_full( &ctx.luffa, vhash, vhash + (16<<1), 16 );
intrlv_2x128( vhash, hash0, hash1, 640 );
luffa_2way_update_close( &ctx.luffa, vhash, vhash + (16<<1), 16 );
dintrlv_2x128_512( hash0, hash1, vhash );
intrlv_2x128( vhash, in2, in3, size<<3 );
luffa512_2way_full( &ctx.luffa, vhash, vhash + (16<<1), 16 );
intrlv_2x128( vhash, hash2, hash3, 640 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
luffa_2way_update_close( &ctx.luffa, vhash, vhash + (16<<1), 16 );
dintrlv_2x128_512( hash2, hash3, vhash );
}
else
@@ -863,10 +871,21 @@ void x16r_4way_hash( void* output, const void* input )
}
size = 64;
}
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
memcpy( output, hash0, 64 );
memcpy( output+64, hash1, 64 );
memcpy( output+128, hash2, 64 );
memcpy( output+192, hash3, 64 );
}
void x16r_4way_hash( void* output, const void* input )
{
uint8_t hash[64*4] __attribute__ ((aligned (64)));
x16r_4way_hash_generic( hash, input );
memcpy( output, hash, 32 );
memcpy( output+32, hash+64, 32 );
memcpy( output+64, hash+128, 32 );
memcpy( output+96, hash+192, 32 );
}
int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
@@ -874,8 +893,6 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t vdata2[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -891,67 +908,20 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
const char elem = hashOrder[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case JH:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
jh512_4way_init( &x16r_ctx.jh );
jh512_4way_update( &x16r_ctx.jh, vdata, 64 );
break;
case SKEIN:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
skein512_4way_init( &x16r_ctx.skein );
skein512_4way_update( &x16r_ctx.skein, vdata, 64 );
break;
case LUFFA:
mm128_bswap32_80( edata, pdata );
intrlv_2x128( vdata2, edata, edata, 640 );
luffa_2way_init( &x16r_ctx.luffa, 512 );
luffa_2way_update( &x16r_ctx.luffa, vdata2, 64 );
rintrlv_2x128_4x64( vdata, vdata2, vdata2, 512 );
break;
case CUBEHASH:
mm128_bswap32_80( edata, pdata );
cubehashInit( &x16r_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16r_ctx.cube, (const byte*)edata, 64 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
case HAMSI:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
hamsi512_4way_init( &x16r_ctx.hamsi );
hamsi512_4way_update( &x16r_ctx.hamsi, vdata, 64 );
break;
case SHABAL:
mm128_bswap32_intrlv80_4x32( vdata2, pdata );
shabal512_4way_init( &x16r_ctx.shabal );
shabal512_4way_update( &x16r_ctx.shabal, vdata2, 64 );
rintrlv_4x32_4x64( vdata, vdata2, 640 );
break;
case WHIRLPOOL:
mm128_bswap32_80( edata, pdata );
sph_whirlpool_init( &x16r_ctx.whirlpool );
sph_whirlpool( &x16r_ctx.whirlpool, edata, 64 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
default:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
x16r_4way_prehash( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x16r_4way_hash( hash, vdata );

View File

@@ -1,7 +1,22 @@
#include "x16r-gate.h"
__thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ] = { 0 };
void (*x16_r_s_getAlgoString) ( const uint8_t*, char* ) = NULL;
#if defined (X16R_8WAY)
__thread x16r_8way_context_overlay x16r_ctx;
#elif defined (X16R_4WAY)
__thread x16r_4way_context_overlay x16r_ctx;
#endif
__thread x16r_context_overlay x16_ctx;
void x16r_getAlgoString( const uint8_t* prevblock, char *output )
{
char *sptr = output;
@@ -207,15 +222,15 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
bool register_x16rt_algo( algo_gate_t* gate )
{
#if defined (X16RT_8WAY)
#if defined (X16R_8WAY)
gate->scanhash = (void*)&scanhash_x16rt_8way;
gate->hash = (void*)&x16rt_8way_hash;
#elif defined (X16RT_4WAY)
gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16R_4WAY)
gate->scanhash = (void*)&scanhash_x16rt_4way;
gate->hash = (void*)&x16rt_4way_hash;
gate->hash = (void*)&x16r_4way_hash;
#else
gate->scanhash = (void*)&scanhash_x16rt;
gate->hash = (void*)&x16rt_hash;
gate->hash = (void*)&x16r_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
opt_target_factor = 256.0;
@@ -224,15 +239,15 @@ bool register_x16rt_algo( algo_gate_t* gate )
bool register_x16rt_veil_algo( algo_gate_t* gate )
{
#if defined (X16RT_8WAY)
#if defined (X16R_8WAY)
gate->scanhash = (void*)&scanhash_x16rt_8way;
gate->hash = (void*)&x16rt_8way_hash;
#elif defined (X16RT_4WAY)
gate->hash = (void*)&x16r_8way_hash;
#elif defined (X16R_4WAY)
gate->scanhash = (void*)&scanhash_x16rt_4way;
gate->hash = (void*)&x16rt_4way_hash;
gate->hash = (void*)&x16r_4way_hash;
#else
gate->scanhash = (void*)&scanhash_x16rt;
gate->hash = (void*)&x16rt_hash;
gate->hash = (void*)&x16r_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
gate->build_extraheader = (void*)&veil_build_extraheader;
@@ -247,7 +262,7 @@ bool register_x16rt_veil_algo( algo_gate_t* gate )
bool register_hex_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_hex;
gate->hash = (void*)&hex_hash;
gate->hash = (void*)&x16r_hash;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
opt_target_factor = 128.0;
@@ -260,13 +275,13 @@ bool register_hex_algo( algo_gate_t* gate )
bool register_x21s_algo( algo_gate_t* gate )
{
#if defined (X21S_8WAY)
#if defined (X16R_8WAY)
gate->scanhash = (void*)&scanhash_x21s_8way;
gate->hash = (void*)&x21s_8way_hash;
gate->miner_thread_init = (void*)&x21s_8way_thread_init;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT
| VAES_OPT;
#elif defined (X21S_4WAY)
#elif defined (X16R_4WAY)
gate->scanhash = (void*)&scanhash_x21s_4way;
gate->hash = (void*)&x21s_4way_hash;
gate->miner_thread_init = (void*)&x21s_4way_thread_init;

View File

@@ -5,29 +5,60 @@
#include "simd-utils.h"
#include <stdint.h>
#include <unistd.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include <openssl/sha.h>
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
#if defined (__AVX2__)
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
#include "algo/luffa/luffa-hash-2way.h"
#include "algo/simd/simd-hash-2way.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/hamsi/hamsi-hash-4way.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/sha/sha-hash-4way.h"
#if defined(__VAES__)
#include "algo/groestl/groestl512-hash-4way.h"
#include "algo/shavite/shavite-hash-4way.h"
#include "algo/echo/echo-hash-4way.h"
#endif
#endif // AVX2
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define X16R_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define X16R_4WAY 1
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define X16RV2_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define X16RV2_4WAY 1
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define X16RT_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define X16RT_4WAY 1
#endif
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define X21S_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define X16RV2_4WAY 1
#define X16RT_4WAY 1
#define X21S_4WAY 1
#define X16R_4WAY 1
#endif
enum x16r_Algo {
@@ -50,6 +81,8 @@ enum x16r_Algo {
X16R_HASH_FUNC_COUNT
};
extern __thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ];
extern void (*x16_r_s_getAlgoString) ( const uint8_t*, char* );
void x16r_getAlgoString( const uint8_t *prevblock, char *output );
void x16s_getAlgoString( const uint8_t *prevblock, char *output );
@@ -67,25 +100,115 @@ bool register_x21s__algo( algo_gate_t* gate );
// x16r, x16s
#if defined(X16R_8WAY)
void x16r_8way_hash( void *state, const void *input );
int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
union _x16r_8way_context_overlay
{
blake512_8way_context blake;
bmw512_8way_context bmw;
skein512_8way_context skein;
jh512_8way_context jh;
keccak512_8way_context keccak;
luffa_4way_context luffa;
cubehashParam cube;
simd_4way_context simd;
hamsi512_8way_context hamsi;
sph_fugue512_context fugue;
shabal512_8way_context shabal;
sph_whirlpool_context whirlpool;
sha512_8way_context sha512;
#if defined(__VAES__)
groestl512_4way_context groestl;
shavite512_4way_context shavite;
echo_4way_context echo;
#else
hashState_groestl groestl;
sph_shavite512_context shavite;
hashState_echo echo;
#endif
} __attribute__ ((aligned (64)));
typedef union _x16r_8way_context_overlay x16r_8way_context_overlay;
extern __thread x16r_8way_context_overlay x16r_ctx;
void x16r_8way_prehash( void *, void * );
void x16r_8way_hash_generic( void *, const void * );
void x16r_8way_hash( void *, const void * );
int scanhash_x16r_8way( struct work *, uint32_t ,
uint64_t *, struct thr_info * );
extern __thread x16r_8way_context_overlay x16r_ctx;
#elif defined(X16R_4WAY)
void x16r_4way_hash( void *state, const void *input );
int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
union _x16r_4way_context_overlay
{
blake512_4way_context blake;
bmw512_4way_context bmw;
hashState_echo echo;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
luffa_2way_context luffa;
hashState_luffa luffa1;
cubehashParam cube;
sph_shavite512_context shavite;
simd_2way_context simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
sha512_4way_context sha512;
} __attribute__ ((aligned (64)));
#else
typedef union _x16r_4way_context_overlay x16r_4way_context_overlay;
void x16r_hash( void *state, const void *input );
int scanhash_x16r( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
extern __thread x16r_4way_context_overlay x16r_ctx;
void x16r_4way_prehash( void *, void * );
void x16r_4way_hash_generic( void *, const void * );
void x16r_4way_hash( void *, const void * );
int scanhash_x16r_4way( struct work *, uint32_t,
uint64_t *, struct thr_info * );
extern __thread x16r_4way_context_overlay x16r_ctx;
#endif
// needed for hex
union _x16r_context_overlay
{
#if defined(__AES__)
hashState_echo echo;
hashState_groestl groestl;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
#endif
sph_blake512_context blake;
sph_bmw512_context bmw;
sph_skein512_context skein;
sph_jh512_context jh;
sph_keccak512_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
SHA512_CTX sha512;
} __attribute__ ((aligned (64)));
typedef union _x16r_context_overlay x16r_context_overlay;
extern __thread x16r_context_overlay x16_ctx;
void x16r_prehash( void *, void * );
void x16r_hash_generic( void *, const void * );
void x16r_hash( void *, const void * );
int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * );
// x16Rv2
#if defined(X16RV2_8WAY)
@@ -108,35 +231,35 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
#endif
// x16rt, veil
#if defined(X16RT_8WAY)
#if defined(X16R_8WAY)
void x16rt_8way_hash( void *state, const void *input );
//void x16rt_8way_hash( void *state, const void *input );
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(X16RT_4WAY)
#elif defined(X16R_4WAY)
void x16rt_4way_hash( void *state, const void *input );
//void x16rt_4way_hash( void *state, const void *input );
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#else
void x16rt_hash( void *state, const void *input );
//void x16rt_hash( void *state, const void *input );
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#endif
// x21s
#if defined(X21S_8WAY)
#if defined(X16R_8WAY)
void x21s_8way_hash( void *state, const void *input );
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
bool x21s_8way_thread_init();
#elif defined(X21S_4WAY)
#elif defined(X16R_4WAY)
void x21s_4way_hash( void *state, const void *input );
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
@@ -152,7 +275,7 @@ bool x21s_thread_init();
#endif
void hex_hash( void *state, const void *input );
//void hex_hash( void *state, const void *input );
int scanhash_hex( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );

View File

@@ -9,72 +9,56 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include <openssl/sha.h>
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16r_context_overlay
void x16r_prehash( void *edata, void *pdata )
{
#if defined(__AES__)
hashState_echo echo;
hashState_groestl groestl;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
#endif
sph_blake512_context blake;
sph_bmw512_context bmw;
sph_skein512_context skein;
sph_jh512_context jh;
sph_keccak512_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
SHA512_CTX sha512;
};
typedef union _x16r_context_overlay x16r_context_overlay;
const char elem = x16r_hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
void x16r_hash( void* output, const void* input )
switch ( algo )
{
case JH:
sph_jh512_init( &x16_ctx.jh );
sph_jh512( &x16_ctx.jh, edata, 64 );
break;
case SKEIN:
sph_skein512_init( &x16_ctx.skein );
sph_skein512( &x16_ctx.skein, edata, 64 );
break;
case LUFFA:
init_luffa( &x16_ctx.luffa, 512 );
update_luffa( &x16_ctx.luffa, (const BitSequence*)edata, 64 );
break;
case CUBEHASH:
cubehashInit( &x16_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16_ctx.cube, (const byte*)edata, 64 );
break;
case HAMSI:
sph_hamsi512_init( &x16_ctx.hamsi );
sph_hamsi512( &x16_ctx.hamsi, edata, 64 );
break;
case SHABAL:
sph_shabal512_init( &x16_ctx.shabal );
sph_shabal512( &x16_ctx.shabal, edata, 64 );
break;
case WHIRLPOOL:
sph_whirlpool_init( &x16_ctx.whirlpool );
sph_whirlpool( &x16_ctx.whirlpool, edata, 64 );
break;
}
}
void x16r_hash_generic( void* output, const void* input )
{
uint32_t _ALIGN(128) hash[16];
x16r_context_overlay ctx;
memcpy( &ctx, &x16_ctx, sizeof(ctx) );
void *in = (void*) input;
int size = 80;
/*
if ( s_ntime == UINT32_MAX )
{
const uint8_t* in8 = (uint8_t*) input;
x16_r_s_getAlgoString( &in8[4], hashOrder );
}
*/
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const char elem = x16r_hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -91,23 +75,21 @@ void x16r_hash( void* output, const void* input )
break;
case GROESTL:
#if defined(__AES__)
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)in, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash, (char*)in, size<<3 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in, size );
sph_groestl512_close(&ctx.groestl, hash);
#endif
break;
case SKEIN:
sph_skein512_init( &ctx.skein );
sph_skein512( &ctx.skein, in, size );
sph_skein512_close( &ctx.skein, hash );
break;
case JH:
if ( i == 0 )
sph_jh512(&ctx.jh, in+64, 16 );
else
{
sph_jh512_init( &ctx.jh );
sph_jh512(&ctx.jh, in, size );
}
sph_jh512_close(&ctx.jh, hash );
break;
case KECCAK:
@@ -115,15 +97,31 @@ void x16r_hash( void* output, const void* input )
sph_keccak512( &ctx.keccak, in, size );
sph_keccak512_close( &ctx.keccak, hash );
break;
case SKEIN:
if ( i == 0 )
sph_skein512(&ctx.skein, in+64, 16 );
else
{
sph_skein512_init( &ctx.skein );
sph_skein512( &ctx.skein, in, size );
}
sph_skein512_close( &ctx.skein, hash );
break;
case LUFFA:
init_luffa( &ctx.luffa, 512 );
if ( i == 0 )
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)in+64, 16 );
else
luffa_full( &ctx.luffa, (BitSequence*)hash, 512,
(const BitSequence*)in, size );
break;
case CUBEHASH:
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
(const byte*)in, size );
if ( i == 0 )
cubehashUpdateDigest( &ctx.cube, (byte*)hash,
(const byte*)in+64, 16 );
else
cubehash_full( &ctx.cube, (byte*)hash, 512,
(byte*)in, size );
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
@@ -131,15 +129,12 @@ void x16r_hash( void* output, const void* input )
sph_shavite512_close( &ctx.shavite, hash );
break;
case SIMD:
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence *)hash,
simd_full( &ctx.simd, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
break;
case ECHO:
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
echo_full( &ctx.echo, hash, 512, in, size );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in, size );
@@ -147,8 +142,13 @@ void x16r_hash( void* output, const void* input )
#endif
break;
case HAMSI:
if ( i == 0 )
sph_hamsi512( &ctx.hamsi, in+64, 16 );
else
{
sph_hamsi512_init( &ctx.hamsi );
sph_hamsi512( &ctx.hamsi, in, size );
}
sph_hamsi512_close( &ctx.hamsi, hash );
break;
case FUGUE:
@@ -157,13 +157,23 @@ void x16r_hash( void* output, const void* input )
sph_fugue512_close( &ctx.fugue, hash );
break;
case SHABAL:
if ( i == 0 )
sph_shabal512( &ctx.shabal, in+64, 16 );
else
{
sph_shabal512_init( &ctx.shabal );
sph_shabal512( &ctx.shabal, in, size );
}
sph_shabal512_close( &ctx.shabal, hash );
break;
case WHIRLPOOL:
if ( i == 0 )
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
else
{
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in, size );
}
sph_whirlpool_close( &ctx.whirlpool, hash );
break;
case SHA_512:
@@ -175,49 +185,53 @@ void x16r_hash( void* output, const void* input )
in = (void*) hash;
size = 64;
}
memcpy(output, hash, 32);
memcpy( output, hash, 64 );
}
void x16r_hash( void* output, const void* input )
{
uint8_t hash[64] __attribute__ ((aligned (64)));
x16r_hash_generic( hash, input );
memcpy( output, hash, 32 );
}
int scanhash_x16r( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t _ALIGN(128) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
volatile uint8_t *restart = &( work_restart[thr_id].restart );
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0cff;
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm128_bswap32_80( edata, pdata );
static __thread uint32_t s_ntime = UINT32_MAX;
if ( s_ntime != pdata[17] )
{
uint32_t ntime = swab32(pdata[17]);
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)(&edata[1]), x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
x16r_prehash( edata, pdata );
do
{
be32enc( &endiandata[19], nonce );
x16r_hash( hash32, endiandata );
edata[19] = nonce;
x16r_hash( hash32, edata );
if ( hash32[7] <= Htarg )
if (fulltest( hash32, ptarget ) && !opt_benchmark )
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
{
pdata[19] = nonce;
pdata[19] = bswap_32( nonce );
submit_solution( work, hash32, mythr );
}
nonce++;
@@ -226,3 +240,4 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

View File

@@ -2,481 +2,14 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa-hash-2way.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/cubehash/cube-hash-2way.h"
#include "algo/simd/simd-hash-2way.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/hamsi/hamsi-hash-4way.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/sha/sha-hash-4way.h"
#if defined(__VAES__)
#include "algo/groestl/groestl512-hash-4way.h"
#include "algo/shavite/shavite-hash-4way.h"
#include "algo/echo/echo-hash-4way.h"
#endif
#if defined (X16RT_8WAY)
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rt_8way_context_overlay
{
blake512_8way_context blake;
bmw512_8way_context bmw;
skein512_8way_context skein;
jh512_8way_context jh;
keccak512_8way_context keccak;
luffa_4way_context luffa;
cubehashParam cube;
// cube_4way_context cube;
simd_4way_context simd;
hamsi512_8way_context hamsi;
sph_fugue512_context fugue;
shabal512_8way_context shabal;
sph_whirlpool_context whirlpool;
sha512_8way_context sha512;
#if defined(__VAES__)
groestl512_4way_context groestl;
shavite512_4way_context shavite;
echo_4way_context echo;
#else
hashState_groestl groestl;
sph_shavite512_context shavite;
hashState_echo echo;
#endif
} __attribute__ ((aligned (64)));
typedef union _x16rt_8way_context_overlay x16rt_8way_context_overlay;
static __thread x16rt_8way_context_overlay x16rt_ctx;
void x16rt_8way_hash( void* output, const void* input )
{
uint32_t vhash[20*8] __attribute__ ((aligned (128)));
uint32_t hash0[20] __attribute__ ((aligned (64)));
uint32_t hash1[20] __attribute__ ((aligned (64)));
uint32_t hash2[20] __attribute__ ((aligned (64)));
uint32_t hash3[20] __attribute__ ((aligned (64)));
uint32_t hash4[20] __attribute__ ((aligned (64)));
uint32_t hash5[20] __attribute__ ((aligned (64)));
uint32_t hash6[20] __attribute__ ((aligned (64)));
uint32_t hash7[20] __attribute__ ((aligned (64)));
x16rt_8way_context_overlay ctx;
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
void *in0 = (void*) hash0;
void *in1 = (void*) hash1;
void *in2 = (void*) hash2;
void *in3 = (void*) hash3;
void *in4 = (void*) hash4;
void *in5 = (void*) hash5;
void *in6 = (void*) hash6;
void *in7 = (void*) hash7;
int size = 80;
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
input, 640 );
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
if ( i == 0 )
blake512_8way_full( &ctx.blake, vhash, input, size );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
blake512_8way_full( &ctx.blake, vhash, vhash, size );
}
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5,
hash6, hash7, vhash );
break;
case BMW:
bmw512_8way_init( &ctx.bmw );
if ( i == 0 )
bmw512_8way_update( &ctx.bmw, input, size );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
bmw512_8way_update( &ctx.bmw, vhash, size );
}
bmw512_8way_close( &ctx.bmw, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case GROESTL:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhash, vhash, size<<3 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhash, vhash, size<<3 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash4, (char*)in4, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash5, (char*)in5, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash6, (char*)in6, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash7, (char*)in7, size<<3 );
#endif
break;
case JH:
if ( i == 0 )
jh512_8way_update( &ctx.jh, input + (64<<3), 16 );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, size );
}
jh512_8way_close( &ctx.jh, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case KECCAK:
keccak512_8way_init( &ctx.keccak );
if ( i == 0 )
keccak512_8way_update( &ctx.keccak, input, size );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
keccak512_8way_update( &ctx.keccak, vhash, size );
}
keccak512_8way_close( &ctx.keccak, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case SKEIN:
if ( i == 0 )
skein512_8way_update( &ctx.skein, input + (64<<3), 16 );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, size );
}
skein512_8way_close( &ctx.skein, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case LUFFA:
if ( i == 0 )
{
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
luffa_4way_update_close( &ctx.luffa, vhash,
vhash + (16<<2), 16 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
luffa_4way_update_close( &ctx.luffa, vhash,
vhash + (16<<2), 16 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
}
else
{
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
luffa512_4way_full( &ctx.luffa, vhash, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
luffa512_4way_full( &ctx.luffa, vhash, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
}
break;
case CUBEHASH:
if ( i == 0 )
{
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
(const byte*)in0 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash1,
(const byte*)in1 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash2,
(const byte*)in2 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
(const byte*)in3 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash4,
(const byte*)in4 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash5,
(const byte*)in5 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash6,
(const byte*)in6 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash7,
(const byte*)in7 + 64, 16 );
}
else
{
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0,
(const byte*)in0, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1,
(const byte*)in1, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2,
(const byte*)in2, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3,
(const byte*)in3, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash4,
(const byte*)in4, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash5,
(const byte*)in5, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash6,
(const byte*)in6, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash7,
(const byte*)in7, size );
}
break;
case SHAVITE:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in0, size );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in1, size );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in2, size );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in3, size );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in4, size );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in5, size );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in6, size );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in7, size );
sph_shavite512_close( &ctx.shavite, hash7 );
#endif
break;
case SIMD:
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
simd512_4way_full( &ctx.simd, vhash, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
simd512_4way_full( &ctx.simd, vhash, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
break;
case ECHO:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
echo_4way_full( &ctx.echo, vhash, 512, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
echo_4way_full( &ctx.echo, vhash, 512, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)in0, size );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)in1, size );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)in2, size );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)in3, size );
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
(const BitSequence *)in4, size );
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
(const BitSequence *)in5, size );
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
(const BitSequence *)in6, size );
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
(const BitSequence *)in7, size );
#endif
break;
case HAMSI:
if ( i == 0 )
hamsi512_8way_update( &ctx.hamsi, input + (64<<3), 16 );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
hamsi512_8way_init( &ctx.hamsi );
hamsi512_8way_update( &ctx.hamsi, vhash, size );
}
hamsi512_8way_close( &ctx.hamsi, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in0, size );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in1, size );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in2, size );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in3, size );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in4, size );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in5, size );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in6, size );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in7, size );
sph_fugue512_close( &ctx.fugue, hash7 );
break;
case SHABAL:
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
if ( i == 0 )
shabal512_8way_update( &ctx.shabal, vhash + (16<<3), 16 );
else
{
shabal512_8way_init( &ctx.shabal );
shabal512_8way_update( &ctx.shabal, vhash, size );
}
shabal512_8way_close( &ctx.shabal, vhash );
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case WHIRLPOOL:
if ( i == 0 )
{
sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in4 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in5 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in6 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in7 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
}
else
{
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in0, size );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in1, size );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in2, size );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in3, size );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in4, size );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in5, size );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in6, size );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in7, size );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
}
break;
case SHA_512:
sha512_8way_init( &ctx.sha512 );
if ( i == 0 )
sha512_8way_update( &ctx.sha512, input, size );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
sha512_8way_update( &ctx.sha512, vhash, size );
}
sha512_8way_close( &ctx.sha512, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
}
size = 64;
}
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
memcpy( output+128, hash4, 32 );
memcpy( output+160, hash5, 32 );
memcpy( output+192, hash6, 32 );
memcpy( output+224, hash7, 32 );
}
#if defined (X16R_8WAY)
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[16*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t _ALIGN(64) timeHash[8*8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -490,74 +23,25 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16rt_getTimeHash( ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], hashOrder );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
hashOrder, ntime, timeHash );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
const char elem = hashOrder[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case JH:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
jh512_8way_init( &x16rt_ctx.jh );
jh512_8way_update( &x16rt_ctx.jh, vdata, 64 );
break;
case SKEIN:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
skein512_8way_init( &x16rt_ctx.skein );
skein512_8way_update( &x16rt_ctx.skein, vdata, 64 );
break;
case LUFFA:
mm128_bswap32_80( edata, pdata );
intrlv_4x128( vdata2, edata, edata, edata, edata, 640 );
luffa_4way_init( &x16rt_ctx.luffa, 512 );
luffa_4way_update( &x16rt_ctx.luffa, vdata2, 64 );
rintrlv_4x128_8x64( vdata, vdata2, vdata2, 640 );
break;
case CUBEHASH:
mm128_bswap32_80( edata, pdata );
cubehashInit( &x16rt_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16rt_ctx.cube, (const byte*)edata, 64 );
intrlv_8x64( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
break;
case HAMSI:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
hamsi512_8way_init( &x16rt_ctx.hamsi );
hamsi512_8way_update( &x16rt_ctx.hamsi, vdata, 64 );
break;
case SHABAL:
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
shabal512_8way_init( &x16rt_ctx.shabal );
shabal512_8way_update( &x16rt_ctx.shabal, vdata2, 64 );
rintrlv_8x32_8x64( vdata, vdata2, 640 );
break;
case WHIRLPOOL:
mm128_bswap32_80( edata, pdata );
sph_whirlpool_init( &x16rt_ctx.whirlpool );
sph_whirlpool( &x16rt_ctx.whirlpool, edata, 64 );
intrlv_8x64( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
break;
default:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
x16r_hash_order, ntime, timeHash );
}
x16r_8way_prehash( vdata, pdata );
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x16rt_8way_hash( hash, vdata );
x16r_8way_hash( hash, vdata );
for ( int i = 0; i < 8; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
@@ -574,313 +58,13 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
return 0;
}
#elif defined (X16RT_4WAY)
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rt_4way_context_overlay
{
blake512_4way_context blake;
bmw512_4way_context bmw;
hashState_echo echo;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
luffa_2way_context luffa;
hashState_luffa luffa1;
cubehashParam cube;
sph_shavite512_context shavite;
simd_2way_context simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
sha512_4way_context sha512;
};
typedef union _x16rt_4way_context_overlay x16rt_4way_context_overlay;
static __thread x16rt_4way_context_overlay x16rt_ctx;
void x16rt_4way_hash( void* output, const void* input )
{
uint32_t hash0[20] __attribute__ ((aligned (64)));
uint32_t hash1[20] __attribute__ ((aligned (64)));
uint32_t hash2[20] __attribute__ ((aligned (64)));
uint32_t hash3[20] __attribute__ ((aligned (64)));
uint32_t vhash[20*4] __attribute__ ((aligned (64)));
x16rt_4way_context_overlay ctx;
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
void *in0 = (void*) hash0;
void *in1 = (void*) hash1;
void *in2 = (void*) hash2;
void *in3 = (void*) hash3;
int size = 80;
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
// Input data is both 64 bit interleaved (input)
// and deinterleaved in inp0-3. First function has no need re-interleave.
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
if ( i == 0 )
blake512_4way_full( &ctx.blake, vhash, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
blake512_4way_full( &ctx.blake, vhash, vhash, size );
}
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case BMW:
bmw512_4way_init( &ctx.bmw );
if ( i == 0 )
bmw512_4way_update( &ctx.bmw, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
bmw512_4way_update( &ctx.bmw, vhash, size );
}
bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case GROESTL:
groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 );
break;
case JH:
if ( i == 0 )
jh512_4way_update( &ctx.jh, input + (64<<2), 16 );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, size );
}
jh512_4way_close( &ctx.jh, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case KECCAK:
keccak512_4way_init( &ctx.keccak );
if ( i == 0 )
keccak512_4way_update( &ctx.keccak, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
keccak512_4way_update( &ctx.keccak, vhash, size );
}
keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case SKEIN:
if ( i == 0 )
skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, size );
}
skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case LUFFA:
if ( i == 0 )
{
update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash0,
(const BitSequence*)in0 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash1,
(const BitSequence*)in1 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash2,
(const BitSequence*)in2 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash3,
(const BitSequence*)in3 + 64, 16 );
}
else
{
intrlv_2x128( vhash, in0, in1, size<<3 );
luffa512_2way_full( &ctx.luffa, vhash, vhash, size );
dintrlv_2x128_512( hash0, hash1, vhash );
intrlv_2x128( vhash, in2, in3, size<<3 );
luffa512_2way_full( &ctx.luffa, vhash, vhash, size );
dintrlv_2x128_512( hash2, hash3, vhash );
}
break;
case CUBEHASH:
if ( i == 0 )
{
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
(const byte*)in0 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1,
(const byte*)in1 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2,
(const byte*)in2 + 64, 16 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3,
(const byte*)in3 + 64, 16 );
}
else
{
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
(const byte*)in0, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash1,
(const byte*)in1, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash2,
(const byte*)in2, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
(const byte*)in3, size );
}
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in0, size );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in1, size );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in2, size );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in3, size );
sph_shavite512_close( &ctx.shavite, hash3 );
break;
case SIMD:
intrlv_2x128( vhash, in0, in1, size<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, in2, in3, size<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
break;
case ECHO:
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)in0, size );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)in1, size );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)in2, size );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)in3, size );
break;
case HAMSI:
if ( i == 0 )
hamsi512_4way_update( &ctx.hamsi, input + (64<<2), 16 );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way_update( &ctx.hamsi, vhash, size );
}
hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in0, size );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in1, size );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in2, size );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in3, size );
sph_fugue512_close( &ctx.fugue, hash3 );
break;
case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
if ( i == 0 )
shabal512_4way_update( &ctx.shabal, vhash + (16<<2), 16 );
else
{
shabal512_4way_init( &ctx.shabal );
shabal512_4way_update( &ctx.shabal, vhash, size );
}
shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case WHIRLPOOL:
if ( i == 0 )
{
sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
memcpy( &ctx, &x16rt_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
}
else
{
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in0, size );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in1, size );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in2, size );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in3, size );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
}
break;
case SHA_512:
sha512_4way_init( &ctx.sha512 );
if ( i == 0 )
sha512_4way_update( &ctx.sha512, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
sha512_4way_update( &ctx.sha512, vhash, size );
}
sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
}
size = 64;
}
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
}
#elif defined (X16R_4WAY)
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t vdata32[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t _ALIGN(64) timeHash[4*8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -894,70 +78,24 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
if ( bench ) ptarget[7] = 0x0cff;
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16rt_getTimeHash( ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], hashOrder );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
hashOrder, ntime, timeHash );
}
const char elem = hashOrder[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case JH:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
jh512_4way_init( &x16rt_ctx.jh );
jh512_4way_update( &x16rt_ctx.jh, vdata, 64 );
break;
case SKEIN:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
skein512_4way_init( &x16rt_ctx.skein );
skein512_4way_update( &x16rt_ctx.skein, vdata, 64 );
break;
case LUFFA:
mm128_bswap32_80( edata, pdata );
init_luffa( &x16rt_ctx.luffa1, 512 );
update_luffa( &x16rt_ctx.luffa1, (const BitSequence*)edata, 64 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
case CUBEHASH:
mm128_bswap32_80( edata, pdata );
cubehashInit( &x16rt_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x16rt_ctx.cube, (const byte*)edata, 64 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
case HAMSI:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
hamsi512_4way_init( &x16rt_ctx.hamsi );
hamsi512_4way_update( &x16rt_ctx.hamsi, vdata, 64 );
break;
case SHABAL:
mm128_bswap32_intrlv80_4x32( vdata32, pdata );
shabal512_4way_init( &x16rt_ctx.shabal );
shabal512_4way_update( &x16rt_ctx.shabal, vdata32, 64 );
rintrlv_4x32_4x64( vdata, vdata32, 640 );
break;
case WHIRLPOOL:
mm128_bswap32_80( edata, pdata );
sph_whirlpool_init( &x16rt_ctx.whirlpool );
sph_whirlpool( &x16rt_ctx.whirlpool, edata, 64 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
default:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
x16r_hash_order, ntime, timeHash );
}
x16r_4way_prehash( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x16rt_4way_hash( hash, vdata );
x16r_4way_hash( hash, vdata );
for ( int i = 0; i < 4; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{

View File

@@ -1,234 +1,46 @@
#include "x16r-gate.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include <openssl/sha.h>
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread bool s_implemented = false;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rt_context_overlay
{
#if defined(__AES__)
hashState_echo echo;
hashState_groestl groestl;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
#endif
sph_blake512_context blake;
sph_bmw512_context bmw;
sph_skein512_context skein;
sph_jh512_context jh;
sph_keccak512_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
SHA512_CTX sha512;
};
typedef union _x16rt_context_overlay x16rt_context_overlay;
void x16rt_hash( void* output, const void* input )
{
uint32_t _ALIGN(128) hash[16];
x16rt_context_overlay ctx;
int size = 80;
void *in = (void*) input;
/*
void *in = (void*) input;
uint32_t *in32 = (uint32_t*) in;
uint32_t ntime = in32[17];
if ( s_ntime == UINT32_MAX )
{
uint32_t _ALIGN(64) timeHash[8];
x16rt_getTimeHash(ntime, &timeHash);
x16rt_getAlgoString(&timeHash[0], hashOrder);
}
*/
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
sph_blake512_init( &ctx.blake );
sph_blake512( &ctx.blake, in, size );
sph_blake512_close( &ctx.blake, hash );
break;
case BMW:
sph_bmw512_init( &ctx.bmw );
sph_bmw512(&ctx.bmw, in, size);
sph_bmw512_close(&ctx.bmw, hash);
break;
case GROESTL:
#if defined(__AES__)
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)in, size<<3 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in, size );
sph_groestl512_close(&ctx.groestl, hash);
#endif
break;
case SKEIN:
sph_skein512_init( &ctx.skein );
sph_skein512( &ctx.skein, in, size );
sph_skein512_close( &ctx.skein, hash );
break;
case JH:
sph_jh512_init( &ctx.jh );
sph_jh512(&ctx.jh, in, size );
sph_jh512_close(&ctx.jh, hash );
break;
case KECCAK:
sph_keccak512_init( &ctx.keccak );
sph_keccak512( &ctx.keccak, in, size );
sph_keccak512_close( &ctx.keccak, hash );
break;
case LUFFA:
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)in, size );
break;
case CUBEHASH:
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
(const byte*)in, size );
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in, size );
sph_shavite512_close( &ctx.shavite, hash );
break;
case SIMD:
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
break;
case ECHO:
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in, size );
sph_echo512_close( &ctx.echo, hash );
#endif
break;
case HAMSI:
sph_hamsi512_init( &ctx.hamsi );
sph_hamsi512( &ctx.hamsi, in, size );
sph_hamsi512_close( &ctx.hamsi, hash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in, size );
sph_fugue512_close( &ctx.fugue, hash );
break;
case SHABAL:
sph_shabal512_init( &ctx.shabal );
sph_shabal512( &ctx.shabal, in, size );
sph_shabal512_close( &ctx.shabal, hash );
break;
case WHIRLPOOL:
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in, size );
sph_whirlpool_close( &ctx.whirlpool, hash );
break;
case SHA_512:
SHA512_Init( &ctx.sha512 );
SHA512_Update( &ctx.sha512, in, size );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
break;
}
in = (void*) hash;
size = 64;
}
memcpy(output, hash, 32);
}
#if !defined(X16R_8WAY) && !defined(X16R_4WAY)
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t _ALIGN(128) edata[20];
uint32_t _ALIGN(64) timeHash[8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0cff;
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm128_bswap32_80( edata, pdata );
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = swab32( pdata[17] );
if ( s_ntime != ntime )
{
x16rt_getTimeHash( ntime, &timeHash );
x16rt_getAlgoString( &timeHash[0], hashOrder );
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
s_ntime = ntime;
s_implemented = true;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
hashOrder, ntime, timeHash );
}
if ( !s_implemented )
{
applog( LOG_WARNING, "s not implemented");
sleep(1);
return 0;
x16r_hash_order, ntime, timeHash );
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
x16r_prehash( edata, pdata );
do
{
be32enc( &endiandata[19], nonce );
x16rt_hash( hash32, endiandata );
edata[19] = nonce;
x16r_hash( hash32, edata );
if ( hash32[7] <= Htarg )
if (fulltest( hash32, ptarget ) && !opt_benchmark )
if ( valid_hash( hash32, ptarget ) && !bench )
{
pdata[19] = nonce;
pdata[19] = bswap_32( nonce );
submit_solution( work, hash32, mythr );
}
nonce++;
@@ -237,3 +49,6 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
#endif // !defined(X16R_8WAY) && !defined(X16R_4WAY)

View File

@@ -6,6 +6,8 @@
*/
#include "x16r-gate.h"
#if !defined(X16R_8WAY) && !defined(X16R_4WAY)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -245,3 +247,5 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
#endif

View File

@@ -8,480 +8,43 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa-hash-2way.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/cubehash/cube-hash-2way.h"
#include "algo/simd/simd-hash-2way.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/hamsi/hamsi-hash-4way.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/sha/sha-hash-4way.h"
#include "algo/haval/haval-hash-4way.h"
#include "algo/tiger/sph_tiger.h"
#include "algo/gost/sph_gost.h"
#include "algo/lyra2/lyra2.h"
#if defined(__VAES__)
#include "algo/groestl/groestl512-hash-4way.h"
#include "algo/shavite/shavite-hash-4way.h"
#include "algo/echo/echo-hash-4way.h"
#endif
#if defined(__SHA__)
#include <openssl/sha.h>
#endif
#if defined(X21S_8WAY) || defined(X21S_4WAY)
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
#endif
#if defined (X21S_8WAY)
static __thread uint64_t* x21s_8way_matrix;
union _x21s_8way_context_overlay
{
blake512_8way_context blake;
bmw512_8way_context bmw;
skein512_8way_context skein;
jh512_8way_context jh;
keccak512_8way_context keccak;
luffa_4way_context luffa;
cubehashParam cube;
// cube_4way_context cube;
simd_4way_context simd;
hamsi512_8way_context hamsi;
sph_fugue512_context fugue;
shabal512_8way_context shabal;
sph_whirlpool_context whirlpool;
sha512_8way_context sha512;
haval256_5_8way_context haval;
sph_tiger_context tiger;
sph_gost512_context gost;
sha256_8way_context sha256;
#if defined(__VAES__)
groestl512_4way_context groestl;
shavite512_4way_context shavite;
echo_4way_context echo;
#else
hashState_groestl groestl;
sph_shavite512_context shavite;
hashState_echo echo;
#endif
} __attribute__ ((aligned (64)));
typedef union _x21s_8way_context_overlay x21s_8way_context_overlay;
static __thread x21s_8way_context_overlay x21s_ctx;
void x21s_8way_hash( void* output, const void* input )
{
uint32_t vhash[20*8] __attribute__ ((aligned (128)));
uint32_t hash0[20] __attribute__ ((aligned (64)));
uint32_t hash1[20] __attribute__ ((aligned (64)));
uint32_t hash2[20] __attribute__ ((aligned (64)));
uint32_t hash3[20] __attribute__ ((aligned (64)));
uint32_t hash4[20] __attribute__ ((aligned (64)));
uint32_t hash5[20] __attribute__ ((aligned (64)));
uint32_t hash6[20] __attribute__ ((aligned (64)));
uint32_t hash7[20] __attribute__ ((aligned (64)));
uint32_t vhash[16*8] __attribute__ ((aligned (128)));
uint8_t shash[64*8] __attribute__ ((aligned (64)));
uint32_t *hash0 = (uint32_t*) shash;
uint32_t *hash1 = (uint32_t*)( shash+64 );
uint32_t *hash2 = (uint32_t*)( shash+128 );
uint32_t *hash3 = (uint32_t*)( shash+192 );
uint32_t *hash4 = (uint32_t*)( shash+256 );
uint32_t *hash5 = (uint32_t*)( shash+320 );
uint32_t *hash6 = (uint32_t*)( shash+384 );
uint32_t *hash7 = (uint32_t*)( shash+448 );
x21s_8way_context_overlay ctx;
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
void *in0 = (void*) hash0;
void *in1 = (void*) hash1;
void *in2 = (void*) hash2;
void *in3 = (void*) hash3;
void *in4 = (void*) hash4;
void *in5 = (void*) hash5;
void *in6 = (void*) hash6;
void *in7 = (void*) hash7;
int size = 80;
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
input, 640 );
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
if ( i == 0 )
blake512_8way_full( &ctx.blake, vhash, input, size );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
blake512_8way_full( &ctx.blake, vhash, vhash, size );
}
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5,
hash6, hash7, vhash );
break;
case BMW:
bmw512_8way_init( &ctx.bmw );
if ( i == 0 )
bmw512_8way_update( &ctx.bmw, input, size );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
bmw512_8way_update( &ctx.bmw, vhash, size );
}
bmw512_8way_close( &ctx.bmw, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case GROESTL:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
groestl512_4way_full( &ctx.groestl, vhash, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
groestl512_4way_full( &ctx.groestl, vhash, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash4, (char*)in4, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash5, (char*)in5, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash6, (char*)in6, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash7, (char*)in7, size<<3 );
#endif
break;
case JH:
if ( i == 0 )
jh512_8way_update( &ctx.jh, input + (64<<3), 16 );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, size );
}
jh512_8way_close( &ctx.jh, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case KECCAK:
keccak512_8way_init( &ctx.keccak );
if ( i == 0 )
keccak512_8way_update( &ctx.keccak, input, size );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
keccak512_8way_update( &ctx.keccak, vhash, size );
}
keccak512_8way_close( &ctx.keccak, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case SKEIN:
if ( i == 0 )
skein512_8way_update( &ctx.skein, input + (64<<3), 16 );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, size );
}
skein512_8way_close( &ctx.skein, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case LUFFA:
if ( i == 0 )
{
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
luffa_4way_update_close( &ctx.luffa, vhash,
vhash + (16<<2), 16 );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
luffa_4way_update_close( &ctx.luffa, vhash,
vhash + (16<<2), 16 );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
}
else
{
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
luffa512_4way_full( &ctx.luffa, vhash, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
luffa512_4way_full( &ctx.luffa, vhash, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
}
break;
case CUBEHASH:
if ( i == 0 )
{
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
(const byte*)in0 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash1,
(const byte*)in1 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash2,
(const byte*)in2 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
(const byte*)in3 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash4,
(const byte*)in4 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash5,
(const byte*)in5 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash6,
(const byte*)in6 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash7,
(const byte*)in7 + 64, 16 );
}
else
{
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0,
(const byte*)in0, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1,
(const byte*)in1, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2,
(const byte*)in2, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3,
(const byte*)in3, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash4,
(const byte*)in4, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash5,
(const byte*)in5, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash6,
(const byte*)in6, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash7,
(const byte*)in7, size );
}
break;
case SHAVITE:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhash, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in0, size );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in1, size );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in2, size );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in3, size );
sph_shavite512_close( &ctx.shavite, hash3 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in4, size );
sph_shavite512_close( &ctx.shavite, hash4 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in5, size );
sph_shavite512_close( &ctx.shavite, hash5 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in6, size );
sph_shavite512_close( &ctx.shavite, hash6 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in7, size );
sph_shavite512_close( &ctx.shavite, hash7 );
#endif
break;
case SIMD:
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
simd512_4way_full( &ctx.simd, vhash, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
simd512_4way_full( &ctx.simd, vhash, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
break;
case ECHO:
#if defined(__VAES__)
intrlv_4x128( vhash, in0, in1, in2, in3, size<<3 );
echo_4way_full( &ctx.echo, vhash, 512, vhash, size );
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhash );
intrlv_4x128( vhash, in4, in5, in6, in7, size<<3 );
echo_4way_full( &ctx.echo, vhash, 512, vhash, size );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
#else
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)in0, size );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)in1, size );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)in2, size );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)in3, size );
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
(const BitSequence *)in4, size );
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
(const BitSequence *)in5, size );
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
(const BitSequence *)in6, size );
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
(const BitSequence *)in7, size );
#endif
break;
case HAMSI:
if ( i == 0 )
hamsi512_8way_update( &ctx.hamsi, input + (64<<3), 16 );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
hamsi512_8way_init( &ctx.hamsi );
hamsi512_8way_update( &ctx.hamsi, vhash, size );
}
hamsi512_8way_close( &ctx.hamsi, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in0, size );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in1, size );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in2, size );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in3, size );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in4, size );
sph_fugue512_close( &ctx.fugue, hash4 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in5, size );
sph_fugue512_close( &ctx.fugue, hash5 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in6, size );
sph_fugue512_close( &ctx.fugue, hash6 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in7, size );
sph_fugue512_close( &ctx.fugue, hash7 );
break;
case SHABAL:
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
if ( i == 0 )
shabal512_8way_update( &ctx.shabal, vhash + (16<<3), 16 );
else
{
shabal512_8way_init( &ctx.shabal );
shabal512_8way_update( &ctx.shabal, vhash, size );
}
shabal512_8way_close( &ctx.shabal, vhash );
dintrlv_8x32_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
case WHIRLPOOL:
if ( i == 0 )
{
sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in4 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in5 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in6 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in7 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
}
else
{
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in0, size );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in1, size );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in2, size );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in3, size );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in4, size );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in5, size );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in6, size );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in7, size );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
}
break;
case SHA_512:
sha512_8way_init( &ctx.sha512 );
if ( i == 0 )
sha512_8way_update( &ctx.sha512, input, size );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
size<<3 );
sha512_8way_update( &ctx.sha512, vhash, size );
}
sha512_8way_close( &ctx.sha512, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, vhash );
break;
}
size = 64;
}
x16r_8way_hash_generic( shash, input );
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
@@ -568,8 +131,6 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[16*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *hash7 = &hash[7<<3];
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
@@ -588,71 +149,21 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
const char elem = hashOrder[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case JH:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
jh512_8way_init( &x21s_ctx.jh );
jh512_8way_update( &x21s_ctx.jh, vdata, 64 );
break;
case SKEIN:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
skein512_8way_init( &x21s_ctx.skein );
skein512_8way_update( &x21s_ctx.skein, vdata, 64 );
break;
case LUFFA:
mm128_bswap32_80( edata, pdata );
intrlv_4x128( vdata2, edata, edata, edata, edata, 640 );
luffa_4way_init( &x21s_ctx.luffa, 512 );
luffa_4way_update( &x21s_ctx.luffa, vdata2, 64 );
rintrlv_4x128_8x64( vdata, vdata2, vdata2, 640 );
break;
case CUBEHASH:
mm128_bswap32_80( edata, pdata );
cubehashInit( &x21s_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x21s_ctx.cube, (const byte*)edata, 64 );
intrlv_8x64( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
break;
case HAMSI:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
hamsi512_8way_init( &x21s_ctx.hamsi );
hamsi512_8way_update( &x21s_ctx.hamsi, vdata, 64 );
break;
case SHABAL:
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
shabal512_8way_init( &x21s_ctx.shabal );
shabal512_8way_update( &x21s_ctx.shabal, vdata2, 64 );
rintrlv_8x32_8x64( vdata, vdata2, 640 );
break;
case WHIRLPOOL:
mm128_bswap32_80( edata, pdata );
sph_whirlpool_init( &x21s_ctx.whirlpool );
sph_whirlpool( &x21s_ctx.whirlpool, edata, 64 );
intrlv_8x64( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
break;
default:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
x16r_8way_prehash( vdata, pdata );
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x21s_8way_hash( hash, vdata );
@@ -670,7 +181,7 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( ( n < last_nonce ) && !(*restart) );
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
@@ -692,23 +203,6 @@ static __thread uint64_t* x21s_4way_matrix;
union _x21s_4way_context_overlay
{
blake512_4way_context blake;
bmw512_4way_context bmw;
hashState_echo echo;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
keccak512_4way_context keccak;
luffa_2way_context luffa;
hashState_luffa luffa1;
cubehashParam cube;
sph_shavite512_context shavite;
simd_2way_context simd;
hamsi512_4way_context hamsi;
sph_fugue512_context fugue;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
sha512_4way_context sha512;
haval256_5_4way_context haval;
sph_tiger_context tiger;
sph_gost512_context gost;
@@ -718,281 +212,20 @@ union _x21s_4way_context_overlay
sha256_4way_context sha256;
#endif
} __attribute__ ((aligned (64)));
typedef union _x21s_4way_context_overlay x21s_4way_context_overlay;
static __thread x21s_4way_context_overlay x21s_ctx;
typedef union _x21s_4way_context_overlay x21s_4way_context_overlay;
void x21s_4way_hash( void* output, const void* input )
{
uint32_t hash0[20] __attribute__ ((aligned (64)));
uint32_t hash1[20] __attribute__ ((aligned (64)));
uint32_t hash2[20] __attribute__ ((aligned (64)));
uint32_t hash3[20] __attribute__ ((aligned (64)));
uint32_t vhash[20*4] __attribute__ ((aligned (64)));
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
uint8_t shash[64*4] __attribute__ ((aligned (64)));
x21s_4way_context_overlay ctx;
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
void *in0 = (void*) hash0;
void *in1 = (void*) hash1;
void *in2 = (void*) hash2;
void *in3 = (void*) hash3;
int size = 80;
uint32_t *hash0 = (uint32_t*) shash;
uint32_t *hash1 = (uint32_t*)( shash+64 );
uint32_t *hash2 = (uint32_t*)( shash+128 );
uint32_t *hash3 = (uint32_t*)( shash+192 );
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
// Input data is both 64 bit interleaved (input)
// and deinterleaved in inp0-3.
// If First function uses 64 bit data it is not required to interleave inp
// first. It may use the inerleaved data dmost convenient, ie 4way 64 bit.
// All other functions assume data is deinterleaved in hash0-3
// All functions must exit with data deinterleaved in hash0-3.
// Alias in0-3 points to either inp0-3 or hash0-3 according to
// its hashOrder position. Size is also set accordingly.
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
if ( i == 0 )
blake512_4way_full( &ctx.blake, vhash, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
blake512_4way_full( &ctx.blake, vhash, vhash, size );
}
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case BMW:
bmw512_4way_init( &ctx.bmw );
if ( i == 0 )
bmw512_4way_update( &ctx.bmw, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
bmw512_4way_update( &ctx.bmw, vhash, size );
}
bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case GROESTL:
groestl512_full( &ctx.groestl, (char*)hash0, (char*)in0, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)in1, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)in2, size<<3 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)in3, size<<3 );
break;
case JH:
if ( i == 0 )
jh512_4way_update( &ctx.jh, input + (64<<2), 16 );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, size );
}
jh512_4way_close( &ctx.jh, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case KECCAK:
keccak512_4way_init( &ctx.keccak );
if ( i == 0 )
keccak512_4way_update( &ctx.keccak, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
keccak512_4way_update( &ctx.keccak, vhash, size );
}
keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case SKEIN:
if ( i == 0 )
skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, size );
}
skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case LUFFA:
if ( i == 0 )
{
update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash0,
(const BitSequence*)in0 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash1,
(const BitSequence*)in1 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash2,
(const BitSequence*)in2 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
update_and_final_luffa( &ctx.luffa1, (BitSequence*)hash3,
(const BitSequence*)in3 + 64, 16 );
}
else
{
intrlv_2x128( vhash, in0, in1, size<<3 );
luffa512_2way_full( &ctx.luffa, vhash, vhash, size );
dintrlv_2x128_512( hash0, hash1, vhash );
intrlv_2x128( vhash, in2, in3, size<<3 );
luffa512_2way_full( &ctx.luffa, vhash, vhash, size );
dintrlv_2x128_512( hash2, hash3, vhash );
}
break;
case CUBEHASH:
if ( i == 0 )
{
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
(const byte*)in0 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1,
(const byte*)in1 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2,
(const byte*)in2 + 64, 16 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3,
(const byte*)in3 + 64, 16 );
}
else
{
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
(const byte*)in0, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash1,
(const byte*)in1, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash2,
(const byte*)in2, size );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hash3,
(const byte*)in3, size );
}
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in0, size );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in1, size );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in2, size );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in3, size );
sph_shavite512_close( &ctx.shavite, hash3 );
break;
case SIMD:
intrlv_2x128( vhash, in0, in1, size<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_2x128( hash0, hash1, vhash, 512 );
intrlv_2x128( vhash, in2, in3, size<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
dintrlv_2x128( hash2, hash3, vhash, 512 );
break;
case ECHO:
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)in0, size );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)in1, size );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)in2, size );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)in3, size );
break;
case HAMSI:
if ( i == 0 )
hamsi512_4way_update( &ctx.hamsi, input + (64<<2), 16 );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
hamsi512_4way_init( &ctx.hamsi );
hamsi512_4way_update( &ctx.hamsi, vhash, size );
}
hamsi512_4way_close( &ctx.hamsi, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in0, size );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in1, size );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in2, size );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in3, size );
sph_fugue512_close( &ctx.fugue, hash3 );
break;
case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
if ( i == 0 )
shabal512_4way_update( &ctx.shabal, vhash + (16<<2), 16 );
else
{
shabal512_4way_init( &ctx.shabal );
shabal512_4way_update( &ctx.shabal, vhash, size );
}
shabal512_4way_close( &ctx.shabal, vhash );
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
break;
case WHIRLPOOL:
if ( i == 0 )
{
sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
memcpy( &ctx, &x21s_ctx, sizeof(ctx) );
sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
}
else
{
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in0, size );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in1, size );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in2, size );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in3, size );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
}
break;
case SHA_512:
sha512_4way_init( &ctx.sha512 );
if ( i == 0 )
sha512_4way_update( &ctx.sha512, input, size );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
sha512_4way_update( &ctx.sha512, vhash, size );
}
sha512_4way_close( &ctx.sha512, vhash );
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
break;
}
size = 64;
}
x16r_4way_hash_generic( shash, input );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
@@ -1073,8 +306,6 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t vdata32[20*4] __attribute__ ((aligned (64)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t bedata1[2] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -1090,66 +321,20 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
}
const char elem = hashOrder[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case JH:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
jh512_4way_init( &x21s_ctx.jh );
jh512_4way_update( &x21s_ctx.jh, vdata, 64 );
break;
case SKEIN:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
skein512_4way_init( &x21s_ctx.skein );
skein512_4way_update( &x21s_ctx.skein, vdata, 64 );
break;
case LUFFA:
mm128_bswap32_80( edata, pdata );
init_luffa( &x21s_ctx.luffa1, 512 );
update_luffa( &x21s_ctx.luffa1, (const BitSequence*)edata, 64 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
case CUBEHASH:
mm128_bswap32_80( edata, pdata );
cubehashInit( &x21s_ctx.cube, 512, 16, 32 );
cubehashUpdate( &x21s_ctx.cube, (const byte*)edata, 64 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
case HAMSI:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
hamsi512_4way_init( &x21s_ctx.hamsi );
hamsi512_4way_update( &x21s_ctx.hamsi, vdata, 64 );
break;
case SHABAL:
mm128_bswap32_intrlv80_4x32( vdata32, pdata );
shabal512_4way_init( &x21s_ctx.shabal );
shabal512_4way_update( &x21s_ctx.shabal, vdata32, 64 );
rintrlv_4x32_4x64( vdata, vdata32, 640 );
break;
case WHIRLPOOL:
mm128_bswap32_80( edata, pdata );
sph_whirlpool_init( &x21s_ctx.whirlpool );
sph_whirlpool( &x21s_ctx.whirlpool, edata, 64 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
default:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
}
x16r_4way_prehash( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x21s_4way_hash( hash, vdata );
@@ -1162,7 +347,7 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( ( n < last_nonce ) && !(*restart) );
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;

View File

@@ -5,63 +5,21 @@
* Optimized by JayDDee@github Jan 2018
*/
#include "x16r-gate.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include <openssl/sha.h>
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
#include "algo/haval/sph-haval.h"
#include "algo/tiger/sph_tiger.h"
#include "algo/gost/sph_gost.h"
#include "algo/lyra2/lyra2.h"
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
#if !defined(X16R_8WAY) && !defined(X16R_4WAY)
static __thread uint64_t* x21s_matrix;
union _x21s_context_overlay
{
#if defined(__AES__)
hashState_echo echo;
hashState_groestl groestl;
#else
sph_groestl512_context groestl;
sph_echo512_context echo;
#endif
sph_blake512_context blake;
sph_bmw512_context bmw;
sph_skein512_context skein;
sph_jh512_context jh;
sph_keccak512_context keccak;
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
hashState_sd simd;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
SHA512_CTX sha512;
sph_haval256_5_context haval;
sph_tiger_context tiger;
sph_gost512_context gost;
@@ -73,112 +31,8 @@ void x21s_hash( void* output, const void* input )
{
uint32_t _ALIGN(128) hash[16];
x21s_context_overlay ctx;
void *in = (void*) input;
int size = 80;
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
case BLAKE:
sph_blake512_init( &ctx.blake );
sph_blake512( &ctx.blake, in, size );
sph_blake512_close( &ctx.blake, hash );
break;
case BMW:
sph_bmw512_init( &ctx.bmw );
sph_bmw512(&ctx.bmw, in, size);
sph_bmw512_close(&ctx.bmw, hash);
break;
case GROESTL:
#if defined(__AES__)
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)in, size<<3 );
#else
sph_groestl512_init( &ctx.groestl );
sph_groestl512( &ctx.groestl, in, size );
sph_groestl512_close(&ctx.groestl, hash);
#endif
break;
case SKEIN:
sph_skein512_init( &ctx.skein );
sph_skein512( &ctx.skein, in, size );
sph_skein512_close( &ctx.skein, hash );
break;
case JH:
sph_jh512_init( &ctx.jh );
sph_jh512(&ctx.jh, in, size );
sph_jh512_close(&ctx.jh, hash );
break;
case KECCAK:
sph_keccak512_init( &ctx.keccak );
sph_keccak512( &ctx.keccak, in, size );
sph_keccak512_close( &ctx.keccak, hash );
break;
case LUFFA:
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)in, size );
break;
case CUBEHASH:
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
(const byte*)in, size );
break;
case SHAVITE:
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, in, size );
sph_shavite512_close( &ctx.shavite, hash );
break;
case SIMD:
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
break;
case ECHO:
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash,
(const BitSequence*)in, size<<3 );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in, size );
sph_echo512_close( &ctx.echo, hash );
#endif
break;
case HAMSI:
sph_hamsi512_init( &ctx.hamsi );
sph_hamsi512( &ctx.hamsi, in, size );
sph_hamsi512_close( &ctx.hamsi, hash );
break;
case FUGUE:
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, in, size );
sph_fugue512_close( &ctx.fugue, hash );
break;
case SHABAL:
sph_shabal512_init( &ctx.shabal );
sph_shabal512( &ctx.shabal, in, size );
sph_shabal512_close( &ctx.shabal, hash );
break;
case WHIRLPOOL:
sph_whirlpool_init( &ctx.whirlpool );
sph_whirlpool( &ctx.whirlpool, in, size );
sph_whirlpool_close( &ctx.whirlpool, hash );
break;
case SHA_512:
SHA512_Init( &ctx.sha512 );
SHA512_Update( &ctx.sha512, in, size );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
break;
}
in = (void*) hash;
size = 64;
}
x16r_hash_generic( hash, input );
sph_haval256_5_init( &ctx.haval );
sph_haval256_5( &ctx.haval, (const void*) hash, 64) ;
@@ -206,42 +60,38 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t _ALIGN(128) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x0cff;
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm128_bswap32_80( edata, pdata );
static __thread uint32_t s_ntime = UINT32_MAX;
if ( s_ntime != pdata[17] )
{
uint32_t ntime = swab32(pdata[17]);
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)(&edata[1]), x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
x16r_prehash( edata, pdata );
do
{
be32enc( &endiandata[19], nonce );
x21s_hash( hash32, endiandata );
edata[19] = nonce;
x21s_hash( hash32, edata );
if ( hash32[7] <= Htarg )
if (fulltest( hash32, ptarget ) && !opt_benchmark )
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
{
pdata[19] = nonce;
pdata[19] = bswap_32( nonce );
submit_solution( work, hash32, mythr );
}
nonce++;
@@ -261,3 +111,4 @@ bool x21s_thread_init()
return x21s_matrix;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "sonoa-gate.h"
#if !defined(SONOA_8WAY) && !defined(SONOA_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -616,3 +619,5 @@ int scanhash_sonoa( struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "x17-gate.h"
#if !defined(X17_8WAY) && !defined(X17_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -9,9 +12,6 @@
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/simd/sph_simd.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
@@ -148,30 +148,32 @@ void x17_hash(void *output, const void *input)
int scanhash_x17( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
// we need bigendian data...
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
mm128_bswap32_80( edata, pdata );
do
{
pdata[19] = ++n;
be32enc( &endiandata[19], n );
x17_hash( hash64, endiandata );
if unlikely( valid_hash( hash64, ptarget ) && !opt_benchmark )
edata[19] = n;
x17_hash( hash64, edata );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
#endif

View File

@@ -1,5 +1,7 @@
#include "xevan-gate.h"
#if !defined(XEVAN_8WAY) && !defined(XEVAN_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -268,3 +270,4 @@ int scanhash_xevan( struct work *work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,3 +1,7 @@
#include "x22i-gate.h"
#if !( defined(X22I_8WAY) || defined(X22I_4WAY) )
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#if defined(__AES__)
@@ -24,7 +28,6 @@
#include "algo/lyra2/lyra2.h"
#include "algo/gost/sph_gost.h"
#include "algo/swifftx/swifftx.h"
#include "x22i-gate.h"
union _x22i_context_overlay
{
@@ -200,3 +203,4 @@ int scanhash_x22i( struct work* work, uint32_t max_nonce,
return 0;
}
#endif

View File

@@ -1,4 +1,7 @@
#include "x22i-gate.h"
#if !( defined(X25X_8WAY) || defined(X25X_4WAY) )
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#if defined(__AES__)
@@ -201,7 +204,7 @@ void x25x_hash( void *output, const void *input )
int scanhash_x25x( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -213,17 +216,19 @@ int scanhash_x25x( struct work* work, uint32_t max_nonce,
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
mm128_bswap32_80( edata, pdata );
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);
be32enc(&edata[k], pdata[k]);
InitializeSWIFFTX();
do
{
pdata[19] = ++n;
be32enc( &endiandata[19], n );
be32enc( &edata[19], n );
x25x_hash( hash, endiandata );
x25x_hash( hash, edata );
if ( hash[7] < Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
@@ -234,3 +239,4 @@ int scanhash_x25x( struct work* work, uint32_t max_nonce,
return 0;
}
#endif

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.8.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.9.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.11.8'
PACKAGE_STRING='cpuminer-opt 3.11.8'
PACKAGE_VERSION='3.11.9'
PACKAGE_STRING='cpuminer-opt 3.11.9'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.11.8 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.11.9 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.11.8:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.11.9:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.11.8
cpuminer-opt configure 3.11.9
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.11.8, which was
It was created by cpuminer-opt $as_me 3.11.9, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.11.8'
VERSION='3.11.9'
cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.11.8, which was
This file was extended by cpuminer-opt $as_me 3.11.9, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.11.8
cpuminer-opt config.status 3.11.9
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.11.8])
AC_INIT([cpuminer-opt], [3.11.9])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

Some files were not shown because too many files have changed in this diff Show More