mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.7.4
This commit is contained in:
56
Makefile.am
56
Makefile.am
@@ -22,24 +22,6 @@ cpuminer_SOURCES = \
|
||||
api.c \
|
||||
sysinfos.c \
|
||||
algo-gate-api.c\
|
||||
algo/groestl/sph_groestl.c \
|
||||
algo/bmw/sph_bmw.c \
|
||||
algo/shavite/sph_shavite.c \
|
||||
algo/shavite/shavite.c \
|
||||
algo/echo/sph_echo.c \
|
||||
algo/heavy/sph_hefty1.c \
|
||||
algo/luffa/sph_luffa.c \
|
||||
algo/cubehash/sph_cubehash.c \
|
||||
algo/simd/sph_simd.c \
|
||||
algo/hamsi/sph_hamsi.c \
|
||||
algo/fugue/sph_fugue.c \
|
||||
algo/gost/sph_gost.c \
|
||||
algo/jh/sph_jh.c \
|
||||
algo/sha/sph_sha2.c \
|
||||
algo/sha/sph_sha2big.c \
|
||||
algo/shabal/sph_shabal.c \
|
||||
algo/sm3/sm3.c \
|
||||
algo/whirlpool/sph_whirlpool.c\
|
||||
crypto/blake2s.c \
|
||||
crypto/oaes_lib.c \
|
||||
crypto/c_keccak.c \
|
||||
@@ -67,22 +49,34 @@ cpuminer_SOURCES = \
|
||||
algo/blake/blake2s.c \
|
||||
algo/blake/mod_blakecoin.c \
|
||||
algo/blake/blakecoin.c \
|
||||
algo/blake/decred-gate.c \
|
||||
algo/blake/decred.c \
|
||||
algo/blake/decred-4way.c \
|
||||
algo/blake/pentablake-gate.c \
|
||||
algo/blake/pentablake-4way.c \
|
||||
algo/blake/pentablake.c \
|
||||
algo/bmw/sph_bmw.c \
|
||||
algo/bmw/bmw256.c \
|
||||
algo/cubehash/sse2/cubehash_sse2.c\
|
||||
algo/cryptonight/cryptolight.c \
|
||||
algo/cryptonight/cryptonight-common.c\
|
||||
algo/cryptonight/cryptonight-aesni.c\
|
||||
algo/cryptonight/cryptonight.c\
|
||||
algo/cubehash/sph_cubehash.c \
|
||||
algo/cubehash/sse2/cubehash_sse2.c\
|
||||
algo/drop.c \
|
||||
algo/echo/sph_echo.c \
|
||||
algo/echo/aes_ni/hash.c\
|
||||
algo/fresh.c \
|
||||
algo/gost/sph_gost.c \
|
||||
algo/groestl/sph_groestl.c \
|
||||
algo/groestl/groestl.c \
|
||||
algo/groestl/myr-groestl.c \
|
||||
algo/groestl/aes_ni/hash-groestl.c \
|
||||
algo/groestl/aes_ni/hash-groestl256.c \
|
||||
algo/fugue/sph_fugue.c \
|
||||
algo/hamsi/sph_hamsi.c \
|
||||
algo/haval/haval.c\
|
||||
algo/heavy/sph_hefty1.c \
|
||||
algo/heavy/heavy.c \
|
||||
algo/heavy/bastion.c \
|
||||
algo/hmq1725.c \
|
||||
@@ -91,6 +85,10 @@ cpuminer_SOURCES = \
|
||||
algo/hodl/hodl-wolf.c \
|
||||
algo/hodl/sha512_avx.c \
|
||||
algo/hodl/sha512_avx2.c \
|
||||
algo/jh/sph_jh.c \
|
||||
algo/jh/jh-hash-4way.c \
|
||||
algo/jh/jha-gate.c \
|
||||
algo/jh/jha-4way.c \
|
||||
algo/jh/jha.c \
|
||||
algo/keccak/sph_keccak.c \
|
||||
algo/keccak/keccak.c\
|
||||
@@ -99,6 +97,7 @@ cpuminer_SOURCES = \
|
||||
algo/keccak/keccak-gate.c \
|
||||
algo/keccak/sse2/keccak.c \
|
||||
algo/lbry.c \
|
||||
algo/luffa/sph_luffa.c \
|
||||
algo/luffa/luffa.c \
|
||||
algo/luffa/sse2/luffa_for_sse2.c \
|
||||
algo/lyra2/lyra2.c \
|
||||
@@ -109,7 +108,9 @@ cpuminer_SOURCES = \
|
||||
algo/lyra2/lyra2z330.c \
|
||||
algo/m7m.c \
|
||||
algo/neoscrypt.c \
|
||||
algo/nist5.c \
|
||||
algo/nist5/nist5-gate.c \
|
||||
algo/nist5/nist5-4way.c \
|
||||
algo/nist5/nist5.c \
|
||||
algo/pluck.c \
|
||||
algo/polytimos/polytimos-gate.c \
|
||||
algo/polytimos/polytimos.c \
|
||||
@@ -119,8 +120,14 @@ cpuminer_SOURCES = \
|
||||
algo/ripemd/sph_ripemd.c \
|
||||
algo/scrypt.c \
|
||||
algo/scryptjane/scrypt-jane.c \
|
||||
algo/sha/sph_sha2.c \
|
||||
algo/sha/sph_sha2big.c \
|
||||
algo/sha/sha2.c \
|
||||
algo/sha/sha256t.c \
|
||||
algo/shabal/sph_shabal.c \
|
||||
algo/shavite/sph_shavite.c \
|
||||
algo/shavite/shavite.c \
|
||||
algo/simd/sph_simd.c \
|
||||
algo/simd/sse2/nist.c \
|
||||
algo/simd/sse2/vector.c \
|
||||
algo/skein/sph_skein.c \
|
||||
@@ -132,11 +139,18 @@ cpuminer_SOURCES = \
|
||||
algo/skein/skein2-4way.c \
|
||||
algo/skein/skein2-gate.c \
|
||||
algo/skunk.c \
|
||||
algo/sm3/sm3.c \
|
||||
algo/tiger/sph_tiger.c \
|
||||
algo/timetravel.c \
|
||||
algo/timetravel10.c \
|
||||
algo/tribus.c \
|
||||
algo/tribus/tribus-gate.c \
|
||||
algo/tribus/tribus.c \
|
||||
algo/tribus/tribus-4way.c \
|
||||
algo/veltor.c \
|
||||
algo/whirlpool/sph_whirlpool.c \
|
||||
algo/whirlpool/whirlpool-hash-4way.c \
|
||||
algo/whirlpool/whirlpool-gate.c \
|
||||
algo/whirlpool/whirlpool-4way.c \
|
||||
algo/whirlpool/whirlpool.c \
|
||||
algo/whirlpool/whirlpoolx.c \
|
||||
algo/x11/phi1612.c \
|
||||
|
18
README.txt
18
README.txt
@@ -17,13 +17,17 @@ supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
||||
these CPUs. Some algos may crash the miner with an invalid instruction.
|
||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
||||
|
||||
Exe name Compile opts Arch name
|
||||
|
||||
cpuminer-sse2.exe -march=core2, Core2
|
||||
cpuminer-sse42.exe -march=corei7, Nehalem
|
||||
cpuminer-aes-sse42.exe -maes -msse4.2 Westmere
|
||||
cpuminer-aes-avx.exe -march=corei7-avx, Sandybridge, Ivybridge
|
||||
cpuminer-aes-avx2.exe -march=core-avx2, Haswell, Broadwell, Skylake, Kabylake
|
||||
Exe name Compile opts Arch name
|
||||
|
||||
cpuminer-sse2.exe -march=core2 Core2
|
||||
cpuminer-sse42.exe -march=corei7 Nehalem
|
||||
cpuminer-aes-sse42.exe -maes -msse4.2" Westmere
|
||||
cpuminer-aes-avx.exe -march=corei7-avx" Sandybridge, Ivybridge
|
||||
cpuminer-aes-avx2.exe "-march=core-avx2" Haswell, Broadwell, Skylake, Kabylake
|
||||
cpuminer-4way.exe "-march=core-avx2 -DFOUR_WAY"
|
||||
|
||||
4way requires a CPU with AES and AVX2. It is still under development and
|
||||
only a few algos are supported. See change log in RELEASE_NOTES in source
|
||||
package for supported algos.
|
||||
|
||||
There is no binary support available for SHA on AMD Ryzen CPUs.
|
||||
|
@@ -67,6 +67,11 @@ have been due to AVX and AVX2 optimizations added to that version.
|
||||
Additional improvements are expected on Ryzen with openssl 1.1.
|
||||
"-march-znver1" or "-msha".
|
||||
|
||||
Additional instructions for static compilalation can be found here:
|
||||
https://lxadm.com/Static_compilation_of_cpuminer
|
||||
Static builds should only considered in a homogeneous HW and SW environment.
|
||||
Local builds will always have the best performance and compatibility.
|
||||
|
||||
Extract cpuminer source.
|
||||
|
||||
tar xvzf cpuminer-opt-x.y.z.tar.gz
|
||||
@@ -96,6 +101,11 @@ Start mining.
|
||||
|
||||
./cpuminer -a algo -o url -u username -p password
|
||||
|
||||
Windows
|
||||
|
||||
The following in how the Windows binary releases are built. It's old and
|
||||
not very good but it works, for me anyway.
|
||||
|
||||
Building on Windows prerequisites:
|
||||
|
||||
msys
|
||||
@@ -154,6 +164,12 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.7.4
|
||||
|
||||
Removed unnecessary build options.
|
||||
|
||||
Added 4way support for tribus and nist5.
|
||||
|
||||
v3.7.3
|
||||
|
||||
Added polytimos algo.
|
||||
|
@@ -1,11 +1,11 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "blake-gate.h"
|
||||
#include "sph_blake.h"
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
#if defined (__AVX__)
|
||||
#if defined (BLAKE_4WAY)
|
||||
|
||||
void blakehash_4way(void *state, const void *input)
|
||||
{
|
||||
@@ -41,7 +41,7 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found;
|
||||
int num_found = 0;
|
||||
|
||||
// if (opt_benchmark)
|
||||
// HTarget = 0x7f;
|
||||
@@ -55,7 +55,6 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
num_found = 0;
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +2, n+1 );
|
||||
be32enc( noncep +4, n+2 );
|
||||
|
@@ -8,11 +8,11 @@ int64_t blake_get_max64 ()
|
||||
bool register_blake_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->get_max64 = (void*)&blake_get_max64;
|
||||
#if defined (__AVX2__) && defined (FOUR_WAY)
|
||||
//#if defined (__AVX2__) && defined (FOUR_WAY)
|
||||
// gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
|
||||
// gate->scanhash = (void*)&scanhash_blake_8way;
|
||||
// gate->hash = (void*)&blakehash_8way;
|
||||
#elif defined(__AVX__) && defined (FOUR_WAY)
|
||||
#if defined(BLAKE_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT;
|
||||
gate->scanhash = (void*)&scanhash_blake_4way;
|
||||
gate->hash = (void*)&blakehash_4way;
|
||||
|
@@ -4,13 +4,11 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined (__AVX2__)
|
||||
//void blakehash_84way(void *state, const void *input);
|
||||
//int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
// uint64_t *hashes_done );
|
||||
#if defined(FOUR_WAY) && defined(__AVX__)
|
||||
#define BLAKE_4WAY
|
||||
#endif
|
||||
|
||||
#if defined (__AVX__)
|
||||
#if defined (BLAKE_4WAY)
|
||||
void blakehash_4way(void *state, const void *input);
|
||||
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
@@ -346,7 +346,7 @@ static const sph_u32 CS[16] = {
|
||||
#define CBF SPH_C64(0x636920D871574E69)
|
||||
|
||||
#if SPH_COMPACT_BLAKE_64
|
||||
|
||||
// not used
|
||||
static const sph_u64 CB[16] = {
|
||||
SPH_C64(0x243F6A8885A308D3), SPH_C64(0x13198A2E03707344),
|
||||
SPH_C64(0xA4093822299F31D0), SPH_C64(0x082EFA98EC4E6C89),
|
||||
@@ -428,7 +428,7 @@ do { \
|
||||
} while (0)
|
||||
|
||||
#if SPH_COMPACT_BLAKE_64
|
||||
|
||||
// not used
|
||||
#define ROUND_B_4WAY(r) do { \
|
||||
GB_4WAY(M[sigma[r][0x0]], M[sigma[r][0x1]], \
|
||||
CB[sigma[r][0x0]], CB[sigma[r][0x1]], V0, V4, V8, VC); \
|
||||
@@ -449,7 +449,7 @@ do { \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
//current_impl
|
||||
#define ROUND_B_4WAY(r) do { \
|
||||
GB_4WAY(Mx(r, 0), Mx(r, 1), CBx(r, 0), CBx(r, 1), V0, V4, V8, VC); \
|
||||
GB_4WAY(Mx(r, 2), Mx(r, 3), CBx(r, 2), CBx(r, 3), V1, V5, V9, VD); \
|
||||
@@ -696,6 +696,7 @@ do { \
|
||||
|
||||
#if SPH_COMPACT_BLAKE_64
|
||||
|
||||
// not used
|
||||
#define COMPRESS64_4WAY do { \
|
||||
__m256i M[16]; \
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
@@ -759,6 +760,8 @@ do { \
|
||||
|
||||
#else
|
||||
|
||||
//current impl
|
||||
|
||||
#define COMPRESS64_4WAY do { \
|
||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
__m256i M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
@@ -986,7 +989,7 @@ blake64_4way( blake_4way_big_context *sc, const void *data, size_t len)
|
||||
size_t ptr;
|
||||
DECL_STATE64_4WAY
|
||||
|
||||
const int buf_size = 64; // sizeof/8
|
||||
const int buf_size = 128; // sizeof/8
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
@@ -1037,7 +1040,7 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
__m256i *out;
|
||||
|
||||
ptr = sc->ptr;
|
||||
bit_len = ((unsigned)ptr << 3) + n;
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
z = 0x80 >> n;
|
||||
zz = ((ub & -z) | z) & 0xFF;
|
||||
u.buf[ptr>>3] = _mm256_set_epi64x( zz, zz, zz, zz );
|
||||
@@ -1057,9 +1060,9 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
{
|
||||
sc->T0 -= 1024 - bit_len;
|
||||
}
|
||||
if ( ptr <= (96 >> 3) )
|
||||
if ( ptr <= 104 )
|
||||
{
|
||||
memset_zero_m256i( u.buf + (ptr>>3) + 1, (96-ptr) >> 3 );
|
||||
memset_zero_m256i( u.buf + (ptr>>3) + 1, (104-ptr) >> 3 );
|
||||
if ( out_size_w64 == 8 )
|
||||
u.buf[(104>>3)] = _mm256_or_si256( u.buf[(104>>3)],
|
||||
_mm256_set_epi64x( 0x0100000000000000,
|
||||
@@ -1070,11 +1073,13 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
_mm256_set_epi64x( th, th, th, th ) );
|
||||
*(u.buf+(120>>3)) = mm256_byteswap_epi64(
|
||||
_mm256_set_epi64x( tl, tl, tl, tl ) );
|
||||
|
||||
blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_m256i( u.buf + (ptr>>3) + 1, (127 - ptr) >> 3 );
|
||||
memset_zero_m256i( u.buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
|
||||
|
||||
blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
|
||||
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
|
||||
@@ -1089,6 +1094,7 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
_mm256_set_epi64x( th, th, th, th ) );
|
||||
*(u.buf+(120>>3)) = mm256_byteswap_epi64(
|
||||
_mm256_set_epi64x( tl, tl, tl, tl ) );
|
||||
|
||||
blake64_4way( sc, u.buf, 128 );
|
||||
}
|
||||
out = (__m256i*)dst;
|
||||
|
@@ -62,9 +62,9 @@ extern "C"{
|
||||
#ifdef __AVX__
|
||||
typedef struct {
|
||||
__m128i buf[16] __attribute__ ((aligned (64)));
|
||||
size_t ptr;
|
||||
__m128i H[8];
|
||||
__m128i S[4];
|
||||
size_t ptr;
|
||||
sph_u32 T0, T1;
|
||||
} blake_4way_small_context;
|
||||
|
||||
@@ -82,13 +82,13 @@ void blake256_4way_addbits_and_close(
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16] __attribute__ ((aligned (64)));
|
||||
size_t ptr;
|
||||
__m256i H[8];
|
||||
__m256i S[4];
|
||||
size_t ptr;
|
||||
sph_u64 T0, T1;
|
||||
} blake_4way_big_context;
|
||||
|
||||
typedef blake_4way_big_context blake512_avx2_context;
|
||||
typedef blake_4way_big_context blake512_4way_context;
|
||||
|
||||
void blake512_4way_init(void *cc);
|
||||
void blake512_4way(void *cc, const void *data, size_t len);
|
||||
|
153
algo/blake/decred-4way.c
Normal file
153
algo/blake/decred-4way.c
Normal file
@@ -0,0 +1,153 @@
|
||||
#include "decred-gate.h"
|
||||
#include "sph_blake.h"
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#if defined (DECRED_4WAY)
|
||||
|
||||
static __thread blake256_4way_context blake_mid;
|
||||
static __thread bool ctx_midstate_done = false;
|
||||
|
||||
void decred_hash_4way( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[16] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
|
||||
blake256_4way_context ctx __attribute__ ((aligned (64)));
|
||||
|
||||
sph_blake256_context ctx2 __attribute__ ((aligned (64)));
|
||||
uint32_t hash[16] __attribute__ ((aligned (64)));
|
||||
uint32_t sin0[45], sin1[45], sin2[45], sin3[45];
|
||||
m128_deinterleave_4x32( sin0, sin1, sin2, sin3, (uint32_t*)input, 180*8 );
|
||||
|
||||
void *tail = input + DECRED_MIDSTATE_LEN;
|
||||
int tail_len = 180 - DECRED_MIDSTATE_LEN;
|
||||
// #define MIDSTATE_LEN 128
|
||||
/*
|
||||
uint8_t *ending = (uint8_t*) input;
|
||||
ending += MIDSTATE_LEN;
|
||||
|
||||
if ( !ctx_midstate_done )
|
||||
{
|
||||
blake256_4way_init( &blake_mid );
|
||||
blake256_4way( &blake_mid, input, DECRED_MIDSTATE_LEN );
|
||||
ctx_midstate_done = true;
|
||||
}
|
||||
memcpy( &ctx, &blake_mid, sizeof(blake_mid) );
|
||||
|
||||
blake256_4way( &ctx, tail, tail_len );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
*/
|
||||
|
||||
|
||||
sph_blake256_init( &ctx2 );
|
||||
sph_blake256( &ctx2, sin0, 180 );
|
||||
sph_blake256_close( &ctx2, hash );
|
||||
|
||||
blake256_4way_init( &ctx );
|
||||
blake256_4way( &ctx, input, 180 );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
|
||||
m128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
/*
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( hash[i] != hash0[i] )
|
||||
printf(" hash mismatch, i = %u\n",i);
|
||||
|
||||
printf("hash: %08lx %08lx %08lx %08lx\n", *hash, *(hash+1),
|
||||
*(hash+2), *(hash+3) );
|
||||
printf("hash0: %08lx %08lx %08lx %08lx\n", *hash0, *(hash0+1),
|
||||
*(hash0+2), *(hash0+3) );
|
||||
printf("\n");
|
||||
*/
|
||||
|
||||
// memcpy( state, hash0, 32 );
|
||||
// memcpy( state+32, hash1, 32 );
|
||||
// memcpy( state+64, hash1, 32 );
|
||||
// memcpy( state+96, hash1, 32 );
|
||||
|
||||
memcpy( state, hash, 32 );
|
||||
|
||||
}
|
||||
|
||||
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t vdata[45*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) endiandata[48];
|
||||
// uint32_t _ALIGN(64) hash32[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
|
||||
// #define DCR_NONCE_OFT32 35
|
||||
|
||||
ctx_midstate_done = false;
|
||||
|
||||
// memcpy(endiandata, pdata, 180);
|
||||
|
||||
m128_interleave_4x32( vdata, pdata, pdata, pdata, pdata, 180*8 );
|
||||
|
||||
uint32_t *noncep = vdata + DECRED_NONCE_INDEX * 4;
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
* noncep = n;
|
||||
*(noncep+2) = n+1;
|
||||
*(noncep+4) = n+2;
|
||||
*(noncep+6) = n+3;
|
||||
|
||||
decred_hash_4way( hash, vdata );
|
||||
|
||||
// endiandata[DCR_NONCE_OFT32] = n;
|
||||
// decred_hash(hash32, endiandata);
|
||||
|
||||
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
pdata[DECRED_NONCE_INDEX] = n;
|
||||
}
|
||||
/* if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n;
|
||||
}
|
||||
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n;
|
||||
}
|
||||
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n;
|
||||
}
|
||||
*/
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
176
algo/blake/decred-gate.c
Normal file
176
algo/blake/decred-gate.c
Normal file
@@ -0,0 +1,176 @@
|
||||
#include "decred-gate.h"
|
||||
#include <unistd.h>
|
||||
#include <memory.h>
|
||||
#include <string.h>
|
||||
|
||||
uint32_t *decred_get_nonceptr( uint32_t *work_data )
|
||||
{
|
||||
return &work_data[ DECRED_NONCE_INDEX ];
|
||||
}
|
||||
|
||||
double decred_calc_network_diff( struct work* work )
|
||||
{
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
uint32_t nbits = work->data[ DECRED_NBITS_INDEX ];
|
||||
uint32_t bits = ( nbits & 0xffffff );
|
||||
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
|
||||
int m;
|
||||
double d = (double)0x0000ffff / (double)bits;
|
||||
|
||||
for ( m = shift; m < 29; m++ )
|
||||
d *= 256.0;
|
||||
for ( m = 29; m < shift; m++ )
|
||||
d /= 256.0;
|
||||
if ( shift == 28 )
|
||||
d *= 256.0; // testnet
|
||||
if ( opt_debug_diff )
|
||||
applog( LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d,
|
||||
shift, bits );
|
||||
return net_diff;
|
||||
}
|
||||
|
||||
void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
|
||||
{
|
||||
// some random extradata to make the work unique
|
||||
work->data[ DECRED_XNONCE_INDEX ] = (rand()*4);
|
||||
work->height = work->data[32];
|
||||
if (!have_longpoll && work->height > *net_blocks + 1)
|
||||
{
|
||||
char netinfo[64] = { 0 };
|
||||
if (opt_showdiff && net_diff > 0.)
|
||||
{
|
||||
if (net_diff != work->targetdiff)
|
||||
sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
|
||||
work->targetdiff);
|
||||
else
|
||||
sprintf(netinfo, ", diff %.3f", net_diff);
|
||||
}
|
||||
applog(LOG_BLUE, "%s block %d%s", algo_names[opt_algo], work->height,
|
||||
netinfo);
|
||||
*net_blocks = work->height - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void decred_be_build_stratum_request( char *req, struct work *work,
|
||||
struct stratum_ctx *sctx )
|
||||
{
|
||||
unsigned char *xnonce2str;
|
||||
uint32_t ntime, nonce;
|
||||
char ntimestr[9], noncestr[9];
|
||||
|
||||
be32enc( &ntime, work->data[ DECRED_NTIME_INDEX ] );
|
||||
be32enc( &nonce, work->data[ DECRED_NONCE_INDEX ] );
|
||||
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
|
||||
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
|
||||
xnonce2str = abin2hex( (char*)( &work->data[ DECRED_XNONCE_INDEX ] ),
|
||||
sctx->xnonce1_size );
|
||||
snprintf( req, JSON_BUF_LEN,
|
||||
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
#define min(a,b) (a>b ? (b) :(a))
|
||||
|
||||
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
uchar merkle_root[64] = { 0 };
|
||||
uint32_t extraheader[32] = { 0 };
|
||||
int headersize = 0;
|
||||
uint32_t* extradata = (uint32_t*) sctx->xnonce1;
|
||||
size_t t;
|
||||
int i;
|
||||
|
||||
// getwork over stratum, getwork merkle + header passed in coinb1
|
||||
memcpy(merkle_root, sctx->job.coinbase, 32);
|
||||
headersize = min((int)sctx->job.coinbase_size - 32,
|
||||
sizeof(extraheader) );
|
||||
memcpy( extraheader, &sctx->job.coinbase[32], headersize );
|
||||
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
|
||||
// Assemble block header
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = swab32(
|
||||
le32dec( (uint32_t *) sctx->job.prevhash + i ) );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = swab32( be32dec( (uint32_t *) merkle_root + i ) );
|
||||
|
||||
// for ( i = 0; i < 8; i++ ) // prevhash
|
||||
// g_work->data[1 + i] = swab32( g_work->data[1 + i] );
|
||||
// for ( i = 0; i < 8; i++ ) // merkle
|
||||
// g_work->data[9 + i] = swab32( g_work->data[9 + i] );
|
||||
|
||||
for ( i = 0; i < headersize/4; i++ ) // header
|
||||
g_work->data[17 + i] = extraheader[i];
|
||||
// extradata
|
||||
|
||||
for ( i = 0; i < sctx->xnonce1_size/4; i++ )
|
||||
g_work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i];
|
||||
for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ )
|
||||
g_work->data[i] = 0;
|
||||
g_work->data[37] = (rand()*4) << 8;
|
||||
// block header suffix from coinb2 (stake version)
|
||||
memcpy( &g_work->data[44],
|
||||
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
|
||||
sctx->bloc_height = g_work->data[32];
|
||||
//applog_hex(work->data, 180);
|
||||
//applog_hex(&work->data[36], 36);
|
||||
}
|
||||
|
||||
#undef min
|
||||
|
||||
bool decred_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id )
|
||||
{
|
||||
if ( have_stratum && strcmp(stratum->job.job_id, work->job_id) )
|
||||
// need to regen g_work..
|
||||
return false;
|
||||
if ( have_stratum && !work->data[0] && !opt_benchmark )
|
||||
{
|
||||
sleep(1);
|
||||
return false;
|
||||
}
|
||||
// extradata: prevent duplicates
|
||||
work->data[ DECRED_XNONCE_INDEX ] += 1;
|
||||
work->data[ DECRED_XNONCE_INDEX + 1 ] |= thr_id;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool register_decred_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(DECRED_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT;
|
||||
gate->scanhash = (void*)&scanhash_decred_4way;
|
||||
gate->hash = (void*)&decred_hash_4way;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_decred;
|
||||
gate->hash = (void*)&decred_hash;
|
||||
#endif
|
||||
|
||||
// gate->optimizations = SSE2_OPT;
|
||||
// gate->scanhash = (void*)&scanhash_decred;
|
||||
// gate->hash = (void*)&decred_hash;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
|
||||
gate->display_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
gate->build_extraheader = (void*)&decred_build_extraheader;
|
||||
gate->ready_to_mine = (void*)&decred_ready_to_mine;
|
||||
gate->nbits_index = DECRED_NBITS_INDEX;
|
||||
gate->ntime_index = DECRED_NTIME_INDEX;
|
||||
gate->nonce_index = DECRED_NONCE_INDEX;
|
||||
gate->work_data_size = DECRED_DATA_SIZE;
|
||||
gate->work_cmp_size = DECRED_WORK_COMPARE_SIZE;
|
||||
allow_mininginfo = false;
|
||||
have_gbt = false;
|
||||
return true;
|
||||
}
|
||||
|
36
algo/blake/decred-gate.h
Normal file
36
algo/blake/decred-gate.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef __DECRED_GATE_H__
|
||||
#define __DECRED_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#define DECRED_NBITS_INDEX 29
|
||||
#define DECRED_NTIME_INDEX 34
|
||||
#define DECRED_NONCE_INDEX 35
|
||||
#define DECRED_XNONCE_INDEX 36
|
||||
#define DECRED_DATA_SIZE 192
|
||||
#define DECRED_WORK_COMPARE_SIZE 140
|
||||
#define DECRED_MIDSTATE_LEN 128
|
||||
|
||||
#if defined (__AVX2__)
|
||||
//void blakehash_84way(void *state, const void *input);
|
||||
//int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
// uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX__)
|
||||
#define DECRED_4WAY
|
||||
#endif
|
||||
|
||||
#if defined (DECRED_4WAY)
|
||||
void decred_hash_4way(void *state, const void *input);
|
||||
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
void decred_hash( void *state, const void *input );
|
||||
int scanhash_decred( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
||||
|
@@ -1,4 +1,4 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "decred-gate.h"
|
||||
#include "sph_blake.h"
|
||||
|
||||
#include <string.h>
|
||||
@@ -14,33 +14,33 @@
|
||||
#define max(a,b) (a<b ? b : a)
|
||||
#endif
|
||||
*/
|
||||
|
||||
/*
|
||||
#define DECRED_NBITS_INDEX 29
|
||||
#define DECRED_NTIME_INDEX 34
|
||||
#define DECRED_NONCE_INDEX 35
|
||||
#define DECRED_XNONCE_INDEX 36
|
||||
#define DECRED_DATA_SIZE 192
|
||||
#define DECRED_WORK_COMPARE_SIZE 140
|
||||
|
||||
*/
|
||||
static __thread sph_blake256_context blake_mid;
|
||||
static __thread bool ctx_midstate_done = false;
|
||||
|
||||
void decred_hash(void *state, const void *input)
|
||||
{
|
||||
#define MIDSTATE_LEN 128
|
||||
// #define MIDSTATE_LEN 128
|
||||
sph_blake256_context ctx __attribute__ ((aligned (64)));
|
||||
|
||||
uint8_t *ending = (uint8_t*) input;
|
||||
ending += MIDSTATE_LEN;
|
||||
ending += DECRED_MIDSTATE_LEN;
|
||||
|
||||
if (!ctx_midstate_done) {
|
||||
sph_blake256_init(&blake_mid);
|
||||
sph_blake256(&blake_mid, input, MIDSTATE_LEN);
|
||||
sph_blake256(&blake_mid, input, DECRED_MIDSTATE_LEN);
|
||||
ctx_midstate_done = true;
|
||||
}
|
||||
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
|
||||
|
||||
sph_blake256(&ctx, ending, (180 - MIDSTATE_LEN));
|
||||
sph_blake256(&ctx, ending, (180 - DECRED_MIDSTATE_LEN));
|
||||
sph_blake256_close(&ctx, state);
|
||||
}
|
||||
|
||||
@@ -59,9 +59,9 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
#define DCR_NONCE_OFT32 35
|
||||
// #define DCR_NONCE_OFT32 35
|
||||
|
||||
const uint32_t first_nonce = pdata[DCR_NONCE_OFT32];
|
||||
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
@@ -81,7 +81,7 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
|
||||
|
||||
do {
|
||||
//be32enc(&endiandata[DCR_NONCE_OFT32], n);
|
||||
endiandata[DCR_NONCE_OFT32] = n;
|
||||
endiandata[DECRED_NONCE_INDEX] = n;
|
||||
decred_hash(hash32, endiandata);
|
||||
|
||||
if (hash32[7] <= HTarget && fulltest(hash32, ptarget)) {
|
||||
@@ -92,7 +92,7 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
|
||||
applog_hash(ptarget);
|
||||
applog_compare_hash(hash32, ptarget);
|
||||
#endif
|
||||
pdata[DCR_NONCE_OFT32] = n;
|
||||
pdata[DECRED_NONCE_INDEX] = n;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -101,10 +101,11 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[DCR_NONCE_OFT32] = n;
|
||||
pdata[DECRED_NONCE_INDEX] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
uint32_t *decred_get_nonceptr( uint32_t *work_data )
|
||||
{
|
||||
return &work_data[ DECRED_NONCE_INDEX ];
|
||||
@@ -172,7 +173,7 @@ void decred_be_build_stratum_request( char *req, struct work *work,
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
|
||||
*/
|
||||
/*
|
||||
// data shared between gen_merkle_root and build_extraheader.
|
||||
__thread uint32_t decred_extraheader[32] = { 0 };
|
||||
@@ -188,7 +189,7 @@ void decred_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
#define min(a,b) (a>b ? (b) :(a))
|
||||
|
||||
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
@@ -282,4 +283,4 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
have_gbt = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
*/
|
||||
|
206
algo/blake/pentablake-4way.c
Normal file
206
algo/blake/pentablake-4way.c
Normal file
@@ -0,0 +1,206 @@
|
||||
#include "pentablake-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "blake-hash-4way.h"
|
||||
#include "sph_blake.h"
|
||||
|
||||
//#define DEBUG_ALGO
|
||||
|
||||
#ifdef PENTABLAKE_4WAY
|
||||
|
||||
extern void pentablakehash_4way( void *output, const void *input )
|
||||
{
|
||||
unsigned char _ALIGN(32) hash[128];
|
||||
// // same as uint32_t hashA[16], hashB[16];
|
||||
// #define hashB hash+64
|
||||
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake512_4way_context ctx;
|
||||
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, input, 80 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
uint64_t sin0[10], sin1[10], sin2[10], sin3[10];
|
||||
m256_deinterleave_4x64( sin0, sin1, sin2, sin3, input, 640 );
|
||||
sph_blake512_context ctx2_blake;
|
||||
sph_blake512_init(&ctx2_blake);
|
||||
sph_blake512(&ctx2_blake, sin0, 80);
|
||||
sph_blake512_close(&ctx2_blake, (void*) hash);
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
uint64_t* hash64 = (uint64_t*)hash;
|
||||
for( int i = 0; i < 8; i++ )
|
||||
{
|
||||
if ( hash0[i] != hash64[i] )
|
||||
printf("hash mismatch %u\n",i);
|
||||
}
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
|
||||
/*
|
||||
uint64_t sin0[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin1[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin2[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin3[10] __attribute__ ((aligned (64)));
|
||||
|
||||
sph_blake512_context ctx_blake;
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, input, 80);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
*/
|
||||
}
|
||||
|
||||
int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[32] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
// uint32_t _ALIGN(32) hash64[8];
|
||||
// uint32_t _ALIGN(32) endiandata[32];
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
pentablakehash_4way( hash, vdata );
|
||||
|
||||
// return immediately on nonce found, only one submit
|
||||
if ( ( !(hash[7] & mask) ) && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
if ( (! ((hash+8)[7] & mask) ) && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
if ( ( !((hash+16)[7] & mask) ) && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
if ( ( !((hash+24)[7] & mask) ) && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
16
algo/blake/pentablake-gate.c
Normal file
16
algo/blake/pentablake-gate.c
Normal file
@@ -0,0 +1,16 @@
|
||||
#include "pentablake-gate.h"
|
||||
|
||||
bool register_pentablake_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (PENTABLAKE_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_pentablake_4way;
|
||||
gate->hash = (void*)&pentablakehash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_pentablake;
|
||||
gate->hash = (void*)&pentablakehash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
21
algo/blake/pentablake-gate.h
Normal file
21
algo/blake/pentablake-gate.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef __PENTABLAKE_GATE_H__
|
||||
#define __PENTABLAKE_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX__)
|
||||
#define PENTABLAKE_4WAY
|
||||
#endif
|
||||
|
||||
#if defined(PENTABLAKE_4WAY)
|
||||
void pentablakehash_4way( void *state, const void *input );
|
||||
int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
void pentablakehash( void *state, const void *input );
|
||||
int scanhash_pentablake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
@@ -1,4 +1,4 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "pentablake-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -110,11 +110,3 @@ int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_pentablake_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_pentablake;
|
||||
gate->hash = (void*)&pentablakehash;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -983,9 +983,11 @@ blake64_close(sph_blake_big_context *sc,
|
||||
u.buf[111] |= 1;
|
||||
sph_enc64be_aligned(u.buf + 112, th);
|
||||
sph_enc64be_aligned(u.buf + 120, tl);
|
||||
|
||||
blake64(sc, u.buf + ptr, 128 - ptr);
|
||||
} else {
|
||||
memset(u.buf + ptr + 1, 0, 127 - ptr);
|
||||
|
||||
blake64(sc, u.buf + ptr, 128 - ptr);
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
|
||||
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
|
||||
@@ -994,6 +996,7 @@ blake64_close(sph_blake_big_context *sc,
|
||||
u.buf[111] = 1;
|
||||
sph_enc64be_aligned(u.buf + 112, th);
|
||||
sph_enc64be_aligned(u.buf + 120, tl);
|
||||
|
||||
blake64(sc, u.buf, 128);
|
||||
}
|
||||
out = dst;
|
||||
|
639
algo/jh/jh-hash-4way.c
Normal file
639
algo/jh/jh-hash-4way.c
Normal file
@@ -0,0 +1,639 @@
|
||||
/* $Id: jh.c 255 2011-06-07 19:50:20Z tp $ */
|
||||
/*
|
||||
* JH implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "jh-hash-4way.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_JH
|
||||
#define SPH_SMALL_FOOTPRINT_JH 1
|
||||
#endif
|
||||
|
||||
#if !defined SPH_JH_64 && SPH_64_TRUE
|
||||
#define SPH_JH_64 1
|
||||
#endif
|
||||
|
||||
#if !SPH_64
|
||||
#undef SPH_JH_64
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The internal bitslice representation may use either big-endian or
|
||||
* little-endian (true bitslice operations do not care about the bit
|
||||
* ordering, and the bit-swapping linear operations in JH happen to
|
||||
* be invariant through endianness-swapping). The constants must be
|
||||
* defined according to the chosen endianness; we use some
|
||||
* byte-swapping macros for that.
|
||||
*/
|
||||
|
||||
#if SPH_LITTLE_ENDIAN
|
||||
|
||||
#if SPH_64
|
||||
#define C64e(x) ((SPH_C64(x) >> 56) \
|
||||
| ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
|
||||
| ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
|
||||
| ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \
|
||||
| ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \
|
||||
| ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
|
||||
| ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
|
||||
| ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
|
||||
#define dec64e_aligned sph_dec64le_aligned
|
||||
#define enc64e sph_enc64le
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if SPH_64
|
||||
#define C64e(x) SPH_C64(x)
|
||||
#define dec64e_aligned sph_dec64be_aligned
|
||||
#define enc64e sph_enc64be
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define Sb(x0, x1, x2, x3, c) \
|
||||
do { \
|
||||
__m256i cc = _mm256_set_epi64x( c, c, c, c ); \
|
||||
x3 = mm256_bitnot( x3 ); \
|
||||
x0 = _mm256_xor_si256( x0, _mm256_and_si256( cc, mm256_bitnot( x2 ) ) ); \
|
||||
tmp = _mm256_xor_si256( cc, _mm256_and_si256( x0, x1 ) ); \
|
||||
x0 = _mm256_xor_si256( x0, _mm256_and_si256( x2, x3 ) ); \
|
||||
x3 = _mm256_xor_si256( x3, _mm256_and_si256( mm256_bitnot( x1 ), x2 ) ); \
|
||||
x1 = _mm256_xor_si256( x1, _mm256_and_si256( x0, x2 ) ); \
|
||||
x2 = _mm256_xor_si256( x2, _mm256_and_si256( x0, mm256_bitnot( x3 ) ) ); \
|
||||
x0 = _mm256_xor_si256( x0, _mm256_or_si256( x1, x3 ) ); \
|
||||
x3 = _mm256_xor_si256( x3, _mm256_and_si256( x1, x2 ) ); \
|
||||
x1 = _mm256_xor_si256( x1, _mm256_and_si256( tmp, x0 ) ); \
|
||||
x2 = _mm256_xor_si256( x2, tmp ); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
#define Sb(x0, x1, x2, x3, c) do { \
|
||||
x3 = ~x3; \
|
||||
x0 ^= (c) & ~x2; \
|
||||
tmp = (c) ^ (x0 & x1); \
|
||||
x0 ^= x2 & x3; \
|
||||
x3 ^= ~x1 & x2; \
|
||||
x1 ^= x0 & x2; \
|
||||
x2 ^= x0 & ~x3; \
|
||||
x0 ^= x1 | x3; \
|
||||
x3 ^= x1 & x2; \
|
||||
x1 ^= tmp & x0; \
|
||||
x2 ^= tmp; \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
#define Lb(x0, x1, x2, x3, x4, x5, x6, x7) \
|
||||
do { \
|
||||
x4 = _mm256_xor_si256( x4, x1 ); \
|
||||
x5 = _mm256_xor_si256( x5, x2 ); \
|
||||
x6 = _mm256_xor_si256( x6, _mm256_xor_si256( x3, x0 ) ); \
|
||||
x7 = _mm256_xor_si256( x7, x0 ); \
|
||||
x0 = _mm256_xor_si256( x0, x5 ); \
|
||||
x1 = _mm256_xor_si256( x1, x6 ); \
|
||||
x2 = _mm256_xor_si256( x2, _mm256_xor_si256( x7, x4 ) ); \
|
||||
x3 = _mm256_xor_si256( x3, x4 ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/*
|
||||
#define Lb(x0, x1, x2, x3, x4, x5, x6, x7) do { \
|
||||
x4 ^= x1; \
|
||||
x5 ^= x2; \
|
||||
x6 ^= x3 ^ x0; \
|
||||
x7 ^= x0; \
|
||||
x0 ^= x5; \
|
||||
x1 ^= x6; \
|
||||
x2 ^= x7 ^ x4; \
|
||||
x3 ^= x4; \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
#if SPH_JH_64
|
||||
|
||||
static const sph_u64 C[] = {
|
||||
C64e(0x72d5dea2df15f867), C64e(0x7b84150ab7231557),
|
||||
C64e(0x81abd6904d5a87f6), C64e(0x4e9f4fc5c3d12b40),
|
||||
C64e(0xea983ae05c45fa9c), C64e(0x03c5d29966b2999a),
|
||||
C64e(0x660296b4f2bb538a), C64e(0xb556141a88dba231),
|
||||
C64e(0x03a35a5c9a190edb), C64e(0x403fb20a87c14410),
|
||||
C64e(0x1c051980849e951d), C64e(0x6f33ebad5ee7cddc),
|
||||
C64e(0x10ba139202bf6b41), C64e(0xdc786515f7bb27d0),
|
||||
C64e(0x0a2c813937aa7850), C64e(0x3f1abfd2410091d3),
|
||||
C64e(0x422d5a0df6cc7e90), C64e(0xdd629f9c92c097ce),
|
||||
C64e(0x185ca70bc72b44ac), C64e(0xd1df65d663c6fc23),
|
||||
C64e(0x976e6c039ee0b81a), C64e(0x2105457e446ceca8),
|
||||
C64e(0xeef103bb5d8e61fa), C64e(0xfd9697b294838197),
|
||||
C64e(0x4a8e8537db03302f), C64e(0x2a678d2dfb9f6a95),
|
||||
C64e(0x8afe7381f8b8696c), C64e(0x8ac77246c07f4214),
|
||||
C64e(0xc5f4158fbdc75ec4), C64e(0x75446fa78f11bb80),
|
||||
C64e(0x52de75b7aee488bc), C64e(0x82b8001e98a6a3f4),
|
||||
C64e(0x8ef48f33a9a36315), C64e(0xaa5f5624d5b7f989),
|
||||
C64e(0xb6f1ed207c5ae0fd), C64e(0x36cae95a06422c36),
|
||||
C64e(0xce2935434efe983d), C64e(0x533af974739a4ba7),
|
||||
C64e(0xd0f51f596f4e8186), C64e(0x0e9dad81afd85a9f),
|
||||
C64e(0xa7050667ee34626a), C64e(0x8b0b28be6eb91727),
|
||||
C64e(0x47740726c680103f), C64e(0xe0a07e6fc67e487b),
|
||||
C64e(0x0d550aa54af8a4c0), C64e(0x91e3e79f978ef19e),
|
||||
C64e(0x8676728150608dd4), C64e(0x7e9e5a41f3e5b062),
|
||||
C64e(0xfc9f1fec4054207a), C64e(0xe3e41a00cef4c984),
|
||||
C64e(0x4fd794f59dfa95d8), C64e(0x552e7e1124c354a5),
|
||||
C64e(0x5bdf7228bdfe6e28), C64e(0x78f57fe20fa5c4b2),
|
||||
C64e(0x05897cefee49d32e), C64e(0x447e9385eb28597f),
|
||||
C64e(0x705f6937b324314a), C64e(0x5e8628f11dd6e465),
|
||||
C64e(0xc71b770451b920e7), C64e(0x74fe43e823d4878a),
|
||||
C64e(0x7d29e8a3927694f2), C64e(0xddcb7a099b30d9c1),
|
||||
C64e(0x1d1b30fb5bdc1be0), C64e(0xda24494ff29c82bf),
|
||||
C64e(0xa4e7ba31b470bfff), C64e(0x0d324405def8bc48),
|
||||
C64e(0x3baefc3253bbd339), C64e(0x459fc3c1e0298ba0),
|
||||
C64e(0xe5c905fdf7ae090f), C64e(0x947034124290f134),
|
||||
C64e(0xa271b701e344ed95), C64e(0xe93b8e364f2f984a),
|
||||
C64e(0x88401d63a06cf615), C64e(0x47c1444b8752afff),
|
||||
C64e(0x7ebb4af1e20ac630), C64e(0x4670b6c5cc6e8ce6),
|
||||
C64e(0xa4d5a456bd4fca00), C64e(0xda9d844bc83e18ae),
|
||||
C64e(0x7357ce453064d1ad), C64e(0xe8a6ce68145c2567),
|
||||
C64e(0xa3da8cf2cb0ee116), C64e(0x33e906589a94999a),
|
||||
C64e(0x1f60b220c26f847b), C64e(0xd1ceac7fa0d18518),
|
||||
C64e(0x32595ba18ddd19d3), C64e(0x509a1cc0aaa5b446),
|
||||
C64e(0x9f3d6367e4046bba), C64e(0xf6ca19ab0b56ee7e),
|
||||
C64e(0x1fb179eaa9282174), C64e(0xe9bdf7353b3651ee),
|
||||
C64e(0x1d57ac5a7550d376), C64e(0x3a46c2fea37d7001),
|
||||
C64e(0xf735c1af98a4d842), C64e(0x78edec209e6b6779),
|
||||
C64e(0x41836315ea3adba8), C64e(0xfac33b4d32832c83),
|
||||
C64e(0xa7403b1f1c2747f3), C64e(0x5940f034b72d769a),
|
||||
C64e(0xe73e4e6cd2214ffd), C64e(0xb8fd8d39dc5759ef),
|
||||
C64e(0x8d9b0c492b49ebda), C64e(0x5ba2d74968f3700d),
|
||||
C64e(0x7d3baed07a8d5584), C64e(0xf5a5e9f0e4f88e65),
|
||||
C64e(0xa0b8a2f436103b53), C64e(0x0ca8079e753eec5a),
|
||||
C64e(0x9168949256e8884f), C64e(0x5bb05c55f8babc4c),
|
||||
C64e(0xe3bb3b99f387947b), C64e(0x75daf4d6726b1c5d),
|
||||
C64e(0x64aeac28dc34b36d), C64e(0x6c34a550b828db71),
|
||||
C64e(0xf861e2f2108d512a), C64e(0xe3db643359dd75fc),
|
||||
C64e(0x1cacbcf143ce3fa2), C64e(0x67bbd13c02e843b0),
|
||||
C64e(0x330a5bca8829a175), C64e(0x7f34194db416535c),
|
||||
C64e(0x923b94c30e794d1e), C64e(0x797475d7b6eeaf3f),
|
||||
C64e(0xeaa8d4f7be1a3921), C64e(0x5cf47e094c232751),
|
||||
C64e(0x26a32453ba323cd2), C64e(0x44a3174a6da6d5ad),
|
||||
C64e(0xb51d3ea6aff2c908), C64e(0x83593d98916b3c56),
|
||||
C64e(0x4cf87ca17286604d), C64e(0x46e23ecc086ec7f6),
|
||||
C64e(0x2f9833b3b1bc765e), C64e(0x2bd666a5efc4e62a),
|
||||
C64e(0x06f4b6e8bec1d436), C64e(0x74ee8215bcef2163),
|
||||
C64e(0xfdc14e0df453c969), C64e(0xa77d5ac406585826),
|
||||
C64e(0x7ec1141606e0fa16), C64e(0x7e90af3d28639d3f),
|
||||
C64e(0xd2c9f2e3009bd20c), C64e(0x5faace30b7d40c30),
|
||||
C64e(0x742a5116f2e03298), C64e(0x0deb30d8e3cef89a),
|
||||
C64e(0x4bc59e7bb5f17992), C64e(0xff51e66e048668d3),
|
||||
C64e(0x9b234d57e6966731), C64e(0xcce6a6f3170a7505),
|
||||
C64e(0xb17681d913326cce), C64e(0x3c175284f805a262),
|
||||
C64e(0xf42bcbb378471547), C64e(0xff46548223936a48),
|
||||
C64e(0x38df58074e5e6565), C64e(0xf2fc7c89fc86508e),
|
||||
C64e(0x31702e44d00bca86), C64e(0xf04009a23078474e),
|
||||
C64e(0x65a0ee39d1f73883), C64e(0xf75ee937e42c3abd),
|
||||
C64e(0x2197b2260113f86f), C64e(0xa344edd1ef9fdee7),
|
||||
C64e(0x8ba0df15762592d9), C64e(0x3c85f7f612dc42be),
|
||||
C64e(0xd8a7ec7cab27b07e), C64e(0x538d7ddaaa3ea8de),
|
||||
C64e(0xaa25ce93bd0269d8), C64e(0x5af643fd1a7308f9),
|
||||
C64e(0xc05fefda174a19a5), C64e(0x974d66334cfd216a),
|
||||
C64e(0x35b49831db411570), C64e(0xea1e0fbbedcd549b),
|
||||
C64e(0x9ad063a151974072), C64e(0xf6759dbf91476fe2)
|
||||
};
|
||||
|
||||
#define Ceven_hi(r) (C[((r) << 2) + 0])
|
||||
#define Ceven_lo(r) (C[((r) << 2) + 1])
|
||||
#define Codd_hi(r) (C[((r) << 2) + 2])
|
||||
#define Codd_lo(r) (C[((r) << 2) + 3])
|
||||
|
||||
#define S(x0, x1, x2, x3, cb, r) do { \
|
||||
Sb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, cb ## hi(r)); \
|
||||
Sb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, cb ## lo(r)); \
|
||||
} while (0)
|
||||
|
||||
#define L(x0, x1, x2, x3, x4, x5, x6, x7) do { \
|
||||
Lb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, \
|
||||
x4 ## h, x5 ## h, x6 ## h, x7 ## h); \
|
||||
Lb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, \
|
||||
x4 ## l, x5 ## l, x6 ## l, x7 ## l); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define Wz(x, c, n) \
|
||||
do { \
|
||||
__m256i t = _mm256_slli_epi64( _mm256_and_si256(x ## h, (c)), (n) ); \
|
||||
x ## h = _mm256_or_si256( _mm256_and_si256( \
|
||||
_mm256_srli_epi64(x ## h, (n)), (c)), t ); \
|
||||
t = _mm256_slli_epi64( _mm256_and_si256(x ## l, (c)), (n) ); \
|
||||
x ## l = _mm256_or_si256( _mm256_and_si256((x ## l >> (n)), (c)), t ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/*
|
||||
#define Wz(x, c, n) do { \
|
||||
sph_u64 t = (x ## h & (c)) << (n); \
|
||||
x ## h = ((x ## h >> (n)) & (c)) | t; \
|
||||
t = (x ## l & (c)) << (n); \
|
||||
x ## l = ((x ## l >> (n)) & (c)) | t; \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
#define W0(x) Wz(x, _mm256_set_epi64x( 0x5555555555555555, \
|
||||
0x5555555555555555, 0x5555555555555555, 0x5555555555555555 ), 1 )
|
||||
#define W1(x) Wz(x, _mm256_set_epi64x( 0x3333333333333333, \
|
||||
0x3333333333333333, 0x3333333333333333, 0x3333333333333333 ), 2 )
|
||||
#define W2(x) Wz(x, _mm256_set_epi64x( 0x0F0F0F0F0F0F0F0F, \
|
||||
0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F ), 4 )
|
||||
#define W3(x) Wz(x, _mm256_set_epi64x( 0x00FF00FF00FF00FF, \
|
||||
0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF ), 8 )
|
||||
#define W4(x) Wz(x, _mm256_set_epi64x( 0x0000FFFF0000FFFF, \
|
||||
0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF ), 16 )
|
||||
#define W5(x) Wz(x, _mm256_set_epi64x( 0x00000000FFFFFFFF, \
|
||||
0x00000000FFFFFFFF, 0x00000000FFFFFFFF, 0x00000000FFFFFFFF ), 32 )
|
||||
#define W6(x) \
|
||||
do { \
|
||||
__m256i t = x ## h; \
|
||||
x ## h = x ## l; \
|
||||
x ## l = t; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
#define W0(x) Wz(x, SPH_C64(0x5555555555555555), 1)
|
||||
#define W1(x) Wz(x, SPH_C64(0x3333333333333333), 2)
|
||||
#define W2(x) Wz(x, SPH_C64(0x0F0F0F0F0F0F0F0F), 4)
|
||||
#define W3(x) Wz(x, SPH_C64(0x00FF00FF00FF00FF), 8)
|
||||
#define W4(x) Wz(x, SPH_C64(0x0000FFFF0000FFFF), 16)
|
||||
#define W5(x) Wz(x, SPH_C64(0x00000000FFFFFFFF), 32)
|
||||
#define W6(x) do { \
|
||||
sph_u64 t = x ## h; \
|
||||
x ## h = x ## l; \
|
||||
x ## l = t; \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
#define DECL_STATE \
|
||||
__m256i h0h, h1h, h2h, h3h, h4h, h5h, h6h, h7h; \
|
||||
__m256i h0l, h1l, h2l, h3l, h4l, h5l, h6l, h7l; \
|
||||
__m256i tmp;
|
||||
|
||||
#define READ_STATE(state) do { \
|
||||
h0h = (state)->H[ 0]; \
|
||||
h0l = (state)->H[ 1]; \
|
||||
h1h = (state)->H[ 2]; \
|
||||
h1l = (state)->H[ 3]; \
|
||||
h2h = (state)->H[ 4]; \
|
||||
h2l = (state)->H[ 5]; \
|
||||
h3h = (state)->H[ 6]; \
|
||||
h3l = (state)->H[ 7]; \
|
||||
h4h = (state)->H[ 8]; \
|
||||
h4l = (state)->H[ 9]; \
|
||||
h5h = (state)->H[10]; \
|
||||
h5l = (state)->H[11]; \
|
||||
h6h = (state)->H[12]; \
|
||||
h6l = (state)->H[13]; \
|
||||
h7h = (state)->H[14]; \
|
||||
h7l = (state)->H[15]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE(state) do { \
|
||||
(state)->H[ 0] = h0h; \
|
||||
(state)->H[ 1] = h0l; \
|
||||
(state)->H[ 2] = h1h; \
|
||||
(state)->H[ 3] = h1l; \
|
||||
(state)->H[ 4] = h2h; \
|
||||
(state)->H[ 5] = h2l; \
|
||||
(state)->H[ 6] = h3h; \
|
||||
(state)->H[ 7] = h3l; \
|
||||
(state)->H[ 8] = h4h; \
|
||||
(state)->H[ 9] = h4l; \
|
||||
(state)->H[10] = h5h; \
|
||||
(state)->H[11] = h5l; \
|
||||
(state)->H[12] = h6h; \
|
||||
(state)->H[13] = h6l; \
|
||||
(state)->H[14] = h7h; \
|
||||
(state)->H[15] = h7l; \
|
||||
} while (0)
|
||||
|
||||
#define INPUT_BUF1 \
|
||||
__m256i m0h = buf[0]; \
|
||||
__m256i m0l = buf[1]; \
|
||||
__m256i m1h = buf[2]; \
|
||||
__m256i m1l = buf[3]; \
|
||||
__m256i m2h = buf[4]; \
|
||||
__m256i m2l = buf[5]; \
|
||||
__m256i m3h = buf[6]; \
|
||||
__m256i m3l = buf[7]; \
|
||||
h0h = _mm256_xor_si256( h0h, m0h ); \
|
||||
h0l = _mm256_xor_si256( h0l, m0l ); \
|
||||
h1h = _mm256_xor_si256( h1h, m1h ); \
|
||||
h1l = _mm256_xor_si256( h1l, m1l ); \
|
||||
h2h = _mm256_xor_si256( h2h, m2h ); \
|
||||
h2l = _mm256_xor_si256( h2l, m2l ); \
|
||||
h3h = _mm256_xor_si256( h3h, m3h ); \
|
||||
h3l = _mm256_xor_si256( h3l, m3l ); \
|
||||
|
||||
#define INPUT_BUF2 \
|
||||
h4h = _mm256_xor_si256( h4h, m0h ); \
|
||||
h4l = _mm256_xor_si256( h4l, m0l ); \
|
||||
h5h = _mm256_xor_si256( h5h, m1h ); \
|
||||
h5l = _mm256_xor_si256( h5l, m1l ); \
|
||||
h6h = _mm256_xor_si256( h6h, m2h ); \
|
||||
h6l = _mm256_xor_si256( h6l, m2l ); \
|
||||
h7h = _mm256_xor_si256( h7h, m3h ); \
|
||||
h7l = _mm256_xor_si256( h7l, m3l ); \
|
||||
|
||||
static const sph_u64 IV256[] = {
|
||||
C64e(0xeb98a3412c20d3eb), C64e(0x92cdbe7b9cb245c1),
|
||||
C64e(0x1c93519160d4c7fa), C64e(0x260082d67e508a03),
|
||||
C64e(0xa4239e267726b945), C64e(0xe0fb1a48d41a9477),
|
||||
C64e(0xcdb5ab26026b177a), C64e(0x56f024420fff2fa8),
|
||||
C64e(0x71a396897f2e4d75), C64e(0x1d144908f77de262),
|
||||
C64e(0x277695f776248f94), C64e(0x87d5b6574780296c),
|
||||
C64e(0x5c5e272dac8e0d6c), C64e(0x518450c657057a0f),
|
||||
C64e(0x7be4d367702412ea), C64e(0x89e3ab13d31cd769)
|
||||
};
|
||||
|
||||
|
||||
static const sph_u64 IV512[] = {
|
||||
C64e(0x6fd14b963e00aa17), C64e(0x636a2e057a15d543),
|
||||
C64e(0x8a225e8d0c97ef0b), C64e(0xe9341259f2b3c361),
|
||||
C64e(0x891da0c1536f801e), C64e(0x2aa9056bea2b6d80),
|
||||
C64e(0x588eccdb2075baa6), C64e(0xa90f3a76baf83bf7),
|
||||
C64e(0x0169e60541e34a69), C64e(0x46b58a8e2e6fe65a),
|
||||
C64e(0x1047a7d0c1843c24), C64e(0x3b6e71b12d5ac199),
|
||||
C64e(0xcf57f6ec9db1f856), C64e(0xa706887c5716b156),
|
||||
C64e(0xe3c2fcdfe68517fb), C64e(0x545a4678cc8cdd4b)
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#define SL(ro) SLu(r + ro, ro)
|
||||
|
||||
#define SLu(r, ro) do { \
|
||||
S(h0, h2, h4, h6, Ceven_, r); \
|
||||
S(h1, h3, h5, h7, Codd_, r); \
|
||||
L(h0, h2, h4, h6, h1, h3, h5, h7); \
|
||||
W ## ro(h1); \
|
||||
W ## ro(h3); \
|
||||
W ## ro(h5); \
|
||||
W ## ro(h7); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_JH
|
||||
|
||||
#if SPH_JH_64
|
||||
|
||||
/*
|
||||
* The "small footprint" 64-bit version just uses a partially unrolled
|
||||
* loop.
|
||||
*/
|
||||
|
||||
#define E8 do { \
|
||||
unsigned r; \
|
||||
for (r = 0; r < 42; r += 7) { \
|
||||
SL(0); \
|
||||
SL(1); \
|
||||
SL(2); \
|
||||
SL(3); \
|
||||
SL(4); \
|
||||
SL(5); \
|
||||
SL(6); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if SPH_JH_64
|
||||
|
||||
/*
|
||||
* On a "true 64-bit" architecture, we can unroll at will.
|
||||
*/
|
||||
|
||||
#define E8 do { \
|
||||
SLu( 0, 0); \
|
||||
SLu( 1, 1); \
|
||||
SLu( 2, 2); \
|
||||
SLu( 3, 3); \
|
||||
SLu( 4, 4); \
|
||||
SLu( 5, 5); \
|
||||
SLu( 6, 6); \
|
||||
SLu( 7, 0); \
|
||||
SLu( 8, 1); \
|
||||
SLu( 9, 2); \
|
||||
SLu(10, 3); \
|
||||
SLu(11, 4); \
|
||||
SLu(12, 5); \
|
||||
SLu(13, 6); \
|
||||
SLu(14, 0); \
|
||||
SLu(15, 1); \
|
||||
SLu(16, 2); \
|
||||
SLu(17, 3); \
|
||||
SLu(18, 4); \
|
||||
SLu(19, 5); \
|
||||
SLu(20, 6); \
|
||||
SLu(21, 0); \
|
||||
SLu(22, 1); \
|
||||
SLu(23, 2); \
|
||||
SLu(24, 3); \
|
||||
SLu(25, 4); \
|
||||
SLu(26, 5); \
|
||||
SLu(27, 6); \
|
||||
SLu(28, 0); \
|
||||
SLu(29, 1); \
|
||||
SLu(30, 2); \
|
||||
SLu(31, 3); \
|
||||
SLu(32, 4); \
|
||||
SLu(33, 5); \
|
||||
SLu(34, 6); \
|
||||
SLu(35, 0); \
|
||||
SLu(36, 1); \
|
||||
SLu(37, 2); \
|
||||
SLu(38, 3); \
|
||||
SLu(39, 4); \
|
||||
SLu(40, 5); \
|
||||
SLu(41, 6); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
jh_4way_init( jh_4way_context *sc, const void *iv )
|
||||
{
|
||||
uint64_t *v = (uint64_t*)iv;
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
sc->H[i] = _mm256_set_epi64x( v[i], v[i], v[i], v[i] );
|
||||
sc->ptr = 0;
|
||||
sc->block_count = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
jh_4way_core( jh_4way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *buf;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
const int buf_size = 64; // 64 * _m256i
|
||||
size_t ptr;
|
||||
DECL_STATE
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
|
||||
if ( len < (buf_size - ptr) )
|
||||
{
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE(sc);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>3);
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
INPUT_BUF1;
|
||||
E8;
|
||||
INPUT_BUF2;
|
||||
sc->block_count ++;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE(sc);
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
|
||||
size_t out_size_w32, const void *iv )
|
||||
{
|
||||
__m256i buf[16*4];
|
||||
__m256i *dst256 = (__m256i*)dst;
|
||||
size_t numz, u;
|
||||
sph_u64 l0, l1, l0e, l1e;
|
||||
|
||||
buf[0] = _mm256_set_epi64x( 0x80, 0x80, 0x80, 0x80 );
|
||||
|
||||
if ( sc->ptr == 0 )
|
||||
numz = 48;
|
||||
else
|
||||
numz = 112 - sc->ptr;
|
||||
|
||||
memset_zero_m256i( buf+1, (numz>>3) - 1 );
|
||||
|
||||
l0 = SPH_T64(sc->block_count << 9) + (sc->ptr << 3);
|
||||
l1 = SPH_T64(sc->block_count >> 55);
|
||||
sph_enc64be( &l0e, l0 );
|
||||
sph_enc64be( &l1e, l1 );
|
||||
*(buf + (numz>>3) ) = _mm256_set_epi64x( l1e, l1e, l1e, l1e );
|
||||
*(buf + (numz>>3) + 1) = _mm256_set_epi64x( l0e, l0e, l0e, l0e );
|
||||
|
||||
jh_4way_core( sc, buf, numz + 16 );
|
||||
|
||||
for ( u=0; u < 8; u++ )
|
||||
buf[u] = sc->H[u+8];
|
||||
|
||||
memcpy_m256i( dst256, buf, 8 );
|
||||
}
|
||||
|
||||
void
|
||||
jh256_4way_init(void *cc)
|
||||
{
|
||||
jh_4way_init(cc, IV256);
|
||||
}
|
||||
|
||||
void
|
||||
jh256_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
jh_4way_core(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
jh256_4way_close(void *cc, void *dst)
|
||||
{
|
||||
jh_4way_close(cc, 0, 0, dst, 8, IV256);
|
||||
}
|
||||
|
||||
void
|
||||
jh512_4way_init(void *cc)
|
||||
{
|
||||
jh_4way_init(cc, IV512);
|
||||
}
|
||||
|
||||
void
|
||||
jh512_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
jh_4way_core(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
jh512_4way_close(void *cc, void *dst)
|
||||
{
|
||||
jh_4way_close(cc, 0, 0, dst, 16, IV512);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
100
algo/jh/jh-hash-4way.h
Normal file
100
algo/jh/jh-hash-4way.h
Normal file
@@ -0,0 +1,100 @@
|
||||
/* $Id: sph_jh.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* JH interface. JH is a family of functions which differ by
|
||||
* their output size; this implementation defines JH for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_jh.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef JH_HASH_4WAY_H__
|
||||
#define JH_HASH_4WAY_H__
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
#define SPH_SIZE_jh256 256
|
||||
|
||||
#define SPH_SIZE_jh512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for JH computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a JH computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running JH computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
__m256i buf[8] __attribute__ ((aligned (64)));
|
||||
__m256i H[16];
|
||||
size_t ptr;
|
||||
uint64_t block_count;
|
||||
/*
|
||||
unsigned char buf[64];
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u64 wide[16];
|
||||
} H;
|
||||
sph_u64 block_count;
|
||||
*/
|
||||
} jh_4way_context;
|
||||
|
||||
typedef jh_4way_context jh256_4way_context;
|
||||
|
||||
typedef jh_4way_context jh512_4way_context;
|
||||
|
||||
void jh256_4way_init(void *cc);
|
||||
|
||||
void jh256_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void jh256_4way_close(void *cc, void *dst);
|
||||
|
||||
void jh512_4way_init(void *cc);
|
||||
|
||||
void jh512_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void jh512_4way_close(void *cc, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
228
algo/jh/jha-4way.c
Normal file
228
algo/jh/jha-4way.c
Normal file
@@ -0,0 +1,228 @@
|
||||
#if defined(JHA_4WAY)
|
||||
|
||||
#include "jha-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "avxdefs.h"
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
|
||||
//static __thread keccak512_4way_context jha_kec_mid
|
||||
// __attribute__ ((aligned (64)));
|
||||
|
||||
void jha_hash_4way( void *output, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhasha[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashb[8*4] __attribute__ ((aligned (64)));
|
||||
__m256i mask;
|
||||
__m256i* vh256 = (__m256i*)vhash;
|
||||
__m256i* vha256 = (__m256i*)vhasha;
|
||||
__m256i* vhb256 = (__m256i*)vhashb;
|
||||
|
||||
blake512_4way_context ctx_blake;
|
||||
hashState_groestl ctx_groestl;
|
||||
jh512_4way_context ctx_jh;
|
||||
skein512_4way_context ctx_skein;
|
||||
keccak512_4way_context ctx_keccak;
|
||||
|
||||
keccak512_4way_init( &ctx_keccak );
|
||||
keccak512_4way( &ctx_keccak, input, 80 );
|
||||
keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
// memcpy( &ctx_keccak, &jha_kec_mid, sizeof jha_kec_mid );
|
||||
// keccak512_4way( &ctx_keccak, input+64, 16 );
|
||||
// keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
// Heavy & Light Pair Loop
|
||||
for ( int round = 0; round < 3; round++ )
|
||||
{
|
||||
memset_zero_m256i( vha256, 20 );
|
||||
memset_zero_m256i( vhb256, 20 );
|
||||
|
||||
mask = _mm256_sub_epi64( _mm256_and_si256( vh256[0],
|
||||
mm256_vec_epi64( 0x1 ) ), mm256_vec_epi64( 0x1 ) );
|
||||
|
||||
// groestl (serial) v skein
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash1,
|
||||
(char*)hash1, 512 );
|
||||
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash2,
|
||||
(char*)hash2, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
|
||||
m256_interleave_4x64( vhasha, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// skein
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way( &ctx_skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx_skein, vhashb );
|
||||
|
||||
// merge vectored hash
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
{
|
||||
vha256[i] = _mm256_maskload_epi64(
|
||||
vhasha + i*4, mm256_bitnot(mask ) );
|
||||
vhb256[i] = _mm256_maskload_epi64(
|
||||
vhashb + i*4, mask );
|
||||
vh256[i] = _mm256_or_si256( vha256[i], vhb256[i] );
|
||||
}
|
||||
|
||||
// blake v jh
|
||||
|
||||
blake512_4way_init( &ctx_blake );
|
||||
blake512_4way( &ctx_blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx_blake, vhasha );
|
||||
|
||||
jh512_4way_init( &ctx_jh );
|
||||
jh512_4way( &ctx_jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx_jh, vhashb );
|
||||
|
||||
// merge vectored hash
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
{
|
||||
vha256[i] = _mm256_maskload_epi64(
|
||||
vhasha + i*4, mm256_bitnot(mask ) );
|
||||
vhb256[i] = _mm256_maskload_epi64(
|
||||
vhashb + i*4, mask );
|
||||
vh256[i] = _mm256_or_si256( vha256[i], vhb256[i] );
|
||||
}
|
||||
}
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
|
||||
}
|
||||
|
||||
int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// precalc midstate for keccak
|
||||
// keccak512_4way_init( &jha_kec_mid );
|
||||
// keccak512_4way( &jha_kec_mid, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ )
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
jha_hash_4way( hash, vdata );
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( !(hash[7] & mask) )
|
||||
&& fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( !((hash+8)[7] & mask) )
|
||||
&& fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( !((hash+16)[7] & mask) )
|
||||
&& fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( !((hash+24)[7] & mask) )
|
||||
&& fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
#endif
|
18
algo/jh/jha-gate.c
Normal file
18
algo/jh/jha-gate.c
Normal file
@@ -0,0 +1,18 @@
|
||||
#include "jha-gate.h"
|
||||
|
||||
|
||||
bool register_jha_algo( algo_gate_t* gate )
|
||||
{
|
||||
//#if defined (JHA_4WAY)
|
||||
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
// gate->scanhash = (void*)&scanhash_jha_4way;
|
||||
// gate->hash = (void*)&jha_hash_4way;
|
||||
//#else
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_jha;
|
||||
gate->hash = (void*)&jha_hash;
|
||||
//#endif
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
return true;
|
||||
};
|
||||
|
27
algo/jh/jha-gate.h
Normal file
27
algo/jh/jha-gate.h
Normal file
@@ -0,0 +1,27 @@
|
||||
#ifndef JHA_GATE_H__
|
||||
#define JHA_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX2__) && !defined(NO_AES_NI)
|
||||
#define JHA_4WAY
|
||||
#endif
|
||||
|
||||
//#if defined JHA_4WAY
|
||||
//void jha_hash_4way( void *state, const void *input );
|
||||
|
||||
//int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
// uint64_t *hashes_done );
|
||||
//#else
|
||||
|
||||
void jha_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_jha( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
//#endif
|
||||
|
||||
#endif
|
||||
|
@@ -1,4 +1,4 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "jha-gate.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
@@ -38,7 +38,6 @@ void jha_hash(void *output, const void *input)
|
||||
sph_keccak512_context ctx_keccak;
|
||||
sph_skein512_context ctx_skein;
|
||||
|
||||
sph_keccak512_init(&ctx_keccak);
|
||||
memcpy( &ctx_keccak, &jha_kec_mid, sizeof jha_kec_mid );
|
||||
sph_keccak512(&ctx_keccak, input+64, 16 );
|
||||
sph_keccak512_close(&ctx_keccak, hash );
|
||||
@@ -154,12 +153,3 @@ int scanhash_jha(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *ha
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_jha_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_jha;
|
||||
gate->hash = (void*)&jha_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -914,6 +914,7 @@ jh_core(sph_jh_context *sc, const void *data, size_t len)
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
|
||||
if (len < (sizeof sc->buf) - ptr) {
|
||||
memcpy(buf + ptr, data, len);
|
||||
ptr += len;
|
||||
|
@@ -6,7 +6,7 @@
|
||||
#include "sph_keccak.h"
|
||||
#include "keccak-hash-4way.h"
|
||||
|
||||
#ifdef __AVX2__
|
||||
#ifdef KECCAK_4WAY
|
||||
|
||||
void keccakhash_4way(void *state, const void *input)
|
||||
{
|
||||
@@ -21,7 +21,7 @@ void keccakhash_4way(void *state, const void *input)
|
||||
keccak256_4way( &ctx, input, 80 );
|
||||
keccak256_4way_close( &ctx, vhash );
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
m256_deinterleave_4x64x( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
@@ -33,16 +33,16 @@ int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
// const uint32_t Htarg = ptarget[7];
|
||||
uint32_t endiandata[20];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
@@ -52,11 +52,10 @@ int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
m256_interleave_4x64x( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
num_found = 0;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
|
@@ -12,7 +12,7 @@ bool register_keccak_algo( algo_gate_t* gate )
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
gate->set_target = (void*)&keccak_set_target;
|
||||
gate->get_max64 = (void*)&keccak_get_max64;
|
||||
#if defined (FOUR_WAY) && defined (__AVX2__)
|
||||
#if defined (KECCAK_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_keccak_4way;
|
||||
gate->hash = (void*)&keccakhash_4way;
|
||||
|
@@ -4,7 +4,11 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#if defined(FOUR_WAY) && defined(__AVX2__)
|
||||
#define KECCAK_4WAY
|
||||
#endif
|
||||
|
||||
#if defined(KECCAK_4WAY)
|
||||
|
||||
void keccakhash_4way( void *state, const void *input );
|
||||
int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
@@ -372,7 +372,7 @@ static const sph_u64 RC[] = {
|
||||
} while (0)
|
||||
|
||||
|
||||
static void keccak64_init( keccak64_ctx_m256i *kc, size_t lim )
|
||||
static void keccak64_init( keccak64_ctx_m256i *kc, unsigned out_size )
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 25; i ++)
|
||||
@@ -386,7 +386,7 @@ static void keccak64_init( keccak64_ctx_m256i *kc, size_t lim )
|
||||
kc->w[17] = mm256_neg1;
|
||||
kc->w[20] = mm256_neg1;
|
||||
kc->ptr = 0;
|
||||
kc->lim = 200 - (lim >> 2);
|
||||
kc->lim = 200 - (out_size >> 2);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -396,6 +396,7 @@ keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
|
||||
__m256i *buf;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
DECL_STATE
|
||||
|
||||
buf = kc->buf;
|
||||
ptr = kc->ptr;
|
||||
@@ -407,6 +408,7 @@ keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE( kc );
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
@@ -425,6 +427,7 @@ keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE( kc );
|
||||
kc->ptr = ptr;
|
||||
}
|
||||
|
||||
@@ -440,12 +443,11 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
|
||||
size_t m256_len = byte_len >> 3;
|
||||
|
||||
eb = 0x100 >> 8;
|
||||
|
||||
if ( kc->ptr == (lim - 1) )
|
||||
if ( kc->ptr == (lim - 8) )
|
||||
{
|
||||
uint64_t t = eb | 0x80;
|
||||
uint64_t t = eb | 0x8000000000000000;
|
||||
u.tmp[0] = _mm256_set_epi64x( t, t, t, t );
|
||||
j = 1;
|
||||
j = 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@@ -1,581 +0,0 @@
|
||||
#include <stddef.h>
|
||||
#include "keccak-hash-4way.h"
|
||||
|
||||
static const sph_u64 RC[] = {
|
||||
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
|
||||
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
|
||||
SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001),
|
||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009),
|
||||
SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088),
|
||||
SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A),
|
||||
SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B),
|
||||
SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003),
|
||||
SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080),
|
||||
SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A),
|
||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
|
||||
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
|
||||
};
|
||||
|
||||
|
||||
// change u.wide and u.narrow to just w, ie kc->w
|
||||
|
||||
#define a00 (kc->w[ 0])
|
||||
#define a10 (kc->w[ 1])
|
||||
#define a20 (kc->w[ 2])
|
||||
#define a30 (kc->w[ 3])
|
||||
#define a40 (kc->w[ 4])
|
||||
#define a01 (kc->w[ 5])
|
||||
#define a11 (kc->w[ 6])
|
||||
#define a21 (kc->w[ 7])
|
||||
#define a31 (kc->w[ 8])
|
||||
#define a41 (kc->w[ 9])
|
||||
#define a02 (kc->w[10])
|
||||
#define a12 (kc->w[11])
|
||||
#define a22 (kc->w[12])
|
||||
#define a32 (kc->w[13])
|
||||
#define a42 (kc->w[14])
|
||||
#define a03 (kc->w[15])
|
||||
#define a13 (kc->w[16])
|
||||
#define a23 (kc->w[17])
|
||||
#define a33 (kc->w[18])
|
||||
#define a43 (kc->w[19])
|
||||
#define a04 (kc->w[20])
|
||||
#define a14 (kc->w[21])
|
||||
#define a24 (kc->w[22])
|
||||
#define a34 (kc->w[23])
|
||||
#define a44 (kc->w[24])
|
||||
|
||||
// null when no copy
|
||||
#define DECL_STATE
|
||||
#define READ_STATE(sc)
|
||||
#define WRITE_STATE(sc)
|
||||
|
||||
#define INPUT_BUF(size) do { \
|
||||
size_t j; \
|
||||
for (j = 0; j < (size>>3); j++ ) \
|
||||
kc->w[j ] = _mm256_xor_si256( kc->w[j], buf[j] ); \
|
||||
} while (0)
|
||||
|
||||
// keccak256 4way uses 136 with 32 bit size
|
||||
// keccak256 8way and keccak512 4 way use 72 with 64 bit size
|
||||
//#define INPUT_BUF144 INPUT_BUF(144)
|
||||
//#define INPUT_BUF136 INPUT_BUF(136)
|
||||
//#define INPUT_BUF104 INPUT_BUF(104)
|
||||
//#define INPUT_BUF72 INPUT_BUF(72)
|
||||
|
||||
//simply redefine these macros to do simd
|
||||
#define mm256_neg1 \
|
||||
(_mm256_set_epi64x( 0xffffffffffffffff, 0xffffffffffffffff, \
|
||||
0xffffffffffffffff, 0xffffffffffffffff ) )
|
||||
|
||||
#define DECL64(x) __m256i x
|
||||
#define MOV64(d, s) (d = s)
|
||||
#define XOR64(d, a, b) (d = _mm256_xor_si256(a,b))
|
||||
#define AND64(d, a, b) (d = _mm256_and_si256(a,b))
|
||||
#define OR64(d, a, b) (d = _mm256_or_si256(a,b))
|
||||
#define NOT64(d, s) (d = _mm256_xor_si256(s,mm256_neg1))
|
||||
#define ROL64(d, v, n) (d = mm256_rotl_64(v, n))
|
||||
#define XOR64_IOTA XOR64
|
||||
|
||||
/*
|
||||
#define DECL64(x) sph_u64 x
|
||||
#define MOV64(d, s) (d = s)
|
||||
#define XOR64(d, a, b) (d = a ^ b)
|
||||
#define AND64(d, a, b) (d = a & b)
|
||||
#define OR64(d, a, b) (d = a | b)
|
||||
#define NOT64(d, s) (d = SPH_T64(~s))
|
||||
#define ROL64(d, v, n) (d = SPH_ROTL64(v, n))
|
||||
#define XOR64_IOTA XOR64
|
||||
*/
|
||||
|
||||
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
|
||||
DECL64(tt0); \
|
||||
DECL64(tt1); \
|
||||
DECL64(tt2); \
|
||||
DECL64(tt3); \
|
||||
uint64_t *ttx = (uint64_t*)&tt0; \
|
||||
uint64_t *d0x = (uint64_t*)&d0; \
|
||||
uint64_t *d1x = (uint64_t*)&d1; \
|
||||
uint64_t *d2x = (uint64_t*)&d2; \
|
||||
uint64_t *d3x = (uint64_t*)&d3; \
|
||||
uint64_t *d4x = (uint64_t*)&d4; \
|
||||
XOR64(tt0, d0, d1); \
|
||||
if (vtp) {printf("Velt0\n"); \
|
||||
printf("d0= %016llx\n",*d0x ); \
|
||||
printf("d1= %016llx\n",*d1x ); \
|
||||
printf("d2= %016llx\n",*d2x ); \
|
||||
printf("d3= %016llx\n",*d3x ); \
|
||||
printf("d4= %016llx\n",*d4x ); \
|
||||
printf("tt0= %016llx\n",*ttx );} \
|
||||
XOR64(tt1, d2, d3); \
|
||||
XOR64(tt0, tt0, d4); \
|
||||
XOR64(tt0, tt0, tt1); \
|
||||
if(vtp){\
|
||||
printf("tt0= %016llx\n",*ttx );} \
|
||||
ROL64(tt0, tt0, 1); \
|
||||
if(vtp){\
|
||||
printf("tt0= %016llx\n",*ttx );} \
|
||||
XOR64(tt2, c0, c1); \
|
||||
XOR64(tt3, c2, c3); \
|
||||
XOR64(tt0, tt0, c4); \
|
||||
XOR64(tt2, tt2, tt3); \
|
||||
XOR64(t, tt0, tt2); \
|
||||
} while (0)
|
||||
|
||||
int vtp = 0;
|
||||
|
||||
#define THETA(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
|
||||
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
|
||||
b40, b41, b42, b43, b44) \
|
||||
do { \
|
||||
DECL64(t0); \
|
||||
DECL64(t1); \
|
||||
DECL64(t2); \
|
||||
DECL64(t3); \
|
||||
DECL64(t4); \
|
||||
TH_ELT(t0, b40, b41, b42, b43, b44, b10, b11, b12, b13, b14); \
|
||||
if(vtp){printf("Velt0\n");\
|
||||
uint64_t *tx = (uint64_t*)&t0; \
|
||||
printf("t0= %016llx\n",tx );} \
|
||||
vtp=0; \
|
||||
TH_ELT(t1, b00, b01, b02, b03, b04, b20, b21, b22, b23, b24); \
|
||||
TH_ELT(t2, b10, b11, b12, b13, b14, b30, b31, b32, b33, b34); \
|
||||
TH_ELT(t3, b20, b21, b22, b23, b24, b40, b41, b42, b43, b44); \
|
||||
TH_ELT(t4, b30, b31, b32, b33, b34, b00, b01, b02, b03, b04); \
|
||||
XOR64(b00, b00, t0); \
|
||||
XOR64(b01, b01, t0); \
|
||||
XOR64(b02, b02, t0); \
|
||||
XOR64(b03, b03, t0); \
|
||||
XOR64(b04, b04, t0); \
|
||||
XOR64(b10, b10, t1); \
|
||||
XOR64(b11, b11, t1); \
|
||||
XOR64(b12, b12, t1); \
|
||||
XOR64(b13, b13, t1); \
|
||||
XOR64(b14, b14, t1); \
|
||||
XOR64(b20, b20, t2); \
|
||||
XOR64(b21, b21, t2); \
|
||||
XOR64(b22, b22, t2); \
|
||||
XOR64(b23, b23, t2); \
|
||||
XOR64(b24, b24, t2); \
|
||||
XOR64(b30, b30, t3); \
|
||||
XOR64(b31, b31, t3); \
|
||||
XOR64(b32, b32, t3); \
|
||||
XOR64(b33, b33, t3); \
|
||||
XOR64(b34, b34, t3); \
|
||||
XOR64(b40, b40, t4); \
|
||||
XOR64(b41, b41, t4); \
|
||||
XOR64(b42, b42, t4); \
|
||||
XOR64(b43, b43, t4); \
|
||||
XOR64(b44, b44, t4); \
|
||||
} while (0)
|
||||
|
||||
#define RHO(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
|
||||
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
|
||||
b40, b41, b42, b43, b44) \
|
||||
do { \
|
||||
/* ROL64(b00, b00, 0); */ \
|
||||
ROL64(b01, b01, 36); \
|
||||
ROL64(b02, b02, 3); \
|
||||
ROL64(b03, b03, 41); \
|
||||
ROL64(b04, b04, 18); \
|
||||
ROL64(b10, b10, 1); \
|
||||
ROL64(b11, b11, 44); \
|
||||
ROL64(b12, b12, 10); \
|
||||
ROL64(b13, b13, 45); \
|
||||
ROL64(b14, b14, 2); \
|
||||
ROL64(b20, b20, 62); \
|
||||
ROL64(b21, b21, 6); \
|
||||
ROL64(b22, b22, 43); \
|
||||
ROL64(b23, b23, 15); \
|
||||
ROL64(b24, b24, 61); \
|
||||
ROL64(b30, b30, 28); \
|
||||
ROL64(b31, b31, 55); \
|
||||
ROL64(b32, b32, 25); \
|
||||
ROL64(b33, b33, 21); \
|
||||
ROL64(b34, b34, 56); \
|
||||
ROL64(b40, b40, 27); \
|
||||
ROL64(b41, b41, 20); \
|
||||
ROL64(b42, b42, 39); \
|
||||
ROL64(b43, b43, 8); \
|
||||
ROL64(b44, b44, 14); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* The KHI macro integrates the "lane complement" optimization. On input,
|
||||
* some words are complemented:
|
||||
* a00 a01 a02 a04 a13 a20 a21 a22 a30 a33 a34 a43
|
||||
* On output, the following words are complemented:
|
||||
* a04 a10 a20 a22 a23 a31
|
||||
*
|
||||
* The (implicit) permutation and the theta expansion will bring back
|
||||
* the input mask for the next round.
|
||||
*/
|
||||
|
||||
#define KHI_XO(d, a, b, c) do { \
|
||||
DECL64(kt); \
|
||||
OR64(kt, b, c); \
|
||||
XOR64(d, a, kt); \
|
||||
} while (0)
|
||||
|
||||
#define KHI_XA(d, a, b, c) do { \
|
||||
DECL64(kt); \
|
||||
AND64(kt, b, c); \
|
||||
XOR64(d, a, kt); \
|
||||
} while (0)
|
||||
|
||||
#define KHI(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
|
||||
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
|
||||
b40, b41, b42, b43, b44) \
|
||||
do { \
|
||||
DECL64(c0); \
|
||||
DECL64(c1); \
|
||||
DECL64(c2); \
|
||||
DECL64(c3); \
|
||||
DECL64(c4); \
|
||||
DECL64(bnn); \
|
||||
NOT64(bnn, b20); \
|
||||
KHI_XO(c0, b00, b10, b20); \
|
||||
KHI_XO(c1, b10, bnn, b30); \
|
||||
KHI_XA(c2, b20, b30, b40); \
|
||||
KHI_XO(c3, b30, b40, b00); \
|
||||
KHI_XA(c4, b40, b00, b10); \
|
||||
MOV64(b00, c0); \
|
||||
MOV64(b10, c1); \
|
||||
MOV64(b20, c2); \
|
||||
MOV64(b30, c3); \
|
||||
MOV64(b40, c4); \
|
||||
NOT64(bnn, b41); \
|
||||
KHI_XO(c0, b01, b11, b21); \
|
||||
KHI_XA(c1, b11, b21, b31); \
|
||||
KHI_XO(c2, b21, b31, bnn); \
|
||||
KHI_XO(c3, b31, b41, b01); \
|
||||
KHI_XA(c4, b41, b01, b11); \
|
||||
MOV64(b01, c0); \
|
||||
MOV64(b11, c1); \
|
||||
MOV64(b21, c2); \
|
||||
MOV64(b31, c3); \
|
||||
MOV64(b41, c4); \
|
||||
NOT64(bnn, b32); \
|
||||
KHI_XO(c0, b02, b12, b22); \
|
||||
KHI_XA(c1, b12, b22, b32); \
|
||||
KHI_XA(c2, b22, bnn, b42); \
|
||||
KHI_XO(c3, bnn, b42, b02); \
|
||||
KHI_XA(c4, b42, b02, b12); \
|
||||
MOV64(b02, c0); \
|
||||
MOV64(b12, c1); \
|
||||
MOV64(b22, c2); \
|
||||
MOV64(b32, c3); \
|
||||
MOV64(b42, c4); \
|
||||
NOT64(bnn, b33); \
|
||||
KHI_XA(c0, b03, b13, b23); \
|
||||
KHI_XO(c1, b13, b23, b33); \
|
||||
KHI_XO(c2, b23, bnn, b43); \
|
||||
KHI_XA(c3, bnn, b43, b03); \
|
||||
KHI_XO(c4, b43, b03, b13); \
|
||||
MOV64(b03, c0); \
|
||||
MOV64(b13, c1); \
|
||||
MOV64(b23, c2); \
|
||||
MOV64(b33, c3); \
|
||||
MOV64(b43, c4); \
|
||||
NOT64(bnn, b14); \
|
||||
KHI_XA(c0, b04, bnn, b24); \
|
||||
KHI_XO(c1, bnn, b24, b34); \
|
||||
KHI_XA(c2, b24, b34, b44); \
|
||||
KHI_XO(c3, b34, b44, b04); \
|
||||
KHI_XA(c4, b44, b04, b14); \
|
||||
MOV64(b04, c0); \
|
||||
MOV64(b14, c1); \
|
||||
MOV64(b24, c2); \
|
||||
MOV64(b34, c3); \
|
||||
MOV64(b44, c4); \
|
||||
} while (0)
|
||||
|
||||
#define IOTA(r) XOR64_IOTA(a00, a00, r)
|
||||
|
||||
#define P0 a00, a01, a02, a03, a04, a10, a11, a12, a13, a14, a20, a21, \
|
||||
a22, a23, a24, a30, a31, a32, a33, a34, a40, a41, a42, a43, a44
|
||||
#define P1 a00, a30, a10, a40, a20, a11, a41, a21, a01, a31, a22, a02, \
|
||||
a32, a12, a42, a33, a13, a43, a23, a03, a44, a24, a04, a34, a14
|
||||
#define P2 a00, a33, a11, a44, a22, a41, a24, a02, a30, a13, a32, a10, \
|
||||
a43, a21, a04, a23, a01, a34, a12, a40, a14, a42, a20, a03, a31
|
||||
#define P3 a00, a23, a41, a14, a32, a24, a42, a10, a33, a01, a43, a11, \
|
||||
a34, a02, a20, a12, a30, a03, a21, a44, a31, a04, a22, a40, a13
|
||||
#define P4 a00, a12, a24, a31, a43, a42, a04, a11, a23, a30, a34, a41, \
|
||||
a03, a10, a22, a21, a33, a40, a02, a14, a13, a20, a32, a44, a01
|
||||
#define P5 a00, a21, a42, a13, a34, a04, a20, a41, a12, a33, a03, a24, \
|
||||
a40, a11, a32, a02, a23, a44, a10, a31, a01, a22, a43, a14, a30
|
||||
#define P6 a00, a02, a04, a01, a03, a20, a22, a24, a21, a23, a40, a42, \
|
||||
a44, a41, a43, a10, a12, a14, a11, a13, a30, a32, a34, a31, a33
|
||||
#define P7 a00, a10, a20, a30, a40, a22, a32, a42, a02, a12, a44, a04, \
|
||||
a14, a24, a34, a11, a21, a31, a41, a01, a33, a43, a03, a13, a23
|
||||
#define P8 a00, a11, a22, a33, a44, a32, a43, a04, a10, a21, a14, a20, \
|
||||
a31, a42, a03, a41, a02, a13, a24, a30, a23, a34, a40, a01, a12
|
||||
#define P9 a00, a41, a32, a23, a14, a43, a34, a20, a11, a02, a31, a22, \
|
||||
a13, a04, a40, a24, a10, a01, a42, a33, a12, a03, a44, a30, a21
|
||||
#define P10 a00, a24, a43, a12, a31, a34, a03, a22, a41, a10, a13, a32, \
|
||||
a01, a20, a44, a42, a11, a30, a04, a23, a21, a40, a14, a33, a02
|
||||
#define P11 a00, a42, a34, a21, a13, a03, a40, a32, a24, a11, a01, a43, \
|
||||
a30, a22, a14, a04, a41, a33, a20, a12, a02, a44, a31, a23, a10
|
||||
#define P12 a00, a04, a03, a02, a01, a40, a44, a43, a42, a41, a30, a34, \
|
||||
a33, a32, a31, a20, a24, a23, a22, a21, a10, a14, a13, a12, a11
|
||||
#define P13 a00, a20, a40, a10, a30, a44, a14, a34, a04, a24, a33, a03, \
|
||||
a23, a43, a13, a22, a42, a12, a32, a02, a11, a31, a01, a21, a41
|
||||
#define P14 a00, a22, a44, a11, a33, a14, a31, a03, a20, a42, a23, a40, \
|
||||
a12, a34, a01, a32, a04, a21, a43, a10, a41, a13, a30, a02, a24
|
||||
#define P15 a00, a32, a14, a41, a23, a31, a13, a40, a22, a04, a12, a44, \
|
||||
a21, a03, a30, a43, a20, a02, a34, a11, a24, a01, a33, a10, a42
|
||||
#define P16 a00, a43, a31, a24, a12, a13, a01, a44, a32, a20, a21, a14, \
|
||||
a02, a40, a33, a34, a22, a10, a03, a41, a42, a30, a23, a11, a04
|
||||
#define P17 a00, a34, a13, a42, a21, a01, a30, a14, a43, a22, a02, a31, \
|
||||
a10, a44, a23, a03, a32, a11, a40, a24, a04, a33, a12, a41, a20
|
||||
#define P18 a00, a03, a01, a04, a02, a30, a33, a31, a34, a32, a10, a13, \
|
||||
a11, a14, a12, a40, a43, a41, a44, a42, a20, a23, a21, a24, a22
|
||||
#define P19 a00, a40, a30, a20, a10, a33, a23, a13, a03, a43, a11, a01, \
|
||||
a41, a31, a21, a44, a34, a24, a14, a04, a22, a12, a02, a42, a32
|
||||
#define P20 a00, a44, a33, a22, a11, a23, a12, a01, a40, a34, a41, a30, \
|
||||
a24, a13, a02, a14, a03, a42, a31, a20, a32, a21, a10, a04, a43
|
||||
#define P21 a00, a14, a23, a32, a41, a12, a21, a30, a44, a03, a24, a33, \
|
||||
a42, a01, a10, a31, a40, a04, a13, a22, a43, a02, a11, a20, a34
|
||||
#define P22 a00, a31, a12, a43, a24, a21, a02, a33, a14, a40, a42, a23, \
|
||||
a04, a30, a11, a13, a44, a20, a01, a32, a34, a10, a41, a22, a03
|
||||
#define P23 a00, a13, a21, a34, a42, a02, a10, a23, a31, a44, a04, a12, \
|
||||
a20, a33, a41, a01, a14, a22, a30, a43, a03, a11, a24, a32, a40
|
||||
|
||||
#define P8_TO_P0 do { \
|
||||
DECL64(t); \
|
||||
MOV64(t, a01); \
|
||||
MOV64(a01, a11); \
|
||||
MOV64(a11, a43); \
|
||||
MOV64(a43, t); \
|
||||
MOV64(t, a02); \
|
||||
MOV64(a02, a22); \
|
||||
MOV64(a22, a31); \
|
||||
MOV64(a31, t); \
|
||||
MOV64(t, a03); \
|
||||
MOV64(a03, a33); \
|
||||
MOV64(a33, a24); \
|
||||
MOV64(a24, t); \
|
||||
MOV64(t, a04); \
|
||||
MOV64(a04, a44); \
|
||||
MOV64(a44, a12); \
|
||||
MOV64(a12, t); \
|
||||
MOV64(t, a10); \
|
||||
MOV64(a10, a32); \
|
||||
MOV64(a32, a13); \
|
||||
MOV64(a13, t); \
|
||||
MOV64(t, a14); \
|
||||
MOV64(a14, a21); \
|
||||
MOV64(a21, a20); \
|
||||
MOV64(a20, t); \
|
||||
MOV64(t, a23); \
|
||||
MOV64(a23, a42); \
|
||||
MOV64(a42, a40); \
|
||||
MOV64(a40, t); \
|
||||
MOV64(t, a30); \
|
||||
MOV64(a30, a41); \
|
||||
MOV64(a41, a34); \
|
||||
MOV64(a34, t); \
|
||||
} while (0)
|
||||
|
||||
#define LPAR (
|
||||
#define RPAR )
|
||||
|
||||
#define KF_ELT(r, s, k) do { \
|
||||
if(r==0){ vtp=1; printf("Vtheo0\n");}\
|
||||
THETA LPAR P ## r RPAR; \
|
||||
if(vtp=1){ \
|
||||
uint64_t *W = (uint64_t*)(kc->w); \
|
||||
printf("w: %016x %016x %016x %016x\n", W[0], W[4], W[8], W[12] ); \
|
||||
printf(" %016x %016x %016x %016x\n", W[16], W[20], W[24], W[28] ); \
|
||||
printf(" %016x %016x %016x %016x\n", W[32], W[36], W[40], W[44] ); \
|
||||
printf(" %016x %016x %016x %016x\n", W[48], W[52], W[56], W[60] );} \
|
||||
vtp=0; \
|
||||
RHO LPAR P ## r RPAR; \
|
||||
if(r==0){ printf("Vrho0\n");\
|
||||
uint64_t *W = (uint64_t*)(kc->w); \
|
||||
printf("w: %016x %016x %016x %016x\n", W[0], W[4], W[8], W[12] ); \
|
||||
printf(" %016x %016x %016x %016x\n", W[16], W[20], W[24], W[28] ); \
|
||||
printf(" %016x %016x %016x %016x\n", W[32], W[36], W[40], W[44] ); \
|
||||
printf(" %016x %016x %016x %016x\n", W[48], W[52], W[56], W[60] );} \
|
||||
KHI LPAR P ## s RPAR; \
|
||||
if(r==0){ printf("Vkhi0\n");\
|
||||
uint64_t *W = (uint64_t*)(kc->w); \
|
||||
printf("w: %016x %016x %016x %016x\n", W[0], W[4], W[8], W[12] ); \
|
||||
printf(" %016x %016x %016x %016x\n", W[16], W[20], W[24], W[28] ); \
|
||||
printf(" %016x %016x %016x %016x\n", W[32], W[36], W[40], W[44] ); \
|
||||
printf(" %016x %016x %016x %016x\n", W[48], W[52], W[56], W[60] );} \
|
||||
IOTA(k); \
|
||||
} while (0)
|
||||
|
||||
#define DO(x) x
|
||||
|
||||
#define KECCAK_F_1600 DO(KECCAK_F_1600_)
|
||||
|
||||
#define KECCAK_F_1600_ do { \
|
||||
int j; \
|
||||
for (j = 0; j < 24; j += 8) \
|
||||
{ \
|
||||
KF_ELT( 0, 1, (_mm256_set_epi64x( RC[j + 0], RC[j + 0], \
|
||||
RC[j + 0], RC[j + 0])) ); \
|
||||
KF_ELT( 1, 2, (_mm256_set_epi64x( RC[j + 1], RC[j + 1], \
|
||||
RC[j + 1], RC[j + 1])) ); \
|
||||
KF_ELT( 2, 3, (_mm256_set_epi64x( RC[j + 2], RC[j + 2], \
|
||||
RC[j + 2], RC[j + 2])) ); \
|
||||
KF_ELT( 3, 4, (_mm256_set_epi64x( RC[j + 3], RC[j + 3], \
|
||||
RC[j + 3], RC[j + 3])) ); \
|
||||
KF_ELT( 4, 5, (_mm256_set_epi64x( RC[j + 4], RC[j + 4], \
|
||||
RC[j + 4], RC[j + 4])) ); \
|
||||
KF_ELT( 5, 6, (_mm256_set_epi64x( RC[j + 5], RC[j + 5], \
|
||||
RC[j + 5], RC[j + 5])) ); \
|
||||
KF_ELT( 6, 7, (_mm256_set_epi64x( RC[j + 6], RC[j + 6], \
|
||||
RC[j + 6], RC[j + 6])) ); \
|
||||
KF_ELT( 7, 8, (_mm256_set_epi64x( RC[j + 7], RC[j + 7], \
|
||||
RC[j + 7], RC[j + 7])) ); \
|
||||
P8_TO_P0; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
static void keccak64_init( keccak64_ctx_m256i *kc, size_t lim )
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 25; i ++)
|
||||
kc->w[i] = _mm256_setzero_si256();
|
||||
|
||||
// Initialization for the "lane complement".
|
||||
kc->w[ 1] = mm256_neg1;
|
||||
kc->w[ 2] = mm256_neg1;
|
||||
kc->w[ 8] = mm256_neg1;
|
||||
kc->w[12] = mm256_neg1;
|
||||
kc->w[17] = mm256_neg1;
|
||||
kc->w[20] = mm256_neg1;
|
||||
kc->ptr = 0;
|
||||
kc->lim = 200 - (lim >> 2);
|
||||
}
|
||||
|
||||
static void
|
||||
keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
|
||||
size_t lim )
|
||||
{
|
||||
__m256i *buf;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
|
||||
buf = kc->buf;
|
||||
ptr = kc->ptr;
|
||||
uint64_t *W = (uint64_t*)(kc->w);
|
||||
|
||||
if ( len < (lim - ptr) )
|
||||
{
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, len>>3 );
|
||||
kc->ptr = ptr + len;
|
||||
return;
|
||||
}
|
||||
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
|
||||
clen = (lim - ptr);
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
if ( ptr == lim )
|
||||
{
|
||||
INPUT_BUF( lim );
|
||||
printf("Vtransform before ptr= %u, len= %u, lim= %u\n",ptr, len, lim);
|
||||
printf("w: %016x %016x %016x %016x\n", W[0], W[4], W[8], W[12] );
|
||||
printf(" %016x %016x %016x %016x\n", W[16], W[20], W[24], W[28] );
|
||||
printf(" %016x %016x %016x %016x\n", W[32], W[36], W[40], W[44] );
|
||||
printf(" %016x %016x %016x %016x\n", W[48], W[52], W[56], W[60] );
|
||||
KECCAK_F_1600;
|
||||
//printf("Vtransform after ptr= %u, len= %u, lim= %u\n",ptr, len, lim);
|
||||
//printf("w: %016x %016x %016x %016x\n", W[0], W[4], W[8], W[12] );
|
||||
//printf(" %016x %016x %016x %016x\n", W[16], W[20], W[24], W[28] );
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
kc->ptr = ptr;
|
||||
}
|
||||
|
||||
// keccak512 4way d=64 lim=72, keccak256 8way d=32 lim=136
|
||||
// keccak256 4way d=32 lim=136
|
||||
|
||||
// keccak512 d=64, lim=72, keccak256 d=32, lim=136
|
||||
static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t d,
|
||||
size_t lim )
|
||||
{
|
||||
unsigned eb;
|
||||
union {
|
||||
__m256i tmp[lim + 1];
|
||||
sph_u64 dummy; /* for alignment */
|
||||
} u;
|
||||
size_t j;
|
||||
// int d = 64;
|
||||
|
||||
eb = 0x100 >> 8;
|
||||
|
||||
if ( kc->ptr == (lim - 1) )
|
||||
{
|
||||
uint64_t t = eb | 0x80;
|
||||
u.tmp[0] = _mm256_set_epi64x( t, t, t, t );
|
||||
j = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
j = lim - kc->ptr;
|
||||
u.tmp[0] = _mm256_set_epi64x( eb, eb, eb, eb );
|
||||
memset_zero_m256i( u.tmp + 1, (j>>3) - 1 );
|
||||
u.tmp[j - 1] = _mm256_set_epi64x( 0x8000000000000000,
|
||||
0x8000000000000000, 0x8000000000000000, 0x8000000000000000);
|
||||
}
|
||||
keccak64_core( kc, u.tmp, j, lim );
|
||||
/* Finalize the "lane complement" */
|
||||
NOT64( kc->w[ 1], kc->w[ 1] );
|
||||
NOT64( kc->w[ 2], kc->w[ 2] );
|
||||
NOT64( kc->w[ 8], kc->w[ 8] );
|
||||
NOT64( kc->w[12], kc->w[12] );
|
||||
NOT64( kc->w[17], kc->w[17] );
|
||||
NOT64( kc->w[20], kc->w[20] );
|
||||
for ( j = 0; j < d; j += 8 )
|
||||
u.tmp[j] = kc->w[j>>3];
|
||||
memcpy_m256i( dst, u.tmp, d>>3 );
|
||||
}
|
||||
|
||||
void keccak256_4way_init( void *kc )
|
||||
{
|
||||
keccak64_init( kc, 256 );
|
||||
}
|
||||
|
||||
void
|
||||
keccak256_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
keccak64_core(cc, data, len, 136);
|
||||
}
|
||||
|
||||
void
|
||||
keccak256_4way_close(void *cc, void *dst)
|
||||
{
|
||||
keccak64_close(cc, dst, 32, 136);
|
||||
}
|
||||
|
||||
void keccak512_4way_init( void *kc )
|
||||
{
|
||||
keccak64_init( kc, 512 );
|
||||
}
|
||||
|
||||
void
|
||||
keccak512_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
keccak64_core(cc, data, len, 72);
|
||||
}
|
||||
|
||||
void
|
||||
keccak512_4way_close(void *cc, void *dst)
|
||||
{
|
||||
keccak64_close(cc, dst, 64, 72);
|
||||
}
|
||||
|
@@ -40,6 +40,8 @@
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
@@ -62,8 +64,6 @@ extern "C"{
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[144*8]; /* first field, for alignment */
|
||||
__m256i w[25];
|
||||
|
File diff suppressed because it is too large
Load Diff
10
algo/m7m.c
10
algo/m7m.c
@@ -197,6 +197,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
|
||||
sph_tiger( &ctx1.tiger, data, M7_MIDSTATE_LEN );
|
||||
sph_ripemd160( &ctx1.ripemd, data, M7_MIDSTATE_LEN );
|
||||
|
||||
// the following calculations can be performed once and the results shared
|
||||
mpz_t magipi, magisw, product, bns0, bns1;
|
||||
mpf_t magifpi, magifpi0, mpt1, mpt2, mptmp, mpten;
|
||||
|
||||
@@ -221,6 +222,9 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
|
||||
|
||||
memcpy( &ctx2, &ctx1, sizeof(m7m_ctx) );
|
||||
|
||||
// with 4 way can a single midstate be shared among lanes?
|
||||
// do sinlge round of midstate and inyerleave for final
|
||||
|
||||
#ifndef USE_SPH_SHA
|
||||
SHA256_Update( &ctx2.sha256, data_p64, 80 - M7_MIDSTATE_LEN );
|
||||
SHA256_Final( (unsigned char*) (bhash[0]), &ctx2.sha256 );
|
||||
@@ -249,6 +253,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
|
||||
sph_ripemd160( &ctx2.ripemd, data_p64, 80 - M7_MIDSTATE_LEN );
|
||||
sph_ripemd160_close( &ctx2.ripemd, (void*)(bhash[6]) );
|
||||
|
||||
// 4 way serial
|
||||
mpz_import(bns0, a, -1, p, -1, 0, bhash[0]);
|
||||
mpz_set(bns1, bns0);
|
||||
mpz_set(product, bns0);
|
||||
@@ -274,6 +279,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
|
||||
sph_sha256_close( &ctxf_sha256, (void*)(hash) );
|
||||
#endif
|
||||
|
||||
// do once and share
|
||||
digits=(int)((sqrt((double)(n/2))*(1.+EPS))/9000+75);
|
||||
mp_bitcnt_t prec = (long int)(digits*BITS_PER_DIGIT+16);
|
||||
mpf_set_prec_raw(magifpi, prec);
|
||||
@@ -296,7 +302,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
|
||||
mpz_set_f(magipi, magifpi);
|
||||
mpz_add(magipi,magipi,magisw);
|
||||
mpz_add(product,product,magipi);
|
||||
|
||||
// share magipi, product and do serial
|
||||
mpz_import(bns0, b, -1, p, -1, 0, (void*)(hash));
|
||||
mpz_add(bns1, bns1, bns0);
|
||||
mpz_mul(product,product,bns1);
|
||||
@@ -317,6 +323,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
|
||||
#endif
|
||||
}
|
||||
|
||||
// this is the scanhash part
|
||||
const unsigned char *hash_ = (const unsigned char *)hash;
|
||||
const unsigned char *target_ = (const unsigned char *)ptarget;
|
||||
for ( i = 31; i >= 0; i-- )
|
||||
@@ -346,6 +353,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
// do this in hashm7m
|
||||
out:
|
||||
mpf_set_prec_raw(magifpi, prec0);
|
||||
mpf_set_prec_raw(magifpi0, prec0);
|
||||
|
178
algo/nist5/nist5-4way.c
Normal file
178
algo/nist5/nist5-4way.c
Normal file
@@ -0,0 +1,178 @@
|
||||
#include "nist5-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(NIST5_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
|
||||
// no improvement with midstate
|
||||
//static __thread blake512_4way_context ctx_mid;
|
||||
|
||||
void nist5hash_4way( void *output, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake512_4way_context ctx_blake;
|
||||
hashState_groestl ctx_groestl;
|
||||
jh512_4way_context ctx_jh;
|
||||
skein512_4way_context ctx_skein;
|
||||
keccak512_4way_context ctx_keccak;
|
||||
|
||||
// memcpy( &ctx_blake, &ctx_mid, sizeof(ctx_mid) );
|
||||
// blake512_4way( &ctx_blake, input + (64<<2), 16 );
|
||||
|
||||
blake512_4way_init( &ctx_blake );
|
||||
blake512_4way( &ctx_blake, input, 80 );
|
||||
blake512_4way_close( &ctx_blake, vhash );
|
||||
|
||||
m256_deinterleave_4x64x( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash0,
|
||||
(const char*)hash0, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash1,
|
||||
(const char*)hash1, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash2,
|
||||
(const char*)hash2, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash3,
|
||||
(const char*)hash3, 512 );
|
||||
|
||||
m256_interleave_4x64x( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
jh512_4way_init( &ctx_jh );
|
||||
jh512_4way( &ctx_jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx_jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx_keccak );
|
||||
keccak512_4way( &ctx_keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way( &ctx_skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx_skein, vhash );
|
||||
|
||||
m256_deinterleave_4x64x( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000 };
|
||||
|
||||
uint32_t masks[] = { 0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// precalc midstate
|
||||
// blake512_4way_init( &ctx_mid );
|
||||
// blake512_4way( &ctx_mid, vdata, 64 );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
{
|
||||
if (Htarg <= htmax[m])
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
nist5hash_4way( hash, vdata );
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( !(hash[7] & mask) )
|
||||
&& fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( !((hash+8)[7] & mask) )
|
||||
&& fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( !((hash+16)[7] & mask) )
|
||||
&& fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( !((hash+24)[7] & mask) )
|
||||
&& fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
17
algo/nist5/nist5-gate.c
Normal file
17
algo/nist5/nist5-gate.c
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "nist5-gate.h"
|
||||
|
||||
bool register_nist5_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (NIST5_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_nist5_4way;
|
||||
gate->hash = (void*)&nist5hash_4way;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
init_nist5_ctx();
|
||||
gate->scanhash = (void*)&scanhash_nist5;
|
||||
gate->hash = (void*)&nist5hash;
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
|
26
algo/nist5/nist5-gate.h
Normal file
26
algo/nist5/nist5-gate.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#ifndef __NIST5_GATE_H__
|
||||
#define __NIST5_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX2__) && !defined(NO_AES_NI)
|
||||
#define NIST5_4WAY
|
||||
#endif
|
||||
|
||||
#if defined(NIST5_4WAY)
|
||||
|
||||
void nist5hash_4way( void *state, const void *input );
|
||||
|
||||
int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#else
|
||||
|
||||
void nist5hash( void *state, const void *input );
|
||||
|
||||
int scanhash_nist5( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -1,4 +1,4 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "nist5-gate.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
@@ -147,7 +147,7 @@ int scanhash_nist5(int thr_id, struct work *work,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
bool register_nist5_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
@@ -156,4 +156,4 @@ bool register_nist5_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&nist5hash;
|
||||
return true;
|
||||
};
|
||||
|
||||
*/
|
@@ -1,125 +0,0 @@
|
||||
#include "polytimos-gate.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/luffa/sph_luffa.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/luffa/sse2/luffa_for_sse2.h"
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
|
||||
|
||||
/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */
|
||||
typedef struct {
|
||||
sph_skein512_context skein;
|
||||
sph_luffa512_context luffa;
|
||||
// hashState_luffa luffa;
|
||||
//#ifdef NO_AES_NI
|
||||
sph_echo512_context echo;
|
||||
//#else
|
||||
// hashState_echo echo;
|
||||
//#endif
|
||||
sph_shabal512_context shabal;
|
||||
sph_fugue512_context fugue;
|
||||
sph_gost512_context gost;
|
||||
} poly_context_holder;
|
||||
|
||||
static __thread poly_context_holder poly_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_polytimos_context()
|
||||
{
|
||||
sph_skein512_init(&poly_ctx.skein);
|
||||
sph_shabal512_init(&poly_ctx.shabal);
|
||||
//#ifdef NO_AES_NI
|
||||
sph_echo512_init(&poly_ctx.echo);
|
||||
//#else
|
||||
// init_echo( &poly_ctx.echo, 512 );
|
||||
//#endif
|
||||
// init_luffa( &poly_ctx.luffa, 512 );
|
||||
sph_luffa512_init(&poly_ctx.luffa);
|
||||
sph_fugue512_init(&poly_ctx.fugue);
|
||||
sph_gost512_init(&poly_ctx.gost);
|
||||
}
|
||||
|
||||
void polytimos_hash(void *output, const void *input)
|
||||
{
|
||||
poly_context_holder ctx __attribute__ ((aligned (64)));
|
||||
uint32_t hashA[16]__attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &poly_ctx, sizeof(poly_ctx) );
|
||||
|
||||
sph_skein512(&ctx.skein, input, 80);
|
||||
sph_skein512_close(&ctx.skein, hashA);
|
||||
|
||||
sph_shabal512(&ctx.shabal, hashA, 64);
|
||||
sph_shabal512_close(&ctx.shabal, hashA);
|
||||
//#ifdef NO_AES_NI
|
||||
sph_echo512(&ctx.echo, hashA, 64);
|
||||
sph_echo512_close(&ctx.echo, hashA);
|
||||
//#else
|
||||
// update_final_echo ( &ctx.echo, (BitSequence *)hashA,
|
||||
// (const BitSequence *)hashA, 512 );
|
||||
//#endif
|
||||
|
||||
// update_and_final_luffa( &ctx.luffa, (BitSequence*)hashA,
|
||||
// (const BitSequence*)hashA, 64 );
|
||||
|
||||
sph_luffa512(&ctx.luffa, hashA, 64);
|
||||
sph_luffa512_close(&ctx.luffa, hashA);
|
||||
|
||||
sph_fugue512(&ctx.fugue, hashA, 64);
|
||||
sph_fugue512_close(&ctx.fugue, hashA);
|
||||
|
||||
sph_gost512(&ctx.gost, hashA, 64);
|
||||
sph_gost512_close(&ctx.gost, hashA);
|
||||
|
||||
memcpy(output, hashA, 32);
|
||||
}
|
||||
|
||||
int scanhash_polytimos( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
// we need bigendian data...
|
||||
for (int i=0; i < 19; i++) {
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
}
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
polytimos_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
work_set_target_ratio(work, hash);
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !(*restart));
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1,4 +1,3 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "skein-gate.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
@@ -60,7 +59,7 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
// hash is returned deinterleaved
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found;
|
||||
int num_found = 0;
|
||||
|
||||
// data is 80 bytes, 20 u32 or 4 u64.
|
||||
|
||||
@@ -76,7 +75,6 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
num_found = 0;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
|
@@ -1,7 +1,4 @@
|
||||
#include "skein-gate.h"
|
||||
#include "algo-gate-api.h"
|
||||
//#include <string.h>
|
||||
//#include <stdint.h>
|
||||
#include "sph_skein.h"
|
||||
#include "skein-hash-4way.h"
|
||||
|
||||
@@ -9,7 +6,7 @@ int64_t skein_get_max64() { return 0x7ffffLL; }
|
||||
|
||||
bool register_skein_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (FOUR_WAY) && defined (__AVX2__)
|
||||
#if defined (SKEIN_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein_4way;
|
||||
gate->hash = (void*)&skeinhash_4way;
|
||||
|
@@ -1,15 +1,22 @@
|
||||
#ifndef __SKEIN_GATE_H__
|
||||
#define __SKEIN_GATE_H__
|
||||
#include <stdint.h>
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#if defined(FOUR_WAY) && defined(__AVX2__)
|
||||
#define SKEIN_4WAY
|
||||
#endif
|
||||
|
||||
#if defined(SKEIN_4WAY)
|
||||
|
||||
void skeinhash_4way( void *output, const void *input );
|
||||
|
||||
int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
void skeinhash( void *output, const void *input );
|
||||
|
||||
int scanhash_skein( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
|
@@ -296,7 +296,6 @@ do { \
|
||||
w5 = _mm256_add_epi64( w5, _mm256_add_epi64( SKBI(k,s,5), \
|
||||
_mm256_set_epi64x( SKBT(t,s,0), SKBT(t,s,0), \
|
||||
SKBT(t,s,0), SKBT(t,s,0) ) ) ); \
|
||||
__m256i skbi6 = SKBI(k,s,6); \
|
||||
w6 = _mm256_add_epi64( w6, _mm256_add_epi64( SKBI(k,s,6), \
|
||||
_mm256_set_epi64x( SKBT(t,s,1), SKBT(t,s,1), \
|
||||
SKBT(t,s,1), SKBT(t,s,1) ) ) ); \
|
||||
@@ -458,20 +457,10 @@ skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
unsigned first;
|
||||
DECL_STATE_BIG_4WAY
|
||||
|
||||
// len is the array size, of data, ie 64 bytes
|
||||
// data points to start of 4 element buf
|
||||
// ptr is a len offset in bytes
|
||||
// buff is an array of 4 elements
|
||||
// buff_size is size of one array element
|
||||
// One element is 8 bytes (64 bits) scalar but 32 bytes (256 bits) 4way
|
||||
// To index buf using ptr it has to be scaled 8 to 1. the amounrt of
|
||||
// data to copy is 32 bytes per element instead of 8, or one m256
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
const int buf_size = 64; // 64 * _m256i
|
||||
|
||||
// 64 byte len, no part block
|
||||
if ( len <= buf_size - ptr )
|
||||
{
|
||||
memcpy_m256i( buf + (ptr>>3), vdata, len>>3 );
|
||||
@@ -481,8 +470,6 @@ skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
|
||||
READ_STATE_BIG( sc );
|
||||
first = ( bcount == 0 ) << 7;
|
||||
// 64 byte len, only one block, no transform here.
|
||||
// 80 byte len, transform first 64 bytes.
|
||||
do {
|
||||
size_t clen;
|
||||
|
||||
@@ -512,19 +499,7 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
__m256i *buf;
|
||||
size_t ptr;
|
||||
unsigned et;
|
||||
int i;
|
||||
DECL_STATE_BIG_4WAY
|
||||
/*
|
||||
* Add bit padding if necessary.
|
||||
*/
|
||||
// if (n != 0) {
|
||||
// unsigned z;
|
||||
// unsigned char x;
|
||||
//
|
||||
// z = 0x80 >> n;
|
||||
// x = ((ub & -z) | z) & 0xFF;
|
||||
// skein_big_core(sc, &x, 1);
|
||||
// }
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
@@ -543,8 +518,6 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
* the encoding of "0", over 8 bytes, then padded with zeros).
|
||||
*/
|
||||
|
||||
// 64 byte len, process only block
|
||||
// 80 byte len, process last part block (16 bytes) padded.
|
||||
READ_STATE_BIG(sc);
|
||||
|
||||
memset_zero_m256i( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
@@ -555,28 +528,6 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
bcount = 0;
|
||||
UBI_BIG_4WAY( 510, 8 );
|
||||
|
||||
// for ( i = 0; i < 2; i ++ )
|
||||
// {
|
||||
// UBI_BIG_AVX2( et, ptr );
|
||||
// if (i == 0)
|
||||
// {
|
||||
// memset_zero_m256i( buf, buf_size >> 3 );
|
||||
// bcount = 0;
|
||||
// et = 510;
|
||||
// ptr = 8;
|
||||
// }
|
||||
// }
|
||||
|
||||
// Can LE be assumed? Should be ok SPH_LITTLE_ENDIAN is defined
|
||||
/* _mm256_enc64le( buf, h0 );
|
||||
_mm256_enc64le( buf + 32, h1 );
|
||||
_mm256_enc64le( buf + 64, h2 );
|
||||
_mm256_enc64le( buf + 96, h3 );
|
||||
_mm256_enc64le( buf + 128, h4 );
|
||||
_mm256_enc64le( buf + 160, h5 );
|
||||
_mm256_enc64le( buf + 192, h6 );
|
||||
_mm256_enc64le( buf + 224, h7 );
|
||||
*/
|
||||
buf[0] = h0;
|
||||
buf[1] = h1;
|
||||
buf[2] = h2;
|
||||
@@ -587,7 +538,6 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
buf[7] = h7;
|
||||
|
||||
memcpy_m256i( dst, buf, out_len >> 3 );
|
||||
// memcpy( dst, buf, out_len * 4 );
|
||||
}
|
||||
|
||||
static const sph_u64 IV256[] = {
|
||||
|
@@ -1,10 +1,9 @@
|
||||
#include "skein-gate.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "skein2-gate.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "skein-hash-4way.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#if defined(SKEIN2_4WAY)
|
||||
|
||||
void skein2hash_4way( void *output, const void *input )
|
||||
{
|
||||
@@ -38,7 +37,7 @@ int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
// hash is returned deinterleaved
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found;
|
||||
int num_found = 0;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
@@ -52,7 +51,6 @@ int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
num_found = 0;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
|
@@ -1,9 +1,6 @@
|
||||
#include "skein2-gate.h"
|
||||
#include "algo-gate-api.h"
|
||||
//#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_skein.h"
|
||||
//#include "skein-hash-avx2.h"
|
||||
|
||||
int64_t skein2_get_max64 ()
|
||||
{
|
||||
|
@@ -1,8 +1,13 @@
|
||||
#ifndef __SKEIN2GATE_H__
|
||||
#define __SKEIN2_GATE_H__
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#if defined(FOUR_WAY) && defined(__AVX2__)
|
||||
#define SKEIN2_4WAY
|
||||
#endif
|
||||
|
||||
#if defined(SKEIN2_4WAY)
|
||||
void skein2hash_4way( void *output, const void *input );
|
||||
int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t* hashes_done );
|
||||
|
@@ -39,25 +39,6 @@
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
void dump_sph_context( sph_u64 ptr, sph_u64 bcount, uint64_t* buf,
|
||||
sph_u64 h0, sph_u64 h1, sph_u64 h2, sph_u64 h3, sph_u64 h4, sph_u64 h5,
|
||||
sph_u64 h6, sph_u64 h7 )
|
||||
{
|
||||
//scalar
|
||||
printf("sptr= %llu, bcount= %llu\n", ptr, bcount );
|
||||
|
||||
printf("sbuf: %016llx %016llx %016llx %016llx\n", *((uint64_t*)buf),
|
||||
*((uint64_t*)buf+1), *((uint64_t*)buf+2), *((uint64_t*)buf+3) );
|
||||
|
||||
printf(" %016llx %016llx %016llx %016llx\n", *((uint64_t*)buf+4),
|
||||
*((uint64_t*)buf+5), *((uint64_t*)buf+6), *((uint64_t*)buf+7) );
|
||||
|
||||
printf("sh:%016llx %016llx %016llx %016llx\n", h0, h1, h2, h3 );
|
||||
|
||||
printf(" %016llx %016llx %016llx %016llx\n", h4, h5, h6, h7 );
|
||||
|
||||
}
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SKEIN
|
||||
#define SPH_SMALL_FOOTPRINT_SKEIN 1
|
||||
#endif
|
||||
|
162
algo/tribus/tribus-4way.c
Normal file
162
algo/tribus/tribus-4way.c
Normal file
@@ -0,0 +1,162 @@
|
||||
#include "tribus-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(__AVX2__) && !defined(NO_AES_NI)
|
||||
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
|
||||
static __thread jh512_4way_context ctx_mid;
|
||||
|
||||
void tribus_hash_4way(void *state, const void *input)
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
jh512_4way_context ctx_jh;
|
||||
keccak512_4way_context ctx_keccak;
|
||||
hashState_echo ctx_echo;
|
||||
|
||||
memcpy( &ctx_jh, &ctx_mid, sizeof(ctx_mid) );
|
||||
jh512_4way( &ctx_jh, input + (64<<2), 16 );
|
||||
jh512_4way_close( &ctx_jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx_keccak );
|
||||
keccak512_4way( &ctx_keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// hash echo serially
|
||||
init_echo( &ctx_echo, 512 );
|
||||
update_final_echo( &ctx_echo, (BitSequence *) hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
init_echo( &ctx_echo, 512 );
|
||||
update_final_echo( &ctx_echo, (BitSequence *) hash1,
|
||||
(const BitSequence *) hash1, 512 );
|
||||
init_echo( &ctx_echo, 512 );
|
||||
update_final_echo( &ctx_echo, (BitSequence *) hash2,
|
||||
(const BitSequence *) hash2, 512 );
|
||||
init_echo( &ctx_echo, 512 );
|
||||
update_final_echo( &ctx_echo, (BitSequence *) hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_tribus_4way(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000 };
|
||||
|
||||
uint32_t masks[] = { 0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
// we need bigendian data...
|
||||
for ( int i = 0; i < 20; i++ )
|
||||
{
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
}
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// precalc midstate
|
||||
// doing it one way then then interleaving would be faster but too
|
||||
// complicated tto interleave context.
|
||||
jh512_4way_init( &ctx_mid );
|
||||
jh512_4way( &ctx_mid, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ )
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
tribus_hash_4way( hash, vdata );
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( !(hash[7] & mask) )
|
||||
&& fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio(work, hash);
|
||||
}
|
||||
if ( ( !((hash+8)[7] & mask) )
|
||||
&& fulltest (hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio(work, hash+8);
|
||||
}
|
||||
if ( ( !((hash+16)[7] & mask) )
|
||||
&& fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio(work, hash+16);
|
||||
}
|
||||
if ( ( !((hash+24)[7] & mask) )
|
||||
&& fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio(work, hash+24);
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
31
algo/tribus/tribus-gate.c
Normal file
31
algo/tribus/tribus-gate.c
Normal file
@@ -0,0 +1,31 @@
|
||||
#include "tribus-gate.h"
|
||||
/*
|
||||
bool tribus_thread_init()
|
||||
{
|
||||
sph_jh512_init( &tribus_ctx.jh );
|
||||
sph_keccak512_init( &tribus_ctx.keccak );
|
||||
#ifdef NO_AES_NI
|
||||
sph_echo512_init( &tribus_ctx.echo );
|
||||
#else
|
||||
init_echo( &tribus_ctx.echo, 512 );
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
bool register_tribus_algo( algo_gate_t* gate )
|
||||
{
|
||||
// gate->miner_thread_init = (void*)&tribus_thread_init;
|
||||
gate->get_max64 = (void*)&get_max64_0x1ffff;
|
||||
#if defined (TRIBUS_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_tribus_4way;
|
||||
gate->hash = (void*)&tribus_hash_4way;
|
||||
#else
|
||||
gate->miner_thread_init = (void*)&tribus_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_tribus;
|
||||
gate->hash = (void*)&tribus_hash;
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
|
29
algo/tribus/tribus-gate.h
Normal file
29
algo/tribus/tribus-gate.h
Normal file
@@ -0,0 +1,29 @@
|
||||
#ifndef TRIBUS_GATE_H__
|
||||
#define TRIBUS_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX2__) && !defined(NO_AES_NI)
|
||||
#define TRIBUS_4WAY
|
||||
#endif
|
||||
|
||||
#if defined(TRIBUS_4WAY)
|
||||
|
||||
void tribus_hash_4way( void *state, const void *input );
|
||||
|
||||
int scanhash_tribus_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#else
|
||||
|
||||
void tribus_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_tribus( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
bool tribus_thread_init();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -1,4 +1,4 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "tribus-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -25,6 +25,18 @@ typedef struct {
|
||||
|
||||
static __thread tribus_ctx_holder tribus_ctx;
|
||||
|
||||
bool tribus_thread_init()
|
||||
{
|
||||
sph_jh512_init( &tribus_ctx.jh );
|
||||
sph_keccak512_init( &tribus_ctx.keccak );
|
||||
#ifdef NO_AES_NI
|
||||
sph_echo512_init( &tribus_ctx.echo );
|
||||
#else
|
||||
init_echo( &tribus_ctx.echo, 512 );
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
void tribus_hash(void *state, const void *input)
|
||||
{
|
||||
unsigned char hash[128] __attribute__ ((aligned (32)));
|
||||
@@ -122,25 +134,4 @@ int scanhash_tribus(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool tribus_thread_init()
|
||||
{
|
||||
sph_jh512_init( &tribus_ctx.jh );
|
||||
sph_keccak512_init( &tribus_ctx.keccak );
|
||||
#ifdef NO_AES_NI
|
||||
sph_echo512_init( &tribus_ctx.echo );
|
||||
#else
|
||||
init_echo( &tribus_ctx.echo, 512 );
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
bool register_tribus_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->miner_thread_init = (void*)&tribus_thread_init;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x1ffff;
|
||||
gate->scanhash = (void*)&scanhash_tribus;
|
||||
gate->hash = (void*)&tribus_hash;
|
||||
return true;
|
||||
};
|
||||
|
291
algo/whirlpool/md-helper-4way.c
Normal file
291
algo/whirlpool/md-helper-4way.c
Normal file
@@ -0,0 +1,291 @@
|
||||
/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */
|
||||
/*
|
||||
* This file contains some functions which implement the external data
|
||||
* handling and padding for Merkle-Damgard hash functions which follow
|
||||
* the conventions set out by MD4 (little-endian) or SHA-1 (big-endian).
|
||||
*
|
||||
* API: this file is meant to be included, not compiled as a stand-alone
|
||||
* file. Some macros must be defined:
|
||||
* RFUN name for the round function
|
||||
* HASH "short name" for the hash function
|
||||
* BE32 defined for big-endian, 32-bit based (e.g. SHA-1)
|
||||
* LE32 defined for little-endian, 32-bit based (e.g. MD5)
|
||||
* BE64 defined for big-endian, 64-bit based (e.g. SHA-512)
|
||||
* LE64 defined for little-endian, 64-bit based (no example yet)
|
||||
* PW01 if defined, append 0x01 instead of 0x80 (for Tiger)
|
||||
* BLEN if defined, length of a message block (in bytes)
|
||||
* PLW1 if defined, length is defined on one 64-bit word only (for Tiger)
|
||||
* PLW4 if defined, length is defined on four 64-bit words (for WHIRLPOOL)
|
||||
* SVAL if defined, reference to the context state information
|
||||
*
|
||||
* BLEN is used when a message block is not 16 (32-bit or 64-bit) words:
|
||||
* this is used for instance for Tiger, which works on 64-bit words but
|
||||
* uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are
|
||||
* ignored if 32-bit words are used; if 64-bit words are used and PLW1 is
|
||||
* set, then only one word (64 bits) will be used to encode the input
|
||||
* message length (in bits), otherwise two words will be used (as in
|
||||
* SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but
|
||||
* not PLW1), four 64-bit words will be used to encode the message length
|
||||
* (in bits). Note that regardless of those settings, only 64-bit message
|
||||
* lengths are supported (in bits): messages longer than 2 Exabytes will be
|
||||
* improperly hashed (this is unlikely to happen soon: 2 Exabytes is about
|
||||
* 2 millions Terabytes, which is huge).
|
||||
*
|
||||
* If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close()
|
||||
* function. This is used for Tiger2, which is identical to Tiger except
|
||||
* when it comes to the padding (Tiger2 uses the standard 0x80 byte instead
|
||||
* of the 0x01 from original Tiger).
|
||||
*
|
||||
* The RFUN function is invoked with two arguments, the first pointing to
|
||||
* aligned data (as a "const void *"), the second being state information
|
||||
* from the context structure. By default, this state information is the
|
||||
* "val" field from the context, and this field is assumed to be an array
|
||||
* of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64).
|
||||
* from the context structure. The "val" field can have any type, except
|
||||
* for the output encoding which assumes that it is an array of "sph_u32"
|
||||
* values. By defining NO_OUTPUT, this last step is deactivated; the
|
||||
* includer code is then responsible for writing out the hash result. When
|
||||
* NO_OUTPUT is defined, the third parameter to the "close()" function is
|
||||
* ignored.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
#undef SPH_XCAT
|
||||
#define SPH_XCAT(a, b) SPH_XCAT_(a, b)
|
||||
#undef SPH_XCAT_
|
||||
#define SPH_XCAT_(a, b) a ## b
|
||||
|
||||
#undef SPH_BLEN
|
||||
#undef SPH_WLEN
|
||||
#if defined BE64 || defined LE64
|
||||
#define SPH_BLEN 128U
|
||||
#define SPH_WLEN 8U
|
||||
#else
|
||||
#define SPH_BLEN 64U
|
||||
#define SPH_WLEN 4U
|
||||
#endif
|
||||
|
||||
#ifdef BLEN
|
||||
#undef SPH_BLEN
|
||||
#define SPH_BLEN BLEN
|
||||
#endif
|
||||
|
||||
#undef SPH_MAXPAD
|
||||
#if defined PLW1
|
||||
#define SPH_MAXPAD (SPH_BLEN - SPH_WLEN)
|
||||
#elif defined PLW4
|
||||
#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 2))
|
||||
#else
|
||||
#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 1))
|
||||
#endif
|
||||
|
||||
#undef SPH_VAL
|
||||
#undef SPH_NO_OUTPUT
|
||||
#ifdef SVAL
|
||||
#define SPH_VAL SVAL
|
||||
#define SPH_NO_OUTPUT 1
|
||||
#else
|
||||
#define SPH_VAL sc->val
|
||||
#endif
|
||||
|
||||
#ifndef CLOSE_ONLY
|
||||
|
||||
#ifdef SPH_UPTR
|
||||
static void
|
||||
SPH_XCAT(HASH, _short)( void *cc, const void *data, size_t len )
|
||||
#else
|
||||
void
|
||||
HASH ( void *cc, const void *data, size_t len )
|
||||
#endif
|
||||
{
|
||||
SPH_XCAT( HASH, _context ) *sc;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
|
||||
sc = cc;
|
||||
ptr = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
clen = SPH_BLEN - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_m256i( sc->buf + (ptr>>3), vdata, clen>>3 );
|
||||
vdata = vdata + (clen>>3);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
if ( ptr == SPH_BLEN )
|
||||
{
|
||||
RFUN( sc->buf, SPH_VAL );
|
||||
ptr = 0;
|
||||
}
|
||||
sc->count += clen;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SPH_UPTR
|
||||
void
|
||||
HASH (void *cc, const void *data, size_t len)
|
||||
{
|
||||
SPH_XCAT(HASH, _context) *sc;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
unsigned ptr;
|
||||
|
||||
if ( len < (2 * SPH_BLEN) )
|
||||
{
|
||||
SPH_XCAT(HASH, _short)(cc, data, len);
|
||||
return;
|
||||
}
|
||||
sc = cc;
|
||||
ptr = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
if ( ptr > 0 )
|
||||
{
|
||||
unsigned t;
|
||||
t = SPH_BLEN - ptr;
|
||||
SPH_XCAT( HASH, _short )( cc, data, t );
|
||||
vdata = vdata + (t>>3);
|
||||
len -= t;
|
||||
}
|
||||
SPH_XCAT( HASH, _short )( cc, data, len );
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Perform padding and produce result. The context is NOT reinitialized
|
||||
* by this function.
|
||||
*/
|
||||
static void
|
||||
SPH_XCAT( HASH, _addbits_and_close )(void *cc, unsigned ub, unsigned n,
|
||||
void *dst, unsigned rnum )
|
||||
{
|
||||
SPH_XCAT(HASH, _context) *sc;
|
||||
unsigned ptr, u;
|
||||
sc = cc;
|
||||
ptr = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
|
||||
uint64_t *b= (uint64_t*)sc->buf;
|
||||
uint64_t *s= (uint64_t*)sc->state;
|
||||
//printf("Vptr 1= %u\n", ptr);
|
||||
//printf("VBuf %016llx %016llx %016llx %016llx\n", b[0], b[4], b[8], b[12] );
|
||||
//printf("VBuf %016llx %016llx %016llx %016llx\n", b[16], b[20], b[24], b[28] );
|
||||
|
||||
#ifdef PW01
|
||||
sc->buf[ptr>>3] = mm256_vec_epi64( 0x100 >> 8 );
|
||||
// sc->buf[ptr++] = 0x100 >> 8;
|
||||
#else
|
||||
// need to overwrite exactly one byte
|
||||
// sc->buf[ptr>>3] = _mm256_set_epi64x( 0, 0, 0, 0x80 );
|
||||
sc->buf[ptr>>3] = mm256_vec_epi64( 0x80 );
|
||||
// ptr++;
|
||||
#endif
|
||||
ptr += 8;
|
||||
|
||||
//printf("Vptr 2= %u\n", ptr);
|
||||
//printf("VBuf %016llx %016llx %016llx %016llx\n", b[0], b[4], b[8], b[12] );
|
||||
//printf("VBuf %016llx %016llx %016llx %016llx\n", b[16], b[20], b[24], b[28] );
|
||||
|
||||
if ( ptr > SPH_MAXPAD )
|
||||
{
|
||||
memset_zero_m256i( sc->buf + (ptr>>3), (SPH_BLEN - ptr) >> 3 );
|
||||
RFUN( sc->buf, SPH_VAL );
|
||||
memset_zero_m256i( sc->buf, SPH_MAXPAD >> 3 );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_m256i( sc->buf + (ptr>>3), (SPH_MAXPAD - ptr) >> 3 );
|
||||
}
|
||||
#if defined BE64
|
||||
#if defined PLW1
|
||||
sc->buf[ SPH_MAXPAD>>3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count << 3 ) );
|
||||
#elif defined PLW4
|
||||
memset_zero_m256i( sc->buf + (SPH_MAXPAD>>3), ( 2 * SPH_WLEN ) >> 3 );
|
||||
sc->buf[ (SPH_MAXPAD + 2 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count >> 61 ) );
|
||||
sc->buf[ (SPH_MAXPAD + 3 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count << 3 ) );
|
||||
#else
|
||||
sc->buf[ ( SPH_MAXPAD + 2 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count >> 61 ) );
|
||||
sc->buf[ ( SPH_MAXPAD + 3 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count << 3 ) );
|
||||
#endif // PLW
|
||||
#else // LE64
|
||||
#if defined PLW1
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = mm256_vec_epi64( sc->count << 3 );
|
||||
#elif defined PLW4
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = _mm256_vec_epi64( sc->count << 3 );
|
||||
sc->buf[ ( SPH_MAXPAD + SPH_WLEN ) >> 3 ] =
|
||||
mm256_vec_epi64( c->count >> 61 );
|
||||
memset_zero_m256i( sc->buf + ( ( SPH_MAXPAD + 2 * SPH_WLEN ) >> 3 ),
|
||||
2 * SPH_WLEN );
|
||||
#else
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = mm256_vec_epi64( sc->count << 3 );
|
||||
sc->buf[ ( SPH_MAXPAD + SPH_WLEN ) >> 3 ] =
|
||||
mm256_vec_epi64( sc->count >> 61 );
|
||||
#endif // PLW
|
||||
|
||||
#endif // LE64
|
||||
|
||||
//printf("Vptr 3= %u\n", ptr);
|
||||
//printf("VBuf %016llx %016llx %016llx %016llx\n", b[0], b[4], b[8], b[12] );
|
||||
//printf("VBuf %016llx %016llx %016llx %016llx\n", b[16], b[20], b[24], b[28] );
|
||||
RFUN( sc->buf, SPH_VAL );
|
||||
|
||||
//printf("Vptr after= %u\n", ptr);
|
||||
//printf("VState %016llx %016llx %016llx %016llx\n", s[0], s[4], s[8], s[12] );
|
||||
//printf("VState %016llx %016llx %016llx %016llx\n", s[16], s[20], s[24], s[28] );
|
||||
|
||||
#ifdef SPH_NO_OUTPUT
|
||||
(void)dst;
|
||||
(void)rnum;
|
||||
(void)u;
|
||||
#else
|
||||
for ( u = 0; u < rnum; u ++ )
|
||||
{
|
||||
#if defined BE64
|
||||
((__m256i*)dst)[u] = mm256_byteswap_epi64( sc->val[u] );
|
||||
#else // LE64
|
||||
((__m256i*)dst)[u] = sc->val[u];
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
SPH_XCAT( HASH, _mdclose )( void *cc, void *dst, unsigned rnum )
|
||||
{
|
||||
SPH_XCAT( HASH, _addbits_and_close )( cc, 0, 0, dst, rnum );
|
||||
}
|
369
algo/whirlpool/md_helper.c
Normal file
369
algo/whirlpool/md_helper.c
Normal file
@@ -0,0 +1,369 @@
|
||||
/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */
|
||||
/*
|
||||
* This file contains some functions which implement the external data
|
||||
* handling and padding for Merkle-Damgard hash functions which follow
|
||||
* the conventions set out by MD4 (little-endian) or SHA-1 (big-endian).
|
||||
*
|
||||
* API: this file is meant to be included, not compiled as a stand-alone
|
||||
* file. Some macros must be defined:
|
||||
* RFUN name for the round function
|
||||
* HASH "short name" for the hash function
|
||||
* BE32 defined for big-endian, 32-bit based (e.g. SHA-1)
|
||||
* LE32 defined for little-endian, 32-bit based (e.g. MD5)
|
||||
* BE64 defined for big-endian, 64-bit based (e.g. SHA-512)
|
||||
* LE64 defined for little-endian, 64-bit based (no example yet)
|
||||
* PW01 if defined, append 0x01 instead of 0x80 (for Tiger)
|
||||
* BLEN if defined, length of a message block (in bytes)
|
||||
* PLW1 if defined, length is defined on one 64-bit word only (for Tiger)
|
||||
* PLW4 if defined, length is defined on four 64-bit words (for WHIRLPOOL)
|
||||
* SVAL if defined, reference to the context state information
|
||||
*
|
||||
* BLEN is used when a message block is not 16 (32-bit or 64-bit) words:
|
||||
* this is used for instance for Tiger, which works on 64-bit words but
|
||||
* uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are
|
||||
* ignored if 32-bit words are used; if 64-bit words are used and PLW1 is
|
||||
* set, then only one word (64 bits) will be used to encode the input
|
||||
* message length (in bits), otherwise two words will be used (as in
|
||||
* SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but
|
||||
* not PLW1), four 64-bit words will be used to encode the message length
|
||||
* (in bits). Note that regardless of those settings, only 64-bit message
|
||||
* lengths are supported (in bits): messages longer than 2 Exabytes will be
|
||||
* improperly hashed (this is unlikely to happen soon: 2 Exabytes is about
|
||||
* 2 millions Terabytes, which is huge).
|
||||
*
|
||||
* If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close()
|
||||
* function. This is used for Tiger2, which is identical to Tiger except
|
||||
* when it comes to the padding (Tiger2 uses the standard 0x80 byte instead
|
||||
* of the 0x01 from original Tiger).
|
||||
*
|
||||
* The RFUN function is invoked with two arguments, the first pointing to
|
||||
* aligned data (as a "const void *"), the second being state information
|
||||
* from the context structure. By default, this state information is the
|
||||
* "val" field from the context, and this field is assumed to be an array
|
||||
* of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64).
|
||||
* from the context structure. The "val" field can have any type, except
|
||||
* for the output encoding which assumes that it is an array of "sph_u32"
|
||||
* values. By defining NO_OUTPUT, this last step is deactivated; the
|
||||
* includer code is then responsible for writing out the hash result. When
|
||||
* NO_OUTPUT is defined, the third parameter to the "close()" function is
|
||||
* ignored.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
#undef SPH_XCAT
|
||||
#define SPH_XCAT(a, b) SPH_XCAT_(a, b)
|
||||
#undef SPH_XCAT_
|
||||
#define SPH_XCAT_(a, b) a ## b
|
||||
|
||||
#undef SPH_BLEN
|
||||
#undef SPH_WLEN
|
||||
#if defined BE64 || defined LE64
|
||||
#define SPH_BLEN 128U
|
||||
#define SPH_WLEN 8U
|
||||
#else
|
||||
#define SPH_BLEN 64U
|
||||
#define SPH_WLEN 4U
|
||||
#endif
|
||||
|
||||
#ifdef BLEN
|
||||
#undef SPH_BLEN
|
||||
#define SPH_BLEN BLEN
|
||||
#endif
|
||||
|
||||
#undef SPH_MAXPAD
|
||||
#if defined PLW1
|
||||
#define SPH_MAXPAD (SPH_BLEN - SPH_WLEN)
|
||||
#elif defined PLW4
|
||||
#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 2))
|
||||
#else
|
||||
#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 1))
|
||||
#endif
|
||||
|
||||
#undef SPH_VAL
|
||||
#undef SPH_NO_OUTPUT
|
||||
#ifdef SVAL
|
||||
#define SPH_VAL SVAL
|
||||
#define SPH_NO_OUTPUT 1
|
||||
#else
|
||||
#define SPH_VAL sc->val
|
||||
#endif
|
||||
|
||||
#ifndef CLOSE_ONLY
|
||||
|
||||
#ifdef SPH_UPTR
|
||||
static void
|
||||
SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len)
|
||||
#else
|
||||
void
|
||||
SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
|
||||
#endif
|
||||
{
|
||||
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
|
||||
size_t current;
|
||||
|
||||
sc = cc;
|
||||
#if SPH_64
|
||||
current = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
#else
|
||||
current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
|
||||
#endif
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
#if !SPH_64
|
||||
sph_u32 clow, clow2;
|
||||
#endif
|
||||
|
||||
clen = SPH_BLEN - current;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(sc->buf + current, data, clen);
|
||||
data = (const unsigned char *)data + clen;
|
||||
current += clen;
|
||||
len -= clen;
|
||||
if (current == SPH_BLEN) {
|
||||
RFUN(sc->buf, SPH_VAL);
|
||||
current = 0;
|
||||
}
|
||||
#if SPH_64
|
||||
sc->count += clen;
|
||||
#else
|
||||
clow = sc->count_low;
|
||||
clow2 = SPH_T32(clow + clen);
|
||||
sc->count_low = clow2;
|
||||
if (clow2 < clow)
|
||||
sc->count_high ++;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SPH_UPTR
|
||||
void
|
||||
SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
|
||||
{
|
||||
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
|
||||
unsigned current;
|
||||
size_t orig_len;
|
||||
#if !SPH_64
|
||||
sph_u32 clow, clow2;
|
||||
#endif
|
||||
|
||||
if (len < (2 * SPH_BLEN)) {
|
||||
SPH_XCAT(HASH, _short)(cc, data, len);
|
||||
return;
|
||||
}
|
||||
sc = cc;
|
||||
#if SPH_64
|
||||
current = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
#else
|
||||
current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
|
||||
#endif
|
||||
if (current > 0) {
|
||||
unsigned t;
|
||||
|
||||
t = SPH_BLEN - current;
|
||||
SPH_XCAT(HASH, _short)(cc, data, t);
|
||||
data = (const unsigned char *)data + t;
|
||||
len -= t;
|
||||
}
|
||||
#if !SPH_UNALIGNED
|
||||
if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) {
|
||||
SPH_XCAT(HASH, _short)(cc, data, len);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
orig_len = len;
|
||||
while (len >= SPH_BLEN) {
|
||||
RFUN(data, SPH_VAL);
|
||||
len -= SPH_BLEN;
|
||||
data = (const unsigned char *)data + SPH_BLEN;
|
||||
}
|
||||
if (len > 0)
|
||||
memcpy(sc->buf, data, len);
|
||||
#if SPH_64
|
||||
sc->count += (sph_u64)orig_len;
|
||||
#else
|
||||
clow = sc->count_low;
|
||||
clow2 = SPH_T32(clow + orig_len);
|
||||
sc->count_low = clow2;
|
||||
if (clow2 < clow)
|
||||
sc->count_high ++;
|
||||
/*
|
||||
* This code handles the improbable situation where "size_t" is
|
||||
* greater than 32 bits, and yet we do not have a 64-bit type.
|
||||
*/
|
||||
orig_len >>= 12;
|
||||
orig_len >>= 10;
|
||||
orig_len >>= 10;
|
||||
sc->count_high += orig_len;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Perform padding and produce result. The context is NOT reinitialized
|
||||
* by this function.
|
||||
*/
|
||||
static void
|
||||
SPH_XCAT(HASH, _addbits_and_close)(void *cc,
|
||||
unsigned ub, unsigned n, void *dst, unsigned rnum)
|
||||
{
|
||||
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
|
||||
unsigned current, u;
|
||||
#if !SPH_64
|
||||
sph_u32 low, high;
|
||||
#endif
|
||||
|
||||
sc = cc;
|
||||
#if SPH_64
|
||||
current = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
#else
|
||||
current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
|
||||
#endif
|
||||
|
||||
uint64_t *b= (uint64_t*)sc->buf;
|
||||
uint64_t *s= (uint64_t*)sc->state;
|
||||
// printf("Sptr 1= %u\n",current);
|
||||
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[0], b[1], b[2], b[3] );
|
||||
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[4], b[5], b[6], b[7] );
|
||||
|
||||
#ifdef PW01
|
||||
sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n);
|
||||
#else
|
||||
{
|
||||
unsigned z;
|
||||
|
||||
z = 0x80 >> n;
|
||||
sc->buf[current ++] = ((ub & -z) | z) & 0xFF;
|
||||
}
|
||||
#endif
|
||||
|
||||
// printf("Sptr 2= %u\n",current);
|
||||
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[0], b[1], b[2], b[3] );
|
||||
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[4], b[5], b[6], b[7] );
|
||||
|
||||
if (current > SPH_MAXPAD) {
|
||||
memset(sc->buf + current, 0, SPH_BLEN - current);
|
||||
RFUN(sc->buf, SPH_VAL);
|
||||
memset(sc->buf, 0, SPH_MAXPAD);
|
||||
} else {
|
||||
memset(sc->buf + current, 0, SPH_MAXPAD - current);
|
||||
}
|
||||
#if defined BE64
|
||||
#if defined PLW1
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#elif defined PLW4
|
||||
memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN);
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN,
|
||||
sc->count >> 61);
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#else
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61);
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#endif
|
||||
#elif defined LE64
|
||||
#if defined PLW1
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
// BUG!! should be PLW4
|
||||
#elif defined PLW1
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
|
||||
memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN);
|
||||
#else
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
|
||||
#endif
|
||||
#else
|
||||
#if SPH_64
|
||||
#ifdef BE32
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#else
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#endif
|
||||
#else
|
||||
low = sc->count_low;
|
||||
high = SPH_T32((sc->count_high << 3) | (low >> 29));
|
||||
low = SPH_T32(low << 3) + (sph_u32)n;
|
||||
#ifdef BE32
|
||||
sph_enc32be(sc->buf + SPH_MAXPAD, high);
|
||||
sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low);
|
||||
#else
|
||||
sph_enc32le(sc->buf + SPH_MAXPAD, low);
|
||||
sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// printf("Sptr 3= %u\n",current);
|
||||
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[0], b[1], b[2], b[3] );
|
||||
// printf("SBuf %016llx %016llx %016llx %016llx\n", b[4], b[5], b[6], b[7] );
|
||||
|
||||
RFUN(sc->buf, SPH_VAL);
|
||||
|
||||
// printf("Sptr after= %u\n",current);
|
||||
// printf("SState %016llx %016llx %016llx %016llx\n", s[0], s[1], s[2], s[3] );
|
||||
// printf("SState %016llx %016llx %016llx %016llx\n", s[4], s[5], s[6], s[7] );
|
||||
|
||||
#ifdef SPH_NO_OUTPUT
|
||||
(void)dst;
|
||||
(void)rnum;
|
||||
(void)u;
|
||||
#else
|
||||
for (u = 0; u < rnum; u ++) {
|
||||
#if defined BE64
|
||||
sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]);
|
||||
#elif defined LE64
|
||||
sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]);
|
||||
#elif defined BE32
|
||||
sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]);
|
||||
#else
|
||||
sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum)
|
||||
{
|
||||
SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum);
|
||||
}
|
@@ -3441,19 +3441,19 @@ ROUND_FUN(whirlpool1, old1)
|
||||
|
||||
#define RFUN whirlpool_round
|
||||
#define HASH whirlpool
|
||||
#include "algo/sha/md_helper.c"
|
||||
#include "md_helper.c"
|
||||
#undef RFUN
|
||||
#undef HASH
|
||||
|
||||
#define RFUN whirlpool0_round
|
||||
#define HASH whirlpool0
|
||||
#include "algo/sha/md_helper.c"
|
||||
#include "md_helper.c"
|
||||
#undef RFUN
|
||||
#undef HASH
|
||||
|
||||
#define RFUN whirlpool1_round
|
||||
#define HASH whirlpool1
|
||||
#include "algo/sha/md_helper.c"
|
||||
#include "md_helper.c"
|
||||
#undef RFUN
|
||||
#undef HASH
|
||||
|
||||
@@ -3463,7 +3463,6 @@ sph_ ## name ## _close(void *cc, void *dst) \
|
||||
{ \
|
||||
sph_ ## name ## _context *sc; \
|
||||
int i; \
|
||||
\
|
||||
name ## _close(cc, dst, 0); \
|
||||
sc = cc; \
|
||||
for (i = 0; i < 8; i ++) \
|
||||
|
3481
algo/whirlpool/sph_whirlpool.c.bak
Normal file
3481
algo/whirlpool/sph_whirlpool.c.bak
Normal file
File diff suppressed because it is too large
Load Diff
209
algo/whirlpool/sph_whirlpool.h.bak
Normal file
209
algo/whirlpool/sph_whirlpool.h.bak
Normal file
@@ -0,0 +1,209 @@
|
||||
/* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* WHIRLPOOL interface.
|
||||
*
|
||||
* WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original
|
||||
* version, published in 2000, studied by NESSIE), "WHIRLPOOL-1"
|
||||
* (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current
|
||||
* version, 2003, with a new diffusion matrix, also described as "plain
|
||||
* WHIRLPOOL"). All three variants are implemented here.
|
||||
*
|
||||
* The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L.
|
||||
* M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open
|
||||
* NESSIE Workshop, Leuven, Belgium, November 13--14, 2000.
|
||||
*
|
||||
* The current WHIRLPOOL specification and a reference implementation
|
||||
* can be found on the WHIRLPOOL web page:
|
||||
* http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_whirlpool.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_WHIRLPOOL_H__
|
||||
#define SPH_WHIRLPOOL_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool 512
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL-0.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool0 512
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL-1.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool1 512
|
||||
|
||||
/**
|
||||
* This structure is a context for WHIRLPOOL computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a WHIRLPOOL computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running WHIRLPOOL computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
sph_u64 state[8];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_whirlpool_context;
|
||||
|
||||
/**
|
||||
* Initialize a WHIRLPOOL context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool_context</code>)
|
||||
*/
|
||||
void sph_whirlpool_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* plain WHIRLPOOL algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
|
||||
*/
|
||||
typedef sph_whirlpool_context sph_whirlpool0_context;
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Initialize a WHIRLPOOL-0 context. This function is identical to
|
||||
* <code>sph_whirlpool_init()</code>.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool0_context</code>)
|
||||
*/
|
||||
void sph_whirlpool0_init(void *cc);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_whirlpool0_init sph_whirlpool_init
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* WHIRLPOOL-0 algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool0(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL-0 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL-0 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool0_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL.
|
||||
*/
|
||||
typedef sph_whirlpool_context sph_whirlpool1_context;
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Initialize a WHIRLPOOL-1 context. This function is identical to
|
||||
* <code>sph_whirlpool_init()</code>.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool1_context</code>)
|
||||
*/
|
||||
void sph_whirlpool1_init(void *cc);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_whirlpool1_init sph_whirlpool_init
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* WHIRLPOOL-1 algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool1(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL-1 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL-1 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool1_close(void *cc, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
131
algo/whirlpool/whirlpool-4way.c
Normal file
131
algo/whirlpool/whirlpool-4way.c
Normal file
@@ -0,0 +1,131 @@
|
||||
#include "whirlpool-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "sph_whirlpool.h"
|
||||
#include "whirlpool-hash-4way.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
static __thread whirlpool_4way_context whirl_mid;
|
||||
|
||||
void whirlpool_hash_4way( void *state, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
const int midlen = 64;
|
||||
const int tail = 80 - midlen;
|
||||
whirlpool_4way_context ctx;
|
||||
|
||||
memcpy( &ctx, &whirl_mid, sizeof whirl_mid );
|
||||
whirlpool1_4way( &ctx, input + (midlen<<2), tail );
|
||||
whirlpool1_4way_close( &ctx, vhash);
|
||||
|
||||
// whirlpool1_4way_init( &ctx );
|
||||
// whirlpool1_4way( &ctx, input, 80 );
|
||||
// whirlpool1_4way_close( &ctx, vhash);
|
||||
|
||||
whirlpool1_4way_init( &ctx );
|
||||
whirlpool1_4way( &ctx, vhash, 64 );
|
||||
whirlpool1_4way_close( &ctx, vhash);
|
||||
|
||||
whirlpool1_4way_init( &ctx );
|
||||
whirlpool1_4way( &ctx, vhash, 64 );
|
||||
whirlpool1_4way_close( &ctx, vhash);
|
||||
|
||||
whirlpool1_4way_init( &ctx );
|
||||
whirlpool1_4way( &ctx, vhash, 64 );
|
||||
whirlpool1_4way_close( &ctx, vhash);
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
memcpy( state , hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_whirlpool_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
unsigned long *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t* pdata = work->data;
|
||||
uint32_t* ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
// if (opt_benchmark)
|
||||
// ((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
for (int i=0; i < 19; i++)
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// midstate
|
||||
whirlpool1_4way_init( &whirl_mid );
|
||||
whirlpool1_4way( &whirl_mid, vdata, 64 );
|
||||
|
||||
do {
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
whirlpool_hash_4way( hash, vdata );
|
||||
|
||||
pdata[19] = n;
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio(work, hash);
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
15
algo/whirlpool/whirlpool-gate.c
Normal file
15
algo/whirlpool/whirlpool-gate.c
Normal file
@@ -0,0 +1,15 @@
|
||||
#include "whirlpool-gate.h"
|
||||
|
||||
bool register_whirlpool_algo( algo_gate_t* gate )
|
||||
{
|
||||
//#if defined (WHIRLPOOL_4WAY)
|
||||
// gate->scanhash = (void*)&scanhash_whirlpool_4way;
|
||||
// gate->hash = (void*)&whirlpool_hash_4way;
|
||||
//#else
|
||||
gate->scanhash = (void*)&scanhash_whirlpool;
|
||||
gate->hash = (void*)&whirlpool_hash;
|
||||
init_whirlpool_ctx();
|
||||
//#endif
|
||||
return true;
|
||||
};
|
||||
|
24
algo/whirlpool/whirlpool-gate.h
Normal file
24
algo/whirlpool/whirlpool-gate.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef WHIRLPOOL_GATE_H__
|
||||
#define WHIRLPOOL_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(FOUR_WAY) && defined(__AVX2__)
|
||||
#define WHIRLPOOL_4WAY
|
||||
#endif
|
||||
|
||||
//#if defined (WHIRLPOOL_4WAY)
|
||||
|
||||
//void whirlpool_hash_4way(void *state, const void *input);
|
||||
|
||||
//int scanhash_whirlpool_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
// uint64_t *hashes_done );
|
||||
//#endif
|
||||
|
||||
void whirlpool_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_whirlpool( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
3533
algo/whirlpool/whirlpool-hash-4way.c
Normal file
3533
algo/whirlpool/whirlpool-hash-4way.c
Normal file
File diff suppressed because it is too large
Load Diff
108
algo/whirlpool/whirlpool-hash-4way.h
Normal file
108
algo/whirlpool/whirlpool-hash-4way.h
Normal file
@@ -0,0 +1,108 @@
|
||||
/* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* WHIRLPOOL interface.
|
||||
*
|
||||
* WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original
|
||||
* version, published in 2000, studied by NESSIE), "WHIRLPOOL-1"
|
||||
* (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current
|
||||
* version, 2003, with a new diffusion matrix, also described as "plain
|
||||
* WHIRLPOOL"). All three variants are implemented here.
|
||||
*
|
||||
* The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L.
|
||||
* M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open
|
||||
* NESSIE Workshop, Leuven, Belgium, November 13--14, 2000.
|
||||
*
|
||||
* The current WHIRLPOOL specification and a reference implementation
|
||||
* can be found on the WHIRLPOOL web page:
|
||||
* http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_whirlpool.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef WHIRLPOOL_HASH_4WAY_H__
|
||||
#define WHIRLPOOL_HASH_4WAY_H__
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool 512
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL-0.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool0 512
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL-1.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool1 512
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[8] __attribute__ ((aligned (64)));
|
||||
__m256i state[8];
|
||||
sph_u64 count;
|
||||
} whirlpool_4way_context;
|
||||
|
||||
void whirlpool_4way_init( void *cc );
|
||||
|
||||
void whirlpool_4way( void *cc, const void *data, size_t len );
|
||||
|
||||
void whirlpool_4way_close( void *cc, void *dst );
|
||||
|
||||
/**
|
||||
* WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
|
||||
*/
|
||||
typedef whirlpool_4way_context whirlpool0_4way_context;
|
||||
|
||||
#define whirlpool0_4way_init whirlpool_4way_init
|
||||
|
||||
void whirlpool0_4way( void *cc, const void *data, size_t len );
|
||||
|
||||
void whirlpool0_4way_close( void *cc, void *dst );
|
||||
|
||||
/**
|
||||
* WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL.
|
||||
*/
|
||||
typedef whirlpool_4way_context whirlpool1_4way_context;
|
||||
|
||||
#define whirlpool1_4way_init whirlpool_4way_init
|
||||
|
||||
void whirlpool1_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void whirlpool1_4way_close(void *cc, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -1,5 +1,4 @@
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include "whirlpool-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -58,7 +57,8 @@ void whirlpool_midstate( const void* input )
|
||||
}
|
||||
|
||||
|
||||
int scanhash_whirlpool(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
|
||||
int scanhash_whirlpool( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t* pdata = work->data;
|
||||
@@ -66,8 +66,8 @@ int scanhash_whirlpool(int thr_id, struct work* work, uint32_t max_nonce, unsign
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce - 1;
|
||||
|
||||
// if (opt_benchmark)
|
||||
// ((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
for (int i=0; i < 19; i++)
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
@@ -83,7 +83,7 @@ int scanhash_whirlpool(int thr_id, struct work* work, uint32_t max_nonce, unsign
|
||||
|
||||
if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
|
||||
{
|
||||
// work_set_target_ratio(work, vhash);
|
||||
work_set_target_ratio(work, vhash);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
@@ -95,11 +95,3 @@ int scanhash_whirlpool(int thr_id, struct work* work, uint32_t max_nonce, unsign
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_whirlpool_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_whirlpool;
|
||||
gate->hash = (void*)&whirlpool_hash;
|
||||
init_whirlpool_ctx();
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -24,7 +24,8 @@ void whirlpoolx_hash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_whirlpoolx(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
|
||||
int scanhash_whirlpoolx( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t* pdata = work->data;
|
||||
@@ -32,8 +33,8 @@ int scanhash_whirlpoolx(int thr_id, struct work* work, uint32_t max_nonce, unsig
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce - 1;
|
||||
|
||||
// if (opt_benchmark)
|
||||
// ((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
for (int i=0; i < 19; i++)
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
@@ -47,7 +48,7 @@ int scanhash_whirlpoolx(int thr_id, struct work* work, uint32_t max_nonce, unsig
|
||||
|
||||
if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
|
||||
{
|
||||
// work_set_target_ratio(work, vhash);
|
||||
work_set_target_ratio(work, vhash);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
|
93
avxdefs.h
93
avxdefs.h
@@ -1,3 +1,6 @@
|
||||
#ifndef AVXDEFS_H__
|
||||
#define AVXDEFS_H__
|
||||
|
||||
// Some tools to help using AVX and AVX2
|
||||
// AVX support is required to include this header file, AVX2 optional.
|
||||
|
||||
@@ -39,7 +42,23 @@ uint8_t v8 [16];
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
// AVX2 replacements for vectorized data
|
||||
// AVX2 implementations of
|
||||
// vector versions of common scalar functions
|
||||
// vector handling, indexing, pointer arithmetic
|
||||
// vector scalar conversion
|
||||
// overlay (union) handling for all integer data types
|
||||
|
||||
// vectorize 64 bit data by replication.
|
||||
// don't need to repeat the val 4 times.
|
||||
inline __m256i mm256_vec_epi64( uint64_t val )
|
||||
{
|
||||
return _mm256_set_epi64x( val, val, val, val );
|
||||
}
|
||||
|
||||
inline __m256i mm256_vec_epi32( uint32_t val )
|
||||
{
|
||||
return _mm256_set_epi32( val, val, val, val, val, val, val, val );
|
||||
}
|
||||
|
||||
// n = number of __m256i (32 bytes)
|
||||
inline void memset_zero_m256i( __m256i *dst, int n )
|
||||
@@ -282,8 +301,8 @@ inline __m256i mm256_byteswap_epi32( __m256i x )
|
||||
_mm256_set_epi32( 0x00ff0000, 0x00ff0000, 0x00ff0000, 0x00ff0000,
|
||||
0x00ff0000, 0x00ff0000, 0x00ff0000, 0x00ff0000 ) );
|
||||
__m256i x0 = _mm256_slli_epi32( x, 24 ); // x0 = x << 24
|
||||
x1 = _mm256_slli_epi32( x1, 8 ); // x1 = mask(x) << 8
|
||||
x2 = _mm256_srli_epi32( x2, 8 ); // x2 = mask(x) >> 8
|
||||
x1 = _mm256_slli_epi32( x1, 8 ); // x1 = mask1(x) << 8
|
||||
x2 = _mm256_srli_epi32( x2, 8 ); // x2 = mask2(x) >> 8
|
||||
__m256i x3 = _mm256_srli_epi32( x, 24 ); // x3 = x >> 24
|
||||
return _mm256_or_si256( _mm256_or_si256( x0, x1 ),
|
||||
_mm256_or_si256( x2, x3 ) );
|
||||
@@ -318,6 +337,11 @@ inline __m256i mm256_byteswap_epi64( __m256i x )
|
||||
return x;
|
||||
}
|
||||
|
||||
// vectorized version of ~ operator
|
||||
#define mm256_bitnot( x ) \
|
||||
_mm256_xor_si256( (x), _mm256_set_epi64x( 0xffffffffffffffff, \
|
||||
0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff ) )
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
// AVX replacements for vectorized data
|
||||
@@ -614,12 +638,65 @@ inline void m256_deinterleave_4x64( uint64_t *dst0, uint64_t *dst1,
|
||||
for ( int i = 0; i < bit_len>>6; i++, s += 4 )
|
||||
{
|
||||
*(dst0+i) = *s;
|
||||
*(dst1+i) = *(s+1);
|
||||
*(dst2+i) = *(s+2);
|
||||
*(dst3+i) = *(s+3);
|
||||
*(dst1+i) = *(s+1);
|
||||
*(dst2+i) = *(s+2);
|
||||
*(dst3+i) = *(s+3);
|
||||
}
|
||||
}
|
||||
|
||||
// optimized versions using AVX2 code
|
||||
// only 512 bit and 640 bit length is supported
|
||||
// all data must be aligned to256 bits
|
||||
// 640 bit data needs padding for overrun to 768
|
||||
// no looping
|
||||
inline void m256_interleave_4x64x( uint64_t *dst, uint64_t *src0,
|
||||
uint64_t *src1, uint64_t *src2, uint64_t *src3, int bit_len )
|
||||
{
|
||||
__m256i* d = (__m256i*)dst;
|
||||
|
||||
d[0] = _mm256_set_epi64x( src3[0], src2[0], src1[0], src0[0] );
|
||||
d[1] = _mm256_set_epi64x( src3[1], src2[1], src1[1], src0[1] );
|
||||
d[2] = _mm256_set_epi64x( src3[2], src2[2], src1[2], src0[2] );
|
||||
d[3] = _mm256_set_epi64x( src3[3], src2[3], src1[3], src0[3] );
|
||||
|
||||
d[4] = _mm256_set_epi64x( src3[4], src2[4], src1[4], src0[4] );
|
||||
d[5] = _mm256_set_epi64x( src3[5], src2[5], src1[5], src0[5] );
|
||||
d[6] = _mm256_set_epi64x( src3[6], src2[6], src1[6], src0[6] );
|
||||
d[7] = _mm256_set_epi64x( src3[7], src2[7], src1[7], src0[7] );
|
||||
|
||||
if ( bit_len == 512 ) return;
|
||||
|
||||
d[8] = _mm256_set_epi64x( src3[8], src2[8], src1[8], src0[8] );
|
||||
d[9] = _mm256_set_epi64x( src3[9], src2[9], src1[9], src0[9] );
|
||||
}
|
||||
|
||||
inline void m256_deinterleave_4x64x( uint64_t *dst0, uint64_t *dst1,
|
||||
uint64_t *dst2, uint64_t *dst3, uint64_t *src, int bit_len )
|
||||
{
|
||||
__m256i* d0 = (__m256i*)dst0;
|
||||
__m256i* d1 = (__m256i*)dst1;
|
||||
__m256i* d2 = (__m256i*)dst2;
|
||||
__m256i* d3 = (__m256i*)dst3;
|
||||
|
||||
d0[0] = _mm256_set_epi64x( src[12], src[ 8], src[ 4], src[ 0] );
|
||||
d1[0] = _mm256_set_epi64x( src[13], src[ 9], src[ 5], src[ 1] );
|
||||
d2[0] = _mm256_set_epi64x( src[14], src[10], src[ 6], src[ 2] );
|
||||
d3[0] = _mm256_set_epi64x( src[15], src[11], src[ 7], src[ 3] );
|
||||
|
||||
d0[1] = _mm256_set_epi64x( src[28], src[24], src[20], src[16] );
|
||||
d1[1] = _mm256_set_epi64x( src[29], src[25], src[21], src[17] );
|
||||
d2[1] = _mm256_set_epi64x( src[30], src[26], src[22], src[18] );
|
||||
d3[1] = _mm256_set_epi64x( src[31], src[27], src[23], src[19] );
|
||||
|
||||
if ( bit_len == 512 ) return;
|
||||
|
||||
// null change to overrun area
|
||||
d0[2] = _mm256_set_epi64x( dst0[44], dst0[40], src[36], src[32] );
|
||||
d1[2] = _mm256_set_epi64x( dst1[45], dst1[41], src[37], src[33] );
|
||||
d2[2] = _mm256_set_epi64x( dst2[46], dst2[42], src[38], src[34] );
|
||||
d3[2] = _mm256_set_epi64x( dst3[47], dst3[43], src[39], src[35] );
|
||||
}
|
||||
|
||||
// interleave 8 arrays of 32 bit elements for AVX2 processing
|
||||
// bit_len must be multiple of 32
|
||||
inline void m256_interleave_8x32( uint32_t *dst, uint32_t *src0,
|
||||
@@ -718,7 +795,8 @@ inline void m128_interleave_4x32( uint32_t *dst, uint32_t *src0,
|
||||
// deinterleave 4 arrays into individual buffers for scalarm processing
|
||||
// bit_len must be multiple of 32
|
||||
inline void m128_deinterleave_4x32( uint32_t *dst0, uint32_t *dst1,
|
||||
uint32_t *dst2,uint32_t *dst3, uint32_t *src, int bit_len )
|
||||
uint32_t *dst2,uint32_t *dst3, uint32_t *src,
|
||||
int bit_len )
|
||||
{
|
||||
uint32_t *s = src;
|
||||
for ( int i = 0; i < bit_len >> 5; i++, s += 4 )
|
||||
@@ -730,4 +808,5 @@ inline void m128_deinterleave_4x32( uint32_t *dst0, uint32_t *dst1,
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -18,7 +18,8 @@ rm -f config.status
|
||||
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
|
||||
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
|
||||
|
||||
CFLAGS="-O3 -march=native -Wall -DFOUR_WAY" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
CFLAGS="-O3 -march=native -Wall -DFOUR_WAY" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall -DFOUR_WAY" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
|
||||
make -j 4
|
||||
|
||||
|
3
build.sh
3
build.sh
@@ -18,7 +18,8 @@ rm -f config.status
|
||||
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
|
||||
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
|
||||
|
||||
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
|
||||
make -j 4
|
||||
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.3.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.4.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.7.3'
|
||||
PACKAGE_STRING='cpuminer-opt 3.7.3'
|
||||
PACKAGE_VERSION='3.7.4'
|
||||
PACKAGE_STRING='cpuminer-opt 3.7.4'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.7.3 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.7.4 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1392,7 +1392,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.7.3:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.7.4:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1497,7 +1497,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.7.3
|
||||
cpuminer-opt configure 3.7.4
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.7.3, which was
|
||||
It was created by cpuminer-opt $as_me 3.7.4, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2981,7 +2981,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.7.3'
|
||||
VERSION='3.7.4'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.7.3, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.7.4, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6743,7 +6743,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.7.3
|
||||
cpuminer-opt config.status 3.7.4
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.7.3])
|
||||
AC_INIT([cpuminer-opt], [3.7.4])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
21
cpu-miner.c
21
cpu-miner.c
@@ -1815,14 +1815,30 @@ static void *miner_thread( void *userdata )
|
||||
{
|
||||
*algo_gate.get_nonceptr( work.data ) = work.nonces[n];
|
||||
if ( !submit_work(mythr, &work) )
|
||||
{
|
||||
applog(LOG_WARNING, "Failed to submit share." );
|
||||
break;
|
||||
}
|
||||
num_submitted++;
|
||||
}
|
||||
// must be a ine way algo, nonce is already in work data
|
||||
#if FOUR_WAY
|
||||
if (num_submitted>1)
|
||||
applog(LOG_NOTICE, "4 WAY hash, %u nonces submitted," CL_MAG " BONUS!" CL_WHT, num_submitted);
|
||||
else
|
||||
applog(LOG_NOTICE, "4 WAY hash %u nonce submitted", num_submitted);
|
||||
#endif
|
||||
// must be a one way algo, nonce is already in work data
|
||||
if ( !num_submitted )
|
||||
{
|
||||
if ( !submit_work(mythr, &work) )
|
||||
{
|
||||
applog(LOG_WARNING, "Failed to submir share.");
|
||||
break;
|
||||
}
|
||||
#if FOUR_WAY
|
||||
applog(LOG_NOTICE, "1 WAY hash 1 nonce submitted");
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
// prevent stale work in solo
|
||||
@@ -1836,8 +1852,6 @@ static void *miner_thread( void *userdata )
|
||||
}
|
||||
}
|
||||
// display hashrate
|
||||
|
||||
|
||||
if (!opt_quiet)
|
||||
{
|
||||
char hc[16];
|
||||
@@ -1846,7 +1860,6 @@ static void *miner_thread( void *userdata )
|
||||
char hr_units[2] = {0,0};
|
||||
double hashcount = thr_hashcount[thr_id];
|
||||
double hashrate = thr_hashrates[thr_id];
|
||||
//printf("display count= %.3f, tcount= %.3f, rate= %03f trate= %03f\n", hashcount, thr_hashcount[thr_id], hashrate,thr_hashrates[thr_id] );
|
||||
if ( hashcount )
|
||||
{
|
||||
scale_hash_for_display( &hashcount, hc_units );
|
||||
|
6
miner.h
6
miner.h
@@ -673,19 +673,19 @@ Options:\n\
|
||||
argon2\n\
|
||||
axiom Shabal-256 MemoHash\n\
|
||||
bastion\n\
|
||||
blake Blake-256 (SFR)\n\
|
||||
blake blake256r14 (SFR)\n\
|
||||
blakecoin blake256r8\n\
|
||||
blake2s Blake-2 S\n\
|
||||
bmw BMW 256\n\
|
||||
c11 Chaincoin\n\
|
||||
cryptolight Cryptonight-light\n\
|
||||
cryptonight cryptonote, Monero (XMR)\n\
|
||||
decred Blake256r8dcr\n\
|
||||
decred Blake256r14dcr\n\
|
||||
deep Deepcoin (DCN)\n\
|
||||
dmd-gr Diamond\n\
|
||||
drop Dropcoin\n\
|
||||
fresh Fresh\n\
|
||||
groestl dmd-gr, Groestl coin\n\
|
||||
groestl Groestl coin\n\
|
||||
heavy Heavy\n\
|
||||
hmq1725 Espers\n\
|
||||
hodl Hodlcoin\n\
|
||||
|
@@ -3,7 +3,15 @@
|
||||
make distclean || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
CFLAGS="-O3 -march=core-avx2 -Wall -DUSE_SPH_SHA" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
||||
CFLAGS="-O3 -march=core-avx2 -Wall -DUSE_SPH_SHA -DFOUR_WAY" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-4way.exe
|
||||
|
||||
make clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
CFLAGS="-O3 -march=core-avx2 -Wall -DUSE_SPH_SHA" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-aes-avx2.exe
|
||||
@@ -11,7 +19,7 @@ mv cpuminer.exe cpuminer-aes-avx2.exe
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
CFLAGS="-O3 -march=corei7-avx -Wall -DUSE_SPH_SHA" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
||||
CFLAGS="-O3 -march=corei7-avx -Wall -DUSE_SPH_SHA" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-aes-avx.exe
|
||||
@@ -19,7 +27,7 @@ mv cpuminer.exe cpuminer-aes-avx.exe
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
CFLAGS="-O3 -maes -msse4.2 -Wall -DUSE_SPH_SHA" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
||||
CFLAGS="-O3 -maes -msse4.2 -Wall -DUSE_SPH_SHA" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-aes-sse42.exe
|
||||
@@ -27,7 +35,7 @@ mv cpuminer.exe cpuminer-aes-sse42.exe
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
CFLAGS="-O3 -march=corei7 -Wall -DUSE_SPH_SHA" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
||||
CFLAGS="-O3 -march=corei7 -Wall -DUSE_SPH_SHA" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-sse42.exe
|
||||
@@ -35,7 +43,7 @@ mv cpuminer.exe cpuminer-sse42.exe
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
CFLAGS="-O3 -march=core2 -Wall -DUSE_SPH_SHA" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
||||
CFLAGS="-O3 -march=core2 -Wall -DUSE_SPH_SHA" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-sse2.exe
|
||||
|
@@ -18,7 +18,8 @@ rm -f config.status
|
||||
# Debian 7.7 / Ubuntu 14.04 (gcc 4.7+)
|
||||
#extracflags="$extracflags -Ofast -flto -fuse-linker-plugin -ftree-loop-if-convert-stores"
|
||||
|
||||
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
#CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
||||
|
||||
make -j 4
|
||||
|
||||
|
Reference in New Issue
Block a user