mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.8.2
This commit is contained in:
13
Makefile.am
13
Makefile.am
@@ -70,6 +70,8 @@ cpuminer_SOURCES = \
|
||||
algo/gost/sph_gost.c \
|
||||
algo/groestl/sph_groestl.c \
|
||||
algo/groestl/groestl.c \
|
||||
algo/groestl/myrgr-gate.c \
|
||||
algo/groestl/myrgr-4way.c \
|
||||
algo/groestl/myr-groestl.c \
|
||||
algo/groestl/aes_ni/hash-groestl.c \
|
||||
algo/groestl/aes_ni/hash-groestl256.c \
|
||||
@@ -97,7 +99,6 @@ cpuminer_SOURCES = \
|
||||
algo/keccak/keccak-4way.c\
|
||||
algo/keccak/keccak-gate.c \
|
||||
algo/keccak/sse2/keccak.c \
|
||||
algo/lbry.c \
|
||||
algo/luffa/sph_luffa.c \
|
||||
algo/luffa/luffa.c \
|
||||
algo/luffa/luffa_for_sse2.c \
|
||||
@@ -115,6 +116,9 @@ cpuminer_SOURCES = \
|
||||
algo/lyra2/lyra2h-gate.c \
|
||||
algo/lyra2/lyra2h.c \
|
||||
algo/lyra2/lyra2h-4way.c \
|
||||
algo/lyra2/allium-gate.c \
|
||||
algo/lyra2/allium-4way.c \
|
||||
algo/lyra2/allium.c \
|
||||
algo/m7m.c \
|
||||
algo/neoscrypt/neoscrypt.c \
|
||||
algo/nist5/nist5-gate.c \
|
||||
@@ -135,6 +139,10 @@ cpuminer_SOURCES = \
|
||||
algo/qubit/deep-2way.c \
|
||||
algo/qubit/deep.c \
|
||||
algo/ripemd/sph_ripemd.c \
|
||||
algo/ripemd/ripemd-hash-4way.c \
|
||||
algo/ripemd/lbry-gate.c \
|
||||
algo/ripemd/lbry.c \
|
||||
algo/ripemd/lbry-4way.c \
|
||||
algo/scrypt.c \
|
||||
algo/scryptjane/scrypt-jane.c \
|
||||
algo/sha/sph_sha2.c \
|
||||
@@ -190,6 +198,9 @@ cpuminer_SOURCES = \
|
||||
algo/x11/x11evo.c \
|
||||
algo/x11/x11evo-4way.c \
|
||||
algo/x11/x11evo-gate.c \
|
||||
algo/x12/x12-gate.c \
|
||||
algo/x12/x12.c \
|
||||
algo/x12/x12-4way.c \
|
||||
algo/x13/x13-gate.c \
|
||||
algo/x13/x13.c \
|
||||
algo/x13/x13-4way.c \
|
||||
|
47
README.md
47
README.md
@@ -13,6 +13,29 @@ mailto://jayddee246@gmail.com
|
||||
|
||||
See file RELEASE_NOTES for change log and compile instructions.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
|
||||
Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
|
||||
optimizations a CPU with AES_NI is required. This includes Intel Westbridge
|
||||
and newer and AMD equivalents. Further optimizations are available on some
|
||||
algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
|
||||
|
||||
Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
|
||||
performance.
|
||||
|
||||
ARM CPUs are not supported.
|
||||
|
||||
2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
|
||||
Centos are known to work and have all dependencies in their repositories.
|
||||
Others may work but may require more effort.
|
||||
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
|
||||
|
||||
MacOS, OSx is not supported.
|
||||
|
||||
3. Stratum pool. Some algos may work wallet mining using getwork.
|
||||
|
||||
Supported Algorithms
|
||||
--------------------
|
||||
|
||||
@@ -75,6 +98,7 @@ Supported Algorithms
|
||||
x11 Dash
|
||||
x11evo Revolvercoin
|
||||
x11gost sib (SibCoin)
|
||||
x12 Galaxie Cash (GCH)
|
||||
x13 X13
|
||||
x13sm3 hsr (Hshare)
|
||||
x14 X14
|
||||
@@ -87,29 +111,6 @@ Supported Algorithms
|
||||
yescryptr16 Yenten (YTN)
|
||||
zr5 Ziftr
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
|
||||
Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
|
||||
optimizations a CPU with AES_NI is required. This includes Intel Westbridge
|
||||
and newer and AMD equivalents. Further optimizations are available on some
|
||||
algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
|
||||
|
||||
Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
|
||||
performance.
|
||||
|
||||
ARM CPUs are not supported.
|
||||
|
||||
2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
|
||||
Centos are known to work and have all dependencies in their repositories.
|
||||
Others may work but may require more effort.
|
||||
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
|
||||
|
||||
MacOS, OSx is not supported.
|
||||
|
||||
3. Stratum pool. Some algos may work wallet mining using getwork.
|
||||
|
||||
Errata
|
||||
------
|
||||
|
||||
|
@@ -159,6 +159,13 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.8.2
|
||||
|
||||
Fixed and faster myr-gr.
|
||||
Added x12 algo (Galaxie Cash), allium algo (Garlicoin).
|
||||
Faster lyra2rev2, lbry, skein.
|
||||
Large reduction in compiler warnings.
|
||||
|
||||
v3.8.1.1
|
||||
|
||||
Fixed Windows AVX2 crash.
|
||||
|
@@ -155,6 +155,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
|
||||
switch (algo)
|
||||
{
|
||||
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
|
||||
case ALGO_ANIME: register_anime_algo ( gate ); break;
|
||||
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
@@ -213,6 +214,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_X11: register_x11_algo ( gate ); break;
|
||||
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
|
||||
case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
|
||||
case ALGO_X12: register_x12_algo ( gate ); break;
|
||||
case ALGO_X13: register_x13_algo ( gate ); break;
|
||||
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
|
||||
case ALGO_X14: register_x14_algo ( gate ); break;
|
||||
@@ -298,6 +300,7 @@ const char* const algo_alias_map[][2] =
|
||||
{ "lyra2", "lyra2re" },
|
||||
{ "lyra2v2", "lyra2rev2" },
|
||||
{ "lyra2zoin", "lyra2z330" },
|
||||
{ "myrgr", "myr-gr" },
|
||||
{ "myriad", "myr-gr" },
|
||||
{ "neo", "neoscrypt" },
|
||||
{ "phi", "phi1612" },
|
||||
|
@@ -96,7 +96,7 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( n >= max_nonce ) && ( *hashes_done < 10 ) )
|
||||
{
|
||||
*hashes_done = 0;
|
||||
sleep(1);
|
||||
// sleep(1);
|
||||
}
|
||||
|
||||
return num_found;
|
||||
|
@@ -12,11 +12,11 @@ static __thread blake256_4way_context blake_mid;
|
||||
void decred_hash_4way( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (32)));
|
||||
void *tail = input + ( DECRED_MIDSTATE_LEN << 2 );
|
||||
// uint32_t hash0[8] __attribute__ ((aligned (32)));
|
||||
// uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
// uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
// uint32_t hash3[8] __attribute__ ((aligned (32)));
|
||||
const void *tail = input + ( DECRED_MIDSTATE_LEN << 2 );
|
||||
int tail_len = 180 - DECRED_MIDSTATE_LEN;
|
||||
blake256_4way_context ctx __attribute__ ((aligned (64)));
|
||||
|
||||
|
@@ -49,13 +49,6 @@ extern "C"{
|
||||
|
||||
// BMW256
|
||||
|
||||
// BMW small has a bug not present in big. Lanes 0 & 2 produce valid hash
|
||||
// while lanes 1 & 3 produce invalid hash. The cause is not known.
|
||||
// Some things that could cause it are: using epi64 instead of epi32,
|
||||
// a memory write that is the wrong size, an attempt to index a vector
|
||||
// like an array (only works for 64 bit elements).
|
||||
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
SPH_C32(0x40414243), SPH_C32(0x44454647),
|
||||
SPH_C32(0x48494A4B), SPH_C32(0x4C4D4E4F),
|
||||
@@ -123,16 +116,14 @@ static const sph_u64 IV512[] = {
|
||||
mm_rotl_32( M[ ( (j) + (off) ) & 0xF ] , \
|
||||
( ( (j) + (off) ) & 0xF ) + 1 )
|
||||
|
||||
// The multiplication in this macro is a possible cause of the lane
|
||||
// corruption but a vectorized mullo did not help.
|
||||
#define add_elt_s( M, H, j ) \
|
||||
_mm_xor_si128( \
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( _mm_add_epi32( rol_off_32( M, j, 0 ), \
|
||||
rol_off_32( M, j, 3 ) ), \
|
||||
rol_off_32( M, j, 10 ) ), \
|
||||
_mm_set1_epi32( ( (j) + 16 ) * 0x05555555UL ) \
|
||||
), H[ ( (j)+7 ) & 0xF ] )
|
||||
_mm_add_epi32( \
|
||||
_mm_sub_epi32( _mm_add_epi32( rol_off_32( M, j, 0 ), \
|
||||
rol_off_32( M, j, 3 ) ), \
|
||||
rol_off_32( M, j, 10 ) ), \
|
||||
_mm_set1_epi32( ( (j)+16 ) * SPH_C32(0x05555555UL) ) ), \
|
||||
H[ ( (j)+7 ) & 0xF ] )
|
||||
|
||||
|
||||
#define expand1s( qt, M, H, i ) \
|
||||
@@ -449,22 +440,22 @@ void compress_small( const __m128i *M, const __m128i H[16], __m128i dH[16] )
|
||||
{
|
||||
__m128i qt[32], xl, xh; \
|
||||
|
||||
qt[ 0] = ss0( Ws0 ) + H[ 1];
|
||||
qt[ 1] = ss1( Ws1 ) + H[ 2];
|
||||
qt[ 2] = ss2( Ws2 ) + H[ 3];
|
||||
qt[ 3] = ss3( Ws3 ) + H[ 4];
|
||||
qt[ 4] = ss4( Ws4 ) + H[ 5];
|
||||
qt[ 5] = ss0( Ws5 ) + H[ 6];
|
||||
qt[ 6] = ss1( Ws6 ) + H[ 7];
|
||||
qt[ 7] = ss2( Ws7 ) + H[ 8];
|
||||
qt[ 8] = ss3( Ws8 ) + H[ 9];
|
||||
qt[ 9] = ss4( Ws9 ) + H[10];
|
||||
qt[10] = ss0( Ws10) + H[11];
|
||||
qt[11] = ss1( Ws11) + H[12];
|
||||
qt[12] = ss2( Ws12) + H[13];
|
||||
qt[13] = ss3( Ws13) + H[14];
|
||||
qt[14] = ss4( Ws14) + H[15];
|
||||
qt[15] = ss0( Ws15) + H[ 0];
|
||||
qt[ 0] = _mm_add_epi32( ss0( Ws0 ), H[ 1] );
|
||||
qt[ 1] = _mm_add_epi32( ss1( Ws1 ), H[ 2] );
|
||||
qt[ 2] = _mm_add_epi32( ss2( Ws2 ), H[ 3] );
|
||||
qt[ 3] = _mm_add_epi32( ss3( Ws3 ), H[ 4] );
|
||||
qt[ 4] = _mm_add_epi32( ss4( Ws4 ), H[ 5] );
|
||||
qt[ 5] = _mm_add_epi32( ss0( Ws5 ), H[ 6] );
|
||||
qt[ 6] = _mm_add_epi32( ss1( Ws6 ), H[ 7] );
|
||||
qt[ 7] = _mm_add_epi32( ss2( Ws7 ), H[ 8] );
|
||||
qt[ 8] = _mm_add_epi32( ss3( Ws8 ), H[ 9] );
|
||||
qt[ 9] = _mm_add_epi32( ss4( Ws9 ), H[10] );
|
||||
qt[10] = _mm_add_epi32( ss0( Ws10), H[11] );
|
||||
qt[11] = _mm_add_epi32( ss1( Ws11), H[12] );
|
||||
qt[12] = _mm_add_epi32( ss2( Ws12), H[13] );
|
||||
qt[13] = _mm_add_epi32( ss3( Ws13), H[14] );
|
||||
qt[14] = _mm_add_epi32( ss4( Ws14), H[15] );
|
||||
qt[15] = _mm_add_epi32( ss0( Ws15), H[ 0] );
|
||||
qt[16] = expand1s( qt, M, H, 16 );
|
||||
qt[17] = expand1s( qt, M, H, 17 );
|
||||
qt[18] = expand2s( qt, M, H, 18 );
|
||||
@@ -740,24 +731,24 @@ void compress_small( const __m128i *M, const __m128i H[16], __m128i dH[16] )
|
||||
|
||||
void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
|
||||
{
|
||||
__m256i qt[32], xl, xh; \
|
||||
__m256i qt[32], xl, xh;
|
||||
|
||||
qt[ 0] = sb0( Wb0 ) + H[ 1];
|
||||
qt[ 1] = sb1( Wb1 ) + H[ 2];
|
||||
qt[ 2] = sb2( Wb2 ) + H[ 3];
|
||||
qt[ 3] = sb3( Wb3 ) + H[ 4];
|
||||
qt[ 4] = sb4( Wb4 ) + H[ 5];
|
||||
qt[ 5] = sb0( Wb5 ) + H[ 6];
|
||||
qt[ 6] = sb1( Wb6 ) + H[ 7];
|
||||
qt[ 7] = sb2( Wb7 ) + H[ 8];
|
||||
qt[ 8] = sb3( Wb8 ) + H[ 9];
|
||||
qt[ 9] = sb4( Wb9 ) + H[10];
|
||||
qt[10] = sb0( Wb10) + H[11];
|
||||
qt[11] = sb1( Wb11) + H[12];
|
||||
qt[12] = sb2( Wb12) + H[13];
|
||||
qt[13] = sb3( Wb13) + H[14];
|
||||
qt[14] = sb4( Wb14) + H[15];
|
||||
qt[15] = sb0( Wb15) + H[ 0];
|
||||
qt[ 0] = _mm256_add_epi64( sb0( Wb0 ), H[ 1] );
|
||||
qt[ 1] = _mm256_add_epi64( sb1( Wb1 ), H[ 2] );
|
||||
qt[ 2] = _mm256_add_epi64( sb2( Wb2 ), H[ 3] );
|
||||
qt[ 3] = _mm256_add_epi64( sb3( Wb3 ), H[ 4] );
|
||||
qt[ 4] = _mm256_add_epi64( sb4( Wb4 ), H[ 5] );
|
||||
qt[ 5] = _mm256_add_epi64( sb0( Wb5 ), H[ 6] );
|
||||
qt[ 6] = _mm256_add_epi64( sb1( Wb6 ), H[ 7] );
|
||||
qt[ 7] = _mm256_add_epi64( sb2( Wb7 ), H[ 8] );
|
||||
qt[ 8] = _mm256_add_epi64( sb3( Wb8 ), H[ 9] );
|
||||
qt[ 9] = _mm256_add_epi64( sb4( Wb9 ), H[10] );
|
||||
qt[10] = _mm256_add_epi64( sb0( Wb10), H[11] );
|
||||
qt[11] = _mm256_add_epi64( sb1( Wb11), H[12] );
|
||||
qt[12] = _mm256_add_epi64( sb2( Wb12), H[13] );
|
||||
qt[13] = _mm256_add_epi64( sb3( Wb13), H[14] );
|
||||
qt[14] = _mm256_add_epi64( sb4( Wb14), H[15] );
|
||||
qt[15] = _mm256_add_epi64( sb0( Wb15), H[ 0] );
|
||||
qt[16] = expand1b( qt, M, H, 16 );
|
||||
qt[17] = expand1b( qt, M, H, 17 );
|
||||
qt[18] = expand2b( qt, M, H, 18 );
|
||||
@@ -870,7 +861,7 @@ void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
|
||||
}
|
||||
|
||||
// BMW256
|
||||
/*
|
||||
|
||||
static const uint32_t final_s[16][4] =
|
||||
{
|
||||
{ 0xaaaaaaa0, 0xaaaaaaa0, 0xaaaaaaa0, 0xaaaaaaa0 },
|
||||
@@ -890,7 +881,7 @@ static const uint32_t final_s[16][4] =
|
||||
{ 0xaaaaaaae, 0xaaaaaaae, 0xaaaaaaae, 0xaaaaaaae },
|
||||
{ 0xaaaaaaaf, 0xaaaaaaaf, 0xaaaaaaaf, 0xaaaaaaaf }
|
||||
};
|
||||
*/
|
||||
/*
|
||||
static const __m128i final_s[16] =
|
||||
{
|
||||
{ 0xaaaaaaa0aaaaaaa0, 0xaaaaaaa0aaaaaaa0 },
|
||||
@@ -910,7 +901,7 @@ static const __m128i final_s[16] =
|
||||
{ 0xaaaaaaaeaaaaaaae, 0xaaaaaaaeaaaaaaae },
|
||||
{ 0xaaaaaaafaaaaaaaf, 0xaaaaaaafaaaaaaaf }
|
||||
};
|
||||
|
||||
*/
|
||||
static void
|
||||
bmw32_4way_init(bmw_4way_small_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
|
1251
algo/bmw/bmw.test
Normal file
1251
algo/bmw/bmw.test
Normal file
File diff suppressed because it is too large
Load Diff
@@ -477,7 +477,7 @@ do { \
|
||||
for (u = 0; u < 16; u ++) \
|
||||
sph_enc64le_aligned(data + 8 * u, h2[u]); \
|
||||
dh = h1; \
|
||||
h = final_b; \
|
||||
h = (sph_u64*)final_b; \
|
||||
} \
|
||||
/* end wrapped for break loop */ \
|
||||
out = dst; \
|
||||
|
@@ -1,2 +0,0 @@
|
||||
amd64
|
||||
x86
|
@@ -14,18 +14,20 @@
|
||||
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
|
||||
*
|
||||
*/
|
||||
#if defined(__AES__)
|
||||
|
||||
#include <memory.h>
|
||||
#include "miner.h"
|
||||
#include "hash_api.h"
|
||||
#include "vperm.h"
|
||||
|
||||
//#include "vperm.h"
|
||||
#include <immintrin.h>
|
||||
/*
|
||||
#ifndef NO_AES_NI
|
||||
#include <wmmintrin.h>
|
||||
#else
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
*/
|
||||
|
||||
MYALIGN const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F};
|
||||
MYALIGN const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC};
|
||||
@@ -246,7 +248,8 @@ void DumpState(__m128i *ps)
|
||||
void Compress(hashState_echo *ctx, const unsigned char *pmsg, unsigned int uBlockCount)
|
||||
{
|
||||
unsigned int r, b, i, j;
|
||||
__m128i t1, t2, t3, t4, s1, s2, s3, k1, ktemp;
|
||||
// __m128i t1, t2, t3, t4, s1, s2, s3, k1, ktemp;
|
||||
__m128i t1, t2, s2, k1;
|
||||
__m128i _state[4][4], _state2[4][4], _statebackup[4][4];
|
||||
|
||||
|
||||
@@ -396,7 +399,7 @@ HashReturn init_echo(hashState_echo *ctx, int nHashSize)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
ctx->k = _mm_xor_si128(ctx->k, ctx->k);
|
||||
ctx->k = _mm_setzero_si128();
|
||||
ctx->processed_bits = 0;
|
||||
ctx->uBufferBytes = 0;
|
||||
|
||||
@@ -742,4 +745,4 @@ HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databit
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -1 +0,0 @@
|
||||
Çağdaş Çalık
|
@@ -1,120 +0,0 @@
|
||||
/*
|
||||
* file : vperm.h
|
||||
* version : 1.0.208
|
||||
* date : 14.12.2010
|
||||
*
|
||||
* vperm implementation of AES s-box
|
||||
*
|
||||
* Credits: Adapted from Mike Hamburg's AES implementation, http://crypto.stanford.edu/vpaes/
|
||||
*
|
||||
* Cagdas Calik
|
||||
* ccalik@metu.edu.tr
|
||||
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef VPERM_H
|
||||
#define VPERM_H
|
||||
|
||||
#include "algo/sha/sha3_common.h"
|
||||
#include <tmmintrin.h>
|
||||
|
||||
/*
|
||||
extern const unsigned int _k_s0F[];
|
||||
extern const unsigned int _k_ipt[];
|
||||
extern const unsigned int _k_opt[];
|
||||
extern const unsigned int _k_inv[];
|
||||
extern const unsigned int _k_sb1[];
|
||||
extern const unsigned int _k_sb2[];
|
||||
extern const unsigned int _k_sb3[];
|
||||
extern const unsigned int _k_sb4[];
|
||||
extern const unsigned int _k_sb5[];
|
||||
extern const unsigned int _k_sb7[];
|
||||
extern const unsigned int _k_sbo[];
|
||||
extern const unsigned int _k_h63[];
|
||||
extern const unsigned int _k_hc6[];
|
||||
extern const unsigned int _k_h5b[];
|
||||
extern const unsigned int _k_h4e[];
|
||||
extern const unsigned int _k_h0e[];
|
||||
extern const unsigned int _k_h15[];
|
||||
extern const unsigned int _k_aesmix1[];
|
||||
extern const unsigned int _k_aesmix2[];
|
||||
extern const unsigned int _k_aesmix3[];
|
||||
extern const unsigned int _k_aesmix4[];
|
||||
*/
|
||||
|
||||
// input: x, table
|
||||
// output: x
|
||||
#define TRANSFORM(x, table, t1, t2)\
|
||||
t1 = _mm_andnot_si128(M128(_k_s0F), x);\
|
||||
t1 = _mm_srli_epi32(t1, 4);\
|
||||
x = _mm_and_si128(x, M128(_k_s0F));\
|
||||
t1 = _mm_shuffle_epi8(*((__m128i*)table + 1), t1);\
|
||||
x = _mm_shuffle_epi8(*((__m128i*)table + 0), x);\
|
||||
x = _mm_xor_si128(x, t1)
|
||||
|
||||
#if 0
|
||||
// compiled erroneously with 32-bit msc compiler
|
||||
t2 = _mm_shuffle_epi8(table[0], x);\
|
||||
x = _mm_shuffle_epi8(table[1], t1);\
|
||||
x = _mm_xor_si128(x, t2)
|
||||
#endif
|
||||
|
||||
// input: x
|
||||
// output: t2, t3
|
||||
#define SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4)\
|
||||
t1 = _mm_andnot_si128(M128(_k_s0F), x);\
|
||||
t1 = _mm_srli_epi32(t1, 4);\
|
||||
x = _mm_and_si128(x, M128(_k_s0F));\
|
||||
t2 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 1), x);\
|
||||
x = _mm_xor_si128(x, t1);\
|
||||
t3 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t1);\
|
||||
t3 = _mm_xor_si128(t3, t2);\
|
||||
t4 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), x);\
|
||||
t4 = _mm_xor_si128(t4, t2);\
|
||||
t2 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t3);\
|
||||
t2 = _mm_xor_si128(t2, x);\
|
||||
t3 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t4);\
|
||||
t3 = _mm_xor_si128(t3, t1);\
|
||||
|
||||
|
||||
// input: x1, x2, table
|
||||
// output: y
|
||||
#define VPERM_LOOKUP(x1, x2, table, y, t)\
|
||||
t = _mm_shuffle_epi8(*((__m128i*)table + 0), x1);\
|
||||
y = _mm_shuffle_epi8(*((__m128i*)table + 1), x2);\
|
||||
y = _mm_xor_si128(y, t)
|
||||
|
||||
|
||||
// input: x
|
||||
// output: x
|
||||
#define SUBSTITUTE_VPERM(x, t1, t2, t3, t4) \
|
||||
TRANSFORM(x, _k_ipt, t1, t2);\
|
||||
SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4);\
|
||||
VPERM_LOOKUP(t2, t3, _k_sbo, x, t1);\
|
||||
x = _mm_xor_si128(x, M128(_k_h63))
|
||||
|
||||
|
||||
// input: x
|
||||
// output: x
|
||||
#define AES_ROUND_VPERM_CORE(x, t1, t2, t3, t4, s1, s2, s3) \
|
||||
SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4);\
|
||||
VPERM_LOOKUP(t2, t3, _k_sb1, s1, t1);\
|
||||
VPERM_LOOKUP(t2, t3, _k_sb2, s2, t1);\
|
||||
s3 = _mm_xor_si128(s1, s2);\
|
||||
x = _mm_shuffle_epi8(s2, M128(_k_aesmix1));\
|
||||
x = _mm_xor_si128(x, _mm_shuffle_epi8(s3, M128(_k_aesmix2)));\
|
||||
x = _mm_xor_si128(x, _mm_shuffle_epi8(s1, M128(_k_aesmix3)));\
|
||||
x = _mm_xor_si128(x, _mm_shuffle_epi8(s1, M128(_k_aesmix4)));\
|
||||
x = _mm_xor_si128(x, M128(_k_h5b))
|
||||
|
||||
|
||||
// input: x
|
||||
// output: x
|
||||
#define AES_ROUND_VPERM(x, t1, t2, t3, t4, s1, s2, s3) \
|
||||
TRANSFORM(x, _k_ipt, t1, t2);\
|
||||
AES_ROUND_VPERM_CORE(x, t1, t2, t3, t4, s1, s2, s3);\
|
||||
TRANSFORM(x, _k_opt, t1, t2)
|
||||
|
||||
#endif // VPERM_H
|
||||
|
@@ -1,4 +1,4 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "myrgr-gate.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -10,8 +10,6 @@
|
||||
#else
|
||||
#include "aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
#include <openssl/sha.h>
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
typedef struct {
|
||||
@@ -20,11 +18,7 @@ typedef struct {
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
#endif
|
||||
//#ifndef USE_SPH_SHA
|
||||
// SHA256_CTX sha;
|
||||
//#else
|
||||
sph_sha256_context sha;
|
||||
//#endif
|
||||
sph_sha256_context sha;
|
||||
} myrgr_ctx_holder;
|
||||
|
||||
myrgr_ctx_holder myrgr_ctx;
|
||||
@@ -36,44 +30,37 @@ void init_myrgr_ctx()
|
||||
#else
|
||||
init_groestl (&myrgr_ctx.groestl, 64 );
|
||||
#endif
|
||||
//#ifndef USE_SPH_SHA
|
||||
// SHA256_Init( &myrgr_ctx.sha );
|
||||
//#else
|
||||
sph_sha256_init( &myrgr_ctx.sha );
|
||||
//#endif
|
||||
sph_sha256_init(&myrgr_ctx.sha);
|
||||
}
|
||||
|
||||
void myriadhash( void *output, const void *input )
|
||||
void myriad_hash(void *output, const void *input)
|
||||
{
|
||||
myrgr_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &myrgr_ctx, sizeof(myrgr_ctx) );
|
||||
uint32_t hash[16] __attribute__ ((aligned (64)));
|
||||
myrgr_ctx_holder ctx;
|
||||
memcpy( &ctx, &myrgr_ctx, sizeof(myrgr_ctx) );
|
||||
|
||||
uint32_t _ALIGN(32) hash[16];
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512(&ctx.groestl, input, 80);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
sph_groestl512(&ctx.groestl, input, 80);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_and_final_groestl( &ctx.groestl, (char*)input,
|
||||
(const char*)input, 640 );
|
||||
update_groestl( &ctx.groestl, (char*)input, 640 );
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
#endif
|
||||
|
||||
//#ifndef USE_SPH_SHA
|
||||
// SHA256_Update( &ctx.sha, hash, 64 );
|
||||
// SHA256_Final( (unsigned char*) hash, &ctx.sha );
|
||||
//#else
|
||||
sph_sha256(&ctx.sha, hash, 64);
|
||||
sph_sha256_close(&ctx.sha, hash);
|
||||
//#endif
|
||||
memcpy(output, hash, 32);
|
||||
sph_sha256(&ctx.sha, hash, 64);
|
||||
sph_sha256_close(&ctx.sha, hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_myriad(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
@@ -84,9 +71,9 @@ int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
do {
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8];
|
||||
be32enc(&endiandata[19], nonce);
|
||||
myriadhash(hash, endiandata);
|
||||
myriad_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
pdata[19] = nonce;
|
||||
@@ -101,14 +88,15 @@ int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
bool register_myriad_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
init_myrgr_ctx();
|
||||
gate->scanhash = (void*)&scanhash_myriad;
|
||||
gate->hash = (void*)&myriadhash;
|
||||
// gate->hash_alt = (void*)&myriadhash;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
*/
|
||||
|
134
algo/groestl/myrgr-4way.c
Normal file
134
algo/groestl/myrgr-4way.c
Normal file
@@ -0,0 +1,134 @@
|
||||
#include "myrgr-gate.h"
|
||||
|
||||
#if defined(MYRGR_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "aes_ni/hash-groestl.h"
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
|
||||
typedef struct {
|
||||
hashState_groestl groestl;
|
||||
sha256_4way_context sha;
|
||||
} myrgr_4way_ctx_holder;
|
||||
|
||||
myrgr_4way_ctx_holder myrgr_4way_ctx;
|
||||
|
||||
void init_myrgr_4way_ctx()
|
||||
{
|
||||
init_groestl (&myrgr_4way_ctx.groestl, 64 );
|
||||
sha256_4way_init( &myrgr_4way_ctx.sha );
|
||||
}
|
||||
|
||||
void myriad_4way_hash( void *output, const void *input )
|
||||
{
|
||||
uint32_t hash0[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[20] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
|
||||
myrgr_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &myrgr_4way_ctx, sizeof(myrgr_4way_ctx) );
|
||||
|
||||
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, input, 640 );
|
||||
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 640 );
|
||||
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 640 );
|
||||
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 640 );
|
||||
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 640 );
|
||||
|
||||
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
sha256_4way( &ctx.sha, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha, vhash );
|
||||
|
||||
mm_deinterleave_4x32( output, output+32, output+64, output+96,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 76; // 19*4
|
||||
uint32_t *noncep1 = vdata + 77;
|
||||
uint32_t *noncep2 = vdata + 78;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
/*
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
*/
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
myriad_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
18
algo/groestl/myrgr-gate.c
Normal file
18
algo/groestl/myrgr-gate.c
Normal file
@@ -0,0 +1,18 @@
|
||||
#include "myrgr-gate.h"
|
||||
|
||||
bool register_myriad_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (MYRGR_4WAY)
|
||||
init_myrgr_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_myriad_4way;
|
||||
gate->hash = (void*)&myriad_4way_hash;
|
||||
#else
|
||||
init_myrgr_ctx();
|
||||
gate->scanhash = (void*)&scanhash_myriad;
|
||||
gate->hash = (void*)&myriad_hash;
|
||||
#endif
|
||||
gate->optimizations = AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
30
algo/groestl/myrgr-gate.h
Normal file
30
algo/groestl/myrgr-gate.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef MYRGR_GATE_H__
|
||||
#define MYRGR_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define MYRGR_4WAY
|
||||
#endif
|
||||
|
||||
#if defined(MYRGR_4WAY)
|
||||
|
||||
void myriad_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_myrgr_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void myriad_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_myrgr_ctx();
|
||||
|
||||
#endif
|
||||
|
@@ -1,940 +0,0 @@
|
||||
/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
|
||||
/*
|
||||
* Hamsi implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_hamsi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAMSI
|
||||
#define SPH_SMALL_FOOTPRINT_HAMSI 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
|
||||
* table lookup during message expansion (1 to 8, inclusive). If we note
|
||||
* w the number of bits per message word (w=32 for Hamsi-224/256, w=64
|
||||
* for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
|
||||
* Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
|
||||
* then we will get t tables (where t=ceil(w/n)) of individual size
|
||||
* 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
|
||||
* n=5, there are 7 tables, but the last one uses only two bits on
|
||||
* input, not five).
|
||||
*
|
||||
* Also, we read t rows of r words from RAM. Words in a given row are
|
||||
* concatenated in RAM in that order, so most of the cost is about
|
||||
* reading the first row word; comparatively, cache misses are thus
|
||||
* less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
|
||||
*
|
||||
* When n=1, tables are "special" in that we omit the first entry of
|
||||
* each table (which always contains 0), so that total table size is
|
||||
* halved.
|
||||
*
|
||||
* We thus have the following (size1 is the cumulative table size of
|
||||
* Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
|
||||
* are for Hamsi-224/256 and Hamsi-384/512, respectively).
|
||||
*
|
||||
* n size1 size2 t1 t2
|
||||
* ---------------------------------------
|
||||
* 1 1024 4096 32 64
|
||||
* 2 2048 8192 16 32
|
||||
* 3 2688 10880 11 22
|
||||
* 4 4096 16384 8 16
|
||||
* 5 6272 25600 7 13
|
||||
* 6 10368 41984 6 11
|
||||
* 7 16896 73856 5 10
|
||||
* 8 32768 131072 4 8
|
||||
*
|
||||
* So there is a trade-off: a lower n makes the tables fit better in
|
||||
* L1 cache, but increases the number of memory accesses. The optimal
|
||||
* value depends on the amount of available L1 cache and the relative
|
||||
* impact of a cache miss.
|
||||
*
|
||||
* Experimentally, in ideal benchmark conditions (which are not necessarily
|
||||
* realistic with regards to L1 cache contention), it seems that n=8 is
|
||||
* the best value on "big" architectures (those with 32 kB or more of L1
|
||||
* cache), while n=4 is better on "small" architectures. This was tested
|
||||
* on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
|
||||
* (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
|
||||
* (8 kB L1 cache).
|
||||
*
|
||||
* Note: with n=1, the 32 tables (actually implemented as one big table)
|
||||
* are read entirely and sequentially, regardless of the input data,
|
||||
* thus avoiding any data-dependent table access pattern.
|
||||
*/
|
||||
|
||||
#if !defined SPH_HAMSI_EXPAND_SMALL
|
||||
#if SPH_SMALL_FOOTPRINT_HAMSI
|
||||
#define SPH_HAMSI_EXPAND_SMALL 4
|
||||
#else
|
||||
#define SPH_HAMSI_EXPAND_SMALL 8
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined SPH_HAMSI_EXPAND_BIG
|
||||
#define SPH_HAMSI_EXPAND_BIG 8
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
#include "sph_hamsi_helper.c"
|
||||
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),
|
||||
SPH_C32(0xa7c3bc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
|
||||
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
|
||||
};
|
||||
|
||||
/*
|
||||
* This version is the one used in the Hamsi submission package for
|
||||
* round 2 of the SHA-3 competition; the UTF-8 encoding is wrong and
|
||||
* shall soon be corrected in the official Hamsi specification.
|
||||
*
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0x3c967a67), SPH_C32(0x3cbc6c20), SPH_C32(0xb4c343c3),
|
||||
SPH_C32(0xa73cbc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
|
||||
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
|
||||
};
|
||||
*/
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
SPH_C32(0x76657273), SPH_C32(0x69746569), SPH_C32(0x74204c65),
|
||||
SPH_C32(0x7576656e), SPH_C32(0x2c204465), SPH_C32(0x70617274),
|
||||
SPH_C32(0x656d656e), SPH_C32(0x7420456c)
|
||||
};
|
||||
|
||||
static const sph_u32 IV384[] = {
|
||||
SPH_C32(0x656b7472), SPH_C32(0x6f746563), SPH_C32(0x686e6965),
|
||||
SPH_C32(0x6b2c2043), SPH_C32(0x6f6d7075), SPH_C32(0x74657220),
|
||||
SPH_C32(0x53656375), SPH_C32(0x72697479), SPH_C32(0x20616e64),
|
||||
SPH_C32(0x20496e64), SPH_C32(0x75737472), SPH_C32(0x69616c20),
|
||||
SPH_C32(0x43727970), SPH_C32(0x746f6772), SPH_C32(0x61706879),
|
||||
SPH_C32(0x2c204b61)
|
||||
};
|
||||
|
||||
static const sph_u32 IV512[] = {
|
||||
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
|
||||
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
|
||||
SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
|
||||
SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
|
||||
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
|
||||
SPH_C32(0x6769756d)
|
||||
};
|
||||
|
||||
static const sph_u32 alpha_n[] = {
|
||||
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
|
||||
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
|
||||
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
|
||||
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
|
||||
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
|
||||
};
|
||||
|
||||
static const sph_u32 alpha_f[] = {
|
||||
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
|
||||
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
|
||||
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
|
||||
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
|
||||
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
|
||||
};
|
||||
|
||||
#define DECL_STATE_SMALL \
|
||||
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7;
|
||||
|
||||
#define READ_STATE_SMALL(sc) do { \
|
||||
c0 = sc->h[0x0]; \
|
||||
c1 = sc->h[0x1]; \
|
||||
c2 = sc->h[0x2]; \
|
||||
c3 = sc->h[0x3]; \
|
||||
c4 = sc->h[0x4]; \
|
||||
c5 = sc->h[0x5]; \
|
||||
c6 = sc->h[0x6]; \
|
||||
c7 = sc->h[0x7]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE_SMALL(sc) do { \
|
||||
sc->h[0x0] = c0; \
|
||||
sc->h[0x1] = c1; \
|
||||
sc->h[0x2] = c2; \
|
||||
sc->h[0x3] = c3; \
|
||||
sc->h[0x4] = c4; \
|
||||
sc->h[0x5] = c5; \
|
||||
sc->h[0x6] = c6; \
|
||||
sc->h[0x7] = c7; \
|
||||
} while (0)
|
||||
|
||||
#define s0 m0
|
||||
#define s1 m1
|
||||
#define s2 c0
|
||||
#define s3 c1
|
||||
#define s4 c2
|
||||
#define s5 c3
|
||||
#define s6 m2
|
||||
#define s7 m3
|
||||
#define s8 m4
|
||||
#define s9 m5
|
||||
#define sA c4
|
||||
#define sB c5
|
||||
#define sC c6
|
||||
#define sD c7
|
||||
#define sE m6
|
||||
#define sF m7
|
||||
|
||||
#define SBOX(a, b, c, d) do { \
|
||||
sph_u32 t; \
|
||||
t = (a); \
|
||||
(a) &= (c); \
|
||||
(a) ^= (d); \
|
||||
(c) ^= (b); \
|
||||
(c) ^= (a); \
|
||||
(d) |= t; \
|
||||
(d) ^= (b); \
|
||||
t ^= (c); \
|
||||
(b) = (d); \
|
||||
(d) |= t; \
|
||||
(d) ^= (a); \
|
||||
(a) &= (b); \
|
||||
t ^= (a); \
|
||||
(b) ^= (d); \
|
||||
(b) ^= t; \
|
||||
(a) = (c); \
|
||||
(c) = (b); \
|
||||
(b) = (d); \
|
||||
(d) = SPH_T32(~t); \
|
||||
} while (0)
|
||||
|
||||
#define L(a, b, c, d) do { \
|
||||
(a) = SPH_ROTL32(a, 13); \
|
||||
(c) = SPH_ROTL32(c, 3); \
|
||||
(b) ^= (a) ^ (c); \
|
||||
(d) ^= (c) ^ SPH_T32((a) << 3); \
|
||||
(b) = SPH_ROTL32(b, 1); \
|
||||
(d) = SPH_ROTL32(d, 7); \
|
||||
(a) ^= (b) ^ (d); \
|
||||
(c) ^= (d) ^ SPH_T32((b) << 7); \
|
||||
(a) = SPH_ROTL32(a, 5); \
|
||||
(c) = SPH_ROTL32(c, 22); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_SMALL(rc, alpha) do { \
|
||||
s0 ^= alpha[0x00]; \
|
||||
s1 ^= alpha[0x01] ^ (sph_u32)(rc); \
|
||||
s2 ^= alpha[0x02]; \
|
||||
s3 ^= alpha[0x03]; \
|
||||
s4 ^= alpha[0x08]; \
|
||||
s5 ^= alpha[0x09]; \
|
||||
s6 ^= alpha[0x0A]; \
|
||||
s7 ^= alpha[0x0B]; \
|
||||
s8 ^= alpha[0x10]; \
|
||||
s9 ^= alpha[0x11]; \
|
||||
sA ^= alpha[0x12]; \
|
||||
sB ^= alpha[0x13]; \
|
||||
sC ^= alpha[0x18]; \
|
||||
sD ^= alpha[0x19]; \
|
||||
sE ^= alpha[0x1A]; \
|
||||
sF ^= alpha[0x1B]; \
|
||||
SBOX(s0, s4, s8, sC); \
|
||||
SBOX(s1, s5, s9, sD); \
|
||||
SBOX(s2, s6, sA, sE); \
|
||||
SBOX(s3, s7, sB, sF); \
|
||||
L(s0, s5, sA, sF); \
|
||||
L(s1, s6, sB, sC); \
|
||||
L(s2, s7, s8, sD); \
|
||||
L(s3, s4, s9, sE); \
|
||||
} while (0)
|
||||
|
||||
#define P_SMALL do { \
|
||||
ROUND_SMALL(0, alpha_n); \
|
||||
ROUND_SMALL(1, alpha_n); \
|
||||
ROUND_SMALL(2, alpha_n); \
|
||||
} while (0)
|
||||
|
||||
#define PF_SMALL do { \
|
||||
ROUND_SMALL(0, alpha_f); \
|
||||
ROUND_SMALL(1, alpha_f); \
|
||||
ROUND_SMALL(2, alpha_f); \
|
||||
ROUND_SMALL(3, alpha_f); \
|
||||
ROUND_SMALL(4, alpha_f); \
|
||||
ROUND_SMALL(5, alpha_f); \
|
||||
} while (0)
|
||||
|
||||
#define T_SMALL do { \
|
||||
/* order is important */ \
|
||||
c7 = (sc->h[7] ^= sB); \
|
||||
c6 = (sc->h[6] ^= sA); \
|
||||
c5 = (sc->h[5] ^= s9); \
|
||||
c4 = (sc->h[4] ^= s8); \
|
||||
c3 = (sc->h[3] ^= s3); \
|
||||
c2 = (sc->h[2] ^= s2); \
|
||||
c1 = (sc->h[1] ^= s1); \
|
||||
c0 = (sc->h[0] ^= s0); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
hamsi_small(sph_hamsi_small_context *sc, const unsigned char *buf, size_t num)
|
||||
{
|
||||
DECL_STATE_SMALL
|
||||
#if !SPH_64
|
||||
sph_u32 tmp;
|
||||
#endif
|
||||
|
||||
#if SPH_64
|
||||
sc->count += (sph_u64)num << 5;
|
||||
#else
|
||||
tmp = SPH_T32((sph_u32)num << 5);
|
||||
sc->count_low = SPH_T32(sc->count_low + tmp);
|
||||
sc->count_high += (sph_u32)((num >> 13) >> 14);
|
||||
if (sc->count_low < tmp)
|
||||
sc->count_high ++;
|
||||
#endif
|
||||
READ_STATE_SMALL(sc);
|
||||
while (num -- > 0) {
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
|
||||
INPUT_SMALL;
|
||||
P_SMALL;
|
||||
T_SMALL;
|
||||
buf += 4;
|
||||
}
|
||||
WRITE_STATE_SMALL(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_final(sph_hamsi_small_context *sc, const unsigned char *buf)
|
||||
{
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
DECL_STATE_SMALL
|
||||
|
||||
READ_STATE_SMALL(sc);
|
||||
INPUT_SMALL;
|
||||
PF_SMALL;
|
||||
T_SMALL;
|
||||
WRITE_STATE_SMALL(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_init(sph_hamsi_small_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
sc->partial_len = 0;
|
||||
memcpy(sc->h, iv, sizeof sc->h);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_core(sph_hamsi_small_context *sc, const void *data, size_t len)
|
||||
{
|
||||
if (sc->partial_len != 0) {
|
||||
size_t mlen;
|
||||
|
||||
mlen = 4 - sc->partial_len;
|
||||
if (len < mlen) {
|
||||
memcpy(sc->partial + sc->partial_len, data, len);
|
||||
sc->partial_len += len;
|
||||
return;
|
||||
} else {
|
||||
memcpy(sc->partial + sc->partial_len, data, mlen);
|
||||
len -= mlen;
|
||||
data = (const unsigned char *)data + mlen;
|
||||
hamsi_small(sc, sc->partial, 1);
|
||||
sc->partial_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
hamsi_small(sc, data, (len >> 2));
|
||||
data = (const unsigned char *)data + (len & ~(size_t)3);
|
||||
len &= (size_t)3;
|
||||
memcpy(sc->partial, data, len);
|
||||
sc->partial_len = len;
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_close(sph_hamsi_small_context *sc,
|
||||
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char pad[12];
|
||||
size_t ptr, u;
|
||||
unsigned z;
|
||||
unsigned char *out;
|
||||
|
||||
ptr = sc->partial_len;
|
||||
memcpy(pad, sc->partial, ptr);
|
||||
#if SPH_64
|
||||
sph_enc64be(pad + 4, sc->count + (ptr << 3) + n);
|
||||
#else
|
||||
sph_enc32be(pad + 4, sc->count_high);
|
||||
sph_enc32be(pad + 8, sc->count_low + (ptr << 3) + n);
|
||||
#endif
|
||||
z = 0x80 >> n;
|
||||
pad[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
while (ptr < 4)
|
||||
pad[ptr ++] = 0;
|
||||
hamsi_small(sc, pad, 2);
|
||||
hamsi_small_final(sc, pad + 8);
|
||||
out = dst;
|
||||
for (u = 0; u < out_size_w32; u ++)
|
||||
sph_enc32be(out + (u << 2), sc->h[u]);
|
||||
}
|
||||
|
||||
#define DECL_STATE_BIG \
|
||||
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7; \
|
||||
sph_u32 c8, c9, cA, cB, cC, cD, cE, cF;
|
||||
|
||||
#define READ_STATE_BIG(sc) do { \
|
||||
c0 = sc->h[0x0]; \
|
||||
c1 = sc->h[0x1]; \
|
||||
c2 = sc->h[0x2]; \
|
||||
c3 = sc->h[0x3]; \
|
||||
c4 = sc->h[0x4]; \
|
||||
c5 = sc->h[0x5]; \
|
||||
c6 = sc->h[0x6]; \
|
||||
c7 = sc->h[0x7]; \
|
||||
c8 = sc->h[0x8]; \
|
||||
c9 = sc->h[0x9]; \
|
||||
cA = sc->h[0xA]; \
|
||||
cB = sc->h[0xB]; \
|
||||
cC = sc->h[0xC]; \
|
||||
cD = sc->h[0xD]; \
|
||||
cE = sc->h[0xE]; \
|
||||
cF = sc->h[0xF]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE_BIG(sc) do { \
|
||||
sc->h[0x0] = c0; \
|
||||
sc->h[0x1] = c1; \
|
||||
sc->h[0x2] = c2; \
|
||||
sc->h[0x3] = c3; \
|
||||
sc->h[0x4] = c4; \
|
||||
sc->h[0x5] = c5; \
|
||||
sc->h[0x6] = c6; \
|
||||
sc->h[0x7] = c7; \
|
||||
sc->h[0x8] = c8; \
|
||||
sc->h[0x9] = c9; \
|
||||
sc->h[0xA] = cA; \
|
||||
sc->h[0xB] = cB; \
|
||||
sc->h[0xC] = cC; \
|
||||
sc->h[0xD] = cD; \
|
||||
sc->h[0xE] = cE; \
|
||||
sc->h[0xF] = cF; \
|
||||
} while (0)
|
||||
|
||||
#define s00 m0
|
||||
#define s01 m1
|
||||
#define s02 c0
|
||||
#define s03 c1
|
||||
#define s04 m2
|
||||
#define s05 m3
|
||||
#define s06 c2
|
||||
#define s07 c3
|
||||
#define s08 c4
|
||||
#define s09 c5
|
||||
#define s0A m4
|
||||
#define s0B m5
|
||||
#define s0C c6
|
||||
#define s0D c7
|
||||
#define s0E m6
|
||||
#define s0F m7
|
||||
#define s10 m8
|
||||
#define s11 m9
|
||||
#define s12 c8
|
||||
#define s13 c9
|
||||
#define s14 mA
|
||||
#define s15 mB
|
||||
#define s16 cA
|
||||
#define s17 cB
|
||||
#define s18 cC
|
||||
#define s19 cD
|
||||
#define s1A mC
|
||||
#define s1B mD
|
||||
#define s1C cE
|
||||
#define s1D cF
|
||||
#define s1E mE
|
||||
#define s1F mF
|
||||
|
||||
#define ROUND_BIG(rc, alpha) do { \
|
||||
s00 ^= alpha[0x00]; \
|
||||
s01 ^= alpha[0x01] ^ (sph_u32)(rc); \
|
||||
s02 ^= alpha[0x02]; \
|
||||
s03 ^= alpha[0x03]; \
|
||||
s04 ^= alpha[0x04]; \
|
||||
s05 ^= alpha[0x05]; \
|
||||
s06 ^= alpha[0x06]; \
|
||||
s07 ^= alpha[0x07]; \
|
||||
s08 ^= alpha[0x08]; \
|
||||
s09 ^= alpha[0x09]; \
|
||||
s0A ^= alpha[0x0A]; \
|
||||
s0B ^= alpha[0x0B]; \
|
||||
s0C ^= alpha[0x0C]; \
|
||||
s0D ^= alpha[0x0D]; \
|
||||
s0E ^= alpha[0x0E]; \
|
||||
s0F ^= alpha[0x0F]; \
|
||||
s10 ^= alpha[0x10]; \
|
||||
s11 ^= alpha[0x11]; \
|
||||
s12 ^= alpha[0x12]; \
|
||||
s13 ^= alpha[0x13]; \
|
||||
s14 ^= alpha[0x14]; \
|
||||
s15 ^= alpha[0x15]; \
|
||||
s16 ^= alpha[0x16]; \
|
||||
s17 ^= alpha[0x17]; \
|
||||
s18 ^= alpha[0x18]; \
|
||||
s19 ^= alpha[0x19]; \
|
||||
s1A ^= alpha[0x1A]; \
|
||||
s1B ^= alpha[0x1B]; \
|
||||
s1C ^= alpha[0x1C]; \
|
||||
s1D ^= alpha[0x1D]; \
|
||||
s1E ^= alpha[0x1E]; \
|
||||
s1F ^= alpha[0x1F]; \
|
||||
SBOX(s00, s08, s10, s18); \
|
||||
SBOX(s01, s09, s11, s19); \
|
||||
SBOX(s02, s0A, s12, s1A); \
|
||||
SBOX(s03, s0B, s13, s1B); \
|
||||
SBOX(s04, s0C, s14, s1C); \
|
||||
SBOX(s05, s0D, s15, s1D); \
|
||||
SBOX(s06, s0E, s16, s1E); \
|
||||
SBOX(s07, s0F, s17, s1F); \
|
||||
L(s00, s09, s12, s1B); \
|
||||
L(s01, s0A, s13, s1C); \
|
||||
L(s02, s0B, s14, s1D); \
|
||||
L(s03, s0C, s15, s1E); \
|
||||
L(s04, s0D, s16, s1F); \
|
||||
L(s05, s0E, s17, s18); \
|
||||
L(s06, s0F, s10, s19); \
|
||||
L(s07, s08, s11, s1A); \
|
||||
/*if (rc == 0 ) { \
|
||||
printf("S L5 post s10 %08lx s11 %08lx s12 %08lx s13 %08lx\n",s10,s11,s12,s13); \
|
||||
}*/ \
|
||||
L(s00, s02, s05, s07); \
|
||||
L(s10, s13, s15, s16); \
|
||||
/*if (rc == 0 ) { \
|
||||
printf("S L5 post s10 %08lx s11 %08lx s12 %08lx s13 %08lx\n",s10,s11,s12,s13); \
|
||||
}*/ \
|
||||
L(s09, s0B, s0C, s0E); \
|
||||
L(s19, s1A, s1C, s1F); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_HAMSI
|
||||
|
||||
#define P_BIG do { \
|
||||
unsigned r; \
|
||||
for (r = 0; r < 6; r ++) \
|
||||
ROUND_BIG(r, alpha_n); \
|
||||
} while (0)
|
||||
|
||||
#define PF_BIG do { \
|
||||
unsigned r; \
|
||||
for (r = 0; r < 12; r ++) \
|
||||
ROUND_BIG(r, alpha_f); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define P_BIG do { \
|
||||
ROUND_BIG(0, alpha_n); \
|
||||
/*printf("S R0 s00 %08lx s01 %08lx s02 %08lx s03 %08lx\n",s00,s01,s02,s03); \
|
||||
printf("S R0 s04 %08lx s05 %08lx s06 %08lx s07 %08lx\n",s04,s05,s06,s07); \
|
||||
printf("S R0 s08 %08lx s09 %08lx s0A %08lx s0B %08lx\n",s08,s09,s0A,s0B); \
|
||||
printf("S R0 s0C %08lx s0D %08lx s0E %08lx s0F %08lx\n",s0C,s0D,s0E,s0F); \
|
||||
printf("S R0 s10 %08lx s11 %08lx s12 %08lx s13 %08lx\n",s10,s11,s12,s13); \
|
||||
printf("S R0 s14 %08lx s15 %08lx s16 %08lx s17 %08lx\n",s14,s15,s16,s17); \
|
||||
printf("S R0 s18 %08lx s19 %08lx s1A %08lx s1B %08lx\n",s18,s19,s1A,s1B); \
|
||||
printf("S R0 s1C %08lx s1D %08lx s1E %08lx s1F %08lx\n",s1C,s1D,s1E,s1F); \
|
||||
*/\
|
||||
ROUND_BIG(1, alpha_n); \
|
||||
ROUND_BIG(2, alpha_n); \
|
||||
ROUND_BIG(3, alpha_n); \
|
||||
ROUND_BIG(4, alpha_n); \
|
||||
ROUND_BIG(5, alpha_n); \
|
||||
} while (0)
|
||||
|
||||
#define PF_BIG do { \
|
||||
ROUND_BIG(0, alpha_f); \
|
||||
ROUND_BIG(1, alpha_f); \
|
||||
ROUND_BIG(2, alpha_f); \
|
||||
ROUND_BIG(3, alpha_f); \
|
||||
ROUND_BIG(4, alpha_f); \
|
||||
ROUND_BIG(5, alpha_f); \
|
||||
ROUND_BIG(6, alpha_f); \
|
||||
ROUND_BIG(7, alpha_f); \
|
||||
ROUND_BIG(8, alpha_f); \
|
||||
ROUND_BIG(9, alpha_f); \
|
||||
ROUND_BIG(10, alpha_f); \
|
||||
ROUND_BIG(11, alpha_f); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define T_BIG do { \
|
||||
/* order is important */ \
|
||||
cF = (sc->h[0xF] ^= s17); \
|
||||
cE = (sc->h[0xE] ^= s16); \
|
||||
cD = (sc->h[0xD] ^= s15); \
|
||||
cC = (sc->h[0xC] ^= s14); \
|
||||
cB = (sc->h[0xB] ^= s13); \
|
||||
cA = (sc->h[0xA] ^= s12); \
|
||||
c9 = (sc->h[0x9] ^= s11); \
|
||||
c8 = (sc->h[0x8] ^= s10); \
|
||||
c7 = (sc->h[0x7] ^= s07); \
|
||||
c6 = (sc->h[0x6] ^= s06); \
|
||||
c5 = (sc->h[0x5] ^= s05); \
|
||||
c4 = (sc->h[0x4] ^= s04); \
|
||||
c3 = (sc->h[0x3] ^= s03); \
|
||||
c2 = (sc->h[0x2] ^= s02); \
|
||||
c1 = (sc->h[0x1] ^= s01); \
|
||||
c0 = (sc->h[0x0] ^= s00); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
hamsi_big(sph_hamsi_big_context *sc, const unsigned char *buf, size_t num)
|
||||
{
|
||||
DECL_STATE_BIG
|
||||
#if !SPH_64
|
||||
sph_u32 tmp;
|
||||
#endif
|
||||
|
||||
#if SPH_64
|
||||
sc->count += (sph_u64)num << 6;
|
||||
#else
|
||||
tmp = SPH_T32((sph_u32)num << 6);
|
||||
sc->count_low = SPH_T32(sc->count_low + tmp);
|
||||
sc->count_high += (sph_u32)((num >> 13) >> 13);
|
||||
if (sc->count_low < tmp)
|
||||
sc->count_high ++;
|
||||
#endif
|
||||
READ_STATE_BIG(sc);
|
||||
/*
|
||||
uint32_t* b = (uint32_t*)buf;
|
||||
//printf("S s64: %016llx\n",*ss);
|
||||
//printf("S buf: %08lx %08lx\n",b[0], b[1]);
|
||||
|
||||
int n1 = 1;
|
||||
int n2 = 1;
|
||||
*/
|
||||
while (num -- > 0) {
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
|
||||
|
||||
INPUT_BIG;
|
||||
/*if ( n1 )
|
||||
{
|
||||
n1 = 0;
|
||||
printf("S INPUT m: %08lx %08lx %08lx %08lx\n",m0,m1,m2,m3 );
|
||||
printf("S INPUT m: %08lx %08lx %08lx %08lx\n",m4,m5,m6,m7);
|
||||
printf("S INPUT m: %08lx %08lx %08lx %08lx\n",m8,m9,mA,mB );
|
||||
printf("S INPUT m: %08lx %08lx %08lx %08lx\n",mC,mD,mE,mF);
|
||||
}
|
||||
*/
|
||||
|
||||
P_BIG;
|
||||
|
||||
/*if ( n2 )
|
||||
{
|
||||
n2 = 0;
|
||||
printf("S P_BIG s: %08lx %08lx %08lx %08lx\n",s00,s01,s02,s03 );
|
||||
printf("S P_BIG s: %08lx %08lx %08lx %08lx\n",s04,s05,s07,s07);
|
||||
printf("S P_BIG s: %08lx %08lx %08lx %08lx\n",s08,s09,s0A,s0B );
|
||||
printf("S P_BIG s: %08lx %08lx %08lx %08lx\n",s0C,s0D,s0E,s0F);
|
||||
}
|
||||
*/
|
||||
|
||||
T_BIG;
|
||||
buf += 8;
|
||||
}
|
||||
WRITE_STATE_BIG(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_final(sph_hamsi_big_context *sc, const unsigned char *buf)
|
||||
{
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
|
||||
DECL_STATE_BIG
|
||||
|
||||
READ_STATE_BIG(sc);
|
||||
INPUT_BIG;
|
||||
PF_BIG;
|
||||
T_BIG;
|
||||
WRITE_STATE_BIG(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_init(sph_hamsi_big_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
sc->partial_len = 0;
|
||||
memcpy(sc->h, iv, sizeof sc->h);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_core(sph_hamsi_big_context *sc, const void *data, size_t len)
|
||||
{
|
||||
uint64_t* d = (uint64_t*)data;
|
||||
uint64_t* h = (uint64_t*)sc->h;
|
||||
/*
|
||||
printf("S core1 len = %d\n",len);
|
||||
printf("S data: %016llx %016llx %016llx %016llx\n",d[0],d[1],d[2],d[3]);
|
||||
printf("S data: %016llx %016llx %016llx %016llx\n",d[4],d[5],d[6],d[7]);
|
||||
printf("S H: %016llx %016llx %016llx %016llx\n",h[0],h[1],h[2],h[3]);
|
||||
*/
|
||||
if (sc->partial_len != 0) {
|
||||
//printf("WARNING partial_len != 0\n");
|
||||
|
||||
size_t mlen;
|
||||
|
||||
mlen = 8 - sc->partial_len;
|
||||
if (len < mlen) {
|
||||
memcpy(sc->partial + sc->partial_len, data, len);
|
||||
sc->partial_len += len;
|
||||
return;
|
||||
} else {
|
||||
memcpy(sc->partial + sc->partial_len, data, mlen);
|
||||
len -= mlen;
|
||||
data = (const unsigned char *)data + mlen;
|
||||
hamsi_big(sc, sc->partial, 1);
|
||||
sc->partial_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
hamsi_big(sc, data, (len >> 3));
|
||||
/*
|
||||
printf("S core2\n");
|
||||
printf("S H: %016llx %016llx %016llx %016llx\n",h[0],h[1],h[2],h[3]);
|
||||
*/
|
||||
data = (const unsigned char *)data + (len & ~(size_t)7);
|
||||
len &= (size_t)7;
|
||||
memcpy(sc->partial, data, len);
|
||||
sc->partial_len = len;
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_close(sph_hamsi_big_context *sc,
|
||||
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char pad[8];
|
||||
size_t ptr, u;
|
||||
unsigned z;
|
||||
unsigned char *out;
|
||||
//uint64_t* h = (uint64_t*)sc->h;
|
||||
|
||||
ptr = sc->partial_len;
|
||||
#if SPH_64
|
||||
sph_enc64be(pad, sc->count + (ptr << 3) + n);
|
||||
#else
|
||||
sph_enc32be(pad, sc->count_high);
|
||||
sph_enc32be(pad + 4, sc->count_low + (ptr << 3) + n);
|
||||
#endif
|
||||
z = 0x80 >> n;
|
||||
sc->partial[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
while (ptr < 8)
|
||||
sc->partial[ptr ++] = 0;
|
||||
|
||||
//printf("S close1\n");
|
||||
//printf("S H: %016llx %016llx %016llx %016llx\n",h[0],h[1],h[2],h[3]);
|
||||
|
||||
hamsi_big(sc, sc->partial, 1);
|
||||
|
||||
//printf("S close2\n");
|
||||
//printf("S H: %016llx %016llx %016llx %016llx\n",h[0],h[1],h[2],h[3]);
|
||||
|
||||
|
||||
hamsi_big_final(sc, pad);
|
||||
|
||||
//printf("S close3\n");
|
||||
//printf("S H: %016llx %016llx %016llx %016llx\n",h[0],h[1],h[2],h[3]);
|
||||
|
||||
|
||||
out = dst;
|
||||
if (out_size_w32 == 12) {
|
||||
sph_enc32be(out + 0, sc->h[ 0]);
|
||||
sph_enc32be(out + 4, sc->h[ 1]);
|
||||
sph_enc32be(out + 8, sc->h[ 3]);
|
||||
sph_enc32be(out + 12, sc->h[ 4]);
|
||||
sph_enc32be(out + 16, sc->h[ 5]);
|
||||
sph_enc32be(out + 20, sc->h[ 6]);
|
||||
sph_enc32be(out + 24, sc->h[ 8]);
|
||||
sph_enc32be(out + 28, sc->h[ 9]);
|
||||
sph_enc32be(out + 32, sc->h[10]);
|
||||
sph_enc32be(out + 36, sc->h[12]);
|
||||
sph_enc32be(out + 40, sc->h[13]);
|
||||
sph_enc32be(out + 44, sc->h[15]);
|
||||
} else {
|
||||
for (u = 0; u < 16; u ++)
|
||||
sph_enc32be(out + (u << 2), sc->h[u]);
|
||||
}
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224_init(void *cc)
|
||||
{
|
||||
hamsi_small_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_small_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, 0, 0, dst, 7);
|
||||
// hamsi_small_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, ub, n, dst, 7);
|
||||
// hamsi_small_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256_init(void *cc)
|
||||
{
|
||||
hamsi_small_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_small_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, 0, 0, dst, 8);
|
||||
// hamsi_small_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, ub, n, dst, 8);
|
||||
// hamsi_small_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384_init(void *cc)
|
||||
{
|
||||
hamsi_big_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_big_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, 0, 0, dst, 12);
|
||||
// hamsi_big_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, ub, n, dst, 12);
|
||||
// hamsi_big_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512_init(void *cc)
|
||||
{
|
||||
hamsi_big_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_big_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, 0, 0, dst, 16);
|
||||
// hamsi_big_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, ub, n, dst, 16);
|
||||
// hamsi_big_init(cc, IV512);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
@@ -95,7 +95,7 @@ int hodl_scanhash( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
GenRandomGarbage( hodl_scratchbuf, work->data, thr_id );
|
||||
GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, thr_id );
|
||||
pthread_barrier_wait( &hodl_barrier );
|
||||
return scanhash_hodl_wolf( thr_id, work, max_nonce, hashes_done );
|
||||
#endif
|
||||
|
@@ -339,13 +339,13 @@ do { \
|
||||
jhSbuffer[53] = 0x00, \
|
||||
jhSbuffer[54] = 0x00, \
|
||||
jhSbuffer[55] = 0x00; \
|
||||
jhSbuffer[56] = ((64*8) >> 56) & 0xff, \
|
||||
jhSbuffer[57] = ((64*8) >> 48) & 0xff, \
|
||||
jhSbuffer[58] = ((64*8) >> 40) & 0xff, \
|
||||
jhSbuffer[59] = ((64*8) >> 32) & 0xff, \
|
||||
jhSbuffer[60] = ((64*8) >> 24) & 0xff, \
|
||||
jhSbuffer[61] = ((64*8) >> 16) & 0xff, \
|
||||
jhSbuffer[62] = ((64*8) >> 8) & 0xff, \
|
||||
jhSbuffer[56] = ((char)((uint64_t)(64*8) >> 56)) & 0xff, \
|
||||
jhSbuffer[57] = ((char)((uint64_t)(64*8) >> 48)) & 0xff, \
|
||||
jhSbuffer[58] = ((char)((uint64_t)(64*8) >> 40)) & 0xff, \
|
||||
jhSbuffer[59] = ((char)((uint64_t)(64*8) >> 32)) & 0xff, \
|
||||
jhSbuffer[60] = ((char)((uint64_t)(64*8) >> 24)) & 0xff, \
|
||||
jhSbuffer[61] = ((char)((uint64_t)(64*8) >> 16)) & 0xff, \
|
||||
jhSbuffer[62] = ((char)((uint64_t)(64*8) >> 8)) & 0xff, \
|
||||
jhSbuffer[63] = (64*8) & 0xff; \
|
||||
b = true; \
|
||||
} \
|
||||
|
@@ -775,10 +775,8 @@ static const sph_u64 RC[] = {
|
||||
KF_ELT( 5, 6, RC[j + 5]); \
|
||||
KF_ELT( 6, 7, RC[j + 6]); \
|
||||
KF_ELT( 7, 8, RC[j + 7]); \
|
||||
*/
|
||||
|
||||
//kekDECL_STATE \
|
||||
|
||||
kekDECL_STATE \
|
||||
*/
|
||||
#define DECL_KEC
|
||||
|
||||
|
||||
|
@@ -35,7 +35,7 @@
|
||||
|
||||
#define MULT2(a0,a1) \
|
||||
do { \
|
||||
__m256i b = _mm256_xor_si256( a0, \
|
||||
register __m256i b = _mm256_xor_si256( a0, \
|
||||
_mm256_shuffle_epi32( _mm256_and_si256(a1,MASK), 16 ) ); \
|
||||
a0 = _mm256_or_si256( _mm256_srli_si256(b,4), _mm256_slli_si256(a1,12) ); \
|
||||
a1 = _mm256_or_si256( _mm256_srli_si256(a1,4), _mm256_slli_si256(b,12) ); \
|
||||
@@ -253,42 +253,42 @@ __m256i CNS[32];
|
||||
|
||||
void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
{
|
||||
__m256i t[2];
|
||||
__m256i t0, t1;
|
||||
__m256i *chainv = state->chainv;
|
||||
__m256i msg0, msg1;
|
||||
__m256i tmp[2];
|
||||
__m256i x[8];
|
||||
|
||||
t[0] = chainv[0];
|
||||
t[1] = chainv[1];
|
||||
t0 = chainv[0];
|
||||
t1 = chainv[1];
|
||||
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[2] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[3] );
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[4] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[5] );
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[6] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[7] );
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[8] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[9] );
|
||||
t0 = _mm256_xor_si256( t0, chainv[2] );
|
||||
t1 = _mm256_xor_si256( t1, chainv[3] );
|
||||
t0 = _mm256_xor_si256( t0, chainv[4] );
|
||||
t1 = _mm256_xor_si256( t1, chainv[5] );
|
||||
t0 = _mm256_xor_si256( t0, chainv[6] );
|
||||
t1 = _mm256_xor_si256( t1, chainv[7] );
|
||||
t0 = _mm256_xor_si256( t0, chainv[8] );
|
||||
t1 = _mm256_xor_si256( t1, chainv[9] );
|
||||
|
||||
MULT2( t[0], t[1] );
|
||||
MULT2( t0, t1 );
|
||||
|
||||
msg0 = _mm256_shuffle_epi32( msg[0], 27 );
|
||||
msg1 = _mm256_shuffle_epi32( msg[1], 27 );
|
||||
|
||||
chainv[0] = _mm256_xor_si256( chainv[0], t[0] );
|
||||
chainv[1] = _mm256_xor_si256( chainv[1], t[1] );
|
||||
chainv[2] = _mm256_xor_si256( chainv[2], t[0] );
|
||||
chainv[3] = _mm256_xor_si256( chainv[3], t[1] );
|
||||
chainv[4] = _mm256_xor_si256( chainv[4], t[0] );
|
||||
chainv[5] = _mm256_xor_si256( chainv[5], t[1] );
|
||||
chainv[6] = _mm256_xor_si256( chainv[6], t[0] );
|
||||
chainv[7] = _mm256_xor_si256( chainv[7], t[1] );
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], t[0] );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], t[1] );
|
||||
chainv[0] = _mm256_xor_si256( chainv[0], t0 );
|
||||
chainv[1] = _mm256_xor_si256( chainv[1], t1 );
|
||||
chainv[2] = _mm256_xor_si256( chainv[2], t0 );
|
||||
chainv[3] = _mm256_xor_si256( chainv[3], t1 );
|
||||
chainv[4] = _mm256_xor_si256( chainv[4], t0 );
|
||||
chainv[5] = _mm256_xor_si256( chainv[5], t1 );
|
||||
chainv[6] = _mm256_xor_si256( chainv[6], t0 );
|
||||
chainv[7] = _mm256_xor_si256( chainv[7], t1 );
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], t0 );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], t1 );
|
||||
|
||||
t[0] = chainv[0];
|
||||
t[1] = chainv[1];
|
||||
t0 = chainv[0];
|
||||
t1 = chainv[1];
|
||||
|
||||
MULT2( chainv[0], chainv[1]);
|
||||
chainv[0] = _mm256_xor_si256( chainv[0], chainv[2] );
|
||||
@@ -307,11 +307,11 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
chainv[7] = _mm256_xor_si256(chainv[7], chainv[9]);
|
||||
|
||||
MULT2( chainv[8], chainv[9]);
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], t[0] );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], t[1] );
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], t0 );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], t1 );
|
||||
|
||||
t[0] = chainv[8];
|
||||
t[1] = chainv[9];
|
||||
t0 = chainv[8];
|
||||
t1 = chainv[9];
|
||||
|
||||
MULT2( chainv[8], chainv[9]);
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], chainv[6] );
|
||||
@@ -330,8 +330,8 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
chainv[3] = _mm256_xor_si256( chainv[3], chainv[1] );
|
||||
|
||||
MULT2( chainv[0], chainv[1] );
|
||||
chainv[0] = _mm256_xor_si256( _mm256_xor_si256( chainv[0], t[0] ), msg0 );
|
||||
chainv[1] = _mm256_xor_si256( _mm256_xor_si256( chainv[1], t[1] ), msg1 );
|
||||
chainv[0] = _mm256_xor_si256( _mm256_xor_si256( chainv[0], t0 ), msg0 );
|
||||
chainv[1] = _mm256_xor_si256( _mm256_xor_si256( chainv[1], t1 ), msg1 );
|
||||
|
||||
MULT2( msg0, msg1);
|
||||
chainv[2] = _mm256_xor_si256( chainv[2], msg0 );
|
||||
@@ -380,22 +380,21 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
chainv[1],chainv[3],chainv[5],chainv[7]);
|
||||
|
||||
/* Process last 256-bit block */
|
||||
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS[16], CNS[17],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[16], CNS[17],
|
||||
tmp[0], tmp[1] );
|
||||
|
||||
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS[18], CNS[19],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[18], CNS[19],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS[20], CNS[21],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[20], CNS[21],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS[22], CNS[23],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[22], CNS[23],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS[24], CNS[25],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[24], CNS[25],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS[26], CNS[27],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[26], CNS[27],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS[28], CNS[29],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[28], CNS[29],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS[30], CNS[31],
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[30], CNS[31],
|
||||
tmp[0], tmp[1] );
|
||||
}
|
||||
|
||||
|
163
algo/lyra2/allium-4way.c
Normal file
163
algo/lyra2/allium-4way.c
Normal file
@@ -0,0 +1,163 @@
|
||||
#include "allium-gate.h"
|
||||
#include <memory.h>
|
||||
|
||||
#if defined (ALLIUM_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
|
||||
typedef struct {
|
||||
blake256_4way_context blake;
|
||||
keccak256_4way_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_4way_context skein;
|
||||
hashState_groestl256 groestl;
|
||||
|
||||
} allium_4way_ctx_holder;
|
||||
|
||||
static allium_4way_ctx_holder allium_4way_ctx;
|
||||
|
||||
void init_allium_4way_ctx()
|
||||
{
|
||||
keccak256_4way_init( &allium_4way_ctx.keccak );
|
||||
cubehashInit( &allium_4way_ctx.cube, 256, 16, 32 );
|
||||
skein256_4way_init( &allium_4way_ctx.skein );
|
||||
init_groestl256( &allium_4way_ctx.groestl, 32 );
|
||||
}
|
||||
|
||||
void allium_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (32)));
|
||||
uint32_t vhash32[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash64[8*4] __attribute__ ((aligned (64)));
|
||||
allium_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &allium_4way_ctx, sizeof(allium_4way_ctx) );
|
||||
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
blake256_4way_close( &ctx.blake, vhash32 );
|
||||
|
||||
mm256_reinterleave_4x64( vhash64, vhash32, 256 );
|
||||
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash64 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*)hash0, 32 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*)hash1, 32 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*)hash2, 32 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*)hash3, 32 );
|
||||
|
||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
|
||||
mm256_interleave_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
skein256_4way( &ctx.skein, vhash64, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash64 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
update_and_final_groestl256( &ctx.groestl, hash0, hash0, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash1, hash1, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash2, hash2, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash3, hash3, 256 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 76; // 19*4
|
||||
uint32_t *noncep1 = vdata + 77;
|
||||
uint32_t *noncep2 = vdata + 78;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
blake256_4way_init( &allium_4way_ctx.blake );
|
||||
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
allium_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
22
algo/lyra2/allium-gate.c
Normal file
22
algo/lyra2/allium-gate.c
Normal file
@@ -0,0 +1,22 @@
|
||||
#include "allium-gate.h"
|
||||
|
||||
int64_t get_max64_0xFFFFLL() { return 0xFFFFLL; }
|
||||
|
||||
bool register_allium_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (ALLIUM_4WAY)
|
||||
init_allium_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_allium_4way;
|
||||
gate->hash = (void*)&allium_4way_hash;
|
||||
#else
|
||||
init_allium_ctx();
|
||||
gate->scanhash = (void*)&scanhash_allium;
|
||||
gate->hash = (void*)&allium_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0xFFFFLL;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
29
algo/lyra2/allium-gate.h
Normal file
29
algo/lyra2/allium-gate.h
Normal file
@@ -0,0 +1,29 @@
|
||||
#ifndef ALLIUM_GATE_H__
|
||||
#define ALLIUM_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
#include "lyra2.h"
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define ALLIUM_4WAY
|
||||
#endif
|
||||
|
||||
bool register_allium_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(ALLIUM_4WAY)
|
||||
|
||||
void allium_4way_hash( void *state, const void *input );
|
||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
void init_allium_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void allium_hash( void *state, const void *input );
|
||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
void init_allium_ctx();
|
||||
|
||||
#endif
|
||||
|
111
algo/lyra2/allium.c
Normal file
111
algo/lyra2/allium.c
Normal file
@@ -0,0 +1,111 @@
|
||||
#include "allium-gate.h"
|
||||
#include <memory.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#endif
|
||||
#include "lyra2.h"
|
||||
|
||||
typedef struct {
|
||||
cubehashParam cube;
|
||||
sph_blake256_context blake;
|
||||
sph_keccak256_context keccak;
|
||||
sph_skein256_context skein;
|
||||
#if defined (__AES__)
|
||||
hashState_groestl256 groestl;
|
||||
#else
|
||||
sph_groestl256_context groestl;
|
||||
#endif
|
||||
} allium_ctx_holder;
|
||||
|
||||
static allium_ctx_holder allium_ctx;
|
||||
|
||||
void init_allium_ctx()
|
||||
{
|
||||
sph_keccak256_init( &allium_ctx.keccak );
|
||||
cubehashInit( &allium_ctx.cube, 256, 16, 32 );
|
||||
sph_skein256_init( &allium_ctx.skein );
|
||||
#if defined (__AES__)
|
||||
init_groestl256( &allium_ctx.groestl, 32 );
|
||||
#else
|
||||
sph_groestl256_init( &allium_ctx.groestl );
|
||||
#endif
|
||||
}
|
||||
|
||||
void allium_hash(void *state, const void *input)
|
||||
{
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
allium_ctx_holder ctx __attribute__ ((aligned (32)));
|
||||
|
||||
memcpy( &ctx, &allium_ctx, sizeof(allium_ctx) );
|
||||
sph_blake256( &ctx.blake, input + 64, 16 );
|
||||
sph_blake256_close( &ctx.blake, hash );
|
||||
|
||||
sph_keccak256( &ctx.keccak, hash, 32 );
|
||||
sph_keccak256_close( &ctx.keccak, hash );
|
||||
|
||||
LYRA2RE( hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash, (const byte*)hash, 32 );
|
||||
|
||||
LYRA2RE( hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
|
||||
sph_skein256( &ctx.skein, hash, 32 );
|
||||
sph_skein256_close( &ctx.skein, hash );
|
||||
|
||||
#if defined (__AES__)
|
||||
update_and_final_groestl256( &ctx.groestl, hash, hash, 256 );
|
||||
#else
|
||||
sph_groestl256( &ctx.groestl, hash, 32 );
|
||||
sph_groestl256_close( &ctx.groestl, hash );
|
||||
#endif
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x3ffff;
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
sph_blake256_init( &allium_ctx.blake );
|
||||
sph_blake256( &allium_ctx.blake, endiandata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], nonce );
|
||||
allium_hash( hash, endiandata );
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
123
algo/lyra2/allium.c.broke
Normal file
123
algo/lyra2/allium.c.broke
Normal file
@@ -0,0 +1,123 @@
|
||||
#include "allium-gate.h"
|
||||
#include <memory.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
cubehashParam cube;
|
||||
sph_blake256_context blake;
|
||||
sph_keccak256_context keccak;
|
||||
sph_skein256_context skein;
|
||||
#if defined (__AES__)
|
||||
hashState_groestl256 groestl;
|
||||
#else
|
||||
sph_groestl256_context groestl;
|
||||
#endif
|
||||
} allium_ctx_holder;
|
||||
|
||||
static allium_ctx_holder allium_ctx;
|
||||
static __thread sph_blake256_context allium_blake_mid;
|
||||
|
||||
void init_allium_ctx()
|
||||
{
|
||||
cubehashInit( &allium_ctx.cube, 256, 16, 32 );
|
||||
sph_blake256_init( &allium_ctx.blake );
|
||||
sph_keccak256_init( &allium_ctx.keccak );
|
||||
sph_skein256_init( &allium_ctx.skein );
|
||||
#if defined (__AES__)
|
||||
init_groestl256( &allium_ctx.groestl, 32 );
|
||||
#else
|
||||
sph_groestl256_init( &allium_ctx.groestl );
|
||||
#endif
|
||||
}
|
||||
|
||||
void allium_blake256_midstate( const void* input )
|
||||
{
|
||||
memcpy( &allium_blake_mid, &allium_ctx.blake, sizeof allium_blake_mid );
|
||||
sph_blake256( &allium_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void allium_hash( void *state, const void *input )
|
||||
{
|
||||
allium_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &allium_ctx, sizeof(allium_ctx) );
|
||||
uint8_t hash[128] __attribute__ ((aligned (64)));
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
|
||||
memcpy( &ctx.blake, &allium_blake_mid, sizeof allium_blake_mid );
|
||||
sph_blake256( &ctx.blake, (uint8_t*)input + midlen, tail );
|
||||
sph_blake256_close( &ctx.blake, hash );
|
||||
|
||||
sph_keccak256( &ctx.keccak, hash, 32 );
|
||||
sph_keccak256_close(&ctx.keccak, hash);
|
||||
|
||||
LYRA2RE( hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
// LYRA2REV2( allium_wholeMatrix, hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash, (const byte*)hash, 32 );
|
||||
|
||||
LYRA2RE( hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
// LYRA2REV2( allium_wholeMatrix, hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
|
||||
sph_skein256( &ctx.skein, hash, 32 );
|
||||
sph_skein256_close( &ctx.skein, hash );
|
||||
|
||||
#if defined (__AES__)
|
||||
update_and_final_groestl256( &ctx.groestl, hash, hash, 256 );
|
||||
#else
|
||||
sph_groestl256( &ctx.skein, hash, 32 );
|
||||
sph_groestl256_close( &ctx.skein, hash );
|
||||
#endif
|
||||
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
allium_blake256_midstate( endiandata );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
allium_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
{
|
||||
if( fulltest(hash, ptarget) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -47,8 +47,9 @@
|
||||
*/
|
||||
|
||||
int LYRA2REV2( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, const void *salt, uint64_t saltlen,
|
||||
uint64_t timeCost, const uint64_t nRows, const uint64_t nCols )
|
||||
const uint64_t pwdlen, const void *salt, const uint64_t saltlen,
|
||||
const uint64_t timeCost, const uint64_t nRows,
|
||||
const uint64_t nCols )
|
||||
{
|
||||
//====================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
@@ -67,12 +68,14 @@ int LYRA2REV2( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
//Tries to allocate enough space for the whole memory matrix
|
||||
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
||||
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
// for Lyra2REv2, nCols = 4, v1 was using 8
|
||||
const int64_t BLOCK_LEN = (nCols == 4) ? BLOCK_LEN_BLAKE2_SAFE_INT64
|
||||
: BLOCK_LEN_BLAKE2_SAFE_BYTES;
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
|
||||
memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
|
||||
|
||||
//=== Getting the password + salt + basil padded with 10*1 ==========//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
//but this ensures that the password copied locally will be overwritten as soon as possible
|
||||
@@ -209,8 +212,9 @@ int LYRA2REV2( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
}
|
||||
|
||||
int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, const void *salt, uint64_t saltlen,
|
||||
uint64_t timeCost, uint64_t nRows, uint64_t nCols )
|
||||
const uint64_t pwdlen, const void *salt, const uint64_t saltlen,
|
||||
const uint64_t timeCost, const uint64_t nRows,
|
||||
const uint64_t nCols )
|
||||
{
|
||||
//========================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
@@ -228,7 +232,9 @@ int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
//Tries to allocate enough space for the whole memory matrix
|
||||
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
||||
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
|
||||
|
||||
//==== Getting the password + salt + basil padded with 10*1 ============//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
@@ -347,9 +353,9 @@ int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
}
|
||||
|
||||
// Lyra2RE doesn't like the new wholeMatrix implementation
|
||||
int LYRA2RE( void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, const void *salt, uint64_t saltlen,
|
||||
uint64_t timeCost, const uint64_t nRows, const uint64_t nCols )
|
||||
int LYRA2RE( void *K, uint64_t kLen, const void *pwd, const uint64_t pwdlen,
|
||||
const void *salt, const uint64_t saltlen, const uint64_t timeCost,
|
||||
const uint64_t nRows, const uint64_t nCols )
|
||||
{
|
||||
//====================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
@@ -374,18 +380,19 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd,
|
||||
: BLOCK_LEN_BLAKE2_SAFE_BYTES;
|
||||
|
||||
i = (int64_t)ROW_LEN_BYTES * nRows;
|
||||
uint64_t *wholeMatrix = _mm_malloc( i, 64 );
|
||||
uint64_t *wholeMatrix = _mm_malloc( i, 32 );
|
||||
// uint64_t *wholeMatrix = _mm_malloc( i, 64 );
|
||||
if (wholeMatrix == NULL)
|
||||
return -1;
|
||||
/*
|
||||
#if defined (__AVX2__)
|
||||
memset_zero_m256i( (__m256i*)wholeMatrix, i/32 );
|
||||
#elif defined(__AVX__)
|
||||
memset_zero_m128i( (__m128i*)wholeMatrix, i/16 );
|
||||
#else
|
||||
|
||||
//#if defined (__AVX2__)
|
||||
// memset_zero_m256i( (__m256i*)wholeMatrix, i<<5 );
|
||||
//#elif defined(__AVX__)
|
||||
// memset_zero_m128i( (__m128i*)wholeMatrix, i<<4 );
|
||||
//#else
|
||||
memset(wholeMatrix, 0, i);
|
||||
#endif
|
||||
*/
|
||||
//#endif
|
||||
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
|
||||
//=== Getting the password + salt + basil padded with 10*1 ==========//
|
||||
|
@@ -54,4 +54,6 @@ int LYRA2Z( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, const void *salt, uint64_t saltlen,
|
||||
uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
|
||||
int LYRA2(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *salt, int32_t saltlen, int64_t timeCost, const int16_t nRows, const int16_t nCols);
|
||||
|
||||
#endif /* LYRA2_H_ */
|
||||
|
@@ -7,9 +7,6 @@
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
|
||||
#include "algo/cubehash/sph_cubehash.h"
|
||||
//#include "algo/bmw/sph_bmw.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
|
||||
typedef struct {
|
||||
@@ -18,19 +15,16 @@ typedef struct {
|
||||
cubehashParam cube;
|
||||
skein256_4way_context skein;
|
||||
bmw256_4way_context bmw;
|
||||
// sph_bmw256_context bmw;
|
||||
} lyra2v2_4way_ctx_holder;
|
||||
|
||||
static lyra2v2_4way_ctx_holder l2v2_4way_ctx;
|
||||
|
||||
void init_lyra2rev2_4way_ctx()
|
||||
{
|
||||
// blake256_4way_init( &l2v2_4way_ctx.blake );
|
||||
keccak256_4way_init( &l2v2_4way_ctx.keccak );
|
||||
cubehashInit( &l2v2_4way_ctx.cube, 256, 16, 32 );
|
||||
skein256_4way_init( &l2v2_4way_ctx.skein );
|
||||
bmw256_4way_init( &l2v2_4way_ctx.bmw );
|
||||
// sph_bmw256_init( &l2v2_4way_ctx.bmw );
|
||||
}
|
||||
|
||||
void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
@@ -45,7 +39,6 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
memcpy( &ctx, &l2v2_4way_ctx, sizeof(l2v2_4way_ctx) );
|
||||
|
||||
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
// blake256_4way( &ctx.blake, input, 80 );
|
||||
blake256_4way_close( &ctx.blake, vhash );
|
||||
|
||||
mm256_reinterleave_4x64( vhash64, vhash, 256 );
|
||||
@@ -54,11 +47,11 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
|
||||
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||
@@ -71,36 +64,20 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
skein256_4way_close( &ctx.skein, vhash64 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
|
||||
memcpy( &ctx.cube, &l2v2_4way_ctx.cube, sizeof ctx.cube );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
|
||||
|
||||
|
||||
// BMW256 4way has a lane corruption problem, only lanes 0 & 2 produce
|
||||
// good hash. As a result this ugly workaround of running bmw256-4way
|
||||
// twice with data shuffled to get all 4 lanes of good hash.
|
||||
// The hash is then shuffled back into the appropriate lanes for output.
|
||||
// Not as fast but still faster than using sph serially.
|
||||
|
||||
// shift lane 1 data to lane 2.
|
||||
mm_interleave_4x32( vhash, hash0, hash0, hash1, hash1, 256 );
|
||||
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
bmw256_4way( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4way_close( &ctx.bmw, vhash );
|
||||
uint32_t trash[8] __attribute__ ((aligned (32)));
|
||||
// extract lane 0 as usual and lane2 containing lane 1 hash
|
||||
mm_deinterleave_4x32( state, trash, state+32, trash, vhash, 256 );
|
||||
// shift lane2 data to lane 0 and lane 3 data to lane 2
|
||||
mm_interleave_4x32( vhash, hash2, hash2, hash3, hash3, 256 );
|
||||
bmw256_4way_init( &ctx.bmw );
|
||||
bmw256_4way( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4way_close( &ctx.bmw, vhash );
|
||||
// extract lane 2 hash from lane 0 and lane 3 hash from lane 2.
|
||||
mm_deinterleave_4x32( state+64, trash, state+96, trash, vhash, 256 );
|
||||
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
@@ -144,7 +121,6 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
//printf("found0\n");
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[19] = n;
|
||||
@@ -152,7 +128,6 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
}
|
||||
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
//printf("found1\n");
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
@@ -160,7 +135,6 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
}
|
||||
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
//printf("found2\n");
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
@@ -168,7 +142,6 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
}
|
||||
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
//printf("found3\n");
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
|
@@ -85,12 +85,12 @@ typedef unsigned int uint;
|
||||
U32TO8_BE((p) + 4, (uint32_t)((v) ));
|
||||
|
||||
|
||||
typedef uint8_t hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
typedef uint8_t hash_digest[SCRYPT_HASH_DIGEST_SIZE] __attribute__ ((aligned (16)));
|
||||
|
||||
|
||||
/* SHA-256 */
|
||||
|
||||
static const uint32_t sha256_constants[64] = {
|
||||
static const uint32_t sha256_constants[64] __attribute__ ((aligned (16))) = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
@@ -123,10 +123,10 @@ static const uint32_t sha256_constants[64] = {
|
||||
|
||||
|
||||
typedef struct sha256_hash_state_t {
|
||||
uint32_t H[8];
|
||||
uint32_t H[8] __attribute__ ((aligned (16)));
|
||||
uint64_t T;
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE] __attribute__ ((aligned (16)));
|
||||
} sha256_hash_state;
|
||||
|
||||
|
||||
@@ -242,7 +242,7 @@ typedef struct sha256_hmac_state_t {
|
||||
} sha256_hmac_state;
|
||||
|
||||
static void neoscrypt_hmac_init_sha256(sha256_hmac_state *st, const uint8_t *key, size_t keylen) {
|
||||
uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0};
|
||||
uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] __attribute__ ((aligned (16))) = {0};
|
||||
size_t i;
|
||||
|
||||
neoscrypt_hash_init_sha256(&st->inner);
|
||||
@@ -570,17 +570,17 @@ typedef struct blake2s_param_t {
|
||||
|
||||
/* State block of 180 bytes */
|
||||
typedef struct blake2s_state_t {
|
||||
uint h[8];
|
||||
uint h[8] __attribute__ ((aligned (16)));
|
||||
uint t[2];
|
||||
uint f[2];
|
||||
uchar buf[2 * BLAKE2S_BLOCK_SIZE];
|
||||
uchar buf[2 * BLAKE2S_BLOCK_SIZE] __attribute__ ((aligned (16)));
|
||||
uint buflen;
|
||||
} blake2s_state;
|
||||
|
||||
static void blake2s_compress(blake2s_state *S, const void *buf) {
|
||||
uint i;
|
||||
uint m[16];
|
||||
uint v[16];
|
||||
uint m[16] __attribute__ ((aligned (16)));
|
||||
uint v[16] __attribute__ ((aligned (16)));
|
||||
|
||||
neoscrypt_copy(m, buf, 64);
|
||||
neoscrypt_copy(v, S, 32);
|
||||
@@ -1082,6 +1082,7 @@ void neoscrypt_wait_for_diff( struct stratum_ctx *stratum )
|
||||
|
||||
bool register_neoscrypt_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_neoscrypt;
|
||||
gate->hash = (void*)&neoscrypt;
|
||||
gate->get_max64 = (void*)&get_neoscrypt_max64;
|
||||
|
@@ -21,6 +21,7 @@ void nist5hash( void *state, const void *input );
|
||||
|
||||
int scanhash_nist5( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
void init_nist5_ctx();
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
138
algo/ripemd/lbry-4way.c
Normal file
138
algo/ripemd/lbry-4way.c
Normal file
@@ -0,0 +1,138 @@
|
||||
#include "lbry-gate.h"
|
||||
|
||||
#if defined(LBRY_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
#include "ripemd-hash-4way.h"
|
||||
|
||||
static __thread sha256_4way_context sha256_mid;
|
||||
|
||||
void lbry_4way_hash( void* output, const void* input )
|
||||
{
|
||||
sha256_4way_context ctx_sha256 __attribute__ ((aligned (64)));
|
||||
sha512_4way_context ctx_sha512;
|
||||
ripemd160_4way_context ctx_ripemd;
|
||||
uint32_t _ALIGN(64) vhashA[16<<2];
|
||||
uint32_t _ALIGN(64) vhashB[16<<2];
|
||||
uint32_t _ALIGN(64) vhashC[16<<2];
|
||||
|
||||
memcpy( &ctx_sha256, &sha256_mid, sizeof(ctx_sha256) );
|
||||
sha256_4way( &ctx_sha256, input+(64<<2), 48 );
|
||||
sha256_4way_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_4way_init( &ctx_sha256 );
|
||||
sha256_4way( &ctx_sha256, vhashA, 32 );
|
||||
sha256_4way_close( &ctx_sha256, vhashA );
|
||||
|
||||
// sha512 64 bit data, 64 byte output
|
||||
mm256_reinterleave_4x64( vhashB, vhashA, 256 );
|
||||
sha512_4way_init( &ctx_sha512 );
|
||||
sha512_4way( &ctx_sha512, vhashB, 32 );
|
||||
sha512_4way_close( &ctx_sha512, vhashB );
|
||||
mm256_reinterleave_4x32( vhashA, vhashB, 512 );
|
||||
|
||||
ripemd160_4way_init( &ctx_ripemd );
|
||||
ripemd160_4way( &ctx_ripemd, vhashA, 32 );
|
||||
ripemd160_4way_close( &ctx_ripemd, vhashB );
|
||||
|
||||
ripemd160_4way_init( &ctx_ripemd );
|
||||
ripemd160_4way( &ctx_ripemd, vhashA+(8<<2), 32 );
|
||||
ripemd160_4way_close( &ctx_ripemd, vhashC );
|
||||
|
||||
sha256_4way_init( &ctx_sha256 );
|
||||
sha256_4way( &ctx_sha256, vhashB, 20 );
|
||||
sha256_4way( &ctx_sha256, vhashC, 20 );
|
||||
sha256_4way_close( &ctx_sha256, vhashA );
|
||||
|
||||
sha256_4way_init( &ctx_sha256 );
|
||||
sha256_4way( &ctx_sha256, vhashA, 32 );
|
||||
sha256_4way_close( &ctx_sha256, vhashA );
|
||||
|
||||
mm_deinterleave_4x32( output, output+32, output+64, output+96, vhashA, 256 );
|
||||
}
|
||||
|
||||
int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[32*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[27];
|
||||
const uint32_t first_nonce = pdata[27];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t edata[32] __attribute__ ((aligned (64)));
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 108; // 27*4
|
||||
uint32_t *noncep1 = vdata + 109;
|
||||
uint32_t *noncep2 = vdata + 110;
|
||||
uint32_t *noncep3 = vdata + 111;
|
||||
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
// we need bigendian data...
|
||||
swab32_array( edata, pdata, 32 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 1024 );
|
||||
sha256_4way_init( &sha256_mid );
|
||||
sha256_4way( &sha256_mid, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < sizeof(masks); m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
lbry_4way_hash( hash, vdata );
|
||||
|
||||
if ( !( hash[7] & mask ) && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = pdata[27] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( !( (hash+8)[7] & mask ) && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( !( (hash+16)[7] & mask ) && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( !( (hash+24)[7] & mask ) && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n+=4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
94
algo/ripemd/lbry-gate.c
Normal file
94
algo/ripemd/lbry-gate.c
Normal file
@@ -0,0 +1,94 @@
|
||||
#include "lbry-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
double lbry_calc_network_diff( struct work *work )
|
||||
{
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
|
||||
uint32_t nbits = swab32( work->data[ LBRY_NBITS_INDEX ] );
|
||||
uint32_t bits = (nbits & 0xffffff);
|
||||
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
|
||||
double d = (double)0x0000ffff / (double)bits;
|
||||
|
||||
for (int m=shift; m < 29; m++) d *= 256.0;
|
||||
for (int m=29; m < shift; m++) d /= 256.0;
|
||||
if (opt_debug_diff)
|
||||
applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
// std_le should work but it doesn't
|
||||
void lbry_le_build_stratum_request( char *req, struct work *work,
|
||||
struct stratum_ctx *sctx )
|
||||
{
|
||||
unsigned char *xnonce2str;
|
||||
uint32_t ntime, nonce;
|
||||
char ntimestr[9], noncestr[9];
|
||||
|
||||
le32enc( &ntime, work->data[ LBRY_NTIME_INDEX ] );
|
||||
le32enc( &nonce, work->data[ LBRY_NONCE_INDEX ] );
|
||||
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
|
||||
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
|
||||
xnonce2str = abin2hex( work->xnonce2, work->xnonce2_len);
|
||||
snprintf( req, JSON_BUF_LEN,
|
||||
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
unsigned char merkle_root[64] = { 0 };
|
||||
size_t t;
|
||||
int i;
|
||||
|
||||
algo_gate.gen_merkle_root( merkle_root, sctx );
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
// Assemble block header
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
g_work->data[17 + i] = ((uint32_t*)sctx->job.claim)[i];
|
||||
g_work->data[ LBRY_NTIME_INDEX ] = le32dec(sctx->job.ntime);
|
||||
g_work->data[ LBRY_NBITS_INDEX ] = le32dec(sctx->job.nbits);
|
||||
g_work->data[28] = 0x80000000;
|
||||
}
|
||||
|
||||
void lbry_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
int64_t lbry_get_max64() { return 0x1ffffLL; }
|
||||
|
||||
bool register_lbry_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
|
||||
#if defined (LBRY_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_lbry_4way;
|
||||
gate->hash = (void*)&lbry_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_lbry;
|
||||
gate->hash = (void*)&lbry_hash;
|
||||
#endif
|
||||
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
|
||||
gate->get_max64 = (void*)&lbry_get_max64;
|
||||
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
|
||||
gate->build_extraheader = (void*)&lbry_build_extraheader;
|
||||
gate->set_target = (void*)&lbry_set_target;
|
||||
gate->ntime_index = LBRY_NTIME_INDEX;
|
||||
gate->nbits_index = LBRY_NBITS_INDEX;
|
||||
gate->nonce_index = LBRY_NONCE_INDEX;
|
||||
gate->work_data_size = LBRY_WORK_DATA_SIZE;
|
||||
return true;
|
||||
}
|
||||
|
30
algo/ripemd/lbry-gate.h
Normal file
30
algo/ripemd/lbry-gate.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef LBRY_GATE_H__
|
||||
#define LBRY_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define LBRY_4WAY
|
||||
#endif
|
||||
|
||||
#define LBRY_NTIME_INDEX 25
|
||||
#define LBRY_NBITS_INDEX 26
|
||||
#define LBRY_NONCE_INDEX 27
|
||||
#define LBRY_WORK_DATA_SIZE 192
|
||||
#define LBRY_WORK_CMP_SIZE 76 // same as default
|
||||
|
||||
bool register_lbry_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(LBRY_4WAY)
|
||||
|
||||
void lbry_4way_hash( void *state, const void *input );
|
||||
int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
void lbry_hash( void *state, const void *input );
|
||||
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
@@ -1,19 +1,12 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "lbry-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "ripemd/sph_ripemd.h"
|
||||
#include "sha/sph_sha2.h"
|
||||
#include "sph_ripemd.h"
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#define LBRY_NTIME_INDEX 25
|
||||
#define LBRY_NBITS_INDEX 26
|
||||
#define LBRY_NONCE_INDEX 27
|
||||
#define LBRY_WORK_DATA_SIZE 192
|
||||
#define LBRY_WORK_CMP_SIZE 76 // same as default
|
||||
|
||||
|
||||
void lbry_hash(void* output, const void* input)
|
||||
{
|
||||
#ifndef USE_SPH_SHA
|
||||
@@ -151,88 +144,3 @@ int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
pdata[27] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
double lbry_calc_network_diff( struct work *work )
|
||||
{
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
|
||||
uint32_t nbits = swab32( work->data[ LBRY_NBITS_INDEX ] );
|
||||
uint32_t bits = (nbits & 0xffffff);
|
||||
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
|
||||
double d = (double)0x0000ffff / (double)bits;
|
||||
|
||||
for (int m=shift; m < 29; m++) d *= 256.0;
|
||||
for (int m=29; m < shift; m++) d /= 256.0;
|
||||
if (opt_debug_diff)
|
||||
applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
// std_le should work but it doesn't
|
||||
void lbry_le_build_stratum_request( char *req, struct work *work,
|
||||
struct stratum_ctx *sctx )
|
||||
{
|
||||
unsigned char *xnonce2str;
|
||||
uint32_t ntime, nonce;
|
||||
char ntimestr[9], noncestr[9];
|
||||
|
||||
le32enc( &ntime, work->data[ LBRY_NTIME_INDEX ] );
|
||||
le32enc( &nonce, work->data[ LBRY_NONCE_INDEX ] );
|
||||
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
|
||||
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
|
||||
xnonce2str = abin2hex( work->xnonce2, work->xnonce2_len);
|
||||
snprintf( req, JSON_BUF_LEN,
|
||||
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
|
||||
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
unsigned char merkle_root[64] = { 0 };
|
||||
size_t t;
|
||||
int i;
|
||||
|
||||
algo_gate.gen_merkle_root( merkle_root, sctx );
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
// Assemble block header
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
g_work->data[17 + i] = ((uint32_t*)sctx->job.claim)[i];
|
||||
g_work->data[ LBRY_NTIME_INDEX ] = le32dec(sctx->job.ntime);
|
||||
g_work->data[ LBRY_NBITS_INDEX ] = le32dec(sctx->job.nbits);
|
||||
g_work->data[28] = 0x80000000;
|
||||
}
|
||||
|
||||
void lbry_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
int64_t lbry_get_max64() { return 0x1ffffLL; }
|
||||
|
||||
bool register_lbry_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_lbry;
|
||||
gate->hash = (void*)&lbry_hash;
|
||||
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
|
||||
gate->get_max64 = (void*)&lbry_get_max64;
|
||||
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
|
||||
gate->build_extraheader = (void*)&lbry_build_extraheader;
|
||||
gate->set_target = (void*)&lbry_set_target;
|
||||
gate->ntime_index = LBRY_NTIME_INDEX;
|
||||
gate->nbits_index = LBRY_NBITS_INDEX;
|
||||
gate->nonce_index = LBRY_NONCE_INDEX;
|
||||
gate->work_data_size = LBRY_WORK_DATA_SIZE;
|
||||
return true;
|
||||
}
|
||||
|
323
algo/ripemd/ripemd-hash-4way.c
Normal file
323
algo/ripemd/ripemd-hash-4way.c
Normal file
@@ -0,0 +1,323 @@
|
||||
#include "ripemd-hash-4way.h"
|
||||
|
||||
#if defined(__AVX__)
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Round functions for RIPEMD-128 and RIPEMD-160.
|
||||
*/
|
||||
#define F1(x, y, z) \
|
||||
_mm_xor_si128( _mm_xor_si128( x, y ), z )
|
||||
|
||||
#define F2(x, y, z) \
|
||||
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( y, z ), x ), z )
|
||||
|
||||
#define F3(x, y, z) \
|
||||
_mm_xor_si128( _mm_or_si128( x, mm_not( y ) ), z )
|
||||
|
||||
#define F4(x, y, z) \
|
||||
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( x, y ), z ), y )
|
||||
|
||||
#define F5(x, y, z) \
|
||||
_mm_xor_si128( x, _mm_or_si128( y, mm_not( z ) ) )
|
||||
|
||||
|
||||
static const uint32_t IV[5] =
|
||||
{ 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 };
|
||||
|
||||
/*
|
||||
* Round constants for RIPEMD-160.
|
||||
*/
|
||||
#define K11 0x00000000
|
||||
#define K12 0x5A827999
|
||||
#define K13 0x6ED9EBA1
|
||||
#define K14 0x8F1BBCDC
|
||||
#define K15 0xA953FD4E
|
||||
|
||||
#define K21 0x50A28BE6
|
||||
#define K22 0x5C4DD124
|
||||
#define K23 0x6D703EF3
|
||||
#define K24 0x7A6D76E9
|
||||
#define K25 0x00000000
|
||||
|
||||
#define RR(a, b, c, d, e, f, s, r, k) \
|
||||
do{ \
|
||||
a = _mm_add_epi32( mm_rotl_32( _mm_add_epi32( _mm_add_epi32( \
|
||||
_mm_add_epi32( a, f( b ,c, d ) ), r ), \
|
||||
_mm_set1_epi32( k ) ), s ), e ); \
|
||||
c = mm_rotl_32( c, 10 );\
|
||||
} while (0)
|
||||
|
||||
#define ROUND1(a, b, c, d, e, f, s, r, k) \
|
||||
RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
|
||||
|
||||
#define ROUND2(a, b, c, d, e, f, s, r, k) \
|
||||
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
|
||||
|
||||
static void ripemd160_4way_round( ripemd160_4way_context *sc )
|
||||
{
|
||||
const __m128i *in = (__m128i*)sc->buf;
|
||||
__m128i *h = (__m128i*)sc->val;
|
||||
register __m128i A1, B1, C1, D1, E1;
|
||||
register __m128i A2, B2, C2, D2, E2;
|
||||
__m128i tmp;
|
||||
|
||||
A1 = A2 = h[0];
|
||||
B1 = B2 = h[1];
|
||||
C1 = C2 = h[2];
|
||||
D1 = D2 = h[3];
|
||||
E1 = E2 = h[4];
|
||||
|
||||
ROUND1( A, B, C, D, E, F1, 11, in[ 0], 1 );
|
||||
ROUND1( E, A, B, C, D, F1, 14, in[ 1], 1 );
|
||||
ROUND1( D, E, A, B, C, F1, 15, in[ 2], 1 );
|
||||
ROUND1( C, D, E, A, B, F1, 12, in[ 3], 1 );
|
||||
ROUND1( B, C, D, E, A, F1, 5, in[ 4], 1 );
|
||||
ROUND1( A, B, C, D, E, F1, 8, in[ 5], 1 );
|
||||
ROUND1( E, A, B, C, D, F1, 7, in[ 6], 1 );
|
||||
ROUND1( D, E, A, B, C, F1, 9, in[ 7], 1 );
|
||||
ROUND1( C, D, E, A, B, F1, 11, in[ 8], 1 );
|
||||
ROUND1( B, C, D, E, A, F1, 13, in[ 9], 1 );
|
||||
ROUND1( A, B, C, D, E, F1, 14, in[10], 1 );
|
||||
ROUND1( E, A, B, C, D, F1, 15, in[11], 1 );
|
||||
ROUND1( D, E, A, B, C, F1, 6, in[12], 1 );
|
||||
ROUND1( C, D, E, A, B, F1, 7, in[13], 1 );
|
||||
ROUND1( B, C, D, E, A, F1, 9, in[14], 1 );
|
||||
ROUND1( A, B, C, D, E, F1, 8, in[15], 1 );
|
||||
|
||||
ROUND1( E, A, B, C, D, F2, 7, in[ 7], 2 );
|
||||
ROUND1( D, E, A, B, C, F2, 6, in[ 4], 2 );
|
||||
ROUND1( C, D, E, A, B, F2, 8, in[13], 2 );
|
||||
ROUND1( B, C, D, E, A, F2, 13, in[ 1], 2 );
|
||||
ROUND1( A, B, C, D, E, F2, 11, in[10], 2 );
|
||||
ROUND1( E, A, B, C, D, F2, 9, in[ 6], 2 );
|
||||
ROUND1( D, E, A, B, C, F2, 7, in[15], 2 );
|
||||
ROUND1( C, D, E, A, B, F2, 15, in[ 3], 2 );
|
||||
ROUND1( B, C, D, E, A, F2, 7, in[12], 2 );
|
||||
ROUND1( A, B, C, D, E, F2, 12, in[ 0], 2 );
|
||||
ROUND1( E, A, B, C, D, F2, 15, in[ 9], 2 );
|
||||
ROUND1( D, E, A, B, C, F2, 9, in[ 5], 2 );
|
||||
ROUND1( C, D, E, A, B, F2, 11, in[ 2], 2 );
|
||||
ROUND1( B, C, D, E, A, F2, 7, in[14], 2 );
|
||||
ROUND1( A, B, C, D, E, F2, 13, in[11], 2 );
|
||||
ROUND1( E, A, B, C, D, F2, 12, in[ 8], 2 );
|
||||
|
||||
ROUND1( D, E, A, B, C, F3, 11, in[ 3], 3 );
|
||||
ROUND1( C, D, E, A, B, F3, 13, in[10], 3 );
|
||||
ROUND1( B, C, D, E, A, F3, 6, in[14], 3 );
|
||||
ROUND1( A, B, C, D, E, F3, 7, in[ 4], 3 );
|
||||
ROUND1( E, A, B, C, D, F3, 14, in[ 9], 3 );
|
||||
ROUND1( D, E, A, B, C, F3, 9, in[15], 3 );
|
||||
ROUND1( C, D, E, A, B, F3, 13, in[ 8], 3 );
|
||||
ROUND1( B, C, D, E, A, F3, 15, in[ 1], 3 );
|
||||
ROUND1( A, B, C, D, E, F3, 14, in[ 2], 3 );
|
||||
ROUND1( E, A, B, C, D, F3, 8, in[ 7], 3 );
|
||||
ROUND1( D, E, A, B, C, F3, 13, in[ 0], 3 );
|
||||
ROUND1( C, D, E, A, B, F3, 6, in[ 6], 3 );
|
||||
ROUND1( B, C, D, E, A, F3, 5, in[13], 3 );
|
||||
ROUND1( A, B, C, D, E, F3, 12, in[11], 3 );
|
||||
ROUND1( E, A, B, C, D, F3, 7, in[ 5], 3 );
|
||||
ROUND1( D, E, A, B, C, F3, 5, in[12], 3 );
|
||||
|
||||
ROUND1( C, D, E, A, B, F4, 11, in[ 1], 4 );
|
||||
ROUND1( B, C, D, E, A, F4, 12, in[ 9], 4 );
|
||||
ROUND1( A, B, C, D, E, F4, 14, in[11], 4 );
|
||||
ROUND1( E, A, B, C, D, F4, 15, in[10], 4 );
|
||||
ROUND1( D, E, A, B, C, F4, 14, in[ 0], 4 );
|
||||
ROUND1( C, D, E, A, B, F4, 15, in[ 8], 4 );
|
||||
ROUND1( B, C, D, E, A, F4, 9, in[12], 4 );
|
||||
ROUND1( A, B, C, D, E, F4, 8, in[ 4], 4 );
|
||||
ROUND1( E, A, B, C, D, F4, 9, in[13], 4 );
|
||||
ROUND1( D, E, A, B, C, F4, 14, in[ 3], 4 );
|
||||
ROUND1( C, D, E, A, B, F4, 5, in[ 7], 4 );
|
||||
ROUND1( B, C, D, E, A, F4, 6, in[15], 4 );
|
||||
ROUND1( A, B, C, D, E, F4, 8, in[14], 4 );
|
||||
ROUND1( E, A, B, C, D, F4, 6, in[ 5], 4 );
|
||||
ROUND1( D, E, A, B, C, F4, 5, in[ 6], 4 );
|
||||
ROUND1( C, D, E, A, B, F4, 12, in[ 2], 4 );
|
||||
|
||||
ROUND1( B, C, D, E, A, F5, 9, in[ 4], 5 );
|
||||
ROUND1( A, B, C, D, E, F5, 15, in[ 0], 5 );
|
||||
ROUND1( E, A, B, C, D, F5, 5, in[ 5], 5 );
|
||||
ROUND1( D, E, A, B, C, F5, 11, in[ 9], 5 );
|
||||
ROUND1( C, D, E, A, B, F5, 6, in[ 7], 5 );
|
||||
ROUND1( B, C, D, E, A, F5, 8, in[12], 5 );
|
||||
ROUND1( A, B, C, D, E, F5, 13, in[ 2], 5 );
|
||||
ROUND1( E, A, B, C, D, F5, 12, in[10], 5 );
|
||||
ROUND1( D, E, A, B, C, F5, 5, in[14], 5 );
|
||||
ROUND1( C, D, E, A, B, F5, 12, in[ 1], 5 );
|
||||
ROUND1( B, C, D, E, A, F5, 13, in[ 3], 5 );
|
||||
ROUND1( A, B, C, D, E, F5, 14, in[ 8], 5 );
|
||||
ROUND1( E, A, B, C, D, F5, 11, in[11], 5 );
|
||||
ROUND1( D, E, A, B, C, F5, 8, in[ 6], 5 );
|
||||
ROUND1( C, D, E, A, B, F5, 5, in[15], 5 );
|
||||
ROUND1( B, C, D, E, A, F5, 6, in[13], 5 );
|
||||
|
||||
ROUND2( A, B, C, D, E, F5, 8, in[ 5], 1 );
|
||||
ROUND2( E, A, B, C, D, F5, 9, in[14], 1 );
|
||||
ROUND2( D, E, A, B, C, F5, 9, in[ 7], 1 );
|
||||
ROUND2( C, D, E, A, B, F5, 11, in[ 0], 1 );
|
||||
ROUND2( B, C, D, E, A, F5, 13, in[ 9], 1 );
|
||||
ROUND2( A, B, C, D, E, F5, 15, in[ 2], 1 );
|
||||
ROUND2( E, A, B, C, D, F5, 15, in[11], 1 );
|
||||
ROUND2( D, E, A, B, C, F5, 5, in[ 4], 1 );
|
||||
ROUND2( C, D, E, A, B, F5, 7, in[13], 1 );
|
||||
ROUND2( B, C, D, E, A, F5, 7, in[ 6], 1 );
|
||||
ROUND2( A, B, C, D, E, F5, 8, in[15], 1 );
|
||||
ROUND2( E, A, B, C, D, F5, 11, in[ 8], 1 );
|
||||
ROUND2( D, E, A, B, C, F5, 14, in[ 1], 1 );
|
||||
ROUND2( C, D, E, A, B, F5, 14, in[10], 1 );
|
||||
ROUND2( B, C, D, E, A, F5, 12, in[ 3], 1 );
|
||||
ROUND2( A, B, C, D, E, F5, 6, in[12], 1 );
|
||||
|
||||
ROUND2( E, A, B, C, D, F4, 9, in[ 6], 2 );
|
||||
ROUND2( D, E, A, B, C, F4, 13, in[11], 2 );
|
||||
ROUND2( C, D, E, A, B, F4, 15, in[ 3], 2 );
|
||||
ROUND2( B, C, D, E, A, F4, 7, in[ 7], 2 );
|
||||
ROUND2( A, B, C, D, E, F4, 12, in[ 0], 2 );
|
||||
ROUND2( E, A, B, C, D, F4, 8, in[13], 2 );
|
||||
ROUND2( D, E, A, B, C, F4, 9, in[ 5], 2 );
|
||||
ROUND2( C, D, E, A, B, F4, 11, in[10], 2 );
|
||||
ROUND2( B, C, D, E, A, F4, 7, in[14], 2 );
|
||||
ROUND2( A, B, C, D, E, F4, 7, in[15], 2 );
|
||||
ROUND2( E, A, B, C, D, F4, 12, in[ 8], 2 );
|
||||
ROUND2( D, E, A, B, C, F4, 7, in[12], 2 );
|
||||
ROUND2( C, D, E, A, B, F4, 6, in[ 4], 2 );
|
||||
ROUND2( B, C, D, E, A, F4, 15, in[ 9], 2 );
|
||||
ROUND2( A, B, C, D, E, F4, 13, in[ 1], 2 );
|
||||
ROUND2( E, A, B, C, D, F4, 11, in[ 2], 2 );
|
||||
|
||||
ROUND2( D, E, A, B, C, F3, 9, in[15], 3 );
|
||||
ROUND2( C, D, E, A, B, F3, 7, in[ 5], 3 );
|
||||
ROUND2( B, C, D, E, A, F3, 15, in[ 1], 3 );
|
||||
ROUND2( A, B, C, D, E, F3, 11, in[ 3], 3 );
|
||||
ROUND2( E, A, B, C, D, F3, 8, in[ 7], 3 );
|
||||
ROUND2( D, E, A, B, C, F3, 6, in[14], 3 );
|
||||
ROUND2( C, D, E, A, B, F3, 6, in[ 6], 3 );
|
||||
ROUND2( B, C, D, E, A, F3, 14, in[ 9], 3 );
|
||||
ROUND2( A, B, C, D, E, F3, 12, in[11], 3 );
|
||||
ROUND2( E, A, B, C, D, F3, 13, in[ 8], 3 );
|
||||
ROUND2( D, E, A, B, C, F3, 5, in[12], 3 );
|
||||
ROUND2( C, D, E, A, B, F3, 14, in[ 2], 3 );
|
||||
ROUND2( B, C, D, E, A, F3, 13, in[10], 3 );
|
||||
ROUND2( A, B, C, D, E, F3, 13, in[ 0], 3 );
|
||||
ROUND2( E, A, B, C, D, F3, 7, in[ 4], 3 );
|
||||
ROUND2( D, E, A, B, C, F3, 5, in[13], 3 );
|
||||
|
||||
ROUND2( C, D, E, A, B, F2, 15, in[ 8], 4 );
|
||||
ROUND2( B, C, D, E, A, F2, 5, in[ 6], 4 );
|
||||
ROUND2( A, B, C, D, E, F2, 8, in[ 4], 4 );
|
||||
ROUND2( E, A, B, C, D, F2, 11, in[ 1], 4 );
|
||||
ROUND2( D, E, A, B, C, F2, 14, in[ 3], 4 );
|
||||
ROUND2( C, D, E, A, B, F2, 14, in[11], 4 );
|
||||
ROUND2( B, C, D, E, A, F2, 6, in[15], 4 );
|
||||
ROUND2( A, B, C, D, E, F2, 14, in[ 0], 4 );
|
||||
ROUND2( E, A, B, C, D, F2, 6, in[ 5], 4 );
|
||||
ROUND2( D, E, A, B, C, F2, 9, in[12], 4 );
|
||||
ROUND2( C, D, E, A, B, F2, 12, in[ 2], 4 );
|
||||
ROUND2( B, C, D, E, A, F2, 9, in[13], 4 );
|
||||
ROUND2( A, B, C, D, E, F2, 12, in[ 9], 4 );
|
||||
ROUND2( E, A, B, C, D, F2, 5, in[ 7], 4 );
|
||||
ROUND2( D, E, A, B, C, F2, 15, in[10], 4 );
|
||||
ROUND2( C, D, E, A, B, F2, 8, in[14], 4 );
|
||||
|
||||
ROUND2( B, C, D, E, A, F1, 8, in[12], 5 );
|
||||
ROUND2( A, B, C, D, E, F1, 5, in[15], 5 );
|
||||
ROUND2( E, A, B, C, D, F1, 12, in[10], 5 );
|
||||
ROUND2( D, E, A, B, C, F1, 9, in[ 4], 5 );
|
||||
ROUND2( C, D, E, A, B, F1, 12, in[ 1], 5 );
|
||||
ROUND2( B, C, D, E, A, F1, 5, in[ 5], 5 );
|
||||
ROUND2( A, B, C, D, E, F1, 14, in[ 8], 5 );
|
||||
ROUND2( E, A, B, C, D, F1, 6, in[ 7], 5 );
|
||||
ROUND2( D, E, A, B, C, F1, 8, in[ 6], 5 );
|
||||
ROUND2( C, D, E, A, B, F1, 13, in[ 2], 5 );
|
||||
ROUND2( B, C, D, E, A, F1, 6, in[13], 5 );
|
||||
ROUND2( A, B, C, D, E, F1, 5, in[14], 5 );
|
||||
ROUND2( E, A, B, C, D, F1, 15, in[ 0], 5 );
|
||||
ROUND2( D, E, A, B, C, F1, 13, in[ 3], 5 );
|
||||
ROUND2( C, D, E, A, B, F1, 11, in[ 9], 5 );
|
||||
ROUND2( B, C, D, E, A, F1, 11, in[11], 5 );
|
||||
|
||||
tmp = _mm_add_epi32( _mm_add_epi32( h[1], C1 ), D2 );
|
||||
h[1] = _mm_add_epi32( _mm_add_epi32( h[2], D1 ), E2 );
|
||||
h[2] = _mm_add_epi32( _mm_add_epi32( h[3], E1 ), A2 );
|
||||
h[3] = _mm_add_epi32( _mm_add_epi32( h[4], A1 ), B2 );
|
||||
h[4] = _mm_add_epi32( _mm_add_epi32( h[0], B1 ), C2 );
|
||||
h[0] = tmp;
|
||||
}
|
||||
|
||||
void ripemd160_4way_init( ripemd160_4way_context *sc )
|
||||
{
|
||||
sc->val[0] = _mm_set1_epi32( IV[0] );
|
||||
sc->val[1] = _mm_set1_epi32( IV[1] );
|
||||
sc->val[2] = _mm_set1_epi32( IV[2] );
|
||||
sc->val[3] = _mm_set1_epi32( IV[3] );
|
||||
sc->val[4] = _mm_set1_epi32( IV[4] );
|
||||
sc->count_high = sc->count_low = 0;
|
||||
}
|
||||
|
||||
void ripemd160_4way( ripemd160_4way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
size_t ptr;
|
||||
const int block_size = 64;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (block_size - 1U);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
uint32_t clow, clow2;
|
||||
|
||||
clen = block_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_128( sc->buf + (ptr>>2), vdata, clen>>2 );
|
||||
vdata = vdata + (clen>>2);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
if ( ptr == block_size )
|
||||
{
|
||||
ripemd160_4way_round( sc );
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
clow2 = clow + clen;
|
||||
sc->count_low = clow2;
|
||||
if ( clow2 < clow )
|
||||
sc->count_high++;
|
||||
}
|
||||
}
|
||||
|
||||
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
uint32_t low, high;
|
||||
const int block_size = 64;
|
||||
const int pad = block_size - 8;
|
||||
|
||||
ptr = (unsigned)sc->count_low & ( block_size - 1U);
|
||||
sc->buf[ ptr>>2 ] = _mm_set1_epi32( 0x80 );
|
||||
ptr += 4;
|
||||
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_128( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
|
||||
ripemd160_4way_round( sc );
|
||||
memset_zero_128( sc->buf, pad>>2 );
|
||||
}
|
||||
else
|
||||
memset_zero_128( sc->buf + (ptr>>2), (pad - ptr) >> 2 );
|
||||
|
||||
low = sc->count_low;
|
||||
high = (sc->count_high << 3) | (low >> 29);
|
||||
low = low << 3;
|
||||
sc->buf[ pad>>2 ] = _mm_set1_epi32( low );
|
||||
sc->buf[ (pad>>2) + 1 ] = _mm_set1_epi32( high );
|
||||
ripemd160_4way_round( sc );
|
||||
for (u = 0; u < 5; u ++)
|
||||
casti_m128i( dst, u ) = sc->val[u];
|
||||
}
|
||||
|
||||
#endif
|
23
algo/ripemd/ripemd-hash-4way.h
Normal file
23
algo/ripemd/ripemd-hash-4way.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#ifndef RIPEMD_HASH_4WAY_H__
|
||||
#define RIPEMD_HASH_4WAY_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
|
||||
#if defined(__AVX__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m128i buf[64>>2];
|
||||
__m128i val[5];
|
||||
uint32_t count_high, count_low;
|
||||
} __attribute__ ((aligned (64))) ripemd160_4way_context;
|
||||
|
||||
void ripemd160_4way_init( ripemd160_4way_context *sc );
|
||||
void ripemd160_4way( ripemd160_4way_context *sc, const void *data, size_t len );
|
||||
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst );
|
||||
|
||||
#endif
|
||||
#endif
|
@@ -1,247 +0,0 @@
|
||||
/* $Id: sha2big.c 216 2010-06-08 09:46:57Z tp $ */
|
||||
/*
|
||||
* SHA-384 / SHA-512 implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_sha2.h"
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
|
||||
#define MAJ(X, Y, Z) (((X) & (Y)) | (((X) | (Y)) & (Z)))
|
||||
|
||||
#define ROTR64 SPH_ROTR64
|
||||
|
||||
#define BSG5_0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39))
|
||||
#define BSG5_1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41))
|
||||
#define SSG5_0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SPH_T64((x) >> 7))
|
||||
#define SSG5_1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SPH_T64((x) >> 6))
|
||||
|
||||
static const sph_u64 K512[80] = {
|
||||
SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD),
|
||||
SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC),
|
||||
SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019),
|
||||
SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118),
|
||||
SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE),
|
||||
SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2),
|
||||
SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1),
|
||||
SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694),
|
||||
SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3),
|
||||
SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65),
|
||||
SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483),
|
||||
SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5),
|
||||
SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210),
|
||||
SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4),
|
||||
SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725),
|
||||
SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70),
|
||||
SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926),
|
||||
SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF),
|
||||
SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8),
|
||||
SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B),
|
||||
SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001),
|
||||
SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30),
|
||||
SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910),
|
||||
SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8),
|
||||
SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53),
|
||||
SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8),
|
||||
SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB),
|
||||
SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3),
|
||||
SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60),
|
||||
SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC),
|
||||
SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9),
|
||||
SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B),
|
||||
SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207),
|
||||
SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178),
|
||||
SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6),
|
||||
SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B),
|
||||
SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493),
|
||||
SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C),
|
||||
SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A),
|
||||
SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817)
|
||||
};
|
||||
|
||||
static const sph_u64 H384[8] = {
|
||||
SPH_C64(0xCBBB9D5DC1059ED8), SPH_C64(0x629A292A367CD507),
|
||||
SPH_C64(0x9159015A3070DD17), SPH_C64(0x152FECD8F70E5939),
|
||||
SPH_C64(0x67332667FFC00B31), SPH_C64(0x8EB44A8768581511),
|
||||
SPH_C64(0xDB0C2E0D64F98FA7), SPH_C64(0x47B5481DBEFA4FA4)
|
||||
};
|
||||
|
||||
static const sph_u64 H512[8] = {
|
||||
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
|
||||
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
|
||||
SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
|
||||
SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
|
||||
};
|
||||
|
||||
/*
|
||||
* This macro defines the body for a SHA-384 / SHA-512 compression function
|
||||
* implementation. The "in" parameter should evaluate, when applied to a
|
||||
* numerical input parameter from 0 to 15, to an expression which yields
|
||||
* the corresponding input block. The "r" parameter should evaluate to
|
||||
* an array or pointer expression designating the array of 8 words which
|
||||
* contains the input and output of the compression function.
|
||||
*
|
||||
* SHA-512 is hard for the compiler. If the loop is completely unrolled,
|
||||
* then the code will be quite huge (possibly more than 100 kB), and the
|
||||
* performance will be degraded due to cache misses on the code. We
|
||||
* unroll only eight steps, which avoids all needless copies when
|
||||
* 64-bit registers are swapped.
|
||||
*/
|
||||
|
||||
#define SHA3_STEP(A, B, C, D, E, F, G, H, i) do { \
|
||||
sph_u64 T1, T2; \
|
||||
T1 = SPH_T64(H + BSG5_1(E) + CH(E, F, G) + K512[i] + W[i]); \
|
||||
T2 = SPH_T64(BSG5_0(A) + MAJ(A, B, C)); \
|
||||
D = SPH_T64(D + T1); \
|
||||
H = SPH_T64(T1 + T2); \
|
||||
} while (0)
|
||||
|
||||
#define SHA3_ROUND_BODY(in, r) do { \
|
||||
int i; \
|
||||
sph_u64 A, B, C, D, E, F, G, H; \
|
||||
sph_u64 W[80]; \
|
||||
\
|
||||
for (i = 0; i < 16; i ++) \
|
||||
W[i] = in(i); \
|
||||
for (i = 16; i < 80; i ++) \
|
||||
W[i] = SPH_T64(SSG5_1(W[i - 2]) + W[i - 7] \
|
||||
+ SSG5_0(W[i - 15]) + W[i - 16]); \
|
||||
A = (r)[0]; \
|
||||
B = (r)[1]; \
|
||||
C = (r)[2]; \
|
||||
D = (r)[3]; \
|
||||
E = (r)[4]; \
|
||||
F = (r)[5]; \
|
||||
G = (r)[6]; \
|
||||
H = (r)[7]; \
|
||||
for (i = 0; i < 80; i += 8) { \
|
||||
SHA3_STEP(A, B, C, D, E, F, G, H, i + 0); \
|
||||
SHA3_STEP(H, A, B, C, D, E, F, G, i + 1); \
|
||||
SHA3_STEP(G, H, A, B, C, D, E, F, i + 2); \
|
||||
SHA3_STEP(F, G, H, A, B, C, D, E, i + 3); \
|
||||
SHA3_STEP(E, F, G, H, A, B, C, D, i + 4); \
|
||||
SHA3_STEP(D, E, F, G, H, A, B, C, i + 5); \
|
||||
SHA3_STEP(C, D, E, F, G, H, A, B, i + 6); \
|
||||
SHA3_STEP(B, C, D, E, F, G, H, A, i + 7); \
|
||||
} \
|
||||
(r)[0] = SPH_T64((r)[0] + A); \
|
||||
(r)[1] = SPH_T64((r)[1] + B); \
|
||||
(r)[2] = SPH_T64((r)[2] + C); \
|
||||
(r)[3] = SPH_T64((r)[3] + D); \
|
||||
(r)[4] = SPH_T64((r)[4] + E); \
|
||||
(r)[5] = SPH_T64((r)[5] + F); \
|
||||
(r)[6] = SPH_T64((r)[6] + G); \
|
||||
(r)[7] = SPH_T64((r)[7] + H); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* One round of SHA-384 / SHA-512. The data must be aligned for 64-bit access.
|
||||
*/
|
||||
static void
|
||||
sha3_round(const unsigned char *data, sph_u64 r[8])
|
||||
{
|
||||
#define SHA3_IN(x) sph_dec64be_aligned(data + (8 * (x)))
|
||||
SHA3_ROUND_BODY(SHA3_IN, r);
|
||||
#undef SHA3_IN
|
||||
}
|
||||
|
||||
/* see sph_sha3.h */
|
||||
void
|
||||
sph_sha384_init(void *cc)
|
||||
{
|
||||
sph_sha384_context *sc;
|
||||
|
||||
sc = cc;
|
||||
memcpy(sc->val, H384, sizeof H384);
|
||||
sc->count = 0;
|
||||
}
|
||||
|
||||
/* see sph_sha3.h */
|
||||
void
|
||||
sph_sha512_init(void *cc)
|
||||
{
|
||||
sph_sha512_context *sc;
|
||||
|
||||
sc = cc;
|
||||
memcpy(sc->val, H512, sizeof H512);
|
||||
sc->count = 0;
|
||||
}
|
||||
|
||||
#define RFUN sha3_round
|
||||
#define HASH sha384
|
||||
#define BE64 1
|
||||
#include "md_helper.c"
|
||||
|
||||
/* see sph_sha3.h */
|
||||
void
|
||||
sph_sha384_close(void *cc, void *dst)
|
||||
{
|
||||
sha384_close(cc, dst, 6);
|
||||
// sph_sha384_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha3.h */
|
||||
void
|
||||
sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha384_addbits_and_close(cc, ub, n, dst, 6);
|
||||
// sph_sha384_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha3.h */
|
||||
void
|
||||
sph_sha512_close(void *cc, void *dst)
|
||||
{
|
||||
sha384_close(cc, dst, 8);
|
||||
// sph_sha512_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha3.h */
|
||||
void
|
||||
sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha384_addbits_and_close(cc, ub, n, dst, 8);
|
||||
// sph_sha512_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha3.h */
|
||||
void
|
||||
sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8])
|
||||
{
|
||||
#define SHA3_IN(x) msg[x]
|
||||
SHA3_ROUND_BODY(SHA3_IN, val);
|
||||
#undef SHA3_IN
|
||||
}
|
||||
|
||||
#endif
|
@@ -37,6 +37,8 @@
|
||||
|
||||
#include "sha2-hash-4way.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
// SHA256 4 way 32 bit
|
||||
|
||||
static const sph_u32 H256[8] = {
|
||||
@@ -46,7 +48,7 @@ static const sph_u32 H256[8] = {
|
||||
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
|
||||
};
|
||||
|
||||
static const sph_u32 K256[80] = {
|
||||
static const sph_u32 K256[64] = {
|
||||
SPH_C32(0x428A2F98), SPH_C32(0x71374491),
|
||||
SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5),
|
||||
SPH_C32(0x3956C25B), SPH_C32(0x59F111F1),
|
||||
@@ -78,18 +80,12 @@ static const sph_u32 K256[80] = {
|
||||
SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
|
||||
SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
|
||||
SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
|
||||
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2),
|
||||
SPH_C32(0xCA273ECE), SPH_C32(0xD186B8C7),
|
||||
SPH_C32(0xEADA7DD6), SPH_C32(0xF57D4F7F),
|
||||
SPH_C32(0x06F067AA), SPH_C32(0x0A637DC5),
|
||||
SPH_C32(0x113F9804), SPH_C32(0x1B710B35),
|
||||
SPH_C32(0x28DB77F5), SPH_C32(0x32CAAB7B),
|
||||
SPH_C32(0x3C9EBE0A), SPH_C32(0x431D67C4),
|
||||
SPH_C32(0x4CC5D4BE), SPH_C32(0x597F299C),
|
||||
SPH_C32(0x5FCB6FAB), SPH_C32(0x6C44198C)
|
||||
|
||||
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
|
||||
};
|
||||
|
||||
#define SHA2s_MEXP( a, b, c, d ) \
|
||||
_mm_add_epi32( _mm_add_epi32( _mm_add_epi32( \
|
||||
SSG2_1( W[a] ), W[b] ), SSG2_0( W[c] ) ), W[d] );
|
||||
|
||||
#define CHs(X, Y, Z) \
|
||||
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( Y, Z ), X ), Z )
|
||||
@@ -114,29 +110,39 @@ static const sph_u32 K256[80] = {
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 17), mm_rotr_32(x, 19) ), _mm_srli_epi32(x, 10) )
|
||||
|
||||
#define SHA256_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
__m128i T1, T2; \
|
||||
register __m128i T1, T2; \
|
||||
T1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( \
|
||||
_mm_add_epi32( H, BSG2_1(E) ), CHs(E, F, G) ), \
|
||||
_mm_set1_epi32( K256[i] ) ), W[i] ); \
|
||||
_mm_set1_epi32( K256[( (j)+(i) )] ) ), W[i] ); \
|
||||
T2 = _mm_add_epi32( BSG2_0(A), MAJs(A, B, C) ); \
|
||||
D = _mm_add_epi32( D, T1 ); \
|
||||
D = _mm_add_epi32( D, T1 ); \
|
||||
H = _mm_add_epi32( T1, T2 ); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
sha256_4way_round( __m128i *in, __m128i r[8] )
|
||||
{
|
||||
int i;
|
||||
__m128i A, B, C, D, E, F, G, H;
|
||||
__m128i W[80];
|
||||
register __m128i A, B, C, D, E, F, G, H;
|
||||
__m128i W[16];
|
||||
|
||||
for ( i = 0; i < 16; i++ )
|
||||
W[i] = mm_bswap_32( in[i] );
|
||||
for ( i = 16; i < 80; i++ )
|
||||
W[i] = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32(
|
||||
SSG2_1( W[ i-2 ] ), W[ i-7 ] ), SSG2_0( W[ i-15 ] ) ), W[ i-16 ] );
|
||||
W[ 0] = mm_bswap_32( in[ 0] );
|
||||
W[ 1] = mm_bswap_32( in[ 1] );
|
||||
W[ 2] = mm_bswap_32( in[ 2] );
|
||||
W[ 3] = mm_bswap_32( in[ 3] );
|
||||
W[ 4] = mm_bswap_32( in[ 4] );
|
||||
W[ 5] = mm_bswap_32( in[ 5] );
|
||||
W[ 6] = mm_bswap_32( in[ 6] );
|
||||
W[ 7] = mm_bswap_32( in[ 7] );
|
||||
W[ 8] = mm_bswap_32( in[ 8] );
|
||||
W[ 9] = mm_bswap_32( in[ 9] );
|
||||
W[10] = mm_bswap_32( in[10] );
|
||||
W[11] = mm_bswap_32( in[11] );
|
||||
W[12] = mm_bswap_32( in[12] );
|
||||
W[13] = mm_bswap_32( in[13] );
|
||||
W[14] = mm_bswap_32( in[14] );
|
||||
W[15] = mm_bswap_32( in[15] );
|
||||
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
@@ -147,16 +153,58 @@ sha256_4way_round( __m128i *in, __m128i r[8] )
|
||||
G = r[6];
|
||||
H = r[7];
|
||||
|
||||
for ( i = 0; i < 80; i += 8 )
|
||||
SHA2s_4WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
|
||||
SHA2s_4WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
|
||||
SHA2s_4WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
|
||||
SHA2s_4WAY_STEP( F, G, H, A, B, C, D, E, 3, 0 );
|
||||
SHA2s_4WAY_STEP( E, F, G, H, A, B, C, D, 4, 0 );
|
||||
SHA2s_4WAY_STEP( D, E, F, G, H, A, B, C, 5, 0 );
|
||||
SHA2s_4WAY_STEP( C, D, E, F, G, H, A, B, 6, 0 );
|
||||
SHA2s_4WAY_STEP( B, C, D, E, F, G, H, A, 7, 0 );
|
||||
SHA2s_4WAY_STEP( A, B, C, D, E, F, G, H, 8, 0 );
|
||||
SHA2s_4WAY_STEP( H, A, B, C, D, E, F, G, 9, 0 );
|
||||
SHA2s_4WAY_STEP( G, H, A, B, C, D, E, F, 10, 0 );
|
||||
SHA2s_4WAY_STEP( F, G, H, A, B, C, D, E, 11, 0 );
|
||||
SHA2s_4WAY_STEP( E, F, G, H, A, B, C, D, 12, 0 );
|
||||
SHA2s_4WAY_STEP( D, E, F, G, H, A, B, C, 13, 0 );
|
||||
SHA2s_4WAY_STEP( C, D, E, F, G, H, A, B, 14, 0 );
|
||||
SHA2s_4WAY_STEP( B, C, D, E, F, G, H, A, 15, 0 );
|
||||
|
||||
for ( int j = 16; j < 64; j += 16 )
|
||||
{
|
||||
SHA256_4WAY_STEP( A, B, C, D, E, F, G, H, i + 0 );
|
||||
SHA256_4WAY_STEP( H, A, B, C, D, E, F, G, i + 1 );
|
||||
SHA256_4WAY_STEP( G, H, A, B, C, D, E, F, i + 2 );
|
||||
SHA256_4WAY_STEP( F, G, H, A, B, C, D, E, i + 3 );
|
||||
SHA256_4WAY_STEP( E, F, G, H, A, B, C, D, i + 4 );
|
||||
SHA256_4WAY_STEP( D, E, F, G, H, A, B, C, i + 5 );
|
||||
SHA256_4WAY_STEP( C, D, E, F, G, H, A, B, i + 6 );
|
||||
SHA256_4WAY_STEP( B, C, D, E, F, G, H, A, i + 7 );
|
||||
W[ 0] = SHA2s_MEXP( 14, 9, 1, 0 );
|
||||
W[ 1] = SHA2s_MEXP( 15, 10, 2, 1 );
|
||||
W[ 2] = SHA2s_MEXP( 0, 11, 3, 2 );
|
||||
W[ 3] = SHA2s_MEXP( 1, 12, 4, 3 );
|
||||
W[ 4] = SHA2s_MEXP( 2, 13, 5, 4 );
|
||||
W[ 5] = SHA2s_MEXP( 3, 14, 6, 5 );
|
||||
W[ 6] = SHA2s_MEXP( 4, 15, 7, 6 );
|
||||
W[ 7] = SHA2s_MEXP( 5, 0, 8, 7 );
|
||||
W[ 8] = SHA2s_MEXP( 6, 1, 9, 8 );
|
||||
W[ 9] = SHA2s_MEXP( 7, 2, 10, 9 );
|
||||
W[10] = SHA2s_MEXP( 8, 3, 11, 10 );
|
||||
W[11] = SHA2s_MEXP( 9, 4, 12, 11 );
|
||||
W[12] = SHA2s_MEXP( 10, 5, 13, 12 );
|
||||
W[13] = SHA2s_MEXP( 11, 6, 14, 13 );
|
||||
W[14] = SHA2s_MEXP( 12, 7, 15, 14 );
|
||||
W[15] = SHA2s_MEXP( 13, 8, 0, 15 );
|
||||
|
||||
SHA2s_4WAY_STEP( A, B, C, D, E, F, G, H, 0, j );
|
||||
SHA2s_4WAY_STEP( H, A, B, C, D, E, F, G, 1, j );
|
||||
SHA2s_4WAY_STEP( G, H, A, B, C, D, E, F, 2, j );
|
||||
SHA2s_4WAY_STEP( F, G, H, A, B, C, D, E, 3, j );
|
||||
SHA2s_4WAY_STEP( E, F, G, H, A, B, C, D, 4, j );
|
||||
SHA2s_4WAY_STEP( D, E, F, G, H, A, B, C, 5, j );
|
||||
SHA2s_4WAY_STEP( C, D, E, F, G, H, A, B, 6, j );
|
||||
SHA2s_4WAY_STEP( B, C, D, E, F, G, H, A, 7, j );
|
||||
SHA2s_4WAY_STEP( A, B, C, D, E, F, G, H, 8, j );
|
||||
SHA2s_4WAY_STEP( H, A, B, C, D, E, F, G, 9, j );
|
||||
SHA2s_4WAY_STEP( G, H, A, B, C, D, E, F, 10, j );
|
||||
SHA2s_4WAY_STEP( F, G, H, A, B, C, D, E, 11, j );
|
||||
SHA2s_4WAY_STEP( E, F, G, H, A, B, C, D, 12, j );
|
||||
SHA2s_4WAY_STEP( D, E, F, G, H, A, B, C, 13, j );
|
||||
SHA2s_4WAY_STEP( C, D, E, F, G, H, A, B, 14, j );
|
||||
SHA2s_4WAY_STEP( B, C, D, E, F, G, H, A, 15, j );
|
||||
}
|
||||
|
||||
r[0] = _mm_add_epi32( r[0], A );
|
||||
@@ -243,7 +291,6 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm_bswap_32( _mm_set1_epi32( low ) );
|
||||
sha256_4way_round( sc->buf, sc->val );
|
||||
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
((__m128i*)dst)[u] = mm_bswap_32( sc->val[u] );
|
||||
}
|
||||
@@ -327,7 +374,7 @@ static const sph_u64 K512[80] = {
|
||||
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
__m256i T1, T2; \
|
||||
register __m256i T1, T2; \
|
||||
T1 = _mm256_add_epi64( _mm256_add_epi64( _mm256_add_epi64( \
|
||||
_mm256_add_epi64( H, BSG5_1(E) ), CH(E, F, G) ), \
|
||||
_mm256_set1_epi64x( K512[i] ) ), W[i] ); \
|
||||
@@ -340,7 +387,7 @@ static void
|
||||
sha512_4way_round( __m256i *in, __m256i r[8] )
|
||||
{
|
||||
int i;
|
||||
__m256i A, B, C, D, E, F, G, H;
|
||||
register __m256i A, B, C, D, E, F, G, H;
|
||||
__m256i W[80];
|
||||
|
||||
for ( i = 0; i < 16; i++ )
|
||||
@@ -428,7 +475,6 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
sc->buf[ ptr>>3 ] = _mm256_set1_epi64x( 0x80 );
|
||||
ptr += 8;
|
||||
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_256( sc->buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
|
@@ -1,47 +1,29 @@
|
||||
#include "skein-gate.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <openssl/sha.h>
|
||||
#include "skein-hash-4way.h"
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
|
||||
#if defined (__AVX2__)
|
||||
#if defined (SKEIN_4WAY)
|
||||
|
||||
void skeinhash_4way( void *state, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash64[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash32[16*4] __attribute__ ((aligned (64)));
|
||||
skein512_4way_context ctx_skein;
|
||||
SHA256_CTX ctx_sha256;
|
||||
sha256_4way_context ctx_sha256;
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way( &ctx_skein, input, 80 );
|
||||
skein512_4way_close( &ctx_skein, vhash );
|
||||
skein512_4way_close( &ctx_skein, vhash64 );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_reinterleave_4x32( vhash32, vhash64, 512 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash0, 64 );
|
||||
SHA256_Final( (unsigned char*)hash0, &ctx_sha256 );
|
||||
sha256_4way_init( &ctx_sha256 );
|
||||
sha256_4way( &ctx_sha256, vhash32, 64 );
|
||||
sha256_4way_close( &ctx_sha256, vhash32 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash1, 64 );
|
||||
SHA256_Final( (unsigned char*)hash1, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash2, 64 );
|
||||
SHA256_Final( (unsigned char*)hash2, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, (unsigned char*)hash3, 64 );
|
||||
SHA256_Final( (unsigned char*)hash3, &ctx_sha256 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state + 32, hash1, 32 );
|
||||
memcpy( state + 64, hash2, 32 );
|
||||
memcpy( state + 96, hash3, 32 );
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash32, 256 );
|
||||
}
|
||||
|
||||
int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
@@ -11,15 +11,15 @@ void whirlpoolx_hash(void *state, const void *input)
|
||||
sph_whirlpool_context ctx_whirlpool;
|
||||
|
||||
unsigned char hash[64];
|
||||
unsigned char hash_xored[32];
|
||||
// unsigned char hash_xored[32];
|
||||
|
||||
sph_whirlpool1_init(&ctx_whirlpool);
|
||||
sph_whirlpool1(&ctx_whirlpool, input, 80);
|
||||
sph_whirlpool1_close(&ctx_whirlpool, hash);
|
||||
|
||||
// compress the 48 first bytes of the hash to 32
|
||||
for (int i = 0; i < 32; i++)
|
||||
hash_xored[i] = hash[i] ^ hash[i + 16];
|
||||
// for (int i = 0; i < 32; i++)
|
||||
// hash_xored[i] = hash[i] ^ hash[i + 16];
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
@@ -43,7 +43,6 @@ void init_x11evo_4way_ctx()
|
||||
jh512_4way_init( &x11evo_4way_ctx.jh );
|
||||
keccak512_4way_init( &x11evo_4way_ctx.keccak );
|
||||
luffa_2way_init( &x11evo_4way_ctx.luffa, 512 );
|
||||
init_luffa( &x11evo_4way_ctx.luffa, 512 );
|
||||
cubehashInit( &x11evo_4way_ctx.cube, 512, 16, 32 );
|
||||
sph_shavite512_init( &x11evo_4way_ctx.shavite );
|
||||
simd_2way_init( &x11evo_4way_ctx.simd, 512 );
|
||||
|
273
algo/x12/x12-4way.c
Normal file
273
algo/x12/x12-4way.c
Normal file
@@ -0,0 +1,273 @@
|
||||
#include "x12-gate.h"
|
||||
|
||||
#if defined(X12_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||
//#include "algo/fugue/sph_fugue.h"
|
||||
|
||||
typedef struct {
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
simd_2way_context simd;
|
||||
hashState_echo echo;
|
||||
hamsi512_4way_context hamsi;
|
||||
// sph_fugue512_context fugue;
|
||||
} x12_4way_ctx_holder;
|
||||
|
||||
x12_4way_ctx_holder x12_4way_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_x12_4way_ctx()
|
||||
{
|
||||
blake512_4way_init( &x12_4way_ctx.blake );
|
||||
bmw512_4way_init( &x12_4way_ctx.bmw );
|
||||
init_groestl( &x12_4way_ctx.groestl, 64 );
|
||||
skein512_4way_init( &x12_4way_ctx.skein );
|
||||
jh512_4way_init( &x12_4way_ctx.jh );
|
||||
keccak512_4way_init( &x12_4way_ctx.keccak );
|
||||
luffa_2way_init( &x12_4way_ctx.luffa, 512 );
|
||||
cubehashInit( &x12_4way_ctx.cube, 512, 16, 32 );
|
||||
sph_shavite512_init( &x12_4way_ctx.shavite );
|
||||
simd_2way_init( &x12_4way_ctx.simd, 512 );
|
||||
init_echo( &x12_4way_ctx.echo, 512 );
|
||||
hamsi512_4way_init( &x12_4way_ctx.hamsi );
|
||||
// sph_fugue512_init( &x12_4way_ctx.fugue );
|
||||
};
|
||||
|
||||
void x12_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
x12_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &x12_4way_ctx, sizeof(x12_4way_ctx) );
|
||||
|
||||
// 1 Blake
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
// 2 Bmw
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 3 Groestl
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
memcpy( &ctx.groestl, &x12_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
memcpy( &ctx.groestl, &x12_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
memcpy( &ctx.groestl, &x12_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
// Parallel 4way 64 bit
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// 4 Skein
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
// 5 JH
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
// 6 Keccak
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// 7 Luffa
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_interleave_2x128( vhash, hash2, hash3, 512 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 512 );
|
||||
|
||||
// 8 Cubehash
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1, 64 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2, 64 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 );
|
||||
|
||||
// 9 Shavite
|
||||
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
memcpy( &ctx.shavite, &x12_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash1, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
memcpy( &ctx.shavite, &x12_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash2, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
memcpy( &ctx.shavite, &x12_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
|
||||
// 10 Simd
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
||||
mm256_interleave_2x128( vhash, hash2, hash3, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 512 );
|
||||
|
||||
// 11 Echo
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
memcpy( &ctx.echo, &x12_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, 512 );
|
||||
memcpy( &ctx.echo, &x12_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, 512 );
|
||||
memcpy( &ctx.echo, &x12_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, 512 );
|
||||
|
||||
// 12 Hamsi parallel 4way 32 bit
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
||||
|
||||
|
||||
/*
|
||||
// 13 Fugue serial
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
*/
|
||||
}
|
||||
|
||||
int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
bool *found = work->nfound;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
|
||||
uint32_t *noncep1 = vdata + 75;
|
||||
uint32_t *noncep2 = vdata + 77;
|
||||
uint32_t *noncep3 = vdata + 79;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
// big endian encode 0..18 uint32_t, 64 bits at a time
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
found[0] = found[1] = found[2] = found[3] = false;
|
||||
be32enc( noncep0, n );
|
||||
be32enc( noncep1, n+1 );
|
||||
be32enc( noncep2, n+2 );
|
||||
be32enc( noncep3, n+3 );
|
||||
|
||||
x12_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
{
|
||||
found[0] = true;
|
||||
num_found++;
|
||||
nonces[0] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
}
|
||||
if ( ( (hash+8)[7] & mask ) == 0 && fulltest( hash+8, ptarget ) )
|
||||
{
|
||||
found[1] = true;
|
||||
num_found++;
|
||||
nonces[1] = n+1;
|
||||
work_set_target_ratio( work, hash+8 );
|
||||
}
|
||||
if ( ( (hash+16)[7] & mask ) == 0 && fulltest( hash+16, ptarget ) )
|
||||
{
|
||||
found[2] = true;
|
||||
num_found++;
|
||||
nonces[2] = n+2;
|
||||
work_set_target_ratio( work, hash+16 );
|
||||
}
|
||||
if ( ( (hash+24)[7] & mask ) == 0 && fulltest( hash+24, ptarget ) )
|
||||
{
|
||||
found[3] = true;
|
||||
num_found++;
|
||||
nonces[3] = n+3;
|
||||
work_set_target_ratio( work, hash+24 );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
18
algo/x12/x12-gate.c
Normal file
18
algo/x12/x12-gate.c
Normal file
@@ -0,0 +1,18 @@
|
||||
#include "x12-gate.h"
|
||||
|
||||
bool register_x12_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X12_4WAY)
|
||||
init_x12_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_x12_4way;
|
||||
gate->hash = (void*)&x12_4way_hash;
|
||||
#else
|
||||
init_x12_ctx();
|
||||
gate->scanhash = (void*)&scanhash_x12;
|
||||
gate->hash = (void*)&x12hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
32
algo/x12/x12-gate.h
Normal file
32
algo/x12/x12-gate.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef X12_GATE_H__
|
||||
#define X12_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define X12_4WAY
|
||||
#endif
|
||||
|
||||
bool register_x12_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(X12_4WAY)
|
||||
|
||||
void x12_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x12_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_x12_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void x12hash( void *state, const void *input );
|
||||
|
||||
int scanhash_x12( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_x12_ctx();
|
||||
|
||||
#endif
|
||||
|
252
algo/x12/x12.c
Normal file
252
algo/x12/x12.c
Normal file
@@ -0,0 +1,252 @@
|
||||
#include "x12-gate.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/sph_luffa.h"
|
||||
#include "algo/cubehash/sph_cubehash.h"
|
||||
#include "algo/simd/sph_simd.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
//#include "algo/fugue/sph_fugue.h"
|
||||
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#include "algo/simd/nist.h"
|
||||
#include "algo/blake/sse2/blake.c"
|
||||
#include "algo/bmw/sse2/bmw.c"
|
||||
#include "algo/keccak/sse2/keccak.c"
|
||||
#include "algo/skein/sse2/skein.c"
|
||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
hashState_luffa luffa;
|
||||
cubehashParam cubehash;
|
||||
sph_shavite512_context shavite;
|
||||
hashState_sd simd;
|
||||
sph_hamsi512_context hamsi;
|
||||
// sph_fugue512_context fugue;
|
||||
} x12_ctx_holder;
|
||||
|
||||
x12_ctx_holder x12_ctx;
|
||||
|
||||
void init_x12_ctx()
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_init(&x12_ctx.groestl);
|
||||
sph_echo512_init(&x12_ctx.echo);
|
||||
#else
|
||||
init_echo( &x12_ctx.echo, 512 );
|
||||
init_groestl (&x12_ctx.groestl, 64 );
|
||||
#endif
|
||||
init_luffa( &x12_ctx.luffa, 512 );
|
||||
cubehashInit( &x12_ctx.cubehash, 512, 16, 32 );
|
||||
sph_shavite512_init( &x12_ctx.shavite );
|
||||
init_sd( &x12_ctx.simd, 512 );
|
||||
sph_hamsi512_init( &x12_ctx.hamsi );
|
||||
// sph_fugue512_init( &x13_ctx.fugue );
|
||||
};
|
||||
|
||||
void x12hash(void *output, const void *input)
|
||||
{
|
||||
unsigned char hash[128] __attribute__ ((aligned (32)));
|
||||
#define hashB hash+64
|
||||
|
||||
x12_ctx_holder ctx;
|
||||
memcpy( &ctx, &x12_ctx, sizeof(x12_ctx) );
|
||||
|
||||
// X11 algos
|
||||
|
||||
unsigned char hashbuf[128];
|
||||
size_t hashptr;
|
||||
sph_u64 hashctA;
|
||||
sph_u64 hashctB;
|
||||
|
||||
//---blake1---
|
||||
|
||||
DECL_BLK;
|
||||
BLK_I;
|
||||
BLK_W;
|
||||
BLK_C;
|
||||
|
||||
//---bmw2---
|
||||
|
||||
DECL_BMW;
|
||||
BMW_I;
|
||||
BMW_U;
|
||||
|
||||
#define M(x) sph_dec64le_aligned(data + 8 * (x))
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
|
||||
BMW_C;
|
||||
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
|
||||
//---groetl----
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)hash, 512 );
|
||||
#endif
|
||||
|
||||
//---skein4---
|
||||
|
||||
DECL_SKN;
|
||||
SKN_I;
|
||||
SKN_U;
|
||||
SKN_C;
|
||||
|
||||
//---jh5------
|
||||
|
||||
DECL_JH;
|
||||
JH_H;
|
||||
|
||||
//---keccak6---
|
||||
|
||||
DECL_KEC;
|
||||
KEC_I;
|
||||
KEC_U;
|
||||
KEC_C;
|
||||
|
||||
//--- luffa7
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hashB,
|
||||
(const BitSequence*)hash, 64 );
|
||||
|
||||
// 8 Cube
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*) hash,
|
||||
(const byte*)hashB, 64 );
|
||||
|
||||
// 9 Shavite
|
||||
sph_shavite512( &ctx.shavite, hash, 64);
|
||||
sph_shavite512_close( &ctx.shavite, hashB);
|
||||
|
||||
// 10 Simd
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence *)hashB, 512 );
|
||||
|
||||
//11---echo---
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
sph_echo512(&ctx.echo, hash, 64);
|
||||
sph_echo512_close(&ctx.echo, hashB);
|
||||
#else
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hashB,
|
||||
(const BitSequence *)hash, 512 );
|
||||
#endif
|
||||
|
||||
// 12 Hamsi
|
||||
sph_hamsi512(&ctx.hamsi, hashB, 64);
|
||||
sph_hamsi512_close(&ctx.hamsi, hash);
|
||||
|
||||
/*
|
||||
// 13 Fugue
|
||||
sph_fugue512(&ctx.fugue, hash, 64);
|
||||
sph_fugue512_close(&ctx.fugue, hashB);
|
||||
*/
|
||||
asm volatile ("emms");
|
||||
memcpy(output, hashB, 32);
|
||||
}
|
||||
|
||||
int scanhash_x12(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
x12hash(hash64, endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if (!(hash64[7] & mask))
|
||||
{
|
||||
if ( fulltest(hash64, ptarget) )
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
// else
|
||||
// {
|
||||
// applog(LOG_INFO, "Result does not validate on CPU!");
|
||||
// }
|
||||
}
|
||||
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
@@ -8,7 +8,7 @@ bool register_polytimos_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_polytimos_4way;
|
||||
gate->hash = (void*)&polytimos_4way_hash;
|
||||
#else
|
||||
init_polytimos_context();
|
||||
init_polytimos_ctx();
|
||||
gate->scanhash = (void*)&scanhash_polytimos;
|
||||
gate->hash = (void*)&polytimos_hash;
|
||||
#endif
|
||||
|
@@ -30,7 +30,7 @@ typedef struct {
|
||||
|
||||
poly_ctx_holder poly_ctx;
|
||||
|
||||
void init_polytimos_context()
|
||||
void init_polytimos_ctx()
|
||||
{
|
||||
sph_skein512_init(&poly_ctx.skein);
|
||||
sph_shabal512_init(&poly_ctx.shabal);
|
||||
|
@@ -33,9 +33,11 @@
|
||||
* gcc bug 54349 (fixed for gcc 4.9+). On 32-bit, it's of direct help. AVX
|
||||
* and XOP are of further help either way.
|
||||
*/
|
||||
/*
|
||||
#ifndef __SSE4_1__
|
||||
#warning "Consider enabling SSE4.1, AVX, or XOP in the C compiler for significantly better performance"
|
||||
#endif
|
||||
*/
|
||||
|
||||
#include <emmintrin.h>
|
||||
#ifdef __XOP__
|
||||
|
@@ -1,401 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 John Doering <ghostlander@phoenixcoin.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
/* arch defines */
|
||||
#include "miner.h"
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && !defined(__arm__)
|
||||
#define ASM 1
|
||||
/* #define WIN64 0 */
|
||||
#endif
|
||||
|
||||
#if (ASM) && (__x86_64__)
|
||||
|
||||
/* neoscrypt_blkcpy(dst, src, len) = SSE2 based block memcpy();
|
||||
* len must be a multiple of 64 bytes aligned properly */
|
||||
.globl neoscrypt_blkcpy_sse2
|
||||
neoscrypt_blkcpy_sse2:
|
||||
#if (WIN64)
|
||||
movq %rdi, %r10
|
||||
movq %rsi, %r11
|
||||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
movq %r8, %rdx
|
||||
#endif
|
||||
xorq %rcx, %rcx
|
||||
movl %edx, %ecx
|
||||
shrl $6, %ecx
|
||||
movq $64, %rax
|
||||
.blkcpy:
|
||||
movdqa 0(%rsi), %xmm0
|
||||
movdqa 16(%rsi), %xmm1
|
||||
movdqa 32(%rsi), %xmm2
|
||||
movdqa 48(%rsi), %xmm3
|
||||
movdqa %xmm0, 0(%rdi)
|
||||
movdqa %xmm1, 16(%rdi)
|
||||
movdqa %xmm2, 32(%rdi)
|
||||
movdqa %xmm3, 48(%rdi)
|
||||
addq %rax, %rdi
|
||||
addq %rax, %rsi
|
||||
decl %ecx
|
||||
jnz .blkcpy
|
||||
#if (WIN64)
|
||||
movq %r10, %rdi
|
||||
movq %r11, %rsi
|
||||
#endif
|
||||
ret
|
||||
|
||||
|
||||
/* neoscrypt_blkswp(blkA, blkB, len) = SSE2 based block swapper;
|
||||
* len must be a multiple of 64 bytes aligned properly */
|
||||
.globl neoscrypt_blkswp_sse2
|
||||
neoscrypt_blkswp_sse2:
|
||||
#if (WIN64)
|
||||
movq %rdi, %r10
|
||||
movq %rsi, %r11
|
||||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
movq %r8, %rdx
|
||||
#endif
|
||||
xorq %rcx, %rcx
|
||||
movl %edx, %ecx
|
||||
shrl $6, %ecx
|
||||
movq $64, %rax
|
||||
.blkswp:
|
||||
movdqa 0(%rdi), %xmm0
|
||||
movdqa 16(%rdi), %xmm1
|
||||
movdqa 32(%rdi), %xmm2
|
||||
movdqa 48(%rdi), %xmm3
|
||||
movdqa 0(%rsi), %xmm4
|
||||
movdqa 16(%rsi), %xmm5
|
||||
movdqa 32(%rsi), %xmm8
|
||||
movdqa 48(%rsi), %xmm9
|
||||
movdqa %xmm0, 0(%rsi)
|
||||
movdqa %xmm1, 16(%rsi)
|
||||
movdqa %xmm2, 32(%rsi)
|
||||
movdqa %xmm3, 48(%rsi)
|
||||
movdqa %xmm4, 0(%rdi)
|
||||
movdqa %xmm5, 16(%rdi)
|
||||
movdqa %xmm8, 32(%rdi)
|
||||
movdqa %xmm9, 48(%rdi)
|
||||
addq %rax, %rdi
|
||||
addq %rax, %rsi
|
||||
decl %ecx
|
||||
jnz .blkswp
|
||||
#if (WIN64)
|
||||
movq %r10, %rdi
|
||||
movq %r11, %rsi
|
||||
#endif
|
||||
ret
|
||||
|
||||
|
||||
/* neoscrypt_blkxor(dst, src, len) = SSE2 based block XOR engine;
|
||||
* len must be a multiple of 64 bytes aligned properly */
|
||||
.globl neoscrypt_blkxor_sse2
|
||||
neoscrypt_blkxor_sse2:
|
||||
#if (WIN64)
|
||||
movq %rdi, %r10
|
||||
movq %rsi, %r11
|
||||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
movq %r8, %rdx
|
||||
#endif
|
||||
xorq %rcx, %rcx
|
||||
movl %edx, %ecx
|
||||
shrl $6, %ecx
|
||||
movq $64, %rax
|
||||
.blkxor:
|
||||
movdqa 0(%rdi), %xmm0
|
||||
movdqa 16(%rdi), %xmm1
|
||||
movdqa 32(%rdi), %xmm2
|
||||
movdqa 48(%rdi), %xmm3
|
||||
movdqa 0(%rsi), %xmm4
|
||||
movdqa 16(%rsi), %xmm5
|
||||
movdqa 32(%rsi), %xmm8
|
||||
movdqa 48(%rsi), %xmm9
|
||||
pxor %xmm4, %xmm0
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm8, %xmm2
|
||||
pxor %xmm9, %xmm3
|
||||
movdqa %xmm0, 0(%rdi)
|
||||
movdqa %xmm1, 16(%rdi)
|
||||
movdqa %xmm2, 32(%rdi)
|
||||
movdqa %xmm3, 48(%rdi)
|
||||
addq %rax, %rdi
|
||||
addq %rax, %rsi
|
||||
decl %ecx
|
||||
jnz .blkxor
|
||||
#if (WIN64)
|
||||
movq %r10, %rdi
|
||||
movq %r11, %rsi
|
||||
#endif
|
||||
ret
|
||||
|
||||
|
||||
/* neoscrypt_salsa(mem, rounds) = SSE2 based Salsa20;
|
||||
* mem must be aligned properly, rounds must be a multiple of 2 */
|
||||
.globl neoscrypt_salsa
|
||||
neoscrypt_salsa:
|
||||
#if (WIN64)
|
||||
movq %rdi, %r10
|
||||
movq %rsi, %r11
|
||||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
#endif
|
||||
xorq %rcx, %rcx
|
||||
movl %esi, %ecx
|
||||
shrl $1, %ecx
|
||||
movdqa 0(%rdi), %xmm0
|
||||
movdqa %xmm0, %xmm12
|
||||
movdqa 16(%rdi), %xmm1
|
||||
movdqa %xmm1, %xmm13
|
||||
movdqa 32(%rdi), %xmm2
|
||||
movdqa %xmm2, %xmm14
|
||||
movdqa 48(%rdi), %xmm3
|
||||
movdqa %xmm3, %xmm15
|
||||
.salsa:
|
||||
movdqa %xmm1, %xmm4
|
||||
paddd %xmm0, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $7, %xmm4
|
||||
psrld $25, %xmm5
|
||||
pxor %xmm4, %xmm3
|
||||
movdqa %xmm0, %xmm4
|
||||
pxor %xmm5, %xmm3
|
||||
paddd %xmm3, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $9, %xmm4
|
||||
psrld $23, %xmm5
|
||||
pxor %xmm4, %xmm2
|
||||
movdqa %xmm3, %xmm4
|
||||
pxor %xmm5, %xmm2
|
||||
pshufd $0x93, %xmm3, %xmm3
|
||||
paddd %xmm2, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $13, %xmm4
|
||||
psrld $19, %xmm5
|
||||
pxor %xmm4, %xmm1
|
||||
movdqa %xmm2, %xmm4
|
||||
pxor %xmm5, %xmm1
|
||||
pshufd $0x4E, %xmm2, %xmm2
|
||||
paddd %xmm1, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $18, %xmm4
|
||||
psrld $14, %xmm5
|
||||
pxor %xmm4, %xmm0
|
||||
movdqa %xmm3, %xmm4
|
||||
pxor %xmm5, %xmm0
|
||||
pshufd $0x39, %xmm1, %xmm1
|
||||
paddd %xmm0, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $7, %xmm4
|
||||
psrld $25, %xmm5
|
||||
pxor %xmm4, %xmm1
|
||||
movdqa %xmm0, %xmm4
|
||||
pxor %xmm5, %xmm1
|
||||
paddd %xmm1, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $9, %xmm4
|
||||
psrld $23, %xmm5
|
||||
pxor %xmm4, %xmm2
|
||||
movdqa %xmm1, %xmm4
|
||||
pxor %xmm5, %xmm2
|
||||
pshufd $0x93, %xmm1, %xmm1
|
||||
paddd %xmm2, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $13, %xmm4
|
||||
psrld $19, %xmm5
|
||||
pxor %xmm4, %xmm3
|
||||
movdqa %xmm2, %xmm4
|
||||
pxor %xmm5, %xmm3
|
||||
pshufd $0x4E, %xmm2, %xmm2
|
||||
paddd %xmm3, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $18, %xmm4
|
||||
psrld $14, %xmm5
|
||||
pxor %xmm4, %xmm0
|
||||
pshufd $0x39, %xmm3, %xmm3
|
||||
pxor %xmm5, %xmm0
|
||||
decl %ecx
|
||||
jnz .salsa
|
||||
|
||||
paddd %xmm12, %xmm0
|
||||
movdqa %xmm0, 0(%rdi)
|
||||
paddd %xmm13, %xmm1
|
||||
movdqa %xmm1, 16(%rdi)
|
||||
paddd %xmm14, %xmm2
|
||||
movdqa %xmm2, 32(%rdi)
|
||||
paddd %xmm15, %xmm3
|
||||
movdqa %xmm3, 48(%rdi)
|
||||
#if (WIN64)
|
||||
movq %r10, %rdi
|
||||
movq %r11, %rsi
|
||||
#endif
|
||||
ret
|
||||
|
||||
|
||||
/* neoscrypt_salsa_tangle(mem, count) = Salsa20 SSE2 map switcher;
|
||||
* correct map: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
||||
* SSE2 map: 0 5 10 15 12 1 6 11 8 13 2 7 4 9 14 3 */
|
||||
.globl neoscrypt_salsa_tangle
|
||||
neoscrypt_salsa_tangle:
|
||||
#if (WIN64)
|
||||
movq %rdi, %r10
|
||||
movq %rsi, %r11
|
||||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
#endif
|
||||
xorq %rcx, %rcx
|
||||
movl %esi, %ecx
|
||||
movq $64, %r8
|
||||
.salsa_tangle:
|
||||
movl 4(%rdi), %eax
|
||||
movl 20(%rdi), %edx
|
||||
movl %eax, 20(%rdi)
|
||||
movl %edx, 4(%rdi)
|
||||
movl 8(%rdi), %eax
|
||||
movl 40(%rdi), %edx
|
||||
movl %eax, 40(%rdi)
|
||||
movl %edx, 8(%rdi)
|
||||
movl 12(%rdi), %eax
|
||||
movl 60(%rdi), %edx
|
||||
movl %eax, 60(%rdi)
|
||||
movl %edx, 12(%rdi)
|
||||
movl 16(%rdi), %eax
|
||||
movl 48(%rdi), %edx
|
||||
movl %eax, 48(%rdi)
|
||||
movl %edx, 16(%rdi)
|
||||
movl 28(%rdi), %eax
|
||||
movl 44(%rdi), %edx
|
||||
movl %eax, 44(%rdi)
|
||||
movl %edx, 28(%rdi)
|
||||
movl 36(%rdi), %eax
|
||||
movl 52(%rdi), %edx
|
||||
movl %eax, 52(%rdi)
|
||||
movl %edx, 36(%rdi)
|
||||
addq %r8, %rdi
|
||||
decl %ecx
|
||||
jnz .salsa_tangle
|
||||
#if (WIN64)
|
||||
movq %r10, %rdi
|
||||
movq %r11, %rsi
|
||||
#endif
|
||||
ret
|
||||
|
||||
|
||||
/* neoscrypt_chacha(mem, rounds) = SSE2 based ChaCha20;
|
||||
* mem must be aligned properly, rounds must be a multiple of 2 */
|
||||
.globl neoscrypt_chacha
|
||||
neoscrypt_chacha:
|
||||
#if (WIN64)
|
||||
movq %rdi, %r10
|
||||
movq %rsi, %r11
|
||||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
#endif
|
||||
xorq %rcx, %rcx
|
||||
movl %esi, %ecx
|
||||
shrl $1, %ecx
|
||||
movdqa 0(%rdi), %xmm0
|
||||
movdqa %xmm0, %xmm12
|
||||
movdqa 16(%rdi), %xmm1
|
||||
movdqa %xmm1, %xmm13
|
||||
movdqa 32(%rdi), %xmm2
|
||||
movdqa %xmm2, %xmm14
|
||||
movdqa 48(%rdi), %xmm3
|
||||
movdqa %xmm3, %xmm15
|
||||
.chacha:
|
||||
paddd %xmm1, %xmm0
|
||||
pxor %xmm0, %xmm3
|
||||
pshuflw $0xB1, %xmm3, %xmm3
|
||||
pshufhw $0xB1, %xmm3, %xmm3
|
||||
paddd %xmm3, %xmm2
|
||||
pxor %xmm2, %xmm1
|
||||
movdqa %xmm1, %xmm4
|
||||
pslld $12, %xmm1
|
||||
psrld $20, %xmm4
|
||||
pxor %xmm4, %xmm1
|
||||
paddd %xmm1, %xmm0
|
||||
pxor %xmm0, %xmm3
|
||||
movdqa %xmm3, %xmm4
|
||||
pslld $8, %xmm3
|
||||
psrld $24, %xmm4
|
||||
pxor %xmm4, %xmm3
|
||||
pshufd $0x93, %xmm0, %xmm0
|
||||
paddd %xmm3, %xmm2
|
||||
pshufd $0x4E, %xmm3, %xmm3
|
||||
pxor %xmm2, %xmm1
|
||||
pshufd $0x39, %xmm2, %xmm2
|
||||
movdqa %xmm1, %xmm4
|
||||
pslld $7, %xmm1
|
||||
psrld $25, %xmm4
|
||||
pxor %xmm4, %xmm1
|
||||
paddd %xmm1, %xmm0
|
||||
pxor %xmm0, %xmm3
|
||||
pshuflw $0xB1, %xmm3, %xmm3
|
||||
pshufhw $0xB1, %xmm3, %xmm3
|
||||
paddd %xmm3, %xmm2
|
||||
pxor %xmm2, %xmm1
|
||||
movdqa %xmm1, %xmm4
|
||||
pslld $12, %xmm1
|
||||
psrld $20, %xmm4
|
||||
pxor %xmm4, %xmm1
|
||||
paddd %xmm1, %xmm0
|
||||
pxor %xmm0, %xmm3
|
||||
movdqa %xmm3, %xmm4
|
||||
pslld $8, %xmm3
|
||||
psrld $24, %xmm4
|
||||
pxor %xmm4, %xmm3
|
||||
pshufd $0x39, %xmm0, %xmm0
|
||||
paddd %xmm3, %xmm2
|
||||
pshufd $0x4E, %xmm3, %xmm3
|
||||
pxor %xmm2, %xmm1
|
||||
pshufd $0x93, %xmm2, %xmm2
|
||||
movdqa %xmm1, %xmm4
|
||||
pslld $7, %xmm1
|
||||
psrld $25, %xmm4
|
||||
pxor %xmm4, %xmm1
|
||||
decl %ecx
|
||||
jnz .chacha
|
||||
|
||||
paddd %xmm12, %xmm0
|
||||
movdqa %xmm0, 0(%rdi)
|
||||
paddd %xmm13, %xmm1
|
||||
movdqa %xmm1, 16(%rdi)
|
||||
paddd %xmm14, %xmm2
|
||||
movdqa %xmm2, 32(%rdi)
|
||||
paddd %xmm15, %xmm3
|
||||
movdqa %xmm3, 48(%rdi)
|
||||
#if (WIN64)
|
||||
movq %r10, %rdi
|
||||
movq %r11, %rsi
|
||||
#endif
|
||||
ret
|
||||
|
||||
#endif /* (ASM) && (__x86_64__) */
|
||||
|
61
avxdefs.h
61
avxdefs.h
@@ -11,6 +11,10 @@
|
||||
// 32 bytes for 256 bit vectors. 64 byte alignment is recommended for
|
||||
// best cache alignment.
|
||||
//
|
||||
// Windows has problems with 256 bit vectors as function arguments passed by
|
||||
// value. Stack alignment is only guaranteed to 16 bytes and 32 is required.
|
||||
// Always use pointers for 256 bit arguments.
|
||||
//
|
||||
// There exist duplicates of some functions. In general the first defined
|
||||
// is preferred as it is more efficient but also more restrictive and may
|
||||
// not be applicable. The less efficient versions are more flexible.
|
||||
@@ -93,7 +97,7 @@ union m128_v8 {
|
||||
};
|
||||
typedef union m128_v8 m128_v8;
|
||||
|
||||
// Compile time definition macros, for initializing only.
|
||||
// Compile time definition macros, for compile time initializing only.
|
||||
// x must be a scalar constant.
|
||||
#define mm_setc_64( x1, x0 ) {{ x1, x0 }}
|
||||
#define mm_setc1_64( x ) {{ x, x }}
|
||||
@@ -111,7 +115,7 @@ typedef union m128_v8 m128_v8;
|
||||
x07, x06, x05, x04, x03, x02, x01, x00 }}
|
||||
#define mm_setc1_8( x ) {{ x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x }}
|
||||
|
||||
// Compile time constants, use only for initializing.
|
||||
// Compile time constants, use only for compile time initializing.
|
||||
#define c128_zero mm_setc1_64( 0ULL )
|
||||
#define c128_neg1 mm_setc1_64( 0xFFFFFFFFFFFFFFFFULL )
|
||||
#define c128_one_128 mm_setc_64( 0ULL, 1ULL )
|
||||
@@ -214,6 +218,7 @@ static inline void memset_64( uint64_t *dst, uint64_t a, int n )
|
||||
//
|
||||
// Bit operations
|
||||
|
||||
// Bitfield extraction/insertion.
|
||||
// Return a vector with n bits extracted and right justified from each
|
||||
// element of v starting at bit i.
|
||||
static inline __m128i mm_bfextract_64( __m128i v, int i, int n )
|
||||
@@ -262,6 +267,10 @@ static inline __m128i mm_bitextract_16( __m128i v, int i )
|
||||
// obsolete, use bfextract with n = 1
|
||||
// Return vector with bit i of each element of v as a bool
|
||||
// (shifted to position 0)
|
||||
#define mm_bittest_64( v, i ) mm_bfextract_64( v, i, 1 )
|
||||
#define mm_bittest_32( v, i ) mm_bfextract_32( v, i, 1 )
|
||||
#define mm_bittest_16( v, i ) mm_bfextract_16( v, i, 1 )
|
||||
/*
|
||||
static inline __m128i mm_bittest_64( __m128i v, int i )
|
||||
{ return _mm_and_si128( _mm_srli_epi64( v, i ), m128_one_64 ); }
|
||||
|
||||
@@ -270,6 +279,7 @@ static inline __m128i mm_bittest_32( __m128i v, int i )
|
||||
|
||||
static inline __m128i mm_bittest_16( __m128i v, int i )
|
||||
{ return _mm_and_si128( _mm_srli_epi16( v, i ), m128_one_64 ); }
|
||||
*/
|
||||
|
||||
// Return vector with bit i of each element in v set/cleared
|
||||
static inline __m128i mm_bitset_64( __m128i v, int i )
|
||||
@@ -770,6 +780,7 @@ static inline uint8_t mm256_vindex_to_imm8_16( __m256i v, uint8_t n )
|
||||
//
|
||||
// Bit operations
|
||||
|
||||
// Bit field extraction/insertion.
|
||||
// Return a vector with bits [i..i+n] extracted and right justified from each
|
||||
// element of v.
|
||||
static inline __m256i mm256_bfextract_64( __m256i v, int i, int n )
|
||||
@@ -810,7 +821,6 @@ static inline __m256i mm256_bfinsert_16( __m256i v, __m256i a, int i, int n )
|
||||
_mm256_slli_epi16( a, i) );
|
||||
}
|
||||
|
||||
|
||||
// return bit n in position, all other bits cleared
|
||||
#define mm256_bitextract_64 ( x, n ) \
|
||||
_mm256_and_si256( _mm256_slli_epi64( m256_one_64, n ), x )
|
||||
@@ -820,12 +830,18 @@ static inline __m256i mm256_bfinsert_16( __m256i v, __m256i a, int i, int n )
|
||||
_mm256_and_si256( _mm256_slli_epi16( m256_one_16, n ), x )
|
||||
|
||||
// Return bit n as bool (bit 0)
|
||||
#define mm_bittest_64( v, i ) mm_bfextract_64( v, i, 1 )
|
||||
#define mm_bittest_32( v, i ) mm_bfextract_32( v, i, 1 )
|
||||
#define mm_bittest_16( v, i ) mm_bfextract_16( v, i, 1 )
|
||||
|
||||
/*
|
||||
#define mm256_bittest_64( x, n ) \
|
||||
_mm256_and_si256( m256_one_64, _mm256_srli_epi64( x, n ) )
|
||||
#define mm256_bittest_32( x, n ) \
|
||||
_mm256_and_si256( m256_one_32, _mm256_srli_epi32( x, n ) )
|
||||
#define mm256_bittest_16( x, n ) \
|
||||
_mm256_and_si256( m256_one_16, _mm256_srli_epi16( x, n ) )
|
||||
*/
|
||||
|
||||
// Return x with bit n set/cleared in all elements
|
||||
#define mm256_bitset_64( x, n ) \
|
||||
@@ -1084,7 +1100,41 @@ static inline __m256i mm256_bswap_16( __m256i v )
|
||||
|
||||
// Pseudo parallel AES
|
||||
// Probably noticeably slower than using pure 128 bit vectors
|
||||
inline __m256i mm256_aesenc_2x128( __m256i x, __m256i k )
|
||||
// Windows has problems with __m256i args paddes by value.
|
||||
// Use pointers to facilitate __m256i to __m128i conversion.
|
||||
// When key is used switching keys may reduce performance.
|
||||
inline __m256i mm256_aesenc_2x128( void *msg, void *key )
|
||||
{
|
||||
((__m128i*)msg)[0] = _mm_aesenc_si128( ((__m128i*)msg)[0],
|
||||
((__m128i*)key)[0] );
|
||||
((__m128i*)msg)[1] = _mm_aesenc_si128( ((__m128i*)msg)[1],
|
||||
((__m128i*)key)[1] );
|
||||
}
|
||||
|
||||
inline __m256i mm256_aesenc_nokey_2x128( void *msg )
|
||||
{
|
||||
((__m128i*)msg)[0] = _mm_aesenc_si128( ((__m128i*)msg)[0], m128_zero );
|
||||
((__m128i*)msg)[1] = _mm_aesenc_si128( ((__m128i*)msg)[1], m128_zero );
|
||||
}
|
||||
|
||||
// source msg preserved
|
||||
/*
|
||||
inline __m256i mm256_aesenc_2x128( void *out, void *msg, void *key )
|
||||
{
|
||||
((__m128i*)out)[0] = _mm_aesenc_si128( ((__m128i*)msg)[0],
|
||||
((__m128i*)key)[0] );
|
||||
((__m128i*)out)[1] = _mm_aesenc_si128( ((__m128i*)msg)[1],
|
||||
((__m128i*)key)[1] );
|
||||
}
|
||||
|
||||
inline __m256i mm256_aesenc_nokey_2x128( void *out, void *msg )
|
||||
{
|
||||
((__m128i*)out)[0] = _mm_aesenc_si128( ((__m128i*)msg)[0], m128_zero );
|
||||
((__m128i*)out)[1] = _mm_aesenc_si128( ((__m128i*)msg)[1], m128_zero );
|
||||
}
|
||||
*/
|
||||
|
||||
inline __m256i mm256_aesenc_2x128_obs( __m256i x, __m256i k )
|
||||
{
|
||||
__m128i hi, lo, khi, klo;
|
||||
|
||||
@@ -1095,7 +1145,7 @@ inline __m256i mm256_aesenc_2x128( __m256i x, __m256i k )
|
||||
return mm256_pack_2x128( hi, lo );
|
||||
}
|
||||
|
||||
inline __m256i mm256_aesenc_nokey_2x128( __m256i x )
|
||||
inline __m256i mm256_aesenc_nokey_2x128_obs( __m256i x )
|
||||
{
|
||||
__m128i hi, lo;
|
||||
|
||||
@@ -1105,6 +1155,7 @@ inline __m256i mm256_aesenc_nokey_2x128( __m256i x )
|
||||
return mm256_pack_2x128( hi, lo );
|
||||
}
|
||||
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
// Paired functions for interleaving and deinterleaving data for vector
|
||||
|
@@ -8,7 +8,7 @@ make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-aes-avx2.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-aes-avx2
|
||||
mv cpuminer cpuminer-avx2
|
||||
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.1.1.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.2.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.8.1.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.1.1'
|
||||
PACKAGE_VERSION='3.8.2'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.2'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.8.1.1 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.8.2 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1392,7 +1392,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.1.1:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.2:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1497,7 +1497,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.8.1.1
|
||||
cpuminer-opt configure 3.8.2
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.8.1.1, which was
|
||||
It was created by cpuminer-opt $as_me 3.8.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2981,7 +2981,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.8.1.1'
|
||||
VERSION='3.8.2'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.8.1.1, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.8.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6743,7 +6743,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.8.1.1
|
||||
cpuminer-opt config.status 3.8.2
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.8.1.1])
|
||||
AC_INIT([cpuminer-opt], [3.8.2])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
@@ -858,12 +858,14 @@ static int share_result( int result, struct work *work, const char *reason )
|
||||
if (reason)
|
||||
{
|
||||
applog(LOG_WARNING, "reject reason: %s", reason);
|
||||
/*
|
||||
if (strncmp(reason, "low difficulty share", 20) == 0)
|
||||
{
|
||||
opt_diff_factor = (opt_diff_factor * 2.0) / 3.0;
|
||||
applog(LOG_WARNING, "factor reduced to : %0.2f", opt_diff_factor);
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
6
miner.h
6
miner.h
@@ -483,6 +483,7 @@ uint32_t* get_stratum_job_ntime();
|
||||
|
||||
enum algos {
|
||||
ALGO_NULL,
|
||||
ALGO_ALLIUM,
|
||||
ALGO_ANIME,
|
||||
ALGO_ARGON2,
|
||||
ALGO_AXIOM,
|
||||
@@ -542,6 +543,7 @@ enum algos {
|
||||
ALGO_X11,
|
||||
ALGO_X11EVO,
|
||||
ALGO_X11GOST,
|
||||
ALGO_X12,
|
||||
ALGO_X13,
|
||||
ALGO_X13SM3,
|
||||
ALGO_X14,
|
||||
@@ -557,6 +559,7 @@ enum algos {
|
||||
};
|
||||
static const char* const algo_names[] = {
|
||||
NULL,
|
||||
"allium",
|
||||
"anime",
|
||||
"argon2",
|
||||
"axiom",
|
||||
@@ -616,6 +619,7 @@ static const char* const algo_names[] = {
|
||||
"x11",
|
||||
"x11evo",
|
||||
"x11gost",
|
||||
"x12",
|
||||
"x13",
|
||||
"x13sm3",
|
||||
"x14",
|
||||
@@ -686,6 +690,7 @@ static char const usage[] = "\
|
||||
Usage: " PACKAGE_NAME " [OPTIONS]\n\
|
||||
Options:\n\
|
||||
-a, --algo=ALGO specify the algorithm to use\n\
|
||||
allium Garlicoin (GRLC)\n\
|
||||
anime Animecoin (ANI)\n\
|
||||
argon2\n\
|
||||
axiom Shabal-256 MemoHash\n\
|
||||
@@ -745,6 +750,7 @@ Options:\n\
|
||||
x11 Dash\n\
|
||||
x11evo Revolvercoin (XRE)\n\
|
||||
x11gost sib (SibCoin)\n\
|
||||
x12 Galaxie Cash (GCH)\n\
|
||||
x13 X13\n\
|
||||
x13sm3 hsr (Hshare)\n\
|
||||
x14 X14\n\
|
||||
|
Reference in New Issue
Block a user