mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
3da2b958cf | ||
![]() |
dc2f8d81d3 | ||
![]() |
fc97ef174a | ||
![]() |
13523a12f9 |
16
Makefile.am
16
Makefile.am
@@ -21,15 +21,6 @@ cpuminer_SOURCES = \
|
||||
api.c \
|
||||
sysinfos.c \
|
||||
algo-gate-api.c\
|
||||
crypto/oaes_lib.c \
|
||||
crypto/c_keccak.c \
|
||||
crypto/c_groestl.c \
|
||||
crypto/c_blake256.c \
|
||||
crypto/c_jh.c \
|
||||
crypto/c_skein.c \
|
||||
crypto/hash.c \
|
||||
crypto/aesb.c \
|
||||
crypto/magimath.cpp \
|
||||
algo/argon2/argon2a/argon2a.c \
|
||||
algo/argon2/argon2a/ar2/argon2.c \
|
||||
algo/argon2/argon2a/ar2/opt.c \
|
||||
@@ -76,10 +67,6 @@ cpuminer_SOURCES = \
|
||||
algo/bmw/bmw512-gate.c \
|
||||
algo/bmw/bmw512.c \
|
||||
algo/bmw/bmw512-4way.c \
|
||||
algo/cryptonight/cryptolight.c \
|
||||
algo/cryptonight/cryptonight-common.c\
|
||||
algo/cryptonight/cryptonight-aesni.c\
|
||||
algo/cryptonight/cryptonight.c\
|
||||
algo/cubehash/cubehash_sse2.c\
|
||||
algo/cubehash/cube-hash-2way.c \
|
||||
algo/echo/sph_echo.c \
|
||||
@@ -141,7 +128,8 @@ cpuminer_SOURCES = \
|
||||
algo/lyra2/allium.c \
|
||||
algo/lyra2/phi2-4way.c \
|
||||
algo/lyra2/phi2.c \
|
||||
algo/m7m.c \
|
||||
algo//m7m/m7m.c \
|
||||
algo/m7m/magimath.cpp \
|
||||
algo/nist5/nist5-gate.c \
|
||||
algo/nist5/nist5-4way.c \
|
||||
algo/nist5/nist5.c \
|
||||
|
@@ -65,10 +65,60 @@ If not what makes it happen or not happen?
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.12.2
|
||||
|
||||
Fixed xevan, skein, skein2 AVX2, #238.
|
||||
|
||||
Reversed polarity of AVX2 vector bit test utilities, and all users, to be
|
||||
logically and semantically correct. Follow up to issue #236.
|
||||
|
||||
v3.12.1
|
||||
|
||||
Fixed anime AVX2 low difficulty shares, git issue #236.
|
||||
|
||||
Periodic summary now reports lost hash rate due to rejected and stale shares,
|
||||
displayed only when non-zero.
|
||||
|
||||
v3.12.0.1
|
||||
|
||||
Fixed hodl rejects, git issue #237.
|
||||
|
||||
Fixed debug code added in v3.12.0 to work with AVX2 to be enabled only
|
||||
after low difficulty share have been seen to avoid unnecessarily excessive
|
||||
log outout.
|
||||
|
||||
Added more digits of precision to diff in log output to help diagnose
|
||||
low difficulty shares.
|
||||
|
||||
v3.12.0
|
||||
|
||||
Faster phi2 AVX2 +62%, AVX512 +150% on Intel CPUs. AMD Ryzen AVX2 is
|
||||
YMMV due to its inferiour AVX2 implementation.
|
||||
|
||||
Fixed Hodl stats, rejects are still an issue since v3.9.5, git issue #237.
|
||||
|
||||
API can now be enabled with "-b port" or "--api-bind port".
|
||||
It will use the default address 127.0.0.1.
|
||||
|
||||
Editorial: Short form options should only be used on the command line to save
|
||||
typing. Configuration files and scripts should always use the long form
|
||||
"--api-bind addr:port" without relying on any defaults. This is a general
|
||||
recommendation that applies to all options for any application.
|
||||
|
||||
Removed obsolete cryptonight, all variants, and supporting code for more
|
||||
size reduction and faster compiling.
|
||||
|
||||
Tweaked the timing of the CPU temperature and frequency log (Linux only).
|
||||
|
||||
Added some debug code to collect more info aboout low difficulty rejects,
|
||||
git issue #236.
|
||||
|
||||
v3.11.9
|
||||
|
||||
Fixed x16r invalid shares when Luffa was first in hash order.
|
||||
|
||||
API is disabled by default.
|
||||
|
||||
New startup message for status of stratum connection, API & extranonce.
|
||||
|
||||
New log report for CPU temperature, frequency of fastest and slowest cores.
|
||||
|
@@ -113,7 +113,6 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->hash = (void*)&null_hash;
|
||||
gate->hash_suw = (void*)&null_hash_suw;
|
||||
gate->get_new_work = (void*)&std_get_new_work;
|
||||
gate->get_nonceptr = (void*)&std_get_nonceptr;
|
||||
gate->work_decode = (void*)&std_le_work_decode;
|
||||
gate->decode_extra_data = (void*)&do_nothing;
|
||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||
@@ -129,7 +128,6 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->resync_threads = (void*)&do_nothing;
|
||||
gate->do_this_thread = (void*)&return_true;
|
||||
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
|
||||
gate->stratum_handle_response = (void*)&std_stratum_handle_response;
|
||||
gate->get_work_data_size = (void*)&std_get_work_data_size;
|
||||
gate->optimizations = EMPTY_SET;
|
||||
gate->ntime_index = STD_NTIME_INDEX;
|
||||
@@ -168,9 +166,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
case ALGO_BMW512: register_bmw512_algo ( gate ); break;
|
||||
case ALGO_C11: register_c11_algo ( gate ); break;
|
||||
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHTV7: register_cryptonightv7_algo ( gate ); break;
|
||||
case ALGO_DECRED: register_decred_algo ( gate ); break;
|
||||
case ALGO_DEEP: register_deep_algo ( gate ); break;
|
||||
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
|
||||
@@ -266,25 +261,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
// restore warnings
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
// override std defaults with jr2 defaults
|
||||
bool register_json_rpc2( algo_gate_t *gate )
|
||||
{
|
||||
// gate->wait_for_diff = (void*)&do_nothing;
|
||||
gate->get_new_work = (void*)&jr2_get_new_work;
|
||||
gate->get_nonceptr = (void*)&jr2_get_nonceptr;
|
||||
gate->stratum_gen_work = (void*)&jr2_stratum_gen_work;
|
||||
gate->build_stratum_request = (void*)&jr2_build_stratum_request;
|
||||
gate->submit_getwork_result = (void*)&jr2_submit_getwork_result;
|
||||
gate->longpoll_rpc_call = (void*)&jr2_longpoll_rpc_call;
|
||||
gate->work_decode = (void*)&jr2_work_decode;
|
||||
gate->stratum_handle_response = (void*)&jr2_stratum_handle_response;
|
||||
gate->nonce_index = JR2_NONCE_INDEX;
|
||||
jsonrpc_2 = true; // still needed
|
||||
opt_extranonce = false;
|
||||
// have_gbt = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// run the alternate hash function for a specific algo
|
||||
void exec_hash_function( int algo, void *output, const void *pdata )
|
||||
{
|
||||
@@ -350,7 +326,7 @@ void get_algo_alias( char** algo_or_alias )
|
||||
if ( !strcasecmp( *algo_or_alias, algo_alias_map[i][ ALIAS ] ) )
|
||||
{
|
||||
// found valid alias, return proper name
|
||||
*algo_or_alias = (const char*)( algo_alias_map[i][ PROPER ] );
|
||||
*algo_or_alias = (char*)( algo_alias_map[i][ PROPER ] );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@@ -129,9 +129,6 @@ void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* );
|
||||
// Get thread local copy of blockheader with unique nonce.
|
||||
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t* );
|
||||
|
||||
// Return pointer to nonce in blockheader.
|
||||
uint32_t *( *get_nonceptr ) ( uint32_t* );
|
||||
|
||||
// Decode getwork blockheader
|
||||
bool ( *work_decode ) ( const json_t*, struct work* );
|
||||
|
||||
@@ -169,7 +166,6 @@ bool ( *do_this_thread ) ( int );
|
||||
void ( *resync_threads ) ( struct work* );
|
||||
|
||||
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
|
||||
bool ( *stratum_handle_response ) ( json_t* );
|
||||
set_t optimizations;
|
||||
int ( *get_work_data_size ) ();
|
||||
int ntime_index;
|
||||
@@ -222,31 +218,22 @@ void null_hash_suw();
|
||||
|
||||
// optional safe targets, default listed first unless noted.
|
||||
|
||||
uint32_t *std_get_nonceptr( uint32_t *work_data );
|
||||
uint32_t *jr2_get_nonceptr( uint32_t *work_data );
|
||||
|
||||
void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
|
||||
uint32_t* end_nonce_ptr );
|
||||
void jr2_get_new_work( struct work *work, struct work *g_work, int thr_id,
|
||||
uint32_t* end_nonce_ptr );
|
||||
|
||||
void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
|
||||
void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
|
||||
|
||||
void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
|
||||
void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
|
||||
|
||||
bool std_le_work_decode( const json_t *val, struct work *work );
|
||||
bool std_be_work_decode( const json_t *val, struct work *work );
|
||||
bool jr2_work_decode( const json_t *val, struct work *work );
|
||||
|
||||
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
|
||||
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
|
||||
bool jr2_submit_getwork_result( CURL *curl, struct work *work );
|
||||
|
||||
void std_le_build_stratum_request( char *req, struct work *work );
|
||||
void std_be_build_stratum_request( char *req, struct work *work );
|
||||
void jr2_build_stratum_request ( char *req, struct work *work );
|
||||
|
||||
char* std_malloc_txs_request( struct work *work );
|
||||
|
||||
@@ -263,10 +250,10 @@ void std_build_block_header( struct work* g_work, uint32_t version,
|
||||
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
|
||||
|
||||
json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
|
||||
json_t* jr2_longpoll_rpc_call( CURL *curl, int *err );
|
||||
//json_t* jr2_longpoll_rpc_call( CURL *curl, int *err );
|
||||
|
||||
bool std_stratum_handle_response( json_t *val );
|
||||
bool jr2_stratum_handle_response( json_t *val );
|
||||
//bool std_stratum_handle_response( json_t *val );
|
||||
//bool jr2_stratum_handle_response( json_t *val );
|
||||
|
||||
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id );
|
||||
@@ -288,7 +275,7 @@ bool register_algo( algo_gate_t *gate );
|
||||
// Overrides a common set of functions used by RPC2 and other RPC2-specific
|
||||
// init. Called by algo's register function before initializing algo-specific
|
||||
// functions and data.
|
||||
bool register_json_rpc2( algo_gate_t *gate );
|
||||
//bool register_json_rpc2( algo_gate_t *gate );
|
||||
|
||||
// use this to call the hash function of an algo directly, ie util.c test.
|
||||
void exec_hash_function( int algo, void *output, const void *pdata );
|
||||
|
@@ -153,7 +153,7 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&decred_hash;
|
||||
#endif
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
// gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->decode_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
|
@@ -138,7 +138,7 @@ void bmw512_2way_close( bmw512_2way_context *ctx, void *dst );
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// BMW-512 4 way 64
|
||||
// BMW-512 64 bit 4 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16];
|
||||
@@ -149,7 +149,6 @@ typedef struct {
|
||||
|
||||
typedef bmw_4way_big_context bmw512_4way_context;
|
||||
|
||||
|
||||
void bmw512_4way_init(void *cc);
|
||||
|
||||
void bmw512_4way_update(void *cc, const void *data, size_t len);
|
||||
@@ -164,6 +163,7 @@ void bmw512_4way_addbits_and_close(
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
// BMW-512 64 bit 8 way
|
||||
typedef struct {
|
||||
__m512i buf[16];
|
||||
__m512i H[16];
|
||||
@@ -171,6 +171,8 @@ typedef struct {
|
||||
uint64_t bit_count;
|
||||
} bmw512_8way_context __attribute__((aligned(128)));
|
||||
|
||||
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
|
||||
size_t len );
|
||||
void bmw512_8way_init( bmw512_8way_context *ctx );
|
||||
void bmw512_8way_update( bmw512_8way_context *ctx, const void *data,
|
||||
size_t len );
|
||||
|
@@ -1507,6 +1507,93 @@ void bmw512_8way_close( bmw512_8way_context *ctx, void *dst )
|
||||
casti_m512i( dst, u ) = h1[ v ];
|
||||
}
|
||||
|
||||
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
__m512i *buf = ctx->buf;
|
||||
__m512i htmp[16];
|
||||
__m512i *H = ctx->H;
|
||||
__m512i *h2 = htmp;
|
||||
uint64_t bit_count = len * 8;
|
||||
size_t ptr = 0;
|
||||
const int buf_size = 128; // bytes of one lane, compatible with len
|
||||
|
||||
// Init
|
||||
|
||||
H[ 0] = m512_const1_64( 0x8081828384858687 );
|
||||
H[ 1] = m512_const1_64( 0x88898A8B8C8D8E8F );
|
||||
H[ 2] = m512_const1_64( 0x9091929394959697 );
|
||||
H[ 3] = m512_const1_64( 0x98999A9B9C9D9E9F );
|
||||
H[ 4] = m512_const1_64( 0xA0A1A2A3A4A5A6A7 );
|
||||
H[ 5] = m512_const1_64( 0xA8A9AAABACADAEAF );
|
||||
H[ 6] = m512_const1_64( 0xB0B1B2B3B4B5B6B7 );
|
||||
H[ 7] = m512_const1_64( 0xB8B9BABBBCBDBEBF );
|
||||
H[ 8] = m512_const1_64( 0xC0C1C2C3C4C5C6C7 );
|
||||
H[ 9] = m512_const1_64( 0xC8C9CACBCCCDCECF );
|
||||
H[10] = m512_const1_64( 0xD0D1D2D3D4D5D6D7 );
|
||||
H[11] = m512_const1_64( 0xD8D9DADBDCDDDEDF );
|
||||
H[12] = m512_const1_64( 0xE0E1E2E3E4E5E6E7 );
|
||||
H[13] = m512_const1_64( 0xE8E9EAEBECEDEEEF );
|
||||
H[14] = m512_const1_64( 0xF0F1F2F3F4F5F6F7 );
|
||||
H[15] = m512_const1_64( 0xF8F9FAFBFCFDFEFF );
|
||||
|
||||
// Update
|
||||
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_512( buf + (ptr>>3), vdata, clen >> 3 );
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
__m512i *ht;
|
||||
compress_big_8way( buf, H, h2 );
|
||||
ht = H;
|
||||
H = h2;
|
||||
h2 = ht;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
if ( H != ctx->H )
|
||||
memcpy_512( ctx->H, H, 16 );
|
||||
|
||||
// Close
|
||||
{
|
||||
__m512i h1[16], h2[16];
|
||||
size_t u, v;
|
||||
|
||||
buf[ ptr>>3 ] = m512_const1_64( 0x80 );
|
||||
ptr += 8;
|
||||
|
||||
if ( ptr > (buf_size - 8) )
|
||||
{
|
||||
memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
compress_big_8way( buf, H, h1 );
|
||||
ptr = 0;
|
||||
H = h1;
|
||||
}
|
||||
memset_zero_512( buf + (ptr>>3), (buf_size - 8 - ptr) >> 3 );
|
||||
buf[ (buf_size - 8) >> 3 ] = _mm512_set1_epi64( bit_count );
|
||||
compress_big_8way( buf, H, h2 );
|
||||
for ( u = 0; u < 16; u ++ )
|
||||
buf[ u ] = h2[ u ];
|
||||
compress_big_8way( buf, final_b8, h1 );
|
||||
for (u = 0, v = 8; u < 8; u ++, v ++)
|
||||
casti_m512i( out, u ) = h1[ v ];
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@@ -1,371 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__arm__) || defined(_MSC_VER)
|
||||
#ifndef NOASM
|
||||
#define NOASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "crypto/int-util.h"
|
||||
#include "crypto/hash-ops.h"
|
||||
|
||||
#if USE_INT128
|
||||
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
|
||||
typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
|
||||
#elif defined (_MSC_VER)
|
||||
/* only for mingw64 on windows */
|
||||
#undef USE_INT128
|
||||
#define USE_INT128 (0)
|
||||
#else
|
||||
typedef __uint128_t uint128_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define LITE 1
|
||||
#if LITE /* cryptonight-light */
|
||||
#define MEMORY (1 << 20)
|
||||
#define ITER (1 << 19)
|
||||
#else
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define ITER (1 << 20)
|
||||
#endif
|
||||
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
static void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
static void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
}
|
||||
|
||||
static void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SUCCESS == r));
|
||||
}
|
||||
|
||||
static void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SKEIN_SUCCESS == r));
|
||||
}
|
||||
|
||||
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#if !defined(_MSC_VER) && !defined(NOASM)
|
||||
extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#else
|
||||
#define fast_aesb_single_round aesb_single_round
|
||||
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
|
||||
#endif
|
||||
|
||||
#if defined(NOASM) || !defined(__x86_64__)
|
||||
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = hi_dword(multiplier);
|
||||
uint64_t b = lo_dword(multiplier);
|
||||
uint64_t c = hi_dword(multiplicand);
|
||||
uint64_t d = lo_dword(multiplicand);
|
||||
|
||||
uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + bc;
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
assert(ac <= *product_hi);
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#else
|
||||
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
|
||||
#endif
|
||||
|
||||
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
|
||||
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
|
||||
};
|
||||
|
||||
|
||||
static inline size_t e2i(const uint8_t* a) {
|
||||
#if !LITE
|
||||
return ((uint32_t *)a)[0] & 0x1FFFF0;
|
||||
#else
|
||||
return ((uint32_t *)a)[0] & 0xFFFF0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
|
||||
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
|
||||
hi += ((uint64_t*) c)[0];
|
||||
|
||||
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
|
||||
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
|
||||
((uint64_t*) dst)[0] = hi;
|
||||
((uint64_t*) dst)[1] = lo;
|
||||
}
|
||||
|
||||
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) a) ^= *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
|
||||
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
|
||||
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
struct cryptonight_ctx {
|
||||
uint8_t _ALIGN(16) long_state[MEMORY];
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
|
||||
uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
|
||||
oaes_ctx* aes_ctx;
|
||||
};
|
||||
|
||||
static void cryptolight_hash_ctx(void* output, const void* input, int len, struct cryptonight_ctx* ctx)
|
||||
{
|
||||
len = 76;
|
||||
hash_process(&ctx->state.hs, (const uint8_t*) input, len);
|
||||
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
size_t i, j;
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
|
||||
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
|
||||
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx->a);
|
||||
aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
|
||||
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx->a);
|
||||
aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
|
||||
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
hash_permutation(&ctx->state.hs);
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx->aes_ctx);
|
||||
}
|
||||
|
||||
void cryptolight_hash(void* output, const void* input, int len) {
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
cryptolight_hash_ctx(output, input, len, ctx);
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
#if defined(__AES__)
|
||||
|
||||
static void cryptolight_hash_ctx_aes_ni(void* output, const void* input,
|
||||
int len, struct cryptonight_ctx* ctx)
|
||||
{
|
||||
hash_process(&ctx->state.hs, (const uint8_t*)input, len);
|
||||
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
size_t i, j;
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
|
||||
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
|
||||
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx->a);
|
||||
fast_aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
|
||||
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx->a);
|
||||
fast_aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
|
||||
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
hash_permutation(&ctx->state.hs);
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx->aes_ctx);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int scanhash_cryptolight( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
|
||||
uint32_t n = *nonceptr - 1;
|
||||
const uint32_t first_nonce = n + 1;
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
uint32_t _ALIGN(32) hash[HASH_SIZE / 4];
|
||||
int thr_id = mythr->id;
|
||||
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
#if defined(__AES__)
|
||||
do {
|
||||
*nonceptr = ++n;
|
||||
cryptolight_hash_ctx_aes_ni(hash, pdata, 76, ctx);
|
||||
if (unlikely(hash[7] < ptarget[7])) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
free(ctx);
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
#else
|
||||
do {
|
||||
*nonceptr = ++n;
|
||||
cryptolight_hash_ctx(hash, pdata, 76, ctx);
|
||||
if (unlikely(hash[7] < ptarget[7])) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
free(ctx);
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
#endif
|
||||
free(ctx);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_cryptolight_algo( algo_gate_t* gate )
|
||||
{
|
||||
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
|
||||
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
|
||||
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptolight;
|
||||
gate->hash = (void*)&cryptolight_hash;
|
||||
gate->hash_suw = (void*)&cryptolight_hash;
|
||||
return true;
|
||||
};
|
||||
|
@@ -1,357 +0,0 @@
|
||||
#if defined(__AES__)
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <memory.h>
|
||||
#include "cryptonight.h"
|
||||
#include "miner.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include <immintrin.h>
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
{
|
||||
__m128i tmp4;
|
||||
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
|
||||
tmp4 = _mm_slli_si128(*tmp1, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
|
||||
}
|
||||
|
||||
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
{
|
||||
__m128i tmp2, tmp4;
|
||||
|
||||
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
|
||||
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
|
||||
tmp4 = _mm_slli_si128(*tmp3, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
|
||||
}
|
||||
|
||||
// Special thanks to Intel for helping me
|
||||
// with ExpandAESKey256() and its subroutines
|
||||
static inline void ExpandAESKey256(char *keybuf)
|
||||
{
|
||||
__m128i tmp1, tmp2, tmp3, *keys;
|
||||
|
||||
keys = (__m128i *)keybuf;
|
||||
|
||||
tmp1 = _mm_load_si128((__m128i *)keybuf);
|
||||
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[2] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[3] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[4] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[5] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[6] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[7] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[8] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[9] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[10] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[11] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[12] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[13] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[14] = tmp1;
|
||||
}
|
||||
|
||||
// align to 64 byte cache line
|
||||
typedef struct
|
||||
{
|
||||
uint8_t long_state[MEMORY] __attribute((aligned(64)));
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(64)));
|
||||
uint64_t a[AES_BLOCK_SIZE >> 3] __attribute__((aligned(64)));
|
||||
uint64_t b[AES_BLOCK_SIZE >> 3] __attribute__((aligned(64)));
|
||||
uint8_t c[AES_BLOCK_SIZE] __attribute__((aligned(64)));
|
||||
} cryptonight_ctx;
|
||||
|
||||
static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
{
|
||||
uint8_t ExpandedKey[256] __attribute__((aligned(64)));
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
size_t i, j;
|
||||
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
|
||||
if ( cryptonightV7 && len < 43 )
|
||||
return;
|
||||
|
||||
const uint64_t tweak = cryptonightV7
|
||||
? *((const uint64_t*) (((const uint8_t*)input) + 35))
|
||||
^ ctx.state.hs.w[24] : 0;
|
||||
|
||||
memcpy( ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||
|
||||
longoutput = (__m128i*)ctx.long_state;
|
||||
xmminput = (__m128i*)ctx.text;
|
||||
expkey = (__m128i*)ExpandedKey;
|
||||
|
||||
// prefetch expkey, xmminput and enough longoutput for 4 iterations
|
||||
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
||||
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
||||
for ( i = 0; i < 64; i += 16 )
|
||||
{
|
||||
__builtin_prefetch( longoutput + i, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 4, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 8, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 12, 1, 0 );
|
||||
}
|
||||
|
||||
// n-4 iterations
|
||||
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
|
||||
i += INIT_SIZE_M128I )
|
||||
{
|
||||
// prefetch 4 iterations ahead.
|
||||
__builtin_prefetch( longoutput + i + 64, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 68, 1, 0 );
|
||||
|
||||
for ( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
|
||||
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
|
||||
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
|
||||
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
|
||||
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
|
||||
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
|
||||
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
|
||||
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
|
||||
}
|
||||
// last 4 iterations
|
||||
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||
{
|
||||
for ( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
|
||||
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
|
||||
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
|
||||
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
|
||||
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
|
||||
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
|
||||
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
|
||||
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
|
||||
}
|
||||
|
||||
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
|
||||
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
|
||||
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
|
||||
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
|
||||
|
||||
uint64_t a[2] __attribute((aligned(16))),
|
||||
b[2] __attribute((aligned(16))),
|
||||
c[2] __attribute((aligned(16)));
|
||||
a[0] = ctx.a[0];
|
||||
a[1] = ctx.a[1];
|
||||
__m128i b_x = _mm_load_si128( (__m128i*)ctx.b );
|
||||
__m128i a_x = _mm_load_si128( (__m128i*)a );
|
||||
__m128i* lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||
__m128i c_x = _mm_load_si128( lsa );
|
||||
uint64_t *nextblock;
|
||||
uint64_t hi, lo;
|
||||
|
||||
// n-1 iterations
|
||||
for( i = 0; __builtin_expect( i < 0x7ffff, 1 ); i++ )
|
||||
{
|
||||
c_x = _mm_aesenc_si128( c_x, a_x );
|
||||
_mm_store_si128( (__m128i*)c, c_x );
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
|
||||
__asm__( "mulq %3\n\t"
|
||||
: "=d" ( hi ),
|
||||
"=a" ( lo )
|
||||
: "%a" ( c[0] ),
|
||||
"rm" ( b[0] )
|
||||
: "cc" );
|
||||
|
||||
b_x = c_x;
|
||||
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
nextblock[0] = a[0];
|
||||
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
|
||||
lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||
a_x = _mm_load_si128( (__m128i*)a );
|
||||
c_x = _mm_load_si128( lsa );
|
||||
}
|
||||
// abreviated nth iteration
|
||||
c_x = _mm_aesenc_si128( c_x, a_x );
|
||||
_mm_store_si128( (__m128i*)c, c_x );
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
__asm__( "mulq %3\n\t"
|
||||
: "=d" ( hi ),
|
||||
"=a" ( lo )
|
||||
: "%a" ( c[0] ),
|
||||
"rm" ( b[0] )
|
||||
: "cc" );
|
||||
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
nextblock[0] = a[0];
|
||||
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
|
||||
memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||
|
||||
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
|
||||
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
||||
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
||||
for ( i = 0; i < 64; i += 16 )
|
||||
{
|
||||
_mm_prefetch( longoutput + i, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 8, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 12, _MM_HINT_T0 );
|
||||
}
|
||||
_mm_prefetch( expkey, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
||||
|
||||
// n-4 iterations
|
||||
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
|
||||
i += INIT_SIZE_M128I )
|
||||
{
|
||||
// stay 4 iterations ahead.
|
||||
_mm_prefetch( longoutput + i + 64, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 68, _MM_HINT_T0 );
|
||||
|
||||
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
|
||||
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
|
||||
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
|
||||
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
|
||||
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
|
||||
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
|
||||
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
|
||||
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
|
||||
|
||||
for( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
}
|
||||
// last 4 iterations
|
||||
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||
{
|
||||
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
|
||||
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
|
||||
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
|
||||
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
|
||||
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
|
||||
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
|
||||
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
|
||||
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
|
||||
|
||||
for( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
}
|
||||
|
||||
memcpy( ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
|
||||
}
|
||||
#endif
|
@@ -1,133 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
// Modified for CPUminer by Lucas Jones
|
||||
|
||||
#include "cpuminer-config.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
#else
|
||||
#include "crypto/c_groestl.h"
|
||||
#endif
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "cryptonight.h"
|
||||
|
||||
/*
|
||||
#if defined __unix__ && (!defined __APPLE__)
|
||||
#include <sys/mman.h>
|
||||
#elif defined _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
*/
|
||||
|
||||
void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
#if defined(__AES__)
|
||||
hashState_groestl256 ctx;
|
||||
init_groestl256( &ctx, 32 );
|
||||
update_and_final_groestl256( &ctx, output, input, len * 8 );
|
||||
#else
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
#endif
|
||||
}
|
||||
|
||||
void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
void (* const extra_hashes[4])( const void *, size_t, char *) =
|
||||
{ do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash };
|
||||
|
||||
void cryptonight_hash( void *restrict output, const void *input, int len )
|
||||
{
|
||||
#if defined(__AES__)
|
||||
cryptonight_hash_aes( output, input, len );
|
||||
#else
|
||||
cryptonight_hash_ctx ( output, input, len );
|
||||
#endif
|
||||
}
|
||||
|
||||
void cryptonight_hash_suw( void *restrict output, const void *input )
|
||||
{
|
||||
#if defined(__AES__)
|
||||
cryptonight_hash_aes( output, input, 76 );
|
||||
#else
|
||||
cryptonight_hash_ctx ( output, input, 76 );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cryptonightV7 = false;
|
||||
|
||||
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
|
||||
uint32_t n = *nonceptr - 1;
|
||||
const uint32_t first_nonce = n + 1;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t hash[32 / 4] __attribute__((aligned(32)));
|
||||
|
||||
// if ( ( cryptonightV7 && ( *(uint8_t*)pdata < 7 ) )
|
||||
// || ( !cryptonightV7 && ( *(uint8_t*)pdata == 7 ) ) )
|
||||
// applog(LOG_WARNING,"Cryptonight variant mismatch, shares may be rejected.");
|
||||
|
||||
do
|
||||
{
|
||||
*nonceptr = ++n;
|
||||
cryptonight_hash( hash, pdata, 76 );
|
||||
if (unlikely( hash[7] < Htarg ))
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
// work_set_target_ratio( work, hash );
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_cryptonight_algo( algo_gate_t* gate )
|
||||
{
|
||||
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
|
||||
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
|
||||
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
|
||||
cryptonightV7 = false;
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
gate->hash = (void*)&cryptonight_hash;
|
||||
gate->hash_suw = (void*)&cryptonight_hash_suw;
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_cryptonightv7_algo( algo_gate_t* gate )
|
||||
{
|
||||
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
|
||||
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
|
||||
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
|
||||
cryptonightV7 = true;
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
gate->hash = (void*)&cryptonight_hash;
|
||||
gate->hash_suw = (void*)&cryptonight_hash_suw;
|
||||
return true;
|
||||
};
|
||||
|
@@ -1,310 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
// Modified for CPUminer by Lucas Jones
|
||||
|
||||
#include "miner.h"
|
||||
#include <memory.h>
|
||||
|
||||
#if defined(__arm__) || defined(_MSC_VER)
|
||||
#ifndef NOASM
|
||||
#define NOASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "crypto/int-util.h"
|
||||
//#include "crypto/hash-ops.h"
|
||||
#include "cryptonight.h"
|
||||
|
||||
#if USE_INT128
|
||||
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
|
||||
typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
|
||||
#elif defined (_MSC_VER)
|
||||
/* only for mingw64 on windows */
|
||||
#undef USE_INT128
|
||||
#define USE_INT128 (0)
|
||||
#else
|
||||
typedef __uint128_t uint128_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define LITE 0
|
||||
#if LITE /* cryptonight-light */
|
||||
#define MEMORY (1 << 20)
|
||||
#define ITER (1 << 19)
|
||||
#else
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define ITER (1 << 20)
|
||||
#endif
|
||||
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
|
||||
|
||||
/*
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
static void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
static void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
}
|
||||
|
||||
static void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SUCCESS == r));
|
||||
}
|
||||
|
||||
static void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SKEIN_SUCCESS == r));
|
||||
}
|
||||
*/
|
||||
|
||||
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#if !defined(_MSC_VER) && !defined(NOASM)
|
||||
extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#else
|
||||
#define fast_aesb_single_round aesb_single_round
|
||||
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(NOASM) || !defined(__x86_64__)
|
||||
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = hi_dword(multiplier);
|
||||
uint64_t b = lo_dword(multiplier);
|
||||
uint64_t c = hi_dword(multiplicand);
|
||||
uint64_t d = lo_dword(multiplicand);
|
||||
|
||||
uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + bc;
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
assert(ac <= *product_hi);
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#else
|
||||
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
|
||||
#endif
|
||||
|
||||
/*
|
||||
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
|
||||
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
|
||||
};
|
||||
*/
|
||||
|
||||
static inline size_t e2i(const uint8_t* a) {
|
||||
#if !LITE
|
||||
return ((uint32_t *)a)[0] & 0x1FFFF0;
|
||||
#else
|
||||
return ((uint32_t *)a)[0] & 0xFFFF0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mul_sum_xor_dst( const uint8_t* a, uint8_t* c, uint8_t* dst,
|
||||
const uint64_t tweak )
|
||||
{
|
||||
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
|
||||
hi += ((uint64_t*) c)[0];
|
||||
|
||||
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
|
||||
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
|
||||
((uint64_t*) dst)[0] = hi;
|
||||
((uint64_t*) dst)[1] = cryptonightV7 ? lo ^ tweak : lo;
|
||||
}
|
||||
|
||||
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) a) ^= *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
|
||||
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
|
||||
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint8_t _ALIGN(16) long_state[MEMORY];
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
|
||||
uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
|
||||
oaes_ctx* aes_ctx;
|
||||
} cryptonight_ctx;
|
||||
|
||||
static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_ctx(void* output, const void* input, int len)
|
||||
{
|
||||
// hash_process(&ctx.state.hs, (const uint8_t*) input, len);
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
|
||||
if ( cryptonightV7 && len < 43 )
|
||||
return;
|
||||
const uint64_t tweak = cryptonightV7
|
||||
? *((const uint64_t*) (((const uint8_t*)input) + 35))
|
||||
^ ctx.state.hs.w[24] : 0;
|
||||
|
||||
ctx.aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
|
||||
__builtin_prefetch( ctx.text, 0, 3 );
|
||||
__builtin_prefetch( ctx.text + 64, 0, 3 );
|
||||
__builtin_prefetch( ctx.long_state, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 64, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 128, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 192, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 256, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 320, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 384, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 448, 1, 0 );
|
||||
|
||||
size_t i, j;
|
||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx.aes_ctx, ctx.state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
|
||||
__builtin_prefetch( ctx.long_state + i + 512, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + i + 576, 1, 0 );
|
||||
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 0], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 1], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 2], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 3], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 4], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 5], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 6], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 7], ctx.aes_ctx->key->exp_data);
|
||||
memcpy(&ctx.long_state[i], ctx.text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx.state.k[0], &ctx.state.k[32], ctx.a);
|
||||
xor_blocks_dst(&ctx.state.k[16], &ctx.state.k[48], ctx.b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i)
|
||||
{
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.c, ctx.a);
|
||||
xor_blocks_dst(ctx.c, ctx.b, &ctx.long_state[j]);
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
|
||||
const uint8_t tmp = lsa[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx.c, ctx.a, &ctx.long_state[e2i(ctx.c)], tweak );
|
||||
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.b, ctx.a);
|
||||
xor_blocks_dst(ctx.b, ctx.c, &ctx.long_state[j]);
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
|
||||
const uint8_t tmp = lsa[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx.b, ctx.a, &ctx.long_state[e2i(ctx.b)], tweak );
|
||||
|
||||
}
|
||||
|
||||
__builtin_prefetch( ctx.text, 0, 3 );
|
||||
__builtin_prefetch( ctx.text + 64, 0, 3 );
|
||||
__builtin_prefetch( ctx.long_state, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 64, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 128, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 192, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 256, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 320, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 384, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 448, 1, 0 );
|
||||
|
||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx.aes_ctx, &ctx.state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
|
||||
__builtin_prefetch( ctx.long_state + i + 512, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + i + 576, 1, 0 );
|
||||
|
||||
xor_blocks(&ctx.text[0 * AES_BLOCK_SIZE], &ctx.long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[0 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[1 * AES_BLOCK_SIZE], &ctx.long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[1 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[2 * AES_BLOCK_SIZE], &ctx.long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[2 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[3 * AES_BLOCK_SIZE], &ctx.long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[3 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[4 * AES_BLOCK_SIZE], &ctx.long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[4 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[5 * AES_BLOCK_SIZE], &ctx.long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[5 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[6 * AES_BLOCK_SIZE], &ctx.long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[6 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[7 * AES_BLOCK_SIZE], &ctx.long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[7 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||
// hash_permutation(&ctx.state.hs);
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx.aes_ctx);
|
||||
}
|
||||
|
@@ -1,51 +0,0 @@
|
||||
#ifndef __CRYPTONIGHT_H_INCLUDED
|
||||
#define __CRYPTONIGHT_H_INCLUDED
|
||||
|
||||
#include <stddef.h>
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "miner.h"
|
||||
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define MEMORY_M128I (MEMORY >> 4) // 2 MiB / 16 = 128 ki * __m128i
|
||||
#define ITER (1 << 20)
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128
|
||||
#define INIT_SIZE_M128I (INIT_SIZE_BYTE >> 4) // 8
|
||||
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union hash_state {
|
||||
uint8_t b[200];
|
||||
uint64_t w[25];
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
void do_blake_hash(const void* input, size_t len, char* output);
|
||||
void do_groestl_hash(const void* input, size_t len, char* output);
|
||||
void do_jh_hash(const void* input, size_t len, char* output);
|
||||
void do_skein_hash(const void* input, size_t len, char* output);
|
||||
void cryptonight_hash_ctx(void* output, const void* input, int len);
|
||||
void keccakf(uint64_t st[25], int rounds);
|
||||
extern void (* const extra_hashes[4])(const void *, size_t, char *);
|
||||
|
||||
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len );
|
||||
|
||||
extern bool cryptonightV7;
|
||||
|
||||
#endif
|
||||
|
@@ -144,7 +144,7 @@ int hodl_scanhash( struct work* work, uint32_t max_nonce,
|
||||
#if defined(__AES__)
|
||||
GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, mythr->id );
|
||||
pthread_barrier_wait( &hodl_barrier );
|
||||
return scanhash_hodl_wolf( work, max_nonce, hashes_done, thr_info );
|
||||
return scanhash_hodl_wolf( work, max_nonce, hashes_done, mythr );
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@@ -129,9 +129,10 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
|
||||
if( FinalPoW[7] <= ptarget[7] )
|
||||
{
|
||||
pdata[20] = swab32( BlockHdr[20] );
|
||||
pdata[21] = swab32( BlockHdr[21] );
|
||||
*hashes_done = CollisionCount;
|
||||
return(1);
|
||||
pdata[21] = swab32( BlockHdr[21] );
|
||||
*hashes_done = CollisionCount;
|
||||
submit_solution( work, FinalPoW, mythr );
|
||||
return(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -198,7 +199,8 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
|
||||
pdata[20] = swab32( BlockHdr[20] );
|
||||
pdata[21] = swab32( BlockHdr[21] );
|
||||
*hashes_done = CollisionCount;
|
||||
return(1);
|
||||
submit_solution( work, FinalPoW, mythr );
|
||||
return(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -76,37 +76,34 @@ int scanhash_allium( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t _ALIGN(128) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x3ffff;
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
edata[i] = bswap_32( pdata[i] );
|
||||
|
||||
sph_blake256_init( &allium_ctx.blake );
|
||||
sph_blake256( &allium_ctx.blake, endiandata, 64 );
|
||||
sph_blake256( &allium_ctx.blake, edata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], nonce );
|
||||
allium_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
edata[19] = nonce;
|
||||
allium_hash( hash, edata );
|
||||
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
pdata[19] = bswap_32( nonce );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -119,7 +119,7 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
||||
gate->hash = (void*)&lyra2rev2_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
@@ -228,13 +228,14 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
|
||||
bool register_phi2_algo( algo_gate_t* gate )
|
||||
{
|
||||
// init_phi2_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
|
||||
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
|
||||
gate->build_extraheader = (void*)&phi2_build_extraheader;
|
||||
opt_target_factor = 256.0;
|
||||
#if defined(PHI2_4WAY)
|
||||
#if defined(PHI2_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_phi2_8way;
|
||||
#elif defined(PHI2_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_phi2_4way;
|
||||
#else
|
||||
init_phi2_ctx();
|
||||
|
@@ -184,19 +184,26 @@ bool init_allium_ctx();
|
||||
|
||||
/////////////////////////////////////////
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
// #define PHI2_4WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define PHI2_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define PHI2_4WAY 1
|
||||
#endif
|
||||
|
||||
extern bool phi2_has_roots;
|
||||
|
||||
bool register_phi2_algo( algo_gate_t* gate );
|
||||
#if defined(PHI2_4WAY)
|
||||
#if defined(PHI2_8WAY)
|
||||
|
||||
void phi2_8way_hash( void *state, const void *input );
|
||||
int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(PHI2_4WAY)
|
||||
|
||||
void phi2_hash_4way( void *state, const void *input );
|
||||
int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
//void init_phi2_ctx();
|
||||
|
||||
#else
|
||||
|
||||
|
@@ -7,27 +7,8 @@
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
|
||||
#if defined (LYRA2REV2_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_8way_context blake;
|
||||
keccak256_8way_context keccak;
|
||||
cube_4way_context cube;
|
||||
skein256_8way_context skein;
|
||||
bmw256_8way_context bmw;
|
||||
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
|
||||
|
||||
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
|
||||
|
||||
bool init_lyra2rev2_8way_ctx()
|
||||
{
|
||||
keccak256_8way_init( &l2v2_8way_ctx.keccak );
|
||||
cube_4way_init( &l2v2_8way_ctx.cube, 256, 16, 32 );
|
||||
skein256_8way_init( &l2v2_8way_ctx.skein );
|
||||
bmw256_8way_init( &l2v2_8way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
#if 0
|
||||
void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
@@ -52,14 +33,24 @@ void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
// cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
// cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
// cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
//
|
||||
// dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
// dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
@@ -80,15 +71,123 @@ void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
|
||||
// cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
// cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
// cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
// cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
//
|
||||
// dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
// dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 256 );
|
||||
|
||||
bmw256_8way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_8way_close( &ctx.bmw, state );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#if defined (LYRA2REV2_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_8way_context blake;
|
||||
keccak256_8way_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_8way_context skein;
|
||||
bmw256_8way_context bmw;
|
||||
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
|
||||
|
||||
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
|
||||
|
||||
bool init_lyra2rev2_8way_ctx()
|
||||
{
|
||||
keccak256_8way_init( &l2v2_8way_ctx.keccak );
|
||||
cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
|
||||
skein256_8way_init( &l2v2_8way_ctx.skein );
|
||||
bmw256_8way_init( &l2v2_8way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) );
|
||||
|
||||
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
|
||||
blake256_8way_close( &ctx.blake, vhash );
|
||||
|
||||
rintrlv_8x32_8x64( vhashA, vhash, 256 );
|
||||
|
||||
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 256 );
|
||||
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 256 );
|
||||
@@ -97,49 +196,49 @@ void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
bmw256_8way_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &hash[7*8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id;
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
if ( bench ) ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
blake256_8way_init( &l2v2_8way_ctx.blake );
|
||||
blake256_8way_update( &l2v2_8way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2rev2_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||
n += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
@@ -226,15 +325,16 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *hashd7 = &(hash[7<<2]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
@@ -249,20 +349,20 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2rev2_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -99,7 +99,7 @@ int scanhash_lyra2rev2( struct work *work,
|
||||
lyra2rev2_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
if( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
if( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
|
@@ -130,15 +130,15 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &hash[7<<4];
|
||||
uint32_t *hashd7 = &hash[7*16];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m512i *noncev = (__m512i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
@@ -159,10 +159,10 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
||||
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||
{
|
||||
extr_lane_16x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
@@ -170,6 +170,7 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
||||
}
|
||||
n += 16;
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
@@ -194,7 +195,7 @@ bool init_lyra2rev3_8way_ctx()
|
||||
|
||||
void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
@@ -250,17 +251,17 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &hash[7<<3];
|
||||
uint32_t *hashd7 = &hash[7*8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
@@ -277,7 +278,7 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
||||
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
@@ -357,42 +358,41 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *hashd7 = &(hash[7*4]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
*noncev = _mm_set_epi32( n+3, n+2, n+1, n );
|
||||
|
||||
blake256_4way_init( &l2v3_4way_ctx.blake );
|
||||
blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2rev3_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -88,7 +88,7 @@ int scanhash_lyra2rev3( struct work *work,
|
||||
lyra2rev3_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
if( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
if( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
|
@@ -56,7 +56,7 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id;
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
@@ -65,14 +65,13 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
}
|
||||
|
||||
lyra2z_midstate( endiandata );
|
||||
lyra2z_midstate( endiandata );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
lyra2z_hash( hash, endiandata );
|
||||
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
|
@@ -9,7 +9,7 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
|
||||
{
|
||||
uint32_t _ALIGN(256) hash[16];
|
||||
|
||||
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
|
||||
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
|
||||
2, 330, 256 );
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
@@ -18,38 +18,40 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
|
||||
int scanhash_lyra2z330( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8] __attribute__ ((aligned (128)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
do
|
||||
{
|
||||
be32enc( &endiandata[19], nonce );
|
||||
lyra2z330_hash( hash, endiandata, work->height );
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
edata[19] = nonce;
|
||||
|
||||
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, edata, 80, edata, 80,
|
||||
2, 330, 256 );
|
||||
|
||||
// lyra2z330_hash( hash, edata, work->height );
|
||||
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
be32enc( pdata + 19, nonce );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = nonce - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -1,233 +1,501 @@
|
||||
/**
|
||||
* Phi-2 algo Implementation
|
||||
*/
|
||||
|
||||
#include "lyra2-gate.h"
|
||||
|
||||
#if defined(PHI2_4WAY)
|
||||
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "lyra2.h"
|
||||
#if defined(__VAES__)
|
||||
#include "algo/echo/echo-hash-4way.h"
|
||||
#elif defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
|
||||
#if defined(PHI2_8WAY)
|
||||
|
||||
typedef struct {
|
||||
cubehashParam cube;
|
||||
jh512_8way_context jh;
|
||||
#if defined(__VAES__)
|
||||
echo_4way_context echo;
|
||||
#else
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
sph_gost512_context gost;
|
||||
skein512_8way_context skein;
|
||||
} phi2_8way_ctx_holder;
|
||||
|
||||
void phi2_8way_hash( void *state, const void *input )
|
||||
{
|
||||
unsigned char _ALIGN(128) hash[64*8];
|
||||
unsigned char _ALIGN(128) hashA[64*2];
|
||||
unsigned char _ALIGN(64) hash0[64];
|
||||
unsigned char _ALIGN(64) hash1[64];
|
||||
unsigned char _ALIGN(64) hash2[64];
|
||||
unsigned char _ALIGN(64) hash3[64];
|
||||
unsigned char _ALIGN(64) hash4[64];
|
||||
unsigned char _ALIGN(64) hash5[64];
|
||||
unsigned char _ALIGN(64) hash6[64];
|
||||
unsigned char _ALIGN(64) hash7[64];
|
||||
const int size = phi2_has_roots ? 144 : 80 ;
|
||||
phi2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*)hash0, 512,
|
||||
(const byte*)input, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash1, 512,
|
||||
(const byte*)input + 144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash2, 512,
|
||||
(const byte*)input + 2*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash3, 512,
|
||||
(const byte*)input + 3*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash4, 512,
|
||||
(const byte*)input + 4*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash5, 512,
|
||||
(const byte*)input + 5*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash6, 512,
|
||||
(const byte*)input + 6*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash7, 512,
|
||||
(const byte*)input + 7*144, size );
|
||||
|
||||
intrlv_2x256( hashA, hash0, hash1, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, hash, 512 );
|
||||
intrlv_2x256( hashA, hash2, hash3, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, hash, 512 );
|
||||
intrlv_2x256( hashA, hash4, hash5, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, hash, 512 );
|
||||
intrlv_2x256( hashA, hash6, hash7, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, hash, 512 );
|
||||
|
||||
intrlv_8x64_512( hash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, (const void*)hash, 64 );
|
||||
jh512_8way_close( &ctx.jh, (void*)hash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, hash );
|
||||
|
||||
#if defined (__VAES__)
|
||||
|
||||
unsigned char _ALIGN(64) hashA0[64];
|
||||
unsigned char _ALIGN(64) hashA1[64];
|
||||
unsigned char _ALIGN(64) hashA2[64];
|
||||
unsigned char _ALIGN(64) hashA3[64];
|
||||
unsigned char _ALIGN(64) hashA4[64];
|
||||
unsigned char _ALIGN(64) hashA5[64];
|
||||
unsigned char _ALIGN(64) hashA6[64];
|
||||
unsigned char _ALIGN(64) hashA7[64];
|
||||
|
||||
intrlv_4x128_512( hash, hash0, hash1, hash2, hash3 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
dintrlv_4x128_512( hashA0, hashA1, hashA2, hashA3, hash );
|
||||
|
||||
intrlv_4x128_512( hash, hash4, hash5, hash6, hash7 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
dintrlv_4x128_512( hashA4, hashA5, hashA6, hashA7, hash );
|
||||
|
||||
#endif
|
||||
|
||||
if ( hash0[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash0 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash0, hashA0, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash1[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash1, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash1 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash1, hashA1, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash2[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash2, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash2 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash2, hashA2, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash3[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash3 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash3, hashA3, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash4[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash4, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash4 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash4, hashA4, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)hash4, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)hash4, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash5[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash5, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash5 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash5, hashA5, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)hash5, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)hash5, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash6[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash6, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash6 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash6, hashA6, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)hash6, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)hash6, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash7[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash7, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash7 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash7, hashA7, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)hash7, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)hash7, 64 );
|
||||
}
|
||||
#endif
|
||||
|
||||
intrlv_8x64_512( hash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, (const void*)hash, 64 );
|
||||
skein512_8way_close( &ctx.skein, (void*)hash );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
casti_m512i( state, i ) = _mm512_xor_si512( casti_m512i( hash, i ),
|
||||
casti_m512i( hash, i+4 ) );
|
||||
}
|
||||
|
||||
int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[16*8];
|
||||
uint32_t _ALIGN(128) edata[36*8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *hash7 = &(hash[49]);
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
if ( bench ) ptarget[7] = 0x00ff;
|
||||
|
||||
phi2_has_roots = false;
|
||||
|
||||
for ( int i = 0; i < 36; i++ )
|
||||
{
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
edata[ i + 36 ] = edata[ i + 2*36 ] = edata[ i + 3*36 ] =
|
||||
edata[ i + 4*36 ] = edata[ i + 5*36 ] = edata[ i + 6*36 ] =
|
||||
edata[ i + 7*36 ] = edata[ i ];
|
||||
if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
|
||||
}
|
||||
|
||||
edata[ 19 ] = n;
|
||||
edata[ 36 + 19 ] = n+1;
|
||||
edata[ 2*36 + 19 ] = n+2;
|
||||
edata[ 3*36 + 19 ] = n+3;
|
||||
edata[ 4*36 + 19 ] = n+4;
|
||||
edata[ 5*36 + 19 ] = n+5;
|
||||
edata[ 6*36 + 19 ] = n+6;
|
||||
edata[ 7*36 + 19 ] = n+7;
|
||||
|
||||
do {
|
||||
phi2_8way_hash( hash, edata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
|
||||
{
|
||||
uint64_t _ALIGN(64) lane_hash[8];
|
||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
be32enc( pdata + 19, n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
edata[ 19 ] += 8;
|
||||
edata[ 36 + 19 ] += 8;
|
||||
edata[ 2*36 + 19 ] += 8;
|
||||
edata[ 3*36 + 19 ] += 8;
|
||||
edata[ 4*36 + 19 ] += 8;
|
||||
edata[ 5*36 + 19 ] += 8;
|
||||
edata[ 6*36 + 19 ] += 8;
|
||||
edata[ 7*36 + 19 ] += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
#elif defined(PHI2_4WAY)
|
||||
|
||||
typedef struct {
|
||||
cubehashParam cube;
|
||||
jh512_4way_context jh;
|
||||
#if defined(__AES__)
|
||||
hashState_echo echo;
|
||||
// hashState_echo echo2;
|
||||
#else
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_gost512_context gost;
|
||||
skein512_4way_context skein;
|
||||
} phi2_ctx_holder;
|
||||
/*
|
||||
phi2_ctx_holder phi2_ctx;
|
||||
} phi2_4way_ctx_holder;
|
||||
|
||||
void init_phi2_ctx()
|
||||
phi2_4way_ctx_holder phi2_4way_ctx;
|
||||
|
||||
void phi2_4way_hash(void *state, const void *input)
|
||||
{
|
||||
cubehashInit( &phi2_ctx.cube, 512, 16, 32 );
|
||||
sph_jh512_init(&phi2_ctx.jh);
|
||||
init_echo( &phi2_ctx.echo1, 512 );
|
||||
init_echo( &phi2_ctx.echo2, 512 );
|
||||
sph_gost512_init(&phi2_ctx.gost);
|
||||
sph_skein512_init(&phi2_ctx.skein);
|
||||
};
|
||||
*/
|
||||
void phi2_hash_4way( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash[4][16] __attribute__ ((aligned (64)));
|
||||
uint32_t hashA[4][16] __attribute__ ((aligned (64)));
|
||||
uint32_t hashB[4][16] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[4*16] __attribute__ ((aligned (64)));
|
||||
unsigned char _ALIGN(128) hash[64*4];
|
||||
unsigned char _ALIGN(64) hash0[64];
|
||||
unsigned char _ALIGN(64) hash1[64];
|
||||
unsigned char _ALIGN(64) hash2[64];
|
||||
unsigned char _ALIGN(64) hash3[64];
|
||||
unsigned char _ALIGN(64) hash0A[64];
|
||||
unsigned char _ALIGN(64) hash1A[64];
|
||||
unsigned char _ALIGN(64) hash2A[64];
|
||||
unsigned char _ALIGN(64) hash3A[64];
|
||||
const int size = phi2_has_roots ? 144 : 80 ;
|
||||
phi2_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
// unsigned char _ALIGN(128) hash[64];
|
||||
// unsigned char _ALIGN(128) hashA[64];
|
||||
// unsigned char _ALIGN(128) hashB[64];
|
||||
cubehash_full( &ctx.cube, (byte*)hash0A, 512,
|
||||
(const byte*)input, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash1A, 512,
|
||||
(const byte*)input + 144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash2A, 512,
|
||||
(const byte*)input + 2*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash3A, 512,
|
||||
(const byte*)input + 3*144, size );
|
||||
|
||||
LYRA2RE( &hash0[ 0], 32, hash0A, 32, hash0A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash0[32], 32, hash0A+32, 32, hash0A+32, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash1[ 0], 32, hash1A, 32, hash1A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash1[32], 32, hash1A+32, 32, hash1A+32, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash2[ 0], 32, hash2A, 32, hash2A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash2[32], 32, hash2A+32, 32, hash2A+32, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash3[ 0], 32, hash3A, 32, hash3A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash3[32], 32, hash3A+32, 32, hash3A+32, 32, 1, 8, 8 );
|
||||
|
||||
phi2_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
// memcpy( &ctx, &phi2_ctx, sizeof(phi2_ctx) );
|
||||
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[0], (const byte*)input,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[1], (const byte*)input+144,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[2], (const byte*)input+288,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[3], (const byte*)input+432,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
|
||||
LYRA2RE( &hashA[0][0], 32, &hashB[0][0], 32, &hashB[0][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[0][8], 32, &hashB[0][8], 32, &hashB[0][8], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[1][0], 32, &hashB[1][0], 32, &hashB[1][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[1][8], 32, &hashB[1][8], 32, &hashB[1][8], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[2][0], 32, &hashB[2][0], 32, &hashB[2][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[2][8], 32, &hashB[2][8], 32, &hashB[2][8], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[3][0], 32, &hashB[3][0], 32, &hashB[3][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[3][8], 32, &hashB[3][8], 32, &hashB[3][8], 32, 1, 8, 8 );
|
||||
|
||||
intrlv_4x64( vhash, hashA[0], hashA[1], hashA[2], hashA[3], 512 );
|
||||
intrlv_4x64_512( hash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4way_update( &ctx.jh, (const void*)hash, 64 );
|
||||
jh512_4way_close( &ctx.jh, (void*)hash );
|
||||
|
||||
dintrlv_4x64( hash[0], hash[1], hash[2], hash[3], vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, hash );
|
||||
|
||||
if ( hash[0][0] & 1 )
|
||||
if ( hash0[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[0], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[0] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
|
||||
(const BitSequence *)hash[0], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
|
||||
(const BitSequence *)hash[0], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
}
|
||||
|
||||
if ( hash[1][0] & 1 )
|
||||
if ( hash1[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[1], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[1] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash1, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
|
||||
(const BitSequence *)hash[1], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
|
||||
(const BitSequence *)hash[1], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
}
|
||||
|
||||
if ( hash[2][0] & 1 )
|
||||
if ( hash2[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[2], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[2] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash2, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash2 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
|
||||
(const BitSequence *)hash[2], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
|
||||
(const BitSequence *)hash[2], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
}
|
||||
|
||||
if ( hash[3][0] & 1 )
|
||||
if ( hash3[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[3], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[3] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash3 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
|
||||
(const BitSequence *)hash[3], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
|
||||
(const BitSequence *)hash[3], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
}
|
||||
|
||||
intrlv_4x64( vhash, hash[0], hash[1], hash[2], hash[3], 512 );
|
||||
|
||||
intrlv_4x64_512( hash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_update( &ctx.skein, (const void*)hash, 64 );
|
||||
skein512_4way_close( &ctx.skein, (void*)hash );
|
||||
|
||||
for (int i=0; i<4; i++)
|
||||
{
|
||||
( (uint64_t*)vhash )[i] ^= ( (uint64_t*)vhash )[i+4];
|
||||
( (uint64_t*)vhash+ 8 )[i] ^= ( (uint64_t*)vhash+ 8 )[i+4];
|
||||
( (uint64_t*)vhash+16 )[i] ^= ( (uint64_t*)vhash+16 )[i+4];
|
||||
( (uint64_t*)vhash+24 )[i] ^= ( (uint64_t*)vhash+24 )[i+4];
|
||||
}
|
||||
// for ( int i = 0; i < 4; i++ )
|
||||
// casti_m256i( vhash, i ) = _mm256_xor_si256( casti_m256i( vhash, i ),
|
||||
// casti_m256i( vhash, i+4 ) );
|
||||
|
||||
memcpy( state, vhash, 128 );
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
casti_m256i( state, i ) = _mm256_xor_si256( casti_m256i( hash, i ),
|
||||
casti_m256i( hash, i+4 ) );
|
||||
}
|
||||
|
||||
int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) edata[36];
|
||||
uint32_t vdata[4][36] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[25]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t _ALIGN(128) hash[16*4];
|
||||
uint32_t _ALIGN(128) edata[36*4];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *hash7 = &(hash[25]); // 3*8+1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if(opt_benchmark){
|
||||
ptarget[7] = 0x00ff;
|
||||
}
|
||||
|
||||
// Data is not interleaved, but hash is.
|
||||
// any non-zero data at index 20 or above sets roots true.
|
||||
// Split up the operations, bswap first, then set roots.
|
||||
|
||||
phi2_has_roots = false;
|
||||
for ( int i=0; i < 36; i++ )
|
||||
{
|
||||
be32enc(&edata[i], pdata[i]);
|
||||
if (i >= 20 && pdata[i]) phi2_has_roots = true;
|
||||
}
|
||||
/*
|
||||
casti_m256i( vdata[0], 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( vdata[0], 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m256i( vdata[0], 2 ) = mm256_bswap_32( casti_m256i( pdata, 2 ) );
|
||||
casti_m256i( vdata[0], 3 ) = mm256_bswap_32( casti_m256i( pdata, 3 ) );
|
||||
casti_m128i( vdata[0], 8 ) = mm128_bswap_32( casti_m128i( pdata, 8 ) );
|
||||
phi2_has_roots = mm128_anybits1( casti_m128i( vdata[0], 5 ) ) ||
|
||||
mm128_anybits1( casti_m128i( vdata[0], 6 ) ) ||
|
||||
mm128_anybits1( casti_m128i( vdata[0], 7 ) ) ||
|
||||
mm128_anybits1( casti_m128i( vdata[0], 8 ) );
|
||||
*/
|
||||
|
||||
memcpy( vdata[0], edata, 144 );
|
||||
memcpy( vdata[1], edata, 144 );
|
||||
memcpy( vdata[2], edata, 144 );
|
||||
memcpy( vdata[3], edata, 144 );
|
||||
|
||||
do {
|
||||
be32enc( &vdata[0][19], n );
|
||||
be32enc( &vdata[1][19], n+1 );
|
||||
be32enc( &vdata[2][19], n+2 );
|
||||
be32enc( &vdata[3][19], n+3 );
|
||||
|
||||
phi2_hash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[ lane<<1 ] < Htarg )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
if ( bench ) ptarget[7] = 0x00ff;
|
||||
|
||||
#endif // PHI2_4WAY
|
||||
phi2_has_roots = false;
|
||||
|
||||
for ( int i = 0; i < 36; i++ )
|
||||
{
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
edata[ i+36 ] = edata[ i+72 ] = edata[ i+108 ] = edata[i];
|
||||
if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
|
||||
}
|
||||
|
||||
edata[ 19 ] = n;
|
||||
edata[ 36 + 19 ] = n+1;
|
||||
edata[ 2*36 + 19 ] = n+2;
|
||||
edata[ 3*36 + 19 ] = n+3;
|
||||
|
||||
do {
|
||||
phi2_4way_hash( hash, edata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
|
||||
{
|
||||
uint64_t _ALIGN(64) lane_hash[8];
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
be32enc( pdata + 19, n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
edata[ 19 ] += 4;
|
||||
edata[ 36 + 19 ] += 4;
|
||||
edata[ 2*36 + 19 ] += 4;
|
||||
edata[ 3*36 + 19 ] += 4;
|
||||
n +=4;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -99,7 +99,6 @@ int scanhash_phi2( struct work *work, uint32_t max_nonce,
|
||||
uint32_t _ALIGN(128) edata[36];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
|
@@ -158,7 +158,7 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
{
|
||||
// ignore POK in first word
|
||||
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
uint32_t *nonceptr = work->data + algo_gate.nonce_index;
|
||||
if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
|
||||
|| ( *nonceptr >= *end_nonce_ptr ) )
|
||||
{
|
||||
|
@@ -162,59 +162,35 @@ int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
|
||||
for (int m=0; m < 6; m++)
|
||||
if (Htarg <= htmax[m])
|
||||
do
|
||||
{
|
||||
anime_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( valid_hash( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
anime_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
pdata[19] = bswap_32( n+i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -126,49 +126,28 @@ int scanhash_anime( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
const int thr_id = mythr->id;
|
||||
const int bench = opt_benchmark;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
for (int m=0; m < 6; m++)
|
||||
if (Htarg <= htmax[m])
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
be32enc( &endiandata[19], n );
|
||||
anime_hash( hash, endiandata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
submit_solution( work, hash, mythr );
|
||||
n++;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
do
|
||||
{
|
||||
edata[19] = n;
|
||||
anime_hash( hash, edata );
|
||||
if ( valid_hash( hash, ptarget ) && !bench )
|
||||
{
|
||||
be32enc( &pdata[19], n );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1028,7 +1028,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
// B
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
@@ -1050,14 +1050,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||
m256_zero );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
@@ -1101,14 +1101,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||
m256_zero );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
@@ -1180,7 +1180,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// B
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way_update( &ctx.haval, vhash, 64 );
|
||||
@@ -1407,7 +1407,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way_update( &ctx.sha512, vhash, 64 );
|
||||
@@ -1443,7 +1443,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
// 4x32 for haval
|
||||
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way_update( &ctx.haval, vhash, 64 );
|
||||
|
@@ -402,14 +402,14 @@ void quark_4way_hash( void *state, const void *input )
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
@@ -427,14 +427,14 @@ void quark_4way_hash( void *state, const void *input )
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
|
@@ -13,17 +13,21 @@
|
||||
|
||||
#if defined (SKEIN_8WAY)
|
||||
|
||||
static __thread skein512_8way_context skein512_8way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skeinhash_8way( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash64[8*8] __attribute__ ((aligned (128)));
|
||||
skein512_8way_context ctx_skein;
|
||||
|
||||
memcpy( &ctx_skein, &skein512_8way_ctx, sizeof( ctx_skein ) );
|
||||
uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
|
||||
sha256_8way_context ctx_sha256;
|
||||
|
||||
skein512_8way_init( &ctx_skein );
|
||||
skein512_8way_update( &ctx_skein, input, 80 );
|
||||
skein512_8way_close( &ctx_skein, vhash64 );
|
||||
skein512_8way_full( &ctx_skein, vhash64, input, 80 );
|
||||
|
||||
// skein512_8way_update( &ctx_skein, input + (64*8), 16 );
|
||||
// skein512_8way_close( &ctx_skein, vhash64 );
|
||||
|
||||
rintrlv_8x64_8x32( vhash32, vhash64, 512 );
|
||||
|
||||
@@ -36,63 +40,74 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash[16*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *hash_d7 = &(hash[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
// skein512_8way_init( &skein512_8way_ctx );
|
||||
// skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
||||
|
||||
skeinhash_8way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( hash7[ lane ] <= Htarg )
|
||||
if ( unlikely( hash_d7[ lane ] <= targ_d7 ) && !bench )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined (SKEIN_4WAY)
|
||||
|
||||
//static __thread skein512_4way_context skein512_4way_ctx
|
||||
// __attribute__ ((aligned (64)));
|
||||
|
||||
void skeinhash_4way( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash64[8*4] __attribute__ ((aligned (128)));
|
||||
skein512_4way_context ctx_skein;
|
||||
// memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
|
||||
#if defined(__SHA__)
|
||||
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[16] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[16] __attribute__ ((aligned (64)));
|
||||
SHA256_CTX ctx_sha256;
|
||||
SHA256_CTX ctx_sha256;
|
||||
#else
|
||||
uint32_t vhash32[16*4] __attribute__ ((aligned (64)));
|
||||
sha256_4way_context ctx_sha256;
|
||||
#endif
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way_update( &ctx_skein, input, 80 );
|
||||
skein512_4way_close( &ctx_skein, vhash64 );
|
||||
skein512_4way_full( &ctx_skein, vhash64, input, 80 );
|
||||
|
||||
// skein512_4way_update( &ctx_skein, input + (64*4), 16 );
|
||||
// skein512_4way_close( &ctx_skein, vhash64 );
|
||||
|
||||
#if defined(__SHA__)
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 512 );
|
||||
@@ -127,38 +142,44 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *hash_d7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
// skein512_4way_init( &skein512_4way_ctx );
|
||||
// skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
|
||||
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
skeinhash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash7[ lane ] <= Htarg )
|
||||
if ( unlikely( ( hash_d7[ lane ] <= targ_d7 ) && !bench ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart );
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -4,14 +4,16 @@
|
||||
|
||||
bool register_skein_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
#if defined (SKEIN_8WAY)
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein_8way;
|
||||
gate->hash = (void*)&skeinhash_8way;
|
||||
#elif defined (SKEIN_4WAY)
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein_4way;
|
||||
gate->hash = (void*)&skeinhash_4way;
|
||||
#else
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein;
|
||||
gate->hash = (void*)&skeinhash;
|
||||
#endif
|
||||
|
@@ -654,6 +654,80 @@ skein_big_close_8way( skein512_8way_context *sc, unsigned ub, unsigned n,
|
||||
memcpy_512( dst, buf, out_len >> 3 );
|
||||
}
|
||||
|
||||
void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
__m512i *buf = sc->buf;
|
||||
size_t ptr = 0;
|
||||
unsigned first;
|
||||
uint64_t bcount = 0;
|
||||
const int buf_size = 64; // 64 * _m256i
|
||||
|
||||
// Init
|
||||
|
||||
h0 = m512_const1_64( 0x4903ADFF749C51CE );
|
||||
h1 = m512_const1_64( 0x0D95DE399746DF03 );
|
||||
h2 = m512_const1_64( 0x8FD1934127C79BCE );
|
||||
h3 = m512_const1_64( 0x9A255629FF352CB1 );
|
||||
h4 = m512_const1_64( 0x5DB62599DF6CA7B0 );
|
||||
h5 = m512_const1_64( 0xEABE394CA9D5C3F4 );
|
||||
h6 = m512_const1_64( 0x991112C71A75B523 );
|
||||
h7 = m512_const1_64( 0xAE18A40B660FCC33 );
|
||||
|
||||
// Update
|
||||
|
||||
if ( len <= buf_size - ptr )
|
||||
{
|
||||
memcpy_512( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
}
|
||||
else
|
||||
{
|
||||
first = ( bcount == 0 ) << 7;
|
||||
do {
|
||||
size_t clen;
|
||||
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
bcount ++;
|
||||
UBI_BIG_8WAY( 96 + first, 0 );
|
||||
first = 0;
|
||||
ptr = 0;
|
||||
}
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_512( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>3);
|
||||
len -= clen;
|
||||
} while ( len > 0 );
|
||||
}
|
||||
|
||||
// Close
|
||||
|
||||
unsigned et;
|
||||
|
||||
memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
et = 352 + ((bcount == 0) << 7);
|
||||
UBI_BIG_8WAY( et, ptr );
|
||||
|
||||
memset_zero_512( buf, buf_size >> 3 );
|
||||
bcount = 0;
|
||||
UBI_BIG_8WAY( 510, 8 );
|
||||
|
||||
casti_m512i( out, 0 ) = h0;
|
||||
casti_m512i( out, 1 ) = h1;
|
||||
casti_m512i( out, 2 ) = h2;
|
||||
casti_m512i( out, 3 ) = h3;
|
||||
casti_m512i( out, 4 ) = h4;
|
||||
casti_m512i( out, 5 ) = h5;
|
||||
casti_m512i( out, 6 ) = h6;
|
||||
casti_m512i( out, 7 ) = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein256_8way_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
@@ -709,6 +783,7 @@ void skein512_4way_init( skein512_4way_context *sc )
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
// Do not use for 128 bt data length
|
||||
static void
|
||||
skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||
size_t len )
|
||||
@@ -794,6 +869,79 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
||||
memcpy_256( dst, buf, out_len >> 3 );
|
||||
}
|
||||
|
||||
void
|
||||
skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf = sc->buf;
|
||||
size_t ptr = 0;
|
||||
unsigned first;
|
||||
const int buf_size = 64; // 64 * __m256i
|
||||
uint64_t bcount = 0;
|
||||
|
||||
h0 = m256_const1_64( 0x4903ADFF749C51CE );
|
||||
h1 = m256_const1_64( 0x0D95DE399746DF03 );
|
||||
h2 = m256_const1_64( 0x8FD1934127C79BCE );
|
||||
h3 = m256_const1_64( 0x9A255629FF352CB1 );
|
||||
h4 = m256_const1_64( 0x5DB62599DF6CA7B0 );
|
||||
h5 = m256_const1_64( 0xEABE394CA9D5C3F4 );
|
||||
h6 = m256_const1_64( 0x991112C71A75B523 );
|
||||
h7 = m256_const1_64( 0xAE18A40B660FCC33 );
|
||||
|
||||
// Update
|
||||
|
||||
if ( len <= buf_size - ptr )
|
||||
{
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
}
|
||||
else
|
||||
{
|
||||
first = ( bcount == 0 ) << 7;
|
||||
do {
|
||||
size_t clen;
|
||||
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
bcount ++;
|
||||
UBI_BIG_4WAY( 96 + first, 0 );
|
||||
first = 0;
|
||||
ptr = 0;
|
||||
}
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>3);
|
||||
len -= clen;
|
||||
} while ( len > 0 );
|
||||
}
|
||||
|
||||
// Close
|
||||
|
||||
unsigned et;
|
||||
|
||||
memset_zero_256( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
et = 352 + ((bcount == 0) << 7);
|
||||
UBI_BIG_4WAY( et, ptr );
|
||||
|
||||
memset_zero_256( buf, buf_size >> 3 );
|
||||
bcount = 0;
|
||||
UBI_BIG_4WAY( 510, 8 );
|
||||
|
||||
casti_m256i( out, 0 ) = h0;
|
||||
casti_m256i( out, 1 ) = h1;
|
||||
casti_m256i( out, 2 ) = h2;
|
||||
casti_m256i( out, 3 ) = h3;
|
||||
casti_m256i( out, 4 ) = h4;
|
||||
casti_m256i( out, 5 ) = h5;
|
||||
casti_m256i( out, 6 ) = h6;
|
||||
casti_m256i( out, 7 ) = h7;
|
||||
}
|
||||
|
||||
void
|
||||
skein256_4way_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
@@ -806,6 +954,9 @@ skein256_4way_close(void *cc, void *dst)
|
||||
skein_big_close_4way(cc, 0, 0, dst, 32);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Do not use with 128 bit data
|
||||
void
|
||||
skein512_4way_update(void *cc, const void *data, size_t len)
|
||||
{
|
||||
|
@@ -63,6 +63,8 @@ typedef struct
|
||||
typedef skein_8way_big_context skein512_8way_context;
|
||||
typedef skein_8way_big_context skein256_8way_context;
|
||||
|
||||
void skein512_8way_full( skein512_8way_context *sc, void *out,
|
||||
const void *data, size_t len );
|
||||
void skein512_8way_init( skein512_8way_context *sc );
|
||||
void skein512_8way_update( void *cc, const void *data, size_t len );
|
||||
void skein512_8way_close( void *cc, void *dst );
|
||||
@@ -85,6 +87,8 @@ typedef skein_4way_big_context skein512_4way_context;
|
||||
typedef skein_4way_big_context skein256_4way_context;
|
||||
|
||||
void skein512_4way_init( skein512_4way_context *sc );
|
||||
void skein512_4way_full( skein512_4way_context *sc, void *out,
|
||||
const void *data, size_t len );
|
||||
void skein512_4way_update( void *cc, const void *data, size_t len );
|
||||
void skein512_4way_close( void *cc, void *dst );
|
||||
|
||||
|
@@ -5,114 +5,131 @@
|
||||
|
||||
#if defined(SKEIN_8WAY)
|
||||
|
||||
// static __thread skein512_8way_context skein512_8way_ctx
|
||||
// __attribute__ ((aligned (64)));
|
||||
|
||||
void skein2hash_8way( void *output, const void *input )
|
||||
{
|
||||
skein512_8way_context ctx;
|
||||
uint64_t hash[16*8] __attribute__ ((aligned (128)));
|
||||
skein512_8way_context ctx;
|
||||
// memcpy( &ctx, &skein512_8way_ctx, sizeof( ctx ) );
|
||||
|
||||
skein512_8way_init( &ctx );
|
||||
skein512_8way_update( &ctx, input, 80 );
|
||||
skein512_8way_close( &ctx, hash );
|
||||
skein512_8way_full( &ctx, hash, input, 80 );
|
||||
|
||||
skein512_8way_init( &ctx );
|
||||
skein512_8way_update( &ctx, hash, 64 );
|
||||
skein512_8way_close( &ctx, output );
|
||||
// skein512_8way_update( &ctx, input + (64*8), 16 );
|
||||
// skein512_8way_close( &ctx, hash );
|
||||
|
||||
skein512_8way_full( &ctx, output, hash, 64 );
|
||||
}
|
||||
|
||||
int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[16*8] __attribute__ ((aligned (128)));
|
||||
uint64_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[49]);
|
||||
uint64_t *hashq3 = &(hash[3*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint64_t targq3 = ((uint64_t*)ptarget)[3];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
// skein512_8way_init( &skein512_8way_ctx );
|
||||
// skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
||||
|
||||
skein2hash_8way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( hash7[ lane<<1 ] <= Htarg )
|
||||
if ( unlikely( hashq3[ lane ] <= targq3 && !bench ) )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) && !bench )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(SKEIN_4WAY)
|
||||
|
||||
//static __thread skein512_4way_context skein512_4way_ctx
|
||||
// __attribute__ ((aligned (64)));
|
||||
|
||||
void skein2hash_4way( void *output, const void *input )
|
||||
{
|
||||
skein512_4way_context ctx;
|
||||
// memcpy( &ctx, &skein512_4way_ctx, sizeof( ctx ) );
|
||||
uint64_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
|
||||
skein512_4way_init( &ctx );
|
||||
skein512_4way_update( &ctx, input, 80 );
|
||||
skein512_4way_close( &ctx, hash );
|
||||
// skein512_4way_update( &ctx, input + (64*4), 16 );
|
||||
// skein512_4way_close( &ctx, hash );
|
||||
|
||||
skein512_4way_init( &ctx );
|
||||
skein512_4way_update( &ctx, hash, 64 );
|
||||
skein512_4way_close( &ctx, output );
|
||||
skein512_4way_full( &ctx, hash, input, 80 );
|
||||
skein512_4way_full( &ctx, output, hash, 64 );
|
||||
}
|
||||
|
||||
int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[25]);
|
||||
uint64_t *hash_q3 = &(hash[3*4]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint64_t targ_q3 = ((uint64_t*)ptarget)[3];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
// skein512_4way_init( &skein512_4way_ctx );
|
||||
// skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
skein2hash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash7[ lane<<1 ] <= Htarg )
|
||||
if ( hash_q3[ lane ] <= targ_q3 )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) && !bench )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -30,9 +30,6 @@
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
static void hex_getAlgoString(const uint32_t* prevblock, char *output)
|
||||
{
|
||||
char *sptr = output;
|
||||
@@ -86,7 +83,7 @@ void hex_hash( void* output, const void* input )
|
||||
void *in = (void*) input;
|
||||
int size = 80;
|
||||
|
||||
char elem = hashOrder[0];
|
||||
char elem = x16r_hash_order[0];
|
||||
uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
@@ -235,7 +232,7 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
const uint32_t last_nonce = max_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
@@ -244,17 +241,18 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
|
||||
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
hex_getAlgoString( (const uint32_t*) (&edata[1]), hashOrder );
|
||||
hex_getAlgoString( (const uint32_t*) (&edata[1]), x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||
const char elem = hashOrder[0];
|
||||
const char elem = x16r_hash_order[0];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
switch ( algo )
|
||||
{
|
||||
|
@@ -692,14 +692,15 @@ void x16r_4way_hash_generic( void* output, const void* input )
|
||||
break;
|
||||
case SKEIN:
|
||||
if ( i == 0 )
|
||||
{
|
||||
skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
}
|
||||
else
|
||||
{
|
||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, size );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, size );
|
||||
}
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
break;
|
||||
case LUFFA:
|
||||
|
@@ -134,7 +134,8 @@ void x16r_hash_generic( void* output, const void* input )
|
||||
break;
|
||||
case ECHO:
|
||||
#if defined(__AES__)
|
||||
echo_full( &ctx.echo, hash, 512, in, size );
|
||||
echo_full( &ctx.echo, (BitSequence*)hash, 512,
|
||||
(const BitSequence*)in, size );
|
||||
#else
|
||||
sph_echo512_init( &ctx.echo );
|
||||
sph_echo512( &ctx.echo, in, size );
|
||||
@@ -237,7 +238,7 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -46,7 +46,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -35,9 +35,6 @@
|
||||
|
||||
#if defined (X16RV2_8WAY)
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
union _x16rv2_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
@@ -96,7 +93,7 @@ void x16rv2_8way_hash( void* output, const void* input )
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const char elem = x16r_hash_order[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
@@ -651,17 +648,19 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
const uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||
const char elem = hashOrder[0];
|
||||
const char elem = x16r_hash_order[0];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
switch ( algo )
|
||||
{
|
||||
@@ -737,9 +736,6 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined (X16RV2_4WAY)
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
union _x16rv2_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
@@ -789,7 +785,7 @@ void x16rv2_4way_hash( void* output, const void* input )
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const char elem = x16r_hash_order[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
@@ -1130,17 +1126,19 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
const uint32_t ntime = bswap_32(pdata[17]);
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||
const char elem = hashOrder[0];
|
||||
const char elem = x16r_hash_order[0];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
switch ( algo )
|
||||
{
|
||||
|
@@ -34,7 +34,6 @@
|
||||
#endif
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
union _x16rv2_context_overlay
|
||||
{
|
||||
@@ -74,16 +73,10 @@ void x16rv2_hash( void* output, const void* input )
|
||||
x16rv2_context_overlay ctx;
|
||||
void *in = (void*) input;
|
||||
int size = 80;
|
||||
/*
|
||||
if ( s_ntime == UINT32_MAX )
|
||||
{
|
||||
const uint8_t* in8 = (uint8_t*) input;
|
||||
x16_r_s_getAlgoString( &in8[4], hashOrder );
|
||||
}
|
||||
*/
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const char elem = x16r_hash_order[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
@@ -203,48 +196,48 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t _ALIGN(128) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
if ( s_ntime != pdata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&edata[1]), x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)",
|
||||
x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
do
|
||||
{
|
||||
be32enc( &endiandata[19], nonce );
|
||||
x16rv2_hash( hash32, endiandata );
|
||||
edata[19] = nonce;
|
||||
x16rv2_hash( hash32, edata );
|
||||
|
||||
if ( hash32[7] <= Htarg )
|
||||
if (fulltest( hash32, ptarget ) && !opt_benchmark )
|
||||
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
pdata[19] = bswap_32( nonce );
|
||||
submit_solution( work, hash32, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -97,7 +97,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -563,59 +563,31 @@ void sonoa_hash( void *state, const void *input )
|
||||
}
|
||||
|
||||
int scanhash_sonoa( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19] - 1;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
uint64_t htmax[] =
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
do
|
||||
{
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] =
|
||||
{
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
edata[19] = n;
|
||||
sonoa_hash( hash64, edata );
|
||||
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
sonoa_hash(hash32, endiandata);
|
||||
if ( !( hash32[7] & mask ) )
|
||||
if ( fulltest( hash32, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash32, mythr );
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
break;
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -74,9 +74,7 @@ void x17_8way_hash( void *state, const void *input )
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -106,9 +104,7 @@ void x17_8way_hash( void *state, const void *input )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
@@ -287,18 +283,18 @@ void x17_8way_hash( void *state, const void *input )
|
||||
int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *hashd7 = &(hash[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
@@ -310,7 +306,7 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
|
||||
x17_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( ( hash7[ lane ] <= Htarg ) && !bench ) )
|
||||
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) ) )
|
||||
@@ -378,9 +374,7 @@ void x17_4way_hash( void *state, const void *input )
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
@@ -474,18 +468,18 @@ void x17_4way_hash( void *state, const void *input )
|
||||
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *hashd7 = &(hash[ 7*4 ]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce -4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
@@ -496,7 +490,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
|
||||
x17_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hash7[ lane ] <= Htarg && !bench ) )
|
||||
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
|
@@ -169,8 +169,8 @@ int scanhash_x17( struct work *work, uint32_t max_nonce,
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -76,9 +76,7 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
memset( &vhash[8<<3], 0, 64<<3 );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -108,9 +106,7 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, dataLen );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, dataLen );
|
||||
@@ -291,9 +287,7 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, vhash, dataLen );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
@@ -323,9 +317,7 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, dataLen );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, dataLen );
|
||||
@@ -504,40 +496,43 @@ void xevan_8way_hash( void *output, const void *input )
|
||||
int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t *hashd7 = &(hash[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
xevan_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) )
|
||||
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
@@ -578,8 +573,6 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
const int dataLen = 128;
|
||||
xevan_4way_context_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
// parallel 4 way
|
||||
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
memset( &vhash[8<<2], 0, 64<<2 );
|
||||
|
||||
@@ -598,9 +591,7 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
// Parallel 4way
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, dataLen );
|
||||
@@ -618,15 +609,11 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
|
||||
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
|
||||
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );
|
||||
|
||||
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||
@@ -718,9 +705,7 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, dataLen );
|
||||
@@ -738,15 +723,11 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
|
||||
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
|
||||
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );
|
||||
|
||||
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||
@@ -818,41 +799,43 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do {
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
xevan_4way_hash( hash, vdata );
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash7[ lane ] <= Htarg )
|
||||
if ( unlikely( hashd7[ lane ] <= targ32 ) && ! bench )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
128
algo/x17/xevan.c
128
algo/x17/xevan.c
@@ -56,8 +56,6 @@ typedef struct {
|
||||
} xevan_ctx_holder;
|
||||
|
||||
xevan_ctx_holder xevan_ctx __attribute__ ((aligned (64)));
|
||||
static __thread sph_blake512_context xevan_blake_mid
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void init_xevan_ctx()
|
||||
{
|
||||
@@ -85,34 +83,23 @@ void init_xevan_ctx()
|
||||
#endif
|
||||
};
|
||||
|
||||
void xevan_blake512_midstate( const void* input )
|
||||
{
|
||||
memcpy( &xevan_blake_mid, &xevan_ctx.blake, sizeof xevan_blake_mid );
|
||||
sph_blake512( &xevan_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void xevan_hash(void *output, const void *input)
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[32]; // 128 bytes required
|
||||
uint32_t _ALIGN(64) hash[32]; // 128 bytes required
|
||||
const int dataLen = 128;
|
||||
xevan_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
|
||||
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
|
||||
memcpy( &ctx.blake, &xevan_blake_mid, sizeof xevan_blake_mid );
|
||||
sph_blake512( &ctx.blake, input + midlen, tail );
|
||||
sph_blake512_close(&ctx.blake, hash);
|
||||
xevan_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
|
||||
|
||||
sph_blake512( &ctx.blake, input, 80 );
|
||||
sph_blake512_close( &ctx.blake, hash );
|
||||
memset(&hash[16], 0, 64);
|
||||
|
||||
sph_bmw512(&ctx.bmw, hash, dataLen);
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)hash, dataLen*8 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const char*)hash, dataLen*8 );
|
||||
#else
|
||||
sph_groestl512(&ctx.groestl, hash, dataLen);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
@@ -127,20 +114,20 @@ void xevan_hash(void *output, const void *input)
|
||||
sph_keccak512(&ctx.keccak, hash, dataLen);
|
||||
sph_keccak512_close(&ctx.keccak, hash);
|
||||
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, dataLen );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, dataLen );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
|
||||
(const byte*) hash, dataLen );
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
|
||||
(const byte*) hash, dataLen );
|
||||
|
||||
sph_shavite512(&ctx.shavite, hash, dataLen);
|
||||
sph_shavite512_close(&ctx.shavite, hash);
|
||||
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence *)hash, dataLen*8 );
|
||||
|
||||
#if defined(__AES__)
|
||||
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
||||
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
||||
(const BitSequence *) hash, dataLen*8 );
|
||||
#else
|
||||
sph_echo512(&ctx.echo, hash, dataLen);
|
||||
@@ -159,15 +146,15 @@ void xevan_hash(void *output, const void *input)
|
||||
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||
|
||||
SHA512_Update( &ctx.sha512, hash, dataLen );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, hash, dataLen );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
|
||||
sph_haval256_5_close(&ctx.haval, hash);
|
||||
|
||||
memset(&hash[8], 0, dataLen - 32);
|
||||
|
||||
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
|
||||
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
|
||||
|
||||
sph_blake512(&ctx.blake, hash, dataLen);
|
||||
sph_blake512_close(&ctx.blake, hash);
|
||||
@@ -176,11 +163,11 @@ void xevan_hash(void *output, const void *input)
|
||||
sph_bmw512_close(&ctx.bmw, hash);
|
||||
|
||||
#if defined(__AES__)
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const BitSequence*)hash, dataLen*8 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash,
|
||||
(const BitSequence*)hash, dataLen*8 );
|
||||
#else
|
||||
sph_groestl512(&ctx.groestl, hash, dataLen);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#endif
|
||||
|
||||
sph_skein512(&ctx.skein, hash, dataLen);
|
||||
@@ -191,24 +178,25 @@ void xevan_hash(void *output, const void *input)
|
||||
|
||||
sph_keccak512(&ctx.keccak, hash, dataLen);
|
||||
sph_keccak512_close(&ctx.keccak, hash);
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, dataLen );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
|
||||
(const byte*) hash, dataLen );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, dataLen );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
|
||||
(const byte*) hash, dataLen );
|
||||
|
||||
sph_shavite512(&ctx.shavite, hash, dataLen);
|
||||
sph_shavite512_close(&ctx.shavite, hash);
|
||||
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
update_final_sd( &ctx.simd, (BitSequence *)hash,
|
||||
(const BitSequence *)hash, dataLen*8 );
|
||||
|
||||
#if defined(__AES__)
|
||||
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
||||
update_final_echo( &ctx.echo, (BitSequence *) hash,
|
||||
(const BitSequence *) hash, dataLen*8 );
|
||||
#else
|
||||
sph_echo512(&ctx.echo, hash, dataLen);
|
||||
sph_echo512_close(&ctx.echo, hash);
|
||||
sph_echo512(&ctx.echo, hash, dataLen);
|
||||
sph_echo512_close(&ctx.echo, hash);
|
||||
#endif
|
||||
|
||||
sph_hamsi512(&ctx.hamsi, hash, dataLen);
|
||||
@@ -223,8 +211,8 @@ void xevan_hash(void *output, const void *input)
|
||||
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
|
||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||
|
||||
SHA512_Update( &ctx.sha512, hash, dataLen );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
SHA512_Update( &ctx.sha512, hash, dataLen );
|
||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||
|
||||
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
|
||||
sph_haval256_5_close(&ctx.haval, hash);
|
||||
@@ -233,41 +221,33 @@ void xevan_hash(void *output, const void *input)
|
||||
}
|
||||
|
||||
int scanhash_xevan( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0cff;
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
for (int k=0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
||||
xevan_blake512_midstate( endiandata );
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
xevan_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
do
|
||||
{
|
||||
edata[19] = n;
|
||||
xevan_hash( hash64, edata );
|
||||
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -87,64 +87,40 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
unsigned char hashA7[64] __attribute__((aligned(32))) = {0};
|
||||
x22i_8way_ctx_overlay ctx;
|
||||
|
||||
blake512_8way_init( &ctx.blake );
|
||||
blake512_8way_update( &ctx.blake, input, 80 );
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)hash0, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)hash1, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)hash2, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)hash3, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4,
|
||||
(const char*)hash4, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5,
|
||||
(const char*)hash5, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6,
|
||||
(const char*)hash6, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7,
|
||||
(const char*)hash7, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
#endif
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
@@ -155,22 +131,16 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa_4way_init( &ctx.luffa, 512 );
|
||||
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
luffa512_4way_full( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa512_4way_full( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
||||
cube_4way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
||||
cube_4way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
|
||||
cube_4way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_4way_init( &ctx.shavite );
|
||||
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
|
||||
#else
|
||||
|
||||
@@ -207,17 +177,13 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
|
||||
#endif
|
||||
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
||||
simd_4way_init( &ctx.simd, 512 );
|
||||
simd_4way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
simd512_4way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_4way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashA, vhashA, 512 );
|
||||
echo_4way_init( &ctx.echo, 512 );
|
||||
echo_4way_update_close( &ctx.echo, vhashB, vhashB, 512 );
|
||||
echo_4way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
|
||||
echo_4way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
@@ -226,30 +192,22 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
|
||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
|
||||
(const BitSequence*)hash0, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
|
||||
(const BitSequence*)hash1, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
|
||||
(const BitSequence*)hash2, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
|
||||
(const BitSequence*)hash3, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash4,
|
||||
(const BitSequence*)hash4, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash5,
|
||||
(const BitSequence*)hash5, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash6,
|
||||
(const BitSequence*)hash6, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash7,
|
||||
(const BitSequence*)hash7, 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)hash4, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)hash5, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)hash6, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)hash7, 64 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
@@ -443,6 +401,55 @@ void x22i_8way_hash( void *output, const void *input )
|
||||
sha256_8way_close( &ctx.sha256, output );
|
||||
}
|
||||
|
||||
int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &(hash[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
x22i_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
@@ -488,6 +495,7 @@ int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
#elif defined(X22I_4WAY)
|
||||
|
||||
@@ -531,33 +539,21 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
unsigned char hashA3[64] __attribute__((aligned(32))) = {0};
|
||||
x22i_ctx_overlay ctx;
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(const char*)hash0, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(const char*)hash1, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(const char*)hash2, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(const char*)hash3, 512 );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (const char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (const char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (const char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (const char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
@@ -569,41 +565,29 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
|
||||
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
luffa512_2way_full( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa512_2way_full( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
||||
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
|
||||
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
|
||||
|
||||
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
|
||||
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
|
||||
|
||||
dintrlv_2x128_512( hash0, hash1, vhashA );
|
||||
dintrlv_2x128_512( hash2, hash3, vhashB );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
|
||||
(const BitSequence*)hash0, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
|
||||
(const BitSequence*)hash1, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
|
||||
(const BitSequence*)hash2, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
|
||||
(const BitSequence*)hash3, 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
@@ -722,44 +706,47 @@ void x22i_4way_hash( void *output, const void *input )
|
||||
int scanhash_x22i_4way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &(hash[ 7*4 ]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x22i_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if unlikely( ( hash7[ lane ] <= Htarg ) )
|
||||
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -167,40 +167,38 @@ void x22i_hash( void *output, const void *input )
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_x22i( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
int scanhash_x22i( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = n;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
|
||||
for (int k=0; k < 20; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = ++n;
|
||||
be32enc( &endiandata[19], n );
|
||||
|
||||
x22i_hash( hash, endiandata );
|
||||
|
||||
if ( hash[7] < Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
edata[19] = n;
|
||||
x22i_hash( hash64, edata );
|
||||
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -530,6 +530,55 @@ void x25x_8way_hash( void *output, const void *input )
|
||||
blake2s_8way_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
|
||||
}
|
||||
|
||||
int scanhash_x25x_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &(hash[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
x25x_8way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
@@ -574,6 +623,7 @@ int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
#elif defined(X25X_4WAY)
|
||||
|
||||
@@ -614,9 +664,7 @@ void x25x_4way_hash( void *output, const void *input )
|
||||
unsigned char vhashX[24][64*4] __attribute__ ((aligned (64)));
|
||||
x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
dintrlv_4x64_512( hash0[0], hash1[0], hash2[0], hash3[0], vhash );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
@@ -624,24 +672,13 @@ void x25x_4way_hash( void *output, const void *input )
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
dintrlv_4x64_512( hash0[1], hash1[1], hash2[1], hash3[1], vhash );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0[2],
|
||||
(const char*)hash0[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1[2],
|
||||
(const char*)hash1[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2[2],
|
||||
(const char*)hash2[1], 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3[2],
|
||||
(const char*)hash3[1], 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash0[2], (const char*)hash0[1], 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1[2], (const char*)hash1[1], 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2[2], (const char*)hash2[1], 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3[2], (const char*)hash3[1], 512 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0[2], hash1[2], hash2[2], hash3[2] );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
dintrlv_4x64_512( hash0[3], hash1[3], hash2[3], hash3[3], vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
@@ -654,32 +691,20 @@ void x25x_4way_hash( void *output, const void *input )
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64_512( hash0[5], hash1[5], hash2[5], hash3[5], vhash );
|
||||
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0[6],
|
||||
(const BitSequence*)hash0[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash1[6],
|
||||
(const BitSequence*)hash1[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash2[6],
|
||||
(const BitSequence*)hash2[5], 64 );
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash3[6],
|
||||
(const BitSequence*)hash3[5], 64 );
|
||||
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0[7],
|
||||
(const byte*)hash0[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash1[7],
|
||||
(const byte*)hash1[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash2[7],
|
||||
(const byte*)hash2[6], 64 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3[7],
|
||||
(const byte*)hash3[6], 64 );
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash0[6], 512,
|
||||
(const BitSequence*)hash0[5], 64 );
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash1[6], 512,
|
||||
(const BitSequence*)hash1[5], 64 );
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash2[6], 512,
|
||||
(const BitSequence*)hash2[5], 64 );
|
||||
luffa_full( &ctx.luffa, (BitSequence*)hash3[6], 512,
|
||||
(const BitSequence*)hash3[5], 64 );
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*)hash0[7], 512, (const byte*)hash0[6], 64 );
|
||||
cubehash_full( &ctx.cube, (byte*)hash1[7], 512, (const byte*)hash1[6], 64 );
|
||||
cubehash_full( &ctx.cube, (byte*)hash2[7], 512, (const byte*)hash2[6], 64 );
|
||||
cubehash_full( &ctx.cube, (byte*)hash3[7], 512, (const byte*)hash3[6], 64 );
|
||||
|
||||
sph_shavite512_init(&ctx.shavite);
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash0[8]);
|
||||
@@ -693,31 +718,23 @@ void x25x_4way_hash( void *output, const void *input )
|
||||
sph_shavite512(&ctx.shavite, (const void*) hash3[7], 64);
|
||||
sph_shavite512_close(&ctx.shavite, hash3[8]);
|
||||
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash0[9],
|
||||
(const BitSequence*)hash0[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash1[9],
|
||||
(const BitSequence*)hash1[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash2[9],
|
||||
(const BitSequence*)hash2[8], 512 );
|
||||
init_sd( &ctx.simd, 512 );
|
||||
update_final_sd( &ctx.simd, (BitSequence*)hash3[9],
|
||||
(const BitSequence*)hash3[8], 512 );
|
||||
simd_full( &ctx.simd, (BitSequence*)hash0[9],
|
||||
(const BitSequence*)hash0[8], 512 );
|
||||
simd_full( &ctx.simd, (BitSequence*)hash1[9],
|
||||
(const BitSequence*)hash1[8], 512 );
|
||||
simd_full( &ctx.simd, (BitSequence*)hash2[9],
|
||||
(const BitSequence*)hash2[8], 512 );
|
||||
simd_full( &ctx.simd, (BitSequence*)hash3[9],
|
||||
(const BitSequence*)hash3[8], 512 );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash0[10],
|
||||
(const BitSequence*)hash0[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash1[10],
|
||||
(const BitSequence*)hash1[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash2[10],
|
||||
(const BitSequence*)hash2[9], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence*)hash3[10],
|
||||
(const BitSequence*)hash3[9], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0[10], 512,
|
||||
(const BitSequence *)hash0[ 9], 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1[10], 512,
|
||||
(const BitSequence *)hash1[ 9], 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2[10], 512,
|
||||
(const BitSequence *)hash2[ 9], 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3[10], 512,
|
||||
(const BitSequence *)hash3[ 9], 64 );
|
||||
|
||||
intrlv_4x64_512( vhash, hash0[10], hash1[10], hash2[10], hash3[10] );
|
||||
|
||||
@@ -870,43 +887,46 @@ void x25x_4way_hash( void *output, const void *input )
|
||||
int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &(hash[ 7*4 ]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
x25x_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -201,42 +201,38 @@ void x25x_hash( void *output, const void *input )
|
||||
memcpy(output, &hash[24], 32);
|
||||
}
|
||||
|
||||
int scanhash_x25x( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
int scanhash_x25x( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = n;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x08ff;
|
||||
if ( bench ) ptarget[7] = 0x08ff;
|
||||
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
for (int k=0; k < 20; k++)
|
||||
be32enc(&edata[k], pdata[k]);
|
||||
|
||||
InitializeSWIFFTX();
|
||||
|
||||
do
|
||||
{
|
||||
pdata[19] = ++n;
|
||||
be32enc( &edata[19], n );
|
||||
|
||||
x25x_hash( hash, edata );
|
||||
|
||||
if ( hash[7] < Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
submit_solution( work, hash, mythr );
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
edata[19] = n;
|
||||
x25x_hash( hash64, edata );
|
||||
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.9.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.2.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.11.9'
|
||||
PACKAGE_STRING='cpuminer-opt 3.11.9'
|
||||
PACKAGE_VERSION='3.12.2'
|
||||
PACKAGE_STRING='cpuminer-opt 3.12.2'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.11.9 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.12.2 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.11.9:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.12.2:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.11.9
|
||||
cpuminer-opt configure 3.12.2
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.11.9, which was
|
||||
It was created by cpuminer-opt $as_me 3.12.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.11.9'
|
||||
VERSION='3.12.2'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.11.9, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.12.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.11.9
|
||||
cpuminer-opt config.status 3.12.2
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.11.9])
|
||||
AC_INIT([cpuminer-opt], [3.12.2])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
484
cpu-miner.c
484
cpu-miner.c
@@ -143,12 +143,6 @@ int api_thr_id = -1;
|
||||
bool stratum_need_reset = false;
|
||||
struct work_restart *work_restart = NULL;
|
||||
struct stratum_ctx stratum;
|
||||
bool jsonrpc_2 = false;
|
||||
char rpc2_id[64] = "";
|
||||
char *rpc2_blob = NULL;
|
||||
size_t rpc2_bloblen = 0;
|
||||
uint32_t rpc2_target = 0;
|
||||
char *rpc2_job_id = NULL;
|
||||
double opt_diff_factor = 1.0;
|
||||
double opt_target_factor = 1.0;
|
||||
uint32_t zr5_pok = 0;
|
||||
@@ -165,20 +159,22 @@ double stratum_diff = 0.;
|
||||
double net_diff = 0.;
|
||||
double net_hashrate = 0.;
|
||||
uint64_t net_blocks = 0;
|
||||
uint32_t opt_work_size = 0;
|
||||
|
||||
// conditional mining
|
||||
bool conditional_state[MAX_CPUS] = { 0 };
|
||||
double opt_max_temp = 0.0;
|
||||
double opt_max_diff = 0.0;
|
||||
double opt_max_rate = 0.0;
|
||||
bool conditional_state[MAX_CPUS] = { 0 };
|
||||
double opt_max_temp = 0.0;
|
||||
double opt_max_diff = 0.0;
|
||||
double opt_max_rate = 0.0;
|
||||
|
||||
uint32_t opt_work_size = 0;
|
||||
char *opt_api_allow = NULL;
|
||||
int opt_api_remote = 0;
|
||||
int opt_api_listen = 0;
|
||||
// int opt_api_listen = 4048;
|
||||
// API
|
||||
static bool opt_api_enabled = false;
|
||||
char *opt_api_allow = NULL;
|
||||
int opt_api_listen = 0;
|
||||
int opt_api_remote = 0;
|
||||
char *default_api_allow = "127.0.0.1";
|
||||
int default_api_listen = 4048;
|
||||
|
||||
pthread_mutex_t rpc2_job_lock;
|
||||
pthread_mutex_t rpc2_login_lock;
|
||||
pthread_mutex_t applog_lock;
|
||||
pthread_mutex_t stats_lock;
|
||||
|
||||
@@ -360,9 +356,6 @@ void work_copy(struct work *dest, const struct work *src)
|
||||
|
||||
int std_get_work_data_size() { return STD_WORK_DATA_SIZE; }
|
||||
|
||||
bool jr2_work_decode( const json_t *val, struct work *work )
|
||||
{ return rpc2_job_decode( val, work ); }
|
||||
|
||||
// Default
|
||||
bool std_le_work_decode( const json_t *val, struct work *work )
|
||||
{
|
||||
@@ -890,8 +883,6 @@ static double norm_diff_sum = 0.;
|
||||
static uint32_t last_block_height = 0;
|
||||
//static bool new_job = false;
|
||||
static double last_targetdiff = 0.;
|
||||
static double ref_rate_hi = 0.;
|
||||
static double ref_rate_lo = 1e100;
|
||||
#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
|
||||
static uint32_t hi_temp = 0;
|
||||
#endif
|
||||
@@ -939,7 +930,6 @@ void report_summary_log( bool force )
|
||||
uint64_t accepts = accept_sum; accept_sum = 0;
|
||||
uint64_t rejects = reject_sum; reject_sum = 0;
|
||||
uint64_t stales = stale_sum; stale_sum = 0;
|
||||
// int latency = latency_sum; latency_sum = 0;
|
||||
memcpy( &start_time, &five_min_start, sizeof start_time );
|
||||
memcpy( &five_min_start, &now, sizeof now );
|
||||
|
||||
@@ -950,34 +940,21 @@ void report_summary_log( bool force )
|
||||
|
||||
double share_time = (double)et.tv_sec + (double)et.tv_usec / 1e6;
|
||||
double ghrate = global_hashrate;
|
||||
double scaled_ghrate = ghrate;
|
||||
|
||||
double shrate = share_time == 0. ? 0. : diff_to_hash * last_targetdiff
|
||||
* (double)(accepts) / share_time;
|
||||
double sess_hrate = uptime.tv_sec == 0. ? 0. : diff_to_hash * norm_diff_sum
|
||||
/ (double)uptime.tv_sec;
|
||||
double scaled_shrate = shrate;
|
||||
// int avg_latency = 0;
|
||||
// double latency_pc = 0.;
|
||||
double submit_rate = 0.;
|
||||
|
||||
double submit_rate = share_time == 0. ? 0. : (double)submits*60. / share_time;
|
||||
char shr_units[4] = {0};
|
||||
char ghr_units[4] = {0};
|
||||
char sess_hr_units[4] = {0};
|
||||
char et_str[24];
|
||||
char upt_str[24];
|
||||
|
||||
// if ( submits ) avg_latency = latency / submits;
|
||||
|
||||
if ( share_time != 0. )
|
||||
{
|
||||
submit_rate = (double)submits*60. / share_time;
|
||||
// latency_pc = (double)latency / (share_time * 10.);
|
||||
}
|
||||
|
||||
if ( ghrate > ref_rate_hi ) ref_rate_hi = ghrate;
|
||||
if ( ghrate < ref_rate_lo ) ref_rate_lo = ghrate;
|
||||
|
||||
scale_hash_for_display( &scaled_shrate, shr_units );
|
||||
scale_hash_for_display( &scaled_ghrate, ghr_units );
|
||||
scale_hash_for_display( &shrate, shr_units );
|
||||
scale_hash_for_display( &ghrate, ghr_units );
|
||||
scale_hash_for_display( &sess_hrate, sess_hr_units );
|
||||
|
||||
sprintf_et( et_str, et.tv_sec );
|
||||
@@ -988,8 +965,26 @@ void report_summary_log( bool force )
|
||||
submit_rate, (double)submitted_share_count*60. /
|
||||
( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ) );
|
||||
applog2( LOG_INFO, "Hash rate %7.2f%sh/s %7.2f%sh/s (%.2f%sh/s)",
|
||||
scaled_shrate, shr_units, sess_hrate, sess_hr_units,
|
||||
scaled_ghrate, ghr_units );
|
||||
shrate, shr_units, sess_hrate, sess_hr_units,
|
||||
ghrate, ghr_units );
|
||||
|
||||
if ( accepted_share_count < submitted_share_count )
|
||||
{
|
||||
double lost_ghrate = uptime.tv_sec == 0. ? 0.
|
||||
: diff_to_hash * last_targetdiff
|
||||
* (double)(submitted_share_count - accepted_share_count )
|
||||
/ (double)uptime.tv_sec;
|
||||
double lost_shrate = share_time == 0. ? 0.
|
||||
: diff_to_hash * last_targetdiff * (double)(submits - accepts )
|
||||
/ share_time;
|
||||
char lshr_units[4] = {0};
|
||||
char lghr_units[4] = {0};
|
||||
scale_hash_for_display( &lost_shrate, lshr_units );
|
||||
scale_hash_for_display( &lost_ghrate, lghr_units );
|
||||
applog2( LOG_INFO, "Lost hash rate %7.2f%sh/s %7.2f%sh/s",
|
||||
lost_shrate, lshr_units, lost_ghrate, lghr_units );
|
||||
}
|
||||
|
||||
applog2( LOG_INFO,"Submitted %6d %6d",
|
||||
submits, submitted_share_count );
|
||||
applog2( LOG_INFO,"Accepted %6d %6d",
|
||||
@@ -1003,29 +998,10 @@ void report_summary_log( bool force )
|
||||
if ( solved_block_count )
|
||||
applog2( LOG_INFO,"Blocks solved %6d",
|
||||
solved_block_count );
|
||||
/*
|
||||
#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
|
||||
|
||||
int temp = cpu_temp(0);
|
||||
char tempstr[32];
|
||||
if ( temp > hi_temp ) hi_temp = temp;
|
||||
|
||||
if ( use_colors && ( temp >= 70 ) )
|
||||
{
|
||||
if ( temp >= 80 )
|
||||
sprintf( tempstr, "%s%dC%s", CL_WHT CL_RED, temp, CL_N );
|
||||
else
|
||||
sprintf( tempstr, "%s%dC%s", CL_WHT CL_YLW, temp, CL_N );
|
||||
}
|
||||
else
|
||||
sprintf( tempstr, "%dC", temp );
|
||||
|
||||
applog2(LOG_INFO,"CPU temp %s max %dC", tempstr, hi_temp );
|
||||
|
||||
#endif
|
||||
*/
|
||||
}
|
||||
|
||||
bool lowdiff_debug = false;
|
||||
|
||||
static int share_result( int result, struct work *null_work,
|
||||
const char *reason )
|
||||
{
|
||||
@@ -1038,7 +1014,6 @@ static int share_result( int result, struct work *null_work,
|
||||
char sres[48];
|
||||
char rres[48];
|
||||
char bres[48];
|
||||
// char job_id[48];
|
||||
bool solved = false;
|
||||
bool stale = false;
|
||||
char *acol = NULL, *bcol = NULL, *scol = NULL, *rcol = NULL;
|
||||
@@ -1093,7 +1068,11 @@ static int share_result( int result, struct work *null_work,
|
||||
stale_share_count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
rejected_share_count++;
|
||||
lowdiff_debug = true;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// update global counters for summary report
|
||||
@@ -1168,7 +1147,7 @@ static int share_result( int result, struct work *null_work,
|
||||
bres, share_time, latency );
|
||||
|
||||
if ( have_stratum && !opt_quiet )
|
||||
applog2( LOG_NOTICE, "Diff %.3g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
|
||||
applog2( LOG_NOTICE, "Diff %.5g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
|
||||
my_stats.share_diff, share_ratio, bcol, stratum.block_height,
|
||||
scol, my_stats.job_id );
|
||||
|
||||
@@ -1186,13 +1165,15 @@ static int share_result( int result, struct work *null_work,
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
be32enc( str2 + i, str1[7 - i] );
|
||||
bin2hex( str3, (unsigned char*)str2, 12 );
|
||||
applog2( LOG_INFO, "Share diff: %g, Hash: %s...", my_stats.share_diff, str3 );
|
||||
applog2( LOG_INFO, "Share diff: %.5g, Hash: %s...",
|
||||
my_stats.share_diff, str3 );
|
||||
|
||||
diff_to_target( str1, my_stats.target_diff );
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
be32enc( str2 + i, str1[7 - i] );
|
||||
bin2hex( str3, (unsigned char*)str2, 12 );
|
||||
applog2( LOG_INFO, "Target diff: %g, Targ: %s...", str3 );
|
||||
applog2( LOG_INFO, "Target diff: %.5g, Targ: %s...",
|
||||
my_stats.target_diff, str3 );
|
||||
}
|
||||
|
||||
if ( unlikely( opt_reset_on_stale && stale ) )
|
||||
@@ -1234,20 +1215,6 @@ void std_be_build_stratum_request( char *req, struct work *work )
|
||||
free( xnonce2str );
|
||||
}
|
||||
|
||||
void jr2_build_stratum_request( char *req, struct work *work )
|
||||
{
|
||||
uchar hash[32];
|
||||
char noncestr[9];
|
||||
bin2hex( noncestr, (char*) algo_gate.get_nonceptr( work->data ),
|
||||
sizeof(uint32_t) );
|
||||
algo_gate.hash_suw( hash, work->data );
|
||||
char *hashhex = abin2hex(hash, 32);
|
||||
snprintf( req, JSON_BUF_LEN,
|
||||
"{\"method\": \"submit\", \"params\": {\"id\": \"%s\", \"job_id\": \"%s\", \"nonce\": \"%s\", \"result\": \"%s\"}, \"id\":4}",
|
||||
rpc2_id, work->job_id, noncestr, hashhex );
|
||||
free( hashhex );
|
||||
}
|
||||
|
||||
bool std_le_submit_getwork_result( CURL *curl, struct work *work )
|
||||
{
|
||||
char req[JSON_BUF_LEN];
|
||||
@@ -1316,53 +1283,6 @@ bool std_be_submit_getwork_result( CURL *curl, struct work *work )
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool jr2_submit_getwork_result( CURL *curl, struct work *work )
|
||||
{
|
||||
json_t *val, *res;
|
||||
char req[JSON_BUF_LEN];
|
||||
char noncestr[9];
|
||||
uchar hash[32];
|
||||
char *hashhex;
|
||||
bin2hex( noncestr, (char*) algo_gate.get_nonceptr( work->data ),
|
||||
sizeof(uint32_t) );
|
||||
algo_gate.hash_suw( hash, work->data );
|
||||
hashhex = abin2hex( &hash[0], 32 );
|
||||
snprintf( req, JSON_BUF_LEN, "{\"method\": \"submit\", \"params\": "
|
||||
"{\"id\": \"%s\", \"job_id\": \"%s\", \"nonce\": \"%s\", \"result\": \"%s\"},"
|
||||
"\"id\":4}\r\n",
|
||||
rpc2_id, work->job_id, noncestr, hashhex );
|
||||
free( hashhex );
|
||||
// issue JSON-RPC request
|
||||
val = json_rpc2_call( curl, rpc_url, rpc_userpass, req, NULL, 0 );
|
||||
if (unlikely( !val ))
|
||||
{
|
||||
applog(LOG_ERR, "submit_upstream_work json_rpc_call failed");
|
||||
return false;
|
||||
}
|
||||
res = json_object_get( val, "result" );
|
||||
json_t *status = json_object_get( res, "status" );
|
||||
bool valid = !strcmp( status ? json_string_value( status ) : "", "OK" );
|
||||
if (valid)
|
||||
share_result( valid, work, NULL );
|
||||
else
|
||||
{
|
||||
json_t *err = json_object_get( res, "error" );
|
||||
const char *sreason = json_string_value( json_object_get(
|
||||
err, "message" ) );
|
||||
share_result( valid, work, sreason );
|
||||
if ( !strcasecmp( "Invalid job id", sreason ) )
|
||||
{
|
||||
work_free( work );
|
||||
work_copy( work, &g_work );
|
||||
g_work_time = 0;
|
||||
restart_threads();
|
||||
}
|
||||
}
|
||||
json_decref(val);
|
||||
return true;
|
||||
}
|
||||
|
||||
char* std_malloc_txs_request( struct work *work )
|
||||
{
|
||||
char *req;
|
||||
@@ -1497,18 +1417,10 @@ static bool get_upstream_work( CURL *curl, struct work *work )
|
||||
start:
|
||||
gettimeofday( &tv_start, NULL );
|
||||
|
||||
if ( jsonrpc_2 )
|
||||
{
|
||||
char s[128];
|
||||
snprintf( s, 128, "{\"method\": \"getjob\", \"params\": {\"id\": \"%s\"}, \"id\":1}\r\n", rpc2_id );
|
||||
val = json_rpc2_call( curl, rpc_url, rpc_userpass, s, NULL, 0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
val = json_rpc_call( curl, rpc_url, rpc_userpass,
|
||||
val = json_rpc_call( curl, rpc_url, rpc_userpass,
|
||||
have_gbt ? gbt_req : getwork_req, &err,
|
||||
have_gbt ? JSON_RPC_QUIET_404 : 0);
|
||||
}
|
||||
|
||||
gettimeofday( &tv_end, NULL );
|
||||
|
||||
if ( have_stratum )
|
||||
@@ -1632,68 +1544,6 @@ static bool workio_submit_work(struct workio_cmd *wc, CURL *curl)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool rpc2_login(CURL *curl)
|
||||
{
|
||||
json_t *val;
|
||||
bool rc = false;
|
||||
struct timeval tv_start, tv_end, diff;
|
||||
char s[JSON_BUF_LEN];
|
||||
|
||||
if (!jsonrpc_2)
|
||||
return false;
|
||||
snprintf(s, JSON_BUF_LEN, "{\"method\": \"login\", \"params\": {"
|
||||
"\"login\": \"%s\", \"pass\": \"%s\", \"agent\": \"%s\"}, \"id\": 1}",
|
||||
rpc_user, rpc_pass, USER_AGENT);
|
||||
gettimeofday(&tv_start, NULL);
|
||||
val = json_rpc_call(curl, rpc_url, rpc_userpass, s, NULL, 0);
|
||||
gettimeofday(&tv_end, NULL);
|
||||
if (!val)
|
||||
goto end;
|
||||
rc = rpc2_login_decode(val);
|
||||
json_t *result = json_object_get(val, "result");
|
||||
if (!result)
|
||||
goto end;
|
||||
json_t *job = json_object_get(result, "job");
|
||||
if (!rpc2_job_decode(job, &g_work))
|
||||
goto end;
|
||||
if (opt_debug && rc)
|
||||
{
|
||||
timeval_subtract(&diff, &tv_end, &tv_start);
|
||||
applog(LOG_DEBUG, "DEBUG: authenticated in %d ms",
|
||||
diff.tv_sec * 1000 + diff.tv_usec / 1000);
|
||||
}
|
||||
json_decref(val);
|
||||
end:
|
||||
return rc;
|
||||
}
|
||||
|
||||
bool rpc2_workio_login(CURL *curl)
|
||||
{
|
||||
int failures = 0;
|
||||
if (opt_benchmark)
|
||||
return true;
|
||||
/* submit solution to bitcoin via JSON-RPC */
|
||||
pthread_mutex_lock(&rpc2_login_lock);
|
||||
while (!rpc2_login(curl))
|
||||
{
|
||||
if (unlikely((opt_retries >= 0) && (++failures > opt_retries)))
|
||||
{
|
||||
applog(LOG_ERR, "...terminating workio thread");
|
||||
pthread_mutex_unlock(&rpc2_login_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* pause, then restart work-request loop */
|
||||
if (!opt_benchmark)
|
||||
applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause);
|
||||
sleep(opt_fail_pause);
|
||||
pthread_mutex_unlock(&rpc2_login_lock);
|
||||
pthread_mutex_lock(&rpc2_login_lock);
|
||||
}
|
||||
pthread_mutex_unlock(&rpc2_login_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void *workio_thread(void *userdata)
|
||||
{
|
||||
struct thr_info *mythr = (struct thr_info *) userdata;
|
||||
@@ -1706,8 +1556,6 @@ static void *workio_thread(void *userdata)
|
||||
applog(LOG_ERR, "CURL initialization failed");
|
||||
return NULL;
|
||||
}
|
||||
if ( jsonrpc_2 && !have_stratum )
|
||||
ok = rpc2_workio_login( curl );
|
||||
|
||||
while ( likely(ok) )
|
||||
{
|
||||
@@ -1759,8 +1607,8 @@ static bool get_work(struct thr_info *thr, struct work *work)
|
||||
|
||||
// this overwrites much of the for loop init
|
||||
memset( work->data + algo_gate.nonce_index, 0x00, 52); // nonce..nonce+52
|
||||
work->data[20] = 0x80000000; // extraheader not used for jr2
|
||||
work->data[31] = 0x00000280; // extraheader not used for jr2
|
||||
work->data[20] = 0x80000000;
|
||||
work->data[31] = 0x00000280;
|
||||
return true;
|
||||
}
|
||||
/* fill out work request message */
|
||||
@@ -1822,12 +1670,13 @@ static inline double u256_to_double( const uint64_t *u )
|
||||
|
||||
void work_set_target_ratio( struct work* work, const void *hash )
|
||||
{
|
||||
double dhash;
|
||||
|
||||
dhash = u256_to_double( (const uint64_t*)hash );
|
||||
if ( likely( dhash > 0. ) )
|
||||
work->sharediff = work->targetdiff *
|
||||
if ( likely( hash ) )
|
||||
{
|
||||
double dhash = u256_to_double( (const uint64_t*)hash );
|
||||
if ( likely( dhash > 0. ) )
|
||||
work->sharediff = work->targetdiff *
|
||||
u256_to_double( (const uint64_t*)( work->target ) ) / dhash;
|
||||
}
|
||||
else
|
||||
work->sharediff = 0.;
|
||||
|
||||
@@ -1843,8 +1692,8 @@ void work_set_target_ratio( struct work* work, const void *hash )
|
||||
share_stats[ s_put_ptr ].net_diff = net_diff;
|
||||
share_stats[ s_put_ptr ].stratum_diff = stratum_diff;
|
||||
share_stats[ s_put_ptr ].target_diff = work->targetdiff;
|
||||
strcpy( share_stats[ s_put_ptr ].job_id, work->job_id );
|
||||
|
||||
( (uint64_t*)share_stats[ s_put_ptr ].job_id )[3] = 0;
|
||||
strncpy( share_stats[ s_put_ptr ].job_id, work->job_id, 30 );
|
||||
s_put_ptr = stats_ptr_incr( s_put_ptr );
|
||||
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
@@ -1860,7 +1709,17 @@ bool submit_solution( struct work *work, const void *hash,
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_NOTICE, "%d submitted by thread %d, job %s",
|
||||
submitted_share_count, thr->id, work->job_id );
|
||||
return true;
|
||||
|
||||
if ( lowdiff_debug )
|
||||
{
|
||||
uint32_t* h = (uint32_t*)hash;
|
||||
uint32_t* t = (uint32_t*)work->target;
|
||||
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
|
||||
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
|
||||
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
|
||||
t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else
|
||||
applog( LOG_WARNING, "%d failed to submit share.",
|
||||
@@ -1878,6 +1737,18 @@ bool submit_lane_solution( struct work *work, const void *hash,
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_NOTICE, "%d submitted by thread %d, lane %d, job %s",
|
||||
submitted_share_count, thr->id, lane, work->job_id );
|
||||
|
||||
if ( lowdiff_debug )
|
||||
{
|
||||
uint32_t* h = (uint32_t*)hash;
|
||||
uint32_t* t = (uint32_t*)work->target;
|
||||
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
|
||||
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
|
||||
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
|
||||
t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
else
|
||||
@@ -1886,22 +1757,6 @@ bool submit_lane_solution( struct work *work, const void *hash,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool rpc2_stratum_job( struct stratum_ctx *sctx, json_t *params )
|
||||
{
|
||||
bool ret = false;
|
||||
pthread_mutex_lock(&sctx->work_lock);
|
||||
ret = rpc2_job_decode(params, &sctx->work);
|
||||
if (ret)
|
||||
{
|
||||
if (sctx->job.job_id)
|
||||
free(sctx->job.job_id);
|
||||
sctx->job.job_id = strdup(sctx->work.job_id);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&sctx->work_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool wanna_mine(int thr_id)
|
||||
{
|
||||
bool state = true;
|
||||
@@ -1988,21 +1843,10 @@ double std_calc_network_diff( struct work* work )
|
||||
return d;
|
||||
}
|
||||
|
||||
uint32_t *std_get_nonceptr( uint32_t *work_data )
|
||||
{
|
||||
return work_data + algo_gate.nonce_index;
|
||||
}
|
||||
|
||||
uint32_t *jr2_get_nonceptr( uint32_t *work_data )
|
||||
{
|
||||
// nonce is misaligned, use byte offset
|
||||
return (uint32_t*) ( ((uint8_t*) work_data) + algo_gate.nonce_index );
|
||||
}
|
||||
|
||||
void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t *end_nonce_ptr )
|
||||
{
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
uint32_t *nonceptr = work->data + algo_gate.nonce_index;
|
||||
|
||||
bool force_new_work = work->job_id ? strtoul( work->job_id, NULL, 16 ) !=
|
||||
strtoul( g_work->job_id, NULL, 16 )
|
||||
@@ -2021,28 +1865,6 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
++(*nonceptr);
|
||||
}
|
||||
|
||||
void jr2_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t *end_nonce_ptr )
|
||||
{
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
|
||||
// byte data[ 0..38, 43..75 ], skip over misaligned nonce [39..42]
|
||||
if ( memcmp( work->data, g_work->data, algo_gate.nonce_index )
|
||||
|| memcmp( ((uint8_t*) work->data) + JR2_WORK_CMP_INDEX_2,
|
||||
((uint8_t*) g_work->data) + JR2_WORK_CMP_INDEX_2,
|
||||
JR2_WORK_CMP_SIZE_2 ) )
|
||||
{
|
||||
work_free( work );
|
||||
work_copy( work, g_work );
|
||||
*nonceptr = ( 0xffffffU / opt_n_threads ) * thr_id
|
||||
+ ( *nonceptr & 0xff000000U );
|
||||
*end_nonce_ptr = ( 0xffffffU / opt_n_threads ) * (thr_id+1)
|
||||
+ ( *nonceptr & 0xff000000U ) - 0x20;
|
||||
}
|
||||
else
|
||||
++(*nonceptr);
|
||||
}
|
||||
|
||||
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id )
|
||||
{
|
||||
@@ -2060,7 +1882,7 @@ static void *miner_thread( void *userdata )
|
||||
struct thr_info *mythr = (struct thr_info *) userdata;
|
||||
int thr_id = mythr->id;
|
||||
uint32_t max_nonce;
|
||||
struct timeval cpu_temp_time = {0};
|
||||
uint32_t *nonceptr = work.data + algo_gate.nonce_index;
|
||||
|
||||
// end_nonce gets read before being set so it needs to be initialized
|
||||
// what is an appropriate value that is completely neutral?
|
||||
@@ -2170,7 +1992,8 @@ static void *miner_thread( void *userdata )
|
||||
if ( have_stratum )
|
||||
{
|
||||
pthread_mutex_lock( &g_work_lock );
|
||||
if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce )
|
||||
|
||||
if ( *nonceptr >= end_nonce )
|
||||
algo_gate.stratum_gen_work( &stratum, &g_work );
|
||||
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce );
|
||||
pthread_mutex_unlock( &g_work_lock );
|
||||
@@ -2181,7 +2004,7 @@ static void *miner_thread( void *userdata )
|
||||
pthread_mutex_lock( &g_work_lock );
|
||||
|
||||
if ( time(NULL) - g_work_time >= min_scantime
|
||||
|| *algo_gate.get_nonceptr( work.data ) >= end_nonce )
|
||||
|| *nonceptr >= end_nonce )
|
||||
{
|
||||
if ( unlikely( !get_work( mythr, &g_work ) ) )
|
||||
{
|
||||
@@ -2242,7 +2065,7 @@ static void *miner_thread( void *userdata )
|
||||
// Select nonce range for approx 1 min duration based
|
||||
// on hashrate, initial value arbitrarilly set to 1000 just to get
|
||||
// a sample hashrate for the next time.
|
||||
uint32_t work_nonce = *( algo_gate.get_nonceptr( work.data ) );
|
||||
uint32_t work_nonce = *nonceptr;
|
||||
max64 = 60 * thr_hashrates[thr_id];
|
||||
if ( max64 <= 0)
|
||||
max64 = 1000;
|
||||
@@ -2301,15 +2124,15 @@ static void *miner_thread( void *userdata )
|
||||
if (!opt_quiet && mythr->id == 0 )
|
||||
{
|
||||
int temp = cpu_temp(0);
|
||||
static struct timeval cpu_temp_time = {0};
|
||||
timeval_subtract( &diff, &tv_end, &cpu_temp_time );
|
||||
int wait = temp >= 80 ? 30 : temp >= 70 ? 90 : 180;
|
||||
int wait = temp >= 80 ? 30 : temp >= 70 ? 60 : 120;
|
||||
if ( ( diff.tv_sec > wait ) || ( temp > hi_temp ) )
|
||||
{
|
||||
char tempstr[32];
|
||||
int lo_freq, hi_freq;
|
||||
linux_cpu_hilo_freq( &lo_freq, &hi_freq );
|
||||
memcpy( &cpu_temp_time, &tv_end, sizeof(cpu_temp_time) );
|
||||
if ( temp > hi_temp ) hi_temp = temp;
|
||||
if ( use_colors && ( temp >= 70 ) )
|
||||
{
|
||||
if ( temp >= 80 )
|
||||
@@ -2321,6 +2144,7 @@ static void *miner_thread( void *userdata )
|
||||
sprintf( tempstr, "%d C", temp );
|
||||
applog( LOG_INFO,"CPU temp: curr %s (max %d), Freq: %.3f/%.3f GHz",
|
||||
tempstr, hi_temp, (float)lo_freq / 1e6, (float)hi_freq/ 1e6 );
|
||||
if ( temp > hi_temp ) hi_temp = temp;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -2398,25 +2222,6 @@ json_t *std_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
|
||||
return val;
|
||||
}
|
||||
|
||||
json_t *jr2_longpoll_rpc_call( CURL *curl, int *err )
|
||||
{
|
||||
json_t *val;
|
||||
char req[128];
|
||||
|
||||
pthread_mutex_lock( &rpc2_login_lock );
|
||||
if ( !strlen(rpc2_id) )
|
||||
{
|
||||
pthread_mutex_unlock( &rpc2_login_lock );
|
||||
sleep(1);
|
||||
return NULL;
|
||||
}
|
||||
snprintf( req, 128, "{\"method\": \"getjob\", \"params\": {\"id\": \"%s\"}, \"id\":1}\r\n", rpc2_id );
|
||||
pthread_mutex_unlock( &rpc2_login_lock );
|
||||
val = json_rpc2_call( curl, rpc_url, rpc_userpass, req, err,
|
||||
JSON_RPC_LONGPOLL );
|
||||
return val;
|
||||
}
|
||||
|
||||
static void *longpoll_thread(void *userdata)
|
||||
{
|
||||
struct thr_info *mythr = (struct thr_info*) userdata;
|
||||
@@ -2478,11 +2283,8 @@ start:
|
||||
double start_diff = 0.0;
|
||||
json_t *res, *soval;
|
||||
res = json_object_get(val, "result");
|
||||
if (!jsonrpc_2)
|
||||
{
|
||||
soval = json_object_get(res, "submitold");
|
||||
submit_old = soval ? json_is_true(soval) : false;
|
||||
}
|
||||
soval = json_object_get(res, "submitold");
|
||||
submit_old = soval ? json_is_true(soval) : false;
|
||||
pthread_mutex_lock(&g_work_lock);
|
||||
start_job_id = g_work.job_id ? strdup(g_work.job_id) : NULL;
|
||||
if (have_gbt)
|
||||
@@ -2547,48 +2349,12 @@ out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool std_stratum_handle_response( json_t *val )
|
||||
{
|
||||
bool valid = false;
|
||||
json_t *err_val, *res_val, *id_val;
|
||||
res_val = json_object_get( val, "result" );
|
||||
err_val = json_object_get( val, "error" );
|
||||
id_val = json_object_get( val, "id" );
|
||||
|
||||
if ( !res_val || json_integer_value(id_val) < 4 )
|
||||
return false;
|
||||
valid = json_is_true( res_val );
|
||||
share_result( valid, NULL, err_val ?
|
||||
json_string_value( json_array_get(err_val, 1) ) : NULL );
|
||||
return true;
|
||||
}
|
||||
|
||||
bool jr2_stratum_handle_response( json_t *val )
|
||||
{
|
||||
bool valid = false;
|
||||
json_t *err_val, *res_val;
|
||||
res_val = json_object_get( val, "result" );
|
||||
err_val = json_object_get( val, "error" );
|
||||
|
||||
if ( !res_val && !err_val )
|
||||
return false;
|
||||
json_t *status = json_object_get( res_val, "status" );
|
||||
if ( status )
|
||||
{
|
||||
const char *s = json_string_value( status );
|
||||
valid = !strcmp( s, "OK" ) && json_is_null( err_val );
|
||||
}
|
||||
else
|
||||
valid = json_is_null( err_val );
|
||||
share_result( valid, NULL, err_val ? json_string_value(err_val) : NULL );
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool stratum_handle_response( char *buf )
|
||||
{
|
||||
json_t *val, *id_val, *res_val;
|
||||
json_t *val, *id_val, *res_val, *err_val;
|
||||
json_error_t err;
|
||||
bool ret = false;
|
||||
bool share_accepted = false;
|
||||
|
||||
val = JSON_LOADS( buf, &err );
|
||||
if (!val)
|
||||
@@ -2602,8 +2368,15 @@ static bool stratum_handle_response( char *buf )
|
||||
id_val = json_object_get( val, "id" );
|
||||
if ( !id_val || json_is_null(id_val) )
|
||||
goto out;
|
||||
if ( !algo_gate.stratum_handle_response( val ) )
|
||||
|
||||
err_val = json_object_get( val, "error" );
|
||||
|
||||
if ( !res_val || json_integer_value( id_val ) < 4 )
|
||||
goto out;
|
||||
share_accepted = json_is_true( res_val );
|
||||
share_result( share_accepted, NULL, err_val ?
|
||||
json_string_value( json_array_get(err_val, 1) ) : NULL );
|
||||
|
||||
ret = true;
|
||||
out:
|
||||
if (val)
|
||||
@@ -2726,7 +2499,7 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
applog2( LOG_INFO, "%s: %s", algo_names[opt_algo], short_url );
|
||||
applog2( LOG_INFO, "Diff: Net %.3g, Stratum %.3g, Target %.3g",
|
||||
applog2( LOG_INFO, "Diff: Net %.5g, Stratum %.5g, Target %.5g",
|
||||
net_diff, stratum_diff, last_targetdiff );
|
||||
|
||||
if ( likely( hr > 0. ) )
|
||||
@@ -2766,31 +2539,6 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
} // new diff/block
|
||||
}
|
||||
|
||||
void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
{
|
||||
pthread_mutex_lock( &sctx->work_lock );
|
||||
work_free( g_work );
|
||||
work_copy( g_work, &sctx->work );
|
||||
pthread_mutex_unlock( &sctx->work_lock );
|
||||
/*
|
||||
if ( stratum_diff != sctx->job.diff )
|
||||
applog( LOG_BLUE, "New stratum diff %g, block %d, job %s",
|
||||
sctx->job.diff, sctx->block_height, g_work->job_id );
|
||||
else if ( last_block_height != sctx->block_height )
|
||||
applog( LOG_BLUE, "New block %d, job %s",
|
||||
sctx->block_height, g_work->job_id );
|
||||
else if ( g_work->job_id )
|
||||
applog( LOG_BLUE,"New job %s", g_work->job_id );
|
||||
*/
|
||||
if ( last_block_height != stratum.block_height )
|
||||
{
|
||||
applog(LOG_BLUE, "Stratum detected new block");
|
||||
last_block_height = stratum.block_height;
|
||||
}
|
||||
if ( stratum_diff != g_work->stratum_diff )
|
||||
stratum_diff = g_work->stratum_diff;
|
||||
}
|
||||
|
||||
static void *stratum_thread(void *userdata )
|
||||
{
|
||||
struct thr_info *mythr = (struct thr_info *) userdata;
|
||||
@@ -2844,11 +2592,6 @@ static void *stratum_thread(void *userdata )
|
||||
}
|
||||
else
|
||||
applog(LOG_BLUE,"Stratum connection established" );
|
||||
if ( unlikely( jsonrpc_2 ) )
|
||||
{
|
||||
work_free(&g_work);
|
||||
work_copy(&g_work, &stratum.work);
|
||||
}
|
||||
}
|
||||
|
||||
report_summary_log( ( stratum_diff != stratum.job.diff )
|
||||
@@ -3010,11 +2753,11 @@ void parse_arg(int key, char *arg )
|
||||
break;
|
||||
|
||||
case 'b':
|
||||
p = strstr(arg, ":");
|
||||
opt_api_enabled = true;
|
||||
p = strstr(arg, ":");
|
||||
if (p) {
|
||||
/* ip:port */
|
||||
if (p - arg > 0) {
|
||||
free(opt_api_allow);
|
||||
opt_api_allow = strdup(arg);
|
||||
opt_api_allow[p - arg] = '\0';
|
||||
}
|
||||
@@ -3024,10 +2767,12 @@ void parse_arg(int key, char *arg )
|
||||
/* ip only */
|
||||
free(opt_api_allow);
|
||||
opt_api_allow = strdup(arg);
|
||||
}
|
||||
opt_api_listen = default_api_listen;
|
||||
}
|
||||
else if (arg) {
|
||||
/* port or 0 to disable */
|
||||
opt_api_listen = atoi(arg);
|
||||
opt_api_allow = default_api_allow;
|
||||
opt_api_listen = atoi(arg);
|
||||
}
|
||||
break;
|
||||
case 1030: /* --api-remote */
|
||||
@@ -3635,7 +3380,6 @@ int main(int argc, char *argv[])
|
||||
|
||||
rpc_user = strdup("");
|
||||
rpc_pass = strdup("");
|
||||
// opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */
|
||||
|
||||
parse_cmdline(argc, argv);
|
||||
|
||||
@@ -3735,8 +3479,6 @@ int main(int argc, char *argv[])
|
||||
|
||||
pthread_mutex_init( &stats_lock, NULL );
|
||||
pthread_mutex_init( &g_work_lock, NULL );
|
||||
pthread_mutex_init( &rpc2_job_lock, NULL );
|
||||
pthread_mutex_init( &rpc2_login_lock, NULL );
|
||||
pthread_mutex_init( &stratum.sock_lock, NULL );
|
||||
pthread_mutex_init( &stratum.work_lock, NULL );
|
||||
|
||||
@@ -3911,7 +3653,7 @@ int main(int argc, char *argv[])
|
||||
tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
|
||||
}
|
||||
|
||||
if (opt_api_listen)
|
||||
if ( opt_api_enabled )
|
||||
{
|
||||
/* api thread */
|
||||
api_thr_id = opt_n_threads + 3;
|
||||
|
170
crypto/aesb.c
170
crypto/aesb.c
@@ -1,170 +0,0 @@
|
||||
/*
|
||||
---------------------------------------------------------------------------
|
||||
Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
|
||||
|
||||
The redistribution and use of this software (with or without changes)
|
||||
is allowed without the payment of fees or royalties provided that:
|
||||
|
||||
source code distributions include the above copyright notice, this
|
||||
list of conditions and the following disclaimer;
|
||||
|
||||
binary distributions include the above copyright notice, this list
|
||||
of conditions and the following disclaimer in their documentation.
|
||||
|
||||
This software is provided 'as is' with no explicit or implied warranties
|
||||
in respect of its operation, including, but not limited to, correctness
|
||||
and fitness for purpose.
|
||||
---------------------------------------------------------------------------
|
||||
Issue Date: 20/12/2007
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#define TABLE_ALIGN 32
|
||||
#define WPOLY 0x011b
|
||||
#define N_COLS 4
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define ALIGN __declspec(align(TABLE_ALIGN))
|
||||
#elif defined(__GNUC__)
|
||||
#define ALIGN __attribute__ ((aligned(16)))
|
||||
#else
|
||||
#define ALIGN
|
||||
#endif
|
||||
|
||||
#define rf1(r,c) (r)
|
||||
#define word_in(x,c) (*((uint32_t*)(x)+(c)))
|
||||
#define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
|
||||
|
||||
#define s(x,c) x[c]
|
||||
#define si(y,x,c) (s(y,c) = word_in(x, c))
|
||||
#define so(y,x,c) word_out(y, c, s(x,c))
|
||||
#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
|
||||
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
|
||||
#define round(y,x,k) \
|
||||
y[0] = (k)[0] ^ (t_fn[0][x[0] & 0xff] ^ t_fn[1][(x[1] >> 8) & 0xff] ^ t_fn[2][(x[2] >> 16) & 0xff] ^ t_fn[3][x[3] >> 24]); \
|
||||
y[1] = (k)[1] ^ (t_fn[0][x[1] & 0xff] ^ t_fn[1][(x[2] >> 8) & 0xff] ^ t_fn[2][(x[3] >> 16) & 0xff] ^ t_fn[3][x[0] >> 24]); \
|
||||
y[2] = (k)[2] ^ (t_fn[0][x[2] & 0xff] ^ t_fn[1][(x[3] >> 8) & 0xff] ^ t_fn[2][(x[0] >> 16) & 0xff] ^ t_fn[3][x[1] >> 24]); \
|
||||
y[3] = (k)[3] ^ (t_fn[0][x[3] & 0xff] ^ t_fn[1][(x[0] >> 8) & 0xff] ^ t_fn[2][(x[1] >> 16) & 0xff] ^ t_fn[3][x[2] >> 24]);
|
||||
#define to_byte(x) ((x) & 0xff)
|
||||
#define bval(x,n) to_byte((x) >> (8 * (n)))
|
||||
|
||||
#define fwd_var(x,r,c)\
|
||||
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
|
||||
: r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
|
||||
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
|
||||
: ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
|
||||
|
||||
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
|
||||
|
||||
#define sb_data(w) {\
|
||||
w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
|
||||
w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
|
||||
w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
|
||||
w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
|
||||
w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
|
||||
w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
|
||||
w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
|
||||
w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
|
||||
w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
|
||||
w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
|
||||
w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
|
||||
w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
|
||||
w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
|
||||
w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
|
||||
w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
|
||||
w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
|
||||
w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
|
||||
w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
|
||||
w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
|
||||
w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
|
||||
w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
|
||||
w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
|
||||
w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
|
||||
w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
|
||||
w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
|
||||
w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
|
||||
w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
|
||||
w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
|
||||
w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
|
||||
w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
|
||||
w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
|
||||
w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
|
||||
|
||||
#define rc_data(w) {\
|
||||
w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
|
||||
w(0x1b), w(0x36) }
|
||||
|
||||
#define bytes2word(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
|
||||
((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
|
||||
|
||||
#define h0(x) (x)
|
||||
#define w0(p) bytes2word(p, 0, 0, 0)
|
||||
#define w1(p) bytes2word(0, p, 0, 0)
|
||||
#define w2(p) bytes2word(0, 0, p, 0)
|
||||
#define w3(p) bytes2word(0, 0, 0, p)
|
||||
|
||||
#define u0(p) bytes2word(f2(p), p, p, f3(p))
|
||||
#define u1(p) bytes2word(f3(p), f2(p), p, p)
|
||||
#define u2(p) bytes2word(p, f3(p), f2(p), p)
|
||||
#define u3(p) bytes2word(p, p, f3(p), f2(p))
|
||||
|
||||
#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p))
|
||||
#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p))
|
||||
#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p))
|
||||
#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p))
|
||||
|
||||
#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY))
|
||||
#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
|
||||
#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) ^ (((x>>5) & 4) * WPOLY))
|
||||
#define f3(x) (f2(x) ^ x)
|
||||
#define f9(x) (f8(x) ^ x)
|
||||
#define fb(x) (f8(x) ^ f2(x) ^ x)
|
||||
#define fd(x) (f8(x) ^ f4(x) ^ x)
|
||||
#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
|
||||
|
||||
#define t_dec(m,n) t_##m##n
|
||||
#define t_set(m,n) t_##m##n
|
||||
#define t_use(m,n) t_##m##n
|
||||
|
||||
#define d_4(t,n,b,e,f,g,h) ALIGN const t n[4][256] = { b(e), b(f), b(g), b(h) }
|
||||
|
||||
#define four_tables(x,tab,vf,rf,c) \
|
||||
(tab[0][bval(vf(x,0,c),rf(0,c))] \
|
||||
^ tab[1][bval(vf(x,1,c),rf(1,c))] \
|
||||
^ tab[2][bval(vf(x,2,c),rf(2,c))] \
|
||||
^ tab[3][bval(vf(x,3,c),rf(3,c))])
|
||||
|
||||
d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
|
||||
|
||||
void aesb_single_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey)
|
||||
{
|
||||
round(((uint32_t*) out), ((uint32_t*) in), ((uint32_t*) expandedKey));
|
||||
}
|
||||
|
||||
void aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey)
|
||||
{
|
||||
uint32_t b1[4];
|
||||
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey));
|
||||
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 1 * N_COLS);
|
||||
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 2 * N_COLS);
|
||||
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 3 * N_COLS);
|
||||
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 4 * N_COLS);
|
||||
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 5 * N_COLS);
|
||||
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 6 * N_COLS);
|
||||
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 7 * N_COLS);
|
||||
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 8 * N_COLS);
|
||||
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 9 * N_COLS);
|
||||
}
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
@@ -1,326 +0,0 @@
|
||||
/*
|
||||
* The blake256_* and blake224_* functions are largely copied from
|
||||
* blake256_light.c and blake224_light.c from the BLAKE website:
|
||||
*
|
||||
* http://131002.net/blake/
|
||||
*
|
||||
* The hmac_* functions implement HMAC-BLAKE-256 and HMAC-BLAKE-224.
|
||||
* HMAC is specified by RFC 2104.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "c_blake256.h"
|
||||
|
||||
#define U8TO32(p) \
|
||||
(((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
|
||||
((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) ))
|
||||
#define U32TO8(p, v) \
|
||||
(p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
|
||||
(p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) );
|
||||
|
||||
const uint8_t sigma[][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
|
||||
{14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
|
||||
{11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
|
||||
{ 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8},
|
||||
{ 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13},
|
||||
{ 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9},
|
||||
{12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11},
|
||||
{13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10},
|
||||
{ 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5},
|
||||
{10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13, 0},
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
|
||||
{14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
|
||||
{11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
|
||||
{ 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8}
|
||||
};
|
||||
|
||||
const uint32_t cst[16] = {
|
||||
0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
|
||||
0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
|
||||
0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
|
||||
0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
|
||||
};
|
||||
|
||||
static const uint8_t padding[] = {
|
||||
0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
};
|
||||
|
||||
|
||||
void blake256_compress(state *S, const uint8_t *block) {
|
||||
uint32_t v[16], m[16], i;
|
||||
|
||||
#define ROT(x,n) (((x)<<(32-n))|((x)>>(n)))
|
||||
#define G(a,b,c,d,e) \
|
||||
v[a] += (m[sigma[i][e]] ^ cst[sigma[i][e+1]]) + v[b]; \
|
||||
v[d] = ROT(v[d] ^ v[a],16); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROT(v[b] ^ v[c],12); \
|
||||
v[a] += (m[sigma[i][e+1]] ^ cst[sigma[i][e]])+v[b]; \
|
||||
v[d] = ROT(v[d] ^ v[a], 8); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROT(v[b] ^ v[c], 7);
|
||||
|
||||
for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4);
|
||||
for (i = 0; i < 8; ++i) v[i] = S->h[i];
|
||||
v[ 8] = S->s[0] ^ 0x243F6A88;
|
||||
v[ 9] = S->s[1] ^ 0x85A308D3;
|
||||
v[10] = S->s[2] ^ 0x13198A2E;
|
||||
v[11] = S->s[3] ^ 0x03707344;
|
||||
v[12] = 0xA4093822;
|
||||
v[13] = 0x299F31D0;
|
||||
v[14] = 0x082EFA98;
|
||||
v[15] = 0xEC4E6C89;
|
||||
|
||||
if (S->nullt == 0) {
|
||||
v[12] ^= S->t[0];
|
||||
v[13] ^= S->t[0];
|
||||
v[14] ^= S->t[1];
|
||||
v[15] ^= S->t[1];
|
||||
}
|
||||
|
||||
for (i = 0; i < 14; ++i) {
|
||||
G(0, 4, 8, 12, 0);
|
||||
G(1, 5, 9, 13, 2);
|
||||
G(2, 6, 10, 14, 4);
|
||||
G(3, 7, 11, 15, 6);
|
||||
G(3, 4, 9, 14, 14);
|
||||
G(2, 7, 8, 13, 12);
|
||||
G(0, 5, 10, 15, 8);
|
||||
G(1, 6, 11, 12, 10);
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i];
|
||||
for (i = 0; i < 8; ++i) S->h[i] ^= S->s[i % 4];
|
||||
}
|
||||
|
||||
void blake256_init(state *S) {
|
||||
S->h[0] = 0x6A09E667;
|
||||
S->h[1] = 0xBB67AE85;
|
||||
S->h[2] = 0x3C6EF372;
|
||||
S->h[3] = 0xA54FF53A;
|
||||
S->h[4] = 0x510E527F;
|
||||
S->h[5] = 0x9B05688C;
|
||||
S->h[6] = 0x1F83D9AB;
|
||||
S->h[7] = 0x5BE0CD19;
|
||||
S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
|
||||
S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
|
||||
}
|
||||
|
||||
void blake224_init(state *S) {
|
||||
S->h[0] = 0xC1059ED8;
|
||||
S->h[1] = 0x367CD507;
|
||||
S->h[2] = 0x3070DD17;
|
||||
S->h[3] = 0xF70E5939;
|
||||
S->h[4] = 0xFFC00B31;
|
||||
S->h[5] = 0x68581511;
|
||||
S->h[6] = 0x64F98FA7;
|
||||
S->h[7] = 0xBEFA4FA4;
|
||||
S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
|
||||
S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
|
||||
}
|
||||
|
||||
// datalen = number of bits
|
||||
void blake256_update(state *S, const uint8_t *data, uint64_t datalen) {
|
||||
int left = S->buflen >> 3;
|
||||
int fill = 64 - left;
|
||||
|
||||
if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) {
|
||||
memcpy((void *) (S->buf + left), (void *) data, fill);
|
||||
S->t[0] += 512;
|
||||
if (S->t[0] == 0) S->t[1]++;
|
||||
blake256_compress(S, S->buf);
|
||||
data += fill;
|
||||
datalen -= (fill << 3);
|
||||
left = 0;
|
||||
}
|
||||
|
||||
while (datalen >= 512) {
|
||||
S->t[0] += 512;
|
||||
if (S->t[0] == 0) S->t[1]++;
|
||||
blake256_compress(S, data);
|
||||
data += 64;
|
||||
datalen -= 512;
|
||||
}
|
||||
|
||||
if (datalen > 0) {
|
||||
memcpy((void *) (S->buf + left), (void *) data, (size_t) (datalen >> 3));
|
||||
S->buflen = (left << 3) + (int) datalen;
|
||||
} else {
|
||||
S->buflen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// datalen = number of bits
|
||||
void blake224_update(state *S, const uint8_t *data, uint64_t datalen) {
|
||||
blake256_update(S, data, datalen);
|
||||
}
|
||||
|
||||
void blake256_final_h(state *S, uint8_t *digest, uint8_t pa, uint8_t pb) {
|
||||
uint8_t msglen[8];
|
||||
uint32_t lo = S->t[0] + S->buflen, hi = S->t[1];
|
||||
if (lo < (unsigned) S->buflen) hi++;
|
||||
U32TO8(msglen + 0, hi);
|
||||
U32TO8(msglen + 4, lo);
|
||||
|
||||
if (S->buflen == 440) { /* one padding byte */
|
||||
S->t[0] -= 8;
|
||||
blake256_update(S, &pa, 8);
|
||||
} else {
|
||||
if (S->buflen < 440) { /* enough space to fill the block */
|
||||
if (S->buflen == 0) S->nullt = 1;
|
||||
S->t[0] -= 440 - S->buflen;
|
||||
blake256_update(S, padding, 440 - S->buflen);
|
||||
} else { /* need 2 compressions */
|
||||
S->t[0] -= 512 - S->buflen;
|
||||
blake256_update(S, padding, 512 - S->buflen);
|
||||
S->t[0] -= 440;
|
||||
blake256_update(S, padding + 1, 440);
|
||||
S->nullt = 1;
|
||||
}
|
||||
blake256_update(S, &pb, 8);
|
||||
S->t[0] -= 8;
|
||||
}
|
||||
S->t[0] -= 64;
|
||||
blake256_update(S, msglen, 64);
|
||||
|
||||
U32TO8(digest + 0, S->h[0]);
|
||||
U32TO8(digest + 4, S->h[1]);
|
||||
U32TO8(digest + 8, S->h[2]);
|
||||
U32TO8(digest + 12, S->h[3]);
|
||||
U32TO8(digest + 16, S->h[4]);
|
||||
U32TO8(digest + 20, S->h[5]);
|
||||
U32TO8(digest + 24, S->h[6]);
|
||||
U32TO8(digest + 28, S->h[7]);
|
||||
}
|
||||
|
||||
void blake256_final(state *S, uint8_t *digest) {
|
||||
blake256_final_h(S, digest, 0x81, 0x01);
|
||||
}
|
||||
|
||||
void blake224_final(state *S, uint8_t *digest) {
|
||||
blake256_final_h(S, digest, 0x80, 0x00);
|
||||
}
|
||||
|
||||
// inlen = number of bytes
|
||||
void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
|
||||
state S;
|
||||
blake256_init(&S);
|
||||
blake256_update(&S, in, inlen * 8);
|
||||
blake256_final(&S, out);
|
||||
}
|
||||
|
||||
// inlen = number of bytes
|
||||
void blake224_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
|
||||
state S;
|
||||
blake224_init(&S);
|
||||
blake224_update(&S, in, inlen * 8);
|
||||
blake224_final(&S, out);
|
||||
}
|
||||
|
||||
// keylen = number of bytes
|
||||
void hmac_blake256_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
|
||||
const uint8_t *key = _key;
|
||||
uint8_t keyhash[32];
|
||||
uint8_t pad[64];
|
||||
uint64_t i;
|
||||
|
||||
if (keylen > 64) {
|
||||
blake256_hash(keyhash, key, keylen);
|
||||
key = keyhash;
|
||||
keylen = 32;
|
||||
}
|
||||
|
||||
blake256_init(&S->inner);
|
||||
memset(pad, 0x36, 64);
|
||||
for (i = 0; i < keylen; ++i) {
|
||||
pad[i] ^= key[i];
|
||||
}
|
||||
blake256_update(&S->inner, pad, 512);
|
||||
|
||||
blake256_init(&S->outer);
|
||||
memset(pad, 0x5c, 64);
|
||||
for (i = 0; i < keylen; ++i) {
|
||||
pad[i] ^= key[i];
|
||||
}
|
||||
blake256_update(&S->outer, pad, 512);
|
||||
|
||||
memset(keyhash, 0, 32);
|
||||
}
|
||||
|
||||
// keylen = number of bytes
|
||||
void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
|
||||
const uint8_t *key = _key;
|
||||
uint8_t keyhash[32];
|
||||
uint8_t pad[64];
|
||||
uint64_t i;
|
||||
|
||||
if (keylen > 64) {
|
||||
blake256_hash(keyhash, key, keylen);
|
||||
key = keyhash;
|
||||
keylen = 28;
|
||||
}
|
||||
|
||||
blake224_init(&S->inner);
|
||||
memset(pad, 0x36, 64);
|
||||
for (i = 0; i < keylen; ++i) {
|
||||
pad[i] ^= key[i];
|
||||
}
|
||||
blake224_update(&S->inner, pad, 512);
|
||||
|
||||
blake224_init(&S->outer);
|
||||
memset(pad, 0x5c, 64);
|
||||
for (i = 0; i < keylen; ++i) {
|
||||
pad[i] ^= key[i];
|
||||
}
|
||||
blake224_update(&S->outer, pad, 512);
|
||||
|
||||
memset(keyhash, 0, 32);
|
||||
}
|
||||
|
||||
// datalen = number of bits
|
||||
void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
|
||||
// update the inner state
|
||||
blake256_update(&S->inner, data, datalen);
|
||||
}
|
||||
|
||||
// datalen = number of bits
|
||||
void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
|
||||
// update the inner state
|
||||
blake224_update(&S->inner, data, datalen);
|
||||
}
|
||||
|
||||
void hmac_blake256_final(hmac_state *S, uint8_t *digest) {
|
||||
uint8_t ihash[32];
|
||||
blake256_final(&S->inner, ihash);
|
||||
blake256_update(&S->outer, ihash, 256);
|
||||
blake256_final(&S->outer, digest);
|
||||
memset(ihash, 0, 32);
|
||||
}
|
||||
|
||||
void hmac_blake224_final(hmac_state *S, uint8_t *digest) {
|
||||
uint8_t ihash[32];
|
||||
blake224_final(&S->inner, ihash);
|
||||
blake224_update(&S->outer, ihash, 224);
|
||||
blake224_final(&S->outer, digest);
|
||||
memset(ihash, 0, 32);
|
||||
}
|
||||
|
||||
// keylen = number of bytes; inlen = number of bytes
|
||||
void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
|
||||
hmac_state S;
|
||||
hmac_blake256_init(&S, key, keylen);
|
||||
hmac_blake256_update(&S, in, inlen * 8);
|
||||
hmac_blake256_final(&S, out);
|
||||
}
|
||||
|
||||
// keylen = number of bytes; inlen = number of bytes
|
||||
void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
|
||||
hmac_state S;
|
||||
hmac_blake224_init(&S, key, keylen);
|
||||
hmac_blake224_update(&S, in, inlen * 8);
|
||||
hmac_blake224_final(&S, out);
|
||||
}
|
@@ -1,43 +0,0 @@
|
||||
#ifndef _BLAKE256_H_
|
||||
#define _BLAKE256_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct {
|
||||
uint32_t h[8], s[4], t[2];
|
||||
int buflen, nullt;
|
||||
uint8_t buf[64];
|
||||
} state;
|
||||
|
||||
typedef struct {
|
||||
state inner;
|
||||
state outer;
|
||||
} hmac_state;
|
||||
|
||||
void blake256_init(state *);
|
||||
void blake224_init(state *);
|
||||
|
||||
void blake256_update(state *, const uint8_t *, uint64_t);
|
||||
void blake224_update(state *, const uint8_t *, uint64_t);
|
||||
|
||||
void blake256_final(state *, uint8_t *);
|
||||
void blake224_final(state *, uint8_t *);
|
||||
|
||||
void blake256_hash(uint8_t *, const uint8_t *, uint64_t);
|
||||
void blake224_hash(uint8_t *, const uint8_t *, uint64_t);
|
||||
|
||||
/* HMAC functions: */
|
||||
|
||||
void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t);
|
||||
void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t);
|
||||
|
||||
void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t);
|
||||
void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t);
|
||||
|
||||
void hmac_blake256_final(hmac_state *, uint8_t *);
|
||||
void hmac_blake224_final(hmac_state *, uint8_t *);
|
||||
|
||||
void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
|
||||
void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
|
||||
|
||||
#endif /* _BLAKE256_H_ */
|
@@ -1,360 +0,0 @@
|
||||
/* hash.c April 2012
|
||||
* Groestl ANSI C code optimised for 32-bit machines
|
||||
* Author: Thomas Krinninger
|
||||
*
|
||||
* This work is based on the implementation of
|
||||
* Soeren S. Thomsen and Krystian Matusiewicz
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#include "c_groestl.h"
|
||||
#include "groestl_tables.h"
|
||||
|
||||
#define P_TYPE 0
|
||||
#define Q_TYPE 1
|
||||
|
||||
const uint8_t shift_Values[2][8] = {{0,1,2,3,4,5,6,7},{1,3,5,7,0,2,4,6}};
|
||||
|
||||
const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6};
|
||||
|
||||
|
||||
#define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
|
||||
v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
|
||||
v1 = temp_var;}
|
||||
|
||||
|
||||
#define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t) \
|
||||
tu = T[2*(uint32_t)x[4*c0+0]]; \
|
||||
tl = T[2*(uint32_t)x[4*c0+0]+1]; \
|
||||
tv1 = T[2*(uint32_t)x[4*c1+1]]; \
|
||||
tv2 = T[2*(uint32_t)x[4*c1+1]+1]; \
|
||||
ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \
|
||||
tu ^= tv1; \
|
||||
tl ^= tv2; \
|
||||
tv1 = T[2*(uint32_t)x[4*c2+2]]; \
|
||||
tv2 = T[2*(uint32_t)x[4*c2+2]+1]; \
|
||||
ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \
|
||||
tu ^= tv1; \
|
||||
tl ^= tv2; \
|
||||
tv1 = T[2*(uint32_t)x[4*c3+3]]; \
|
||||
tv2 = T[2*(uint32_t)x[4*c3+3]+1]; \
|
||||
ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \
|
||||
tu ^= tv1; \
|
||||
tl ^= tv2; \
|
||||
tl ^= T[2*(uint32_t)x[4*c4+0]]; \
|
||||
tu ^= T[2*(uint32_t)x[4*c4+0]+1]; \
|
||||
tv1 = T[2*(uint32_t)x[4*c5+1]]; \
|
||||
tv2 = T[2*(uint32_t)x[4*c5+1]+1]; \
|
||||
ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \
|
||||
tl ^= tv1; \
|
||||
tu ^= tv2; \
|
||||
tv1 = T[2*(uint32_t)x[4*c6+2]]; \
|
||||
tv2 = T[2*(uint32_t)x[4*c6+2]+1]; \
|
||||
ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \
|
||||
tl ^= tv1; \
|
||||
tu ^= tv2; \
|
||||
tv1 = T[2*(uint32_t)x[4*c7+3]]; \
|
||||
tv2 = T[2*(uint32_t)x[4*c7+3]+1]; \
|
||||
ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \
|
||||
tl ^= tv1; \
|
||||
tu ^= tv2; \
|
||||
y[i] = tu; \
|
||||
y[i+1] = tl;
|
||||
|
||||
|
||||
/* compute one round of P (short variants) */
|
||||
static void RND512P(uint8_t *x, uint32_t *y, uint32_t r) {
|
||||
uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
|
||||
uint32_t* x32 = (uint32_t*)x;
|
||||
x32[ 0] ^= 0x00000000^r;
|
||||
x32[ 2] ^= 0x00000010^r;
|
||||
x32[ 4] ^= 0x00000020^r;
|
||||
x32[ 6] ^= 0x00000030^r;
|
||||
x32[ 8] ^= 0x00000040^r;
|
||||
x32[10] ^= 0x00000050^r;
|
||||
x32[12] ^= 0x00000060^r;
|
||||
x32[14] ^= 0x00000070^r;
|
||||
COLUMN(x,y, 0, 0, 2, 4, 6, 9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y, 2, 2, 4, 6, 8, 11, 13, 15, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y, 4, 4, 6, 8, 10, 13, 15, 1, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y, 6, 6, 8, 10, 12, 15, 1, 3, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y, 8, 8, 10, 12, 14, 1, 3, 5, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y,10, 10, 12, 14, 0, 3, 5, 7, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y,12, 12, 14, 0, 2, 5, 7, 9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y,14, 14, 0, 2, 4, 7, 9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
}
|
||||
|
||||
/* compute one round of Q (short variants) */
|
||||
static void RND512Q(uint8_t *x, uint32_t *y, uint32_t r) {
|
||||
uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
|
||||
uint32_t* x32 = (uint32_t*)x;
|
||||
x32[ 0] = ~x32[ 0];
|
||||
x32[ 1] ^= 0xffffffff^r;
|
||||
x32[ 2] = ~x32[ 2];
|
||||
x32[ 3] ^= 0xefffffff^r;
|
||||
x32[ 4] = ~x32[ 4];
|
||||
x32[ 5] ^= 0xdfffffff^r;
|
||||
x32[ 6] = ~x32[ 6];
|
||||
x32[ 7] ^= 0xcfffffff^r;
|
||||
x32[ 8] = ~x32[ 8];
|
||||
x32[ 9] ^= 0xbfffffff^r;
|
||||
x32[10] = ~x32[10];
|
||||
x32[11] ^= 0xafffffff^r;
|
||||
x32[12] = ~x32[12];
|
||||
x32[13] ^= 0x9fffffff^r;
|
||||
x32[14] = ~x32[14];
|
||||
x32[15] ^= 0x8fffffff^r;
|
||||
COLUMN(x,y, 0, 2, 6, 10, 14, 1, 5, 9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y, 2, 4, 8, 12, 0, 3, 7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y, 4, 6, 10, 14, 2, 5, 9, 13, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y, 6, 8, 12, 0, 4, 7, 11, 15, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y, 8, 10, 14, 2, 6, 9, 13, 1, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y,10, 12, 0, 4, 8, 11, 15, 3, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y,12, 14, 2, 6, 10, 13, 1, 5, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
COLUMN(x,y,14, 0, 4, 8, 12, 15, 3, 7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
|
||||
}
|
||||
|
||||
/* compute compression function (short variants) */
|
||||
static void F512(uint32_t *h, const uint32_t *m) {
|
||||
int i;
|
||||
uint32_t Ptmp[2*COLS512];
|
||||
uint32_t Qtmp[2*COLS512];
|
||||
uint32_t y[2*COLS512];
|
||||
uint32_t z[2*COLS512];
|
||||
|
||||
for (i = 0; i < 2*COLS512; i++) {
|
||||
z[i] = m[i];
|
||||
Ptmp[i] = h[i]^m[i];
|
||||
}
|
||||
|
||||
/* compute Q(m) */
|
||||
RND512Q((uint8_t*)z, y, 0x00000000);
|
||||
RND512Q((uint8_t*)y, z, 0x01000000);
|
||||
RND512Q((uint8_t*)z, y, 0x02000000);
|
||||
RND512Q((uint8_t*)y, z, 0x03000000);
|
||||
RND512Q((uint8_t*)z, y, 0x04000000);
|
||||
RND512Q((uint8_t*)y, z, 0x05000000);
|
||||
RND512Q((uint8_t*)z, y, 0x06000000);
|
||||
RND512Q((uint8_t*)y, z, 0x07000000);
|
||||
RND512Q((uint8_t*)z, y, 0x08000000);
|
||||
RND512Q((uint8_t*)y, Qtmp, 0x09000000);
|
||||
|
||||
/* compute P(h+m) */
|
||||
RND512P((uint8_t*)Ptmp, y, 0x00000000);
|
||||
RND512P((uint8_t*)y, z, 0x00000001);
|
||||
RND512P((uint8_t*)z, y, 0x00000002);
|
||||
RND512P((uint8_t*)y, z, 0x00000003);
|
||||
RND512P((uint8_t*)z, y, 0x00000004);
|
||||
RND512P((uint8_t*)y, z, 0x00000005);
|
||||
RND512P((uint8_t*)z, y, 0x00000006);
|
||||
RND512P((uint8_t*)y, z, 0x00000007);
|
||||
RND512P((uint8_t*)z, y, 0x00000008);
|
||||
RND512P((uint8_t*)y, Ptmp, 0x00000009);
|
||||
|
||||
/* compute P(h+m) + Q(m) + h */
|
||||
for (i = 0; i < 2*COLS512; i++) {
|
||||
h[i] ^= Ptmp[i]^Qtmp[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* digest up to msglen bytes of input (full blocks only) */
|
||||
static void Transform(groestlHashState *ctx,
|
||||
const uint8_t *input,
|
||||
int msglen) {
|
||||
|
||||
/* digest message, one block at a time */
|
||||
for (; msglen >= SIZE512;
|
||||
msglen -= SIZE512, input += SIZE512) {
|
||||
F512(ctx->chaining,(uint32_t*)input);
|
||||
|
||||
/* increment block counter */
|
||||
ctx->block_counter1++;
|
||||
if (ctx->block_counter1 == 0) ctx->block_counter2++;
|
||||
}
|
||||
}
|
||||
|
||||
/* given state h, do h <- P(h)+h */
|
||||
static void OutputTransformation(groestlHashState *ctx) {
|
||||
int j;
|
||||
uint32_t temp[2*COLS512];
|
||||
uint32_t y[2*COLS512];
|
||||
uint32_t z[2*COLS512];
|
||||
|
||||
|
||||
|
||||
for (j = 0; j < 2*COLS512; j++) {
|
||||
temp[j] = ctx->chaining[j];
|
||||
}
|
||||
RND512P((uint8_t*)temp, y, 0x00000000);
|
||||
RND512P((uint8_t*)y, z, 0x00000001);
|
||||
RND512P((uint8_t*)z, y, 0x00000002);
|
||||
RND512P((uint8_t*)y, z, 0x00000003);
|
||||
RND512P((uint8_t*)z, y, 0x00000004);
|
||||
RND512P((uint8_t*)y, z, 0x00000005);
|
||||
RND512P((uint8_t*)z, y, 0x00000006);
|
||||
RND512P((uint8_t*)y, z, 0x00000007);
|
||||
RND512P((uint8_t*)z, y, 0x00000008);
|
||||
RND512P((uint8_t*)y, temp, 0x00000009);
|
||||
for (j = 0; j < 2*COLS512; j++) {
|
||||
ctx->chaining[j] ^= temp[j];
|
||||
}
|
||||
}
|
||||
|
||||
/* initialise context */
|
||||
static void Init(groestlHashState* ctx) {
|
||||
int i = 0;
|
||||
/* allocate memory for state and data buffer */
|
||||
|
||||
for(;i<(SIZE512/sizeof(uint32_t));i++)
|
||||
{
|
||||
ctx->chaining[i] = 0;
|
||||
}
|
||||
|
||||
/* set initial value */
|
||||
ctx->chaining[2*COLS512-1] = u32BIG((uint32_t)HASH_BIT_LEN);
|
||||
|
||||
/* set other variables */
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->block_counter1 = 0;
|
||||
ctx->block_counter2 = 0;
|
||||
ctx->bits_in_last_byte = 0;
|
||||
}
|
||||
|
||||
/* update state with databitlen bits of input */
|
||||
static void Update(groestlHashState* ctx,
|
||||
const BitSequence* input,
|
||||
DataLength databitlen) {
|
||||
int index = 0;
|
||||
int msglen = (int)(databitlen/8);
|
||||
int rem = (int)(databitlen%8);
|
||||
|
||||
/* if the buffer contains data that has not yet been digested, first
|
||||
add data to buffer until full */
|
||||
if (ctx->buf_ptr) {
|
||||
while (ctx->buf_ptr < SIZE512 && index < msglen) {
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
|
||||
}
|
||||
if (ctx->buf_ptr < SIZE512) {
|
||||
/* buffer still not full, return */
|
||||
if (rem) {
|
||||
ctx->bits_in_last_byte = rem;
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* digest buffer */
|
||||
ctx->buf_ptr = 0;
|
||||
Transform(ctx, ctx->buffer, SIZE512);
|
||||
}
|
||||
|
||||
/* digest bulk of message */
|
||||
Transform(ctx, input+index, msglen-index);
|
||||
index += ((msglen-index)/SIZE512)*SIZE512;
|
||||
|
||||
/* store remaining data in buffer */
|
||||
while (index < msglen) {
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
|
||||
}
|
||||
|
||||
/* if non-integral number of bytes have been supplied, store
|
||||
remaining bits in last byte, together with information about
|
||||
number of bits */
|
||||
if (rem) {
|
||||
ctx->bits_in_last_byte = rem;
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
|
||||
}
|
||||
}
|
||||
|
||||
#define BILB ctx->bits_in_last_byte
|
||||
|
||||
/* finalise: process remaining data (including padding), perform
|
||||
output transformation, and write hash result to 'output' */
|
||||
static void Final(groestlHashState* ctx,
|
||||
BitSequence* output) {
|
||||
int i, j = 0, hashbytelen = HASH_BIT_LEN/8;
|
||||
uint8_t *s = (BitSequence*)ctx->chaining;
|
||||
|
||||
/* pad with '1'-bit and first few '0'-bits */
|
||||
if (BILB) {
|
||||
ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
|
||||
ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
|
||||
BILB = 0;
|
||||
}
|
||||
else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
|
||||
|
||||
/* pad with '0'-bits */
|
||||
if (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
|
||||
/* padding requires two blocks */
|
||||
while (ctx->buf_ptr < SIZE512) {
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = 0;
|
||||
}
|
||||
/* digest first padding block */
|
||||
Transform(ctx, ctx->buffer, SIZE512);
|
||||
ctx->buf_ptr = 0;
|
||||
}
|
||||
while (ctx->buf_ptr < SIZE512-LENGTHFIELDLEN) {
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = 0;
|
||||
}
|
||||
|
||||
/* length padding */
|
||||
ctx->block_counter1++;
|
||||
if (ctx->block_counter1 == 0) ctx->block_counter2++;
|
||||
ctx->buf_ptr = SIZE512;
|
||||
|
||||
while (ctx->buf_ptr > SIZE512-(int)sizeof(uint32_t)) {
|
||||
ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1;
|
||||
ctx->block_counter1 >>= 8;
|
||||
}
|
||||
while (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
|
||||
ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2;
|
||||
ctx->block_counter2 >>= 8;
|
||||
}
|
||||
/* digest final padding block */
|
||||
Transform(ctx, ctx->buffer, SIZE512);
|
||||
/* perform output transformation */
|
||||
OutputTransformation(ctx);
|
||||
|
||||
/* store hash result in output */
|
||||
for (i = SIZE512-hashbytelen; i < SIZE512; i++,j++) {
|
||||
output[j] = s[i];
|
||||
}
|
||||
|
||||
/* zeroise relevant variables and deallocate memory */
|
||||
for (i = 0; i < COLS512; i++) {
|
||||
ctx->chaining[i] = 0;
|
||||
}
|
||||
for (i = 0; i < SIZE512; i++) {
|
||||
ctx->buffer[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* hash bit sequence */
|
||||
void groestl(const BitSequence* data,
|
||||
DataLength databitlen,
|
||||
BitSequence* hashval) {
|
||||
|
||||
groestlHashState context;
|
||||
|
||||
/* initialise */
|
||||
Init(&context);
|
||||
|
||||
|
||||
/* process message */
|
||||
Update(&context, data, databitlen);
|
||||
|
||||
/* finalise */
|
||||
Final(&context, hashval);
|
||||
}
|
||||
/*
|
||||
static int crypto_hash(unsigned char *out,
|
||||
const unsigned char *in,
|
||||
unsigned long long len)
|
||||
{
|
||||
groestl(in, 8*len, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
*/
|
@@ -1,60 +0,0 @@
|
||||
#ifndef __hash_h
|
||||
#define __hash_h
|
||||
/*
|
||||
#include "crypto_uint8.h"
|
||||
#include "crypto_uint32.h"
|
||||
#include "crypto_uint64.h"
|
||||
#include "crypto_hash.h"
|
||||
|
||||
typedef crypto_uint8 uint8_t;
|
||||
typedef crypto_uint32 uint32_t;
|
||||
typedef crypto_uint64 uint64_t;
|
||||
*/
|
||||
#include <stdint.h>
|
||||
|
||||
#include "hash.h"
|
||||
|
||||
/* some sizes (number of bytes) */
|
||||
#define ROWS 8
|
||||
#define LENGTHFIELDLEN ROWS
|
||||
#define COLS512 8
|
||||
|
||||
#define SIZE512 (ROWS*COLS512)
|
||||
|
||||
#define ROUNDS512 10
|
||||
#define HASH_BIT_LEN 256
|
||||
|
||||
#define ROTL32(v, n) ((((v)<<(n))|((v)>>(32-(n))))&li_32(ffffffff))
|
||||
|
||||
|
||||
#define li_32(h) 0x##h##u
|
||||
#define EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n)))
|
||||
#define u32BIG(a) \
|
||||
((ROTL32(a,8) & li_32(00FF00FF)) | \
|
||||
(ROTL32(a,24) & li_32(FF00FF00)))
|
||||
|
||||
|
||||
/* NIST API begin */
|
||||
typedef struct {
|
||||
uint32_t chaining[SIZE512/sizeof(uint32_t)]; /* actual state */
|
||||
uint32_t block_counter1,
|
||||
block_counter2; /* message block counter(s) */
|
||||
BitSequence buffer[SIZE512]; /* data buffer */
|
||||
int buf_ptr; /* data buffer pointer */
|
||||
int bits_in_last_byte; /* no. of message bits in last byte of
|
||||
data buffer */
|
||||
} groestlHashState;
|
||||
|
||||
/*void Init(hashState*);
|
||||
void Update(hashState*, const BitSequence*, DataLength);
|
||||
void Final(hashState*, BitSequence*); */
|
||||
void groestl(const BitSequence*, DataLength, BitSequence*);
|
||||
/* NIST API end */
|
||||
|
||||
/*
|
||||
int crypto_hash(unsigned char *out,
|
||||
const unsigned char *in,
|
||||
unsigned long long len);
|
||||
*/
|
||||
|
||||
#endif /* __hash_h */
|
366
crypto/c_jh.c
366
crypto/c_jh.c
@@ -1,366 +0,0 @@
|
||||
/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
|
||||
|
||||
--------------------------------
|
||||
Performance
|
||||
|
||||
Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
|
||||
Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
|
||||
Speed for long message:
|
||||
1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2
|
||||
2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3
|
||||
|
||||
--------------------------------
|
||||
Last Modified: January 16, 2011
|
||||
*/
|
||||
|
||||
#include "c_jh.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
/*typedef unsigned long long uint64;*/
|
||||
typedef uint64_t uint64;
|
||||
|
||||
/*define data alignment for different C compilers*/
|
||||
#if defined(__GNUC__)
|
||||
#define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
|
||||
#else
|
||||
#define DATA_ALIGN16(x) __declspec(align(16)) x
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct {
|
||||
int hashbitlen; /*the message digest size*/
|
||||
unsigned long long databitlen; /*the message size in bits*/
|
||||
unsigned long long datasize_in_buffer; /*the size of the message remained in buffer; assumed to be multiple of 8bits except for the last partial block at the end of the message*/
|
||||
DATA_ALIGN16(uint64 x[8][2]); /*the 1024-bit state, ( x[i][0] || x[i][1] ) is the ith row of the state in the pseudocode*/
|
||||
unsigned char buffer[64]; /*the 512-bit message block to be hashed;*/
|
||||
} hashState;
|
||||
|
||||
|
||||
/*The initial hash value H(0)*/
|
||||
const unsigned char JH224_H0[128]={0x2d,0xfe,0xdd,0x62,0xf9,0x9a,0x98,0xac,0xae,0x7c,0xac,0xd6,0x19,0xd6,0x34,0xe7,0xa4,0x83,0x10,0x5,0xbc,0x30,0x12,0x16,0xb8,0x60,0x38,0xc6,0xc9,0x66,0x14,0x94,0x66,0xd9,0x89,0x9f,0x25,0x80,0x70,0x6f,0xce,0x9e,0xa3,0x1b,0x1d,0x9b,0x1a,0xdc,0x11,0xe8,0x32,0x5f,0x7b,0x36,0x6e,0x10,0xf9,0x94,0x85,0x7f,0x2,0xfa,0x6,0xc1,0x1b,0x4f,0x1b,0x5c,0xd8,0xc8,0x40,0xb3,0x97,0xf6,0xa1,0x7f,0x6e,0x73,0x80,0x99,0xdc,0xdf,0x93,0xa5,0xad,0xea,0xa3,0xd3,0xa4,0x31,0xe8,0xde,0xc9,0x53,0x9a,0x68,0x22,0xb4,0xa9,0x8a,0xec,0x86,0xa1,0xe4,0xd5,0x74,0xac,0x95,0x9c,0xe5,0x6c,0xf0,0x15,0x96,0xd,0xea,0xb5,0xab,0x2b,0xbf,0x96,0x11,0xdc,0xf0,0xdd,0x64,0xea,0x6e};
|
||||
const unsigned char JH256_H0[128]={0xeb,0x98,0xa3,0x41,0x2c,0x20,0xd3,0xeb,0x92,0xcd,0xbe,0x7b,0x9c,0xb2,0x45,0xc1,0x1c,0x93,0x51,0x91,0x60,0xd4,0xc7,0xfa,0x26,0x0,0x82,0xd6,0x7e,0x50,0x8a,0x3,0xa4,0x23,0x9e,0x26,0x77,0x26,0xb9,0x45,0xe0,0xfb,0x1a,0x48,0xd4,0x1a,0x94,0x77,0xcd,0xb5,0xab,0x26,0x2,0x6b,0x17,0x7a,0x56,0xf0,0x24,0x42,0xf,0xff,0x2f,0xa8,0x71,0xa3,0x96,0x89,0x7f,0x2e,0x4d,0x75,0x1d,0x14,0x49,0x8,0xf7,0x7d,0xe2,0x62,0x27,0x76,0x95,0xf7,0x76,0x24,0x8f,0x94,0x87,0xd5,0xb6,0x57,0x47,0x80,0x29,0x6c,0x5c,0x5e,0x27,0x2d,0xac,0x8e,0xd,0x6c,0x51,0x84,0x50,0xc6,0x57,0x5,0x7a,0xf,0x7b,0xe4,0xd3,0x67,0x70,0x24,0x12,0xea,0x89,0xe3,0xab,0x13,0xd3,0x1c,0xd7,0x69};
|
||||
const unsigned char JH384_H0[128]={0x48,0x1e,0x3b,0xc6,0xd8,0x13,0x39,0x8a,0x6d,0x3b,0x5e,0x89,0x4a,0xde,0x87,0x9b,0x63,0xfa,0xea,0x68,0xd4,0x80,0xad,0x2e,0x33,0x2c,0xcb,0x21,0x48,0xf,0x82,0x67,0x98,0xae,0xc8,0x4d,0x90,0x82,0xb9,0x28,0xd4,0x55,0xea,0x30,0x41,0x11,0x42,0x49,0x36,0xf5,0x55,0xb2,0x92,0x48,0x47,0xec,0xc7,0x25,0xa,0x93,0xba,0xf4,0x3c,0xe1,0x56,0x9b,0x7f,0x8a,0x27,0xdb,0x45,0x4c,0x9e,0xfc,0xbd,0x49,0x63,0x97,0xaf,0xe,0x58,0x9f,0xc2,0x7d,0x26,0xaa,0x80,0xcd,0x80,0xc0,0x8b,0x8c,0x9d,0xeb,0x2e,0xda,0x8a,0x79,0x81,0xe8,0xf8,0xd5,0x37,0x3a,0xf4,0x39,0x67,0xad,0xdd,0xd1,0x7a,0x71,0xa9,0xb4,0xd3,0xbd,0xa4,0x75,0xd3,0x94,0x97,0x6c,0x3f,0xba,0x98,0x42,0x73,0x7f};
|
||||
const unsigned char JH512_H0[128]={0x6f,0xd1,0x4b,0x96,0x3e,0x0,0xaa,0x17,0x63,0x6a,0x2e,0x5,0x7a,0x15,0xd5,0x43,0x8a,0x22,0x5e,0x8d,0xc,0x97,0xef,0xb,0xe9,0x34,0x12,0x59,0xf2,0xb3,0xc3,0x61,0x89,0x1d,0xa0,0xc1,0x53,0x6f,0x80,0x1e,0x2a,0xa9,0x5,0x6b,0xea,0x2b,0x6d,0x80,0x58,0x8e,0xcc,0xdb,0x20,0x75,0xba,0xa6,0xa9,0xf,0x3a,0x76,0xba,0xf8,0x3b,0xf7,0x1,0x69,0xe6,0x5,0x41,0xe3,0x4a,0x69,0x46,0xb5,0x8a,0x8e,0x2e,0x6f,0xe6,0x5a,0x10,0x47,0xa7,0xd0,0xc1,0x84,0x3c,0x24,0x3b,0x6e,0x71,0xb1,0x2d,0x5a,0xc1,0x99,0xcf,0x57,0xf6,0xec,0x9d,0xb1,0xf8,0x56,0xa7,0x6,0x88,0x7c,0x57,0x16,0xb1,0x56,0xe3,0xc2,0xfc,0xdf,0xe6,0x85,0x17,0xfb,0x54,0x5a,0x46,0x78,0xcc,0x8c,0xdd,0x4b};
|
||||
|
||||
/*42 round constants, each round constant is 32-byte (256-bit)*/
|
||||
const unsigned char E8_bitslice_roundconstant[42][32]={
|
||||
{0x72,0xd5,0xde,0xa2,0xdf,0x15,0xf8,0x67,0x7b,0x84,0x15,0xa,0xb7,0x23,0x15,0x57,0x81,0xab,0xd6,0x90,0x4d,0x5a,0x87,0xf6,0x4e,0x9f,0x4f,0xc5,0xc3,0xd1,0x2b,0x40},
|
||||
{0xea,0x98,0x3a,0xe0,0x5c,0x45,0xfa,0x9c,0x3,0xc5,0xd2,0x99,0x66,0xb2,0x99,0x9a,0x66,0x2,0x96,0xb4,0xf2,0xbb,0x53,0x8a,0xb5,0x56,0x14,0x1a,0x88,0xdb,0xa2,0x31},
|
||||
{0x3,0xa3,0x5a,0x5c,0x9a,0x19,0xe,0xdb,0x40,0x3f,0xb2,0xa,0x87,0xc1,0x44,0x10,0x1c,0x5,0x19,0x80,0x84,0x9e,0x95,0x1d,0x6f,0x33,0xeb,0xad,0x5e,0xe7,0xcd,0xdc},
|
||||
{0x10,0xba,0x13,0x92,0x2,0xbf,0x6b,0x41,0xdc,0x78,0x65,0x15,0xf7,0xbb,0x27,0xd0,0xa,0x2c,0x81,0x39,0x37,0xaa,0x78,0x50,0x3f,0x1a,0xbf,0xd2,0x41,0x0,0x91,0xd3},
|
||||
{0x42,0x2d,0x5a,0xd,0xf6,0xcc,0x7e,0x90,0xdd,0x62,0x9f,0x9c,0x92,0xc0,0x97,0xce,0x18,0x5c,0xa7,0xb,0xc7,0x2b,0x44,0xac,0xd1,0xdf,0x65,0xd6,0x63,0xc6,0xfc,0x23},
|
||||
{0x97,0x6e,0x6c,0x3,0x9e,0xe0,0xb8,0x1a,0x21,0x5,0x45,0x7e,0x44,0x6c,0xec,0xa8,0xee,0xf1,0x3,0xbb,0x5d,0x8e,0x61,0xfa,0xfd,0x96,0x97,0xb2,0x94,0x83,0x81,0x97},
|
||||
{0x4a,0x8e,0x85,0x37,0xdb,0x3,0x30,0x2f,0x2a,0x67,0x8d,0x2d,0xfb,0x9f,0x6a,0x95,0x8a,0xfe,0x73,0x81,0xf8,0xb8,0x69,0x6c,0x8a,0xc7,0x72,0x46,0xc0,0x7f,0x42,0x14},
|
||||
{0xc5,0xf4,0x15,0x8f,0xbd,0xc7,0x5e,0xc4,0x75,0x44,0x6f,0xa7,0x8f,0x11,0xbb,0x80,0x52,0xde,0x75,0xb7,0xae,0xe4,0x88,0xbc,0x82,0xb8,0x0,0x1e,0x98,0xa6,0xa3,0xf4},
|
||||
{0x8e,0xf4,0x8f,0x33,0xa9,0xa3,0x63,0x15,0xaa,0x5f,0x56,0x24,0xd5,0xb7,0xf9,0x89,0xb6,0xf1,0xed,0x20,0x7c,0x5a,0xe0,0xfd,0x36,0xca,0xe9,0x5a,0x6,0x42,0x2c,0x36},
|
||||
{0xce,0x29,0x35,0x43,0x4e,0xfe,0x98,0x3d,0x53,0x3a,0xf9,0x74,0x73,0x9a,0x4b,0xa7,0xd0,0xf5,0x1f,0x59,0x6f,0x4e,0x81,0x86,0xe,0x9d,0xad,0x81,0xaf,0xd8,0x5a,0x9f},
|
||||
{0xa7,0x5,0x6,0x67,0xee,0x34,0x62,0x6a,0x8b,0xb,0x28,0xbe,0x6e,0xb9,0x17,0x27,0x47,0x74,0x7,0x26,0xc6,0x80,0x10,0x3f,0xe0,0xa0,0x7e,0x6f,0xc6,0x7e,0x48,0x7b},
|
||||
{0xd,0x55,0xa,0xa5,0x4a,0xf8,0xa4,0xc0,0x91,0xe3,0xe7,0x9f,0x97,0x8e,0xf1,0x9e,0x86,0x76,0x72,0x81,0x50,0x60,0x8d,0xd4,0x7e,0x9e,0x5a,0x41,0xf3,0xe5,0xb0,0x62},
|
||||
{0xfc,0x9f,0x1f,0xec,0x40,0x54,0x20,0x7a,0xe3,0xe4,0x1a,0x0,0xce,0xf4,0xc9,0x84,0x4f,0xd7,0x94,0xf5,0x9d,0xfa,0x95,0xd8,0x55,0x2e,0x7e,0x11,0x24,0xc3,0x54,0xa5},
|
||||
{0x5b,0xdf,0x72,0x28,0xbd,0xfe,0x6e,0x28,0x78,0xf5,0x7f,0xe2,0xf,0xa5,0xc4,0xb2,0x5,0x89,0x7c,0xef,0xee,0x49,0xd3,0x2e,0x44,0x7e,0x93,0x85,0xeb,0x28,0x59,0x7f},
|
||||
{0x70,0x5f,0x69,0x37,0xb3,0x24,0x31,0x4a,0x5e,0x86,0x28,0xf1,0x1d,0xd6,0xe4,0x65,0xc7,0x1b,0x77,0x4,0x51,0xb9,0x20,0xe7,0x74,0xfe,0x43,0xe8,0x23,0xd4,0x87,0x8a},
|
||||
{0x7d,0x29,0xe8,0xa3,0x92,0x76,0x94,0xf2,0xdd,0xcb,0x7a,0x9,0x9b,0x30,0xd9,0xc1,0x1d,0x1b,0x30,0xfb,0x5b,0xdc,0x1b,0xe0,0xda,0x24,0x49,0x4f,0xf2,0x9c,0x82,0xbf},
|
||||
{0xa4,0xe7,0xba,0x31,0xb4,0x70,0xbf,0xff,0xd,0x32,0x44,0x5,0xde,0xf8,0xbc,0x48,0x3b,0xae,0xfc,0x32,0x53,0xbb,0xd3,0x39,0x45,0x9f,0xc3,0xc1,0xe0,0x29,0x8b,0xa0},
|
||||
{0xe5,0xc9,0x5,0xfd,0xf7,0xae,0x9,0xf,0x94,0x70,0x34,0x12,0x42,0x90,0xf1,0x34,0xa2,0x71,0xb7,0x1,0xe3,0x44,0xed,0x95,0xe9,0x3b,0x8e,0x36,0x4f,0x2f,0x98,0x4a},
|
||||
{0x88,0x40,0x1d,0x63,0xa0,0x6c,0xf6,0x15,0x47,0xc1,0x44,0x4b,0x87,0x52,0xaf,0xff,0x7e,0xbb,0x4a,0xf1,0xe2,0xa,0xc6,0x30,0x46,0x70,0xb6,0xc5,0xcc,0x6e,0x8c,0xe6},
|
||||
{0xa4,0xd5,0xa4,0x56,0xbd,0x4f,0xca,0x0,0xda,0x9d,0x84,0x4b,0xc8,0x3e,0x18,0xae,0x73,0x57,0xce,0x45,0x30,0x64,0xd1,0xad,0xe8,0xa6,0xce,0x68,0x14,0x5c,0x25,0x67},
|
||||
{0xa3,0xda,0x8c,0xf2,0xcb,0xe,0xe1,0x16,0x33,0xe9,0x6,0x58,0x9a,0x94,0x99,0x9a,0x1f,0x60,0xb2,0x20,0xc2,0x6f,0x84,0x7b,0xd1,0xce,0xac,0x7f,0xa0,0xd1,0x85,0x18},
|
||||
{0x32,0x59,0x5b,0xa1,0x8d,0xdd,0x19,0xd3,0x50,0x9a,0x1c,0xc0,0xaa,0xa5,0xb4,0x46,0x9f,0x3d,0x63,0x67,0xe4,0x4,0x6b,0xba,0xf6,0xca,0x19,0xab,0xb,0x56,0xee,0x7e},
|
||||
{0x1f,0xb1,0x79,0xea,0xa9,0x28,0x21,0x74,0xe9,0xbd,0xf7,0x35,0x3b,0x36,0x51,0xee,0x1d,0x57,0xac,0x5a,0x75,0x50,0xd3,0x76,0x3a,0x46,0xc2,0xfe,0xa3,0x7d,0x70,0x1},
|
||||
{0xf7,0x35,0xc1,0xaf,0x98,0xa4,0xd8,0x42,0x78,0xed,0xec,0x20,0x9e,0x6b,0x67,0x79,0x41,0x83,0x63,0x15,0xea,0x3a,0xdb,0xa8,0xfa,0xc3,0x3b,0x4d,0x32,0x83,0x2c,0x83},
|
||||
{0xa7,0x40,0x3b,0x1f,0x1c,0x27,0x47,0xf3,0x59,0x40,0xf0,0x34,0xb7,0x2d,0x76,0x9a,0xe7,0x3e,0x4e,0x6c,0xd2,0x21,0x4f,0xfd,0xb8,0xfd,0x8d,0x39,0xdc,0x57,0x59,0xef},
|
||||
{0x8d,0x9b,0xc,0x49,0x2b,0x49,0xeb,0xda,0x5b,0xa2,0xd7,0x49,0x68,0xf3,0x70,0xd,0x7d,0x3b,0xae,0xd0,0x7a,0x8d,0x55,0x84,0xf5,0xa5,0xe9,0xf0,0xe4,0xf8,0x8e,0x65},
|
||||
{0xa0,0xb8,0xa2,0xf4,0x36,0x10,0x3b,0x53,0xc,0xa8,0x7,0x9e,0x75,0x3e,0xec,0x5a,0x91,0x68,0x94,0x92,0x56,0xe8,0x88,0x4f,0x5b,0xb0,0x5c,0x55,0xf8,0xba,0xbc,0x4c},
|
||||
{0xe3,0xbb,0x3b,0x99,0xf3,0x87,0x94,0x7b,0x75,0xda,0xf4,0xd6,0x72,0x6b,0x1c,0x5d,0x64,0xae,0xac,0x28,0xdc,0x34,0xb3,0x6d,0x6c,0x34,0xa5,0x50,0xb8,0x28,0xdb,0x71},
|
||||
{0xf8,0x61,0xe2,0xf2,0x10,0x8d,0x51,0x2a,0xe3,0xdb,0x64,0x33,0x59,0xdd,0x75,0xfc,0x1c,0xac,0xbc,0xf1,0x43,0xce,0x3f,0xa2,0x67,0xbb,0xd1,0x3c,0x2,0xe8,0x43,0xb0},
|
||||
{0x33,0xa,0x5b,0xca,0x88,0x29,0xa1,0x75,0x7f,0x34,0x19,0x4d,0xb4,0x16,0x53,0x5c,0x92,0x3b,0x94,0xc3,0xe,0x79,0x4d,0x1e,0x79,0x74,0x75,0xd7,0xb6,0xee,0xaf,0x3f},
|
||||
{0xea,0xa8,0xd4,0xf7,0xbe,0x1a,0x39,0x21,0x5c,0xf4,0x7e,0x9,0x4c,0x23,0x27,0x51,0x26,0xa3,0x24,0x53,0xba,0x32,0x3c,0xd2,0x44,0xa3,0x17,0x4a,0x6d,0xa6,0xd5,0xad},
|
||||
{0xb5,0x1d,0x3e,0xa6,0xaf,0xf2,0xc9,0x8,0x83,0x59,0x3d,0x98,0x91,0x6b,0x3c,0x56,0x4c,0xf8,0x7c,0xa1,0x72,0x86,0x60,0x4d,0x46,0xe2,0x3e,0xcc,0x8,0x6e,0xc7,0xf6},
|
||||
{0x2f,0x98,0x33,0xb3,0xb1,0xbc,0x76,0x5e,0x2b,0xd6,0x66,0xa5,0xef,0xc4,0xe6,0x2a,0x6,0xf4,0xb6,0xe8,0xbe,0xc1,0xd4,0x36,0x74,0xee,0x82,0x15,0xbc,0xef,0x21,0x63},
|
||||
{0xfd,0xc1,0x4e,0xd,0xf4,0x53,0xc9,0x69,0xa7,0x7d,0x5a,0xc4,0x6,0x58,0x58,0x26,0x7e,0xc1,0x14,0x16,0x6,0xe0,0xfa,0x16,0x7e,0x90,0xaf,0x3d,0x28,0x63,0x9d,0x3f},
|
||||
{0xd2,0xc9,0xf2,0xe3,0x0,0x9b,0xd2,0xc,0x5f,0xaa,0xce,0x30,0xb7,0xd4,0xc,0x30,0x74,0x2a,0x51,0x16,0xf2,0xe0,0x32,0x98,0xd,0xeb,0x30,0xd8,0xe3,0xce,0xf8,0x9a},
|
||||
{0x4b,0xc5,0x9e,0x7b,0xb5,0xf1,0x79,0x92,0xff,0x51,0xe6,0x6e,0x4,0x86,0x68,0xd3,0x9b,0x23,0x4d,0x57,0xe6,0x96,0x67,0x31,0xcc,0xe6,0xa6,0xf3,0x17,0xa,0x75,0x5},
|
||||
{0xb1,0x76,0x81,0xd9,0x13,0x32,0x6c,0xce,0x3c,0x17,0x52,0x84,0xf8,0x5,0xa2,0x62,0xf4,0x2b,0xcb,0xb3,0x78,0x47,0x15,0x47,0xff,0x46,0x54,0x82,0x23,0x93,0x6a,0x48},
|
||||
{0x38,0xdf,0x58,0x7,0x4e,0x5e,0x65,0x65,0xf2,0xfc,0x7c,0x89,0xfc,0x86,0x50,0x8e,0x31,0x70,0x2e,0x44,0xd0,0xb,0xca,0x86,0xf0,0x40,0x9,0xa2,0x30,0x78,0x47,0x4e},
|
||||
{0x65,0xa0,0xee,0x39,0xd1,0xf7,0x38,0x83,0xf7,0x5e,0xe9,0x37,0xe4,0x2c,0x3a,0xbd,0x21,0x97,0xb2,0x26,0x1,0x13,0xf8,0x6f,0xa3,0x44,0xed,0xd1,0xef,0x9f,0xde,0xe7},
|
||||
{0x8b,0xa0,0xdf,0x15,0x76,0x25,0x92,0xd9,0x3c,0x85,0xf7,0xf6,0x12,0xdc,0x42,0xbe,0xd8,0xa7,0xec,0x7c,0xab,0x27,0xb0,0x7e,0x53,0x8d,0x7d,0xda,0xaa,0x3e,0xa8,0xde},
|
||||
{0xaa,0x25,0xce,0x93,0xbd,0x2,0x69,0xd8,0x5a,0xf6,0x43,0xfd,0x1a,0x73,0x8,0xf9,0xc0,0x5f,0xef,0xda,0x17,0x4a,0x19,0xa5,0x97,0x4d,0x66,0x33,0x4c,0xfd,0x21,0x6a},
|
||||
{0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}};
|
||||
|
||||
|
||||
static void E8(hashState *state); /*The bijective function E8, in bitslice form*/
|
||||
static void F8(hashState *state); /*The compression function F8 */
|
||||
|
||||
/*The API functions*/
|
||||
static HashReturn Init(hashState *state, int hashbitlen);
|
||||
static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
|
||||
static HashReturn Final(hashState *state, BitSequence *hashval);
|
||||
HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval);
|
||||
|
||||
/*swapping bit 2i with bit 2i+1 of 64-bit x*/
|
||||
#define SWAP1(x) (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1));
|
||||
/*swapping bits 4i||4i+1 with bits 4i+2||4i+3 of 64-bit x*/
|
||||
#define SWAP2(x) (x) = ((((x) & 0x3333333333333333ULL) << 2) | (((x) & 0xccccccccccccccccULL) >> 2));
|
||||
/*swapping bits 8i||8i+1||8i+2||8i+3 with bits 8i+4||8i+5||8i+6||8i+7 of 64-bit x*/
|
||||
#define SWAP4(x) (x) = ((((x) & 0x0f0f0f0f0f0f0f0fULL) << 4) | (((x) & 0xf0f0f0f0f0f0f0f0ULL) >> 4));
|
||||
/*swapping bits 16i||16i+1||......||16i+7 with bits 16i+8||16i+9||......||16i+15 of 64-bit x*/
|
||||
#define SWAP8(x) (x) = ((((x) & 0x00ff00ff00ff00ffULL) << 8) | (((x) & 0xff00ff00ff00ff00ULL) >> 8));
|
||||
/*swapping bits 32i||32i+1||......||32i+15 with bits 32i+16||32i+17||......||32i+31 of 64-bit x*/
|
||||
#define SWAP16(x) (x) = ((((x) & 0x0000ffff0000ffffULL) << 16) | (((x) & 0xffff0000ffff0000ULL) >> 16));
|
||||
/*swapping bits 64i||64i+1||......||64i+31 with bits 64i+32||64i+33||......||64i+63 of 64-bit x*/
|
||||
#define SWAP32(x) (x) = (((x) << 32) | ((x) >> 32));
|
||||
|
||||
/*The MDS transform*/
|
||||
#define L(m0,m1,m2,m3,m4,m5,m6,m7) \
|
||||
(m4) ^= (m1); \
|
||||
(m5) ^= (m2); \
|
||||
(m6) ^= (m0) ^ (m3); \
|
||||
(m7) ^= (m0); \
|
||||
(m0) ^= (m5); \
|
||||
(m1) ^= (m6); \
|
||||
(m2) ^= (m4) ^ (m7); \
|
||||
(m3) ^= (m4);
|
||||
|
||||
/*Two Sboxes are computed in parallel, each Sbox implements S0 and S1, selected by a constant bit*/
|
||||
/*The reason to compute two Sboxes in parallel is to try to fully utilize the parallel processing power*/
|
||||
#define SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1) \
|
||||
m3 = ~(m3); \
|
||||
m7 = ~(m7); \
|
||||
m0 ^= ((~(m2)) & (cc0)); \
|
||||
m4 ^= ((~(m6)) & (cc1)); \
|
||||
temp0 = (cc0) ^ ((m0) & (m1));\
|
||||
temp1 = (cc1) ^ ((m4) & (m5));\
|
||||
m0 ^= ((m2) & (m3)); \
|
||||
m4 ^= ((m6) & (m7)); \
|
||||
m3 ^= ((~(m1)) & (m2)); \
|
||||
m7 ^= ((~(m5)) & (m6)); \
|
||||
m1 ^= ((m0) & (m2)); \
|
||||
m5 ^= ((m4) & (m6)); \
|
||||
m2 ^= ((m0) & (~(m3))); \
|
||||
m6 ^= ((m4) & (~(m7))); \
|
||||
m0 ^= ((m1) | (m3)); \
|
||||
m4 ^= ((m5) | (m7)); \
|
||||
m3 ^= ((m1) & (m2)); \
|
||||
m7 ^= ((m5) & (m6)); \
|
||||
m1 ^= (temp0 & (m0)); \
|
||||
m5 ^= (temp1 & (m4)); \
|
||||
m2 ^= temp0; \
|
||||
m6 ^= temp1;
|
||||
|
||||
/*The bijective function E8, in bitslice form*/
|
||||
static void E8(hashState *state)
|
||||
{
|
||||
uint64 i,roundnumber,temp0,temp1;
|
||||
|
||||
for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) {
|
||||
/*round 7*roundnumber+0: Sbox, MDS and Swapping layers*/
|
||||
for (i = 0; i < 2; i++) {
|
||||
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i+2] );
|
||||
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
|
||||
SWAP1(state->x[1][i]); SWAP1(state->x[3][i]); SWAP1(state->x[5][i]); SWAP1(state->x[7][i]);
|
||||
}
|
||||
|
||||
/*round 7*roundnumber+1: Sbox, MDS and Swapping layers*/
|
||||
for (i = 0; i < 2; i++) {
|
||||
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i+2] );
|
||||
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
|
||||
SWAP2(state->x[1][i]); SWAP2(state->x[3][i]); SWAP2(state->x[5][i]); SWAP2(state->x[7][i]);
|
||||
}
|
||||
|
||||
/*round 7*roundnumber+2: Sbox, MDS and Swapping layers*/
|
||||
for (i = 0; i < 2; i++) {
|
||||
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i+2] );
|
||||
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
|
||||
SWAP4(state->x[1][i]); SWAP4(state->x[3][i]); SWAP4(state->x[5][i]); SWAP4(state->x[7][i]);
|
||||
}
|
||||
|
||||
/*round 7*roundnumber+3: Sbox, MDS and Swapping layers*/
|
||||
for (i = 0; i < 2; i++) {
|
||||
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i+2] );
|
||||
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
|
||||
SWAP8(state->x[1][i]); SWAP8(state->x[3][i]); SWAP8(state->x[5][i]); SWAP8(state->x[7][i]);
|
||||
}
|
||||
|
||||
/*round 7*roundnumber+4: Sbox, MDS and Swapping layers*/
|
||||
for (i = 0; i < 2; i++) {
|
||||
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i+2] );
|
||||
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
|
||||
SWAP16(state->x[1][i]); SWAP16(state->x[3][i]); SWAP16(state->x[5][i]); SWAP16(state->x[7][i]);
|
||||
}
|
||||
|
||||
/*round 7*roundnumber+5: Sbox, MDS and Swapping layers*/
|
||||
for (i = 0; i < 2; i++) {
|
||||
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i+2] );
|
||||
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
|
||||
SWAP32(state->x[1][i]); SWAP32(state->x[3][i]); SWAP32(state->x[5][i]); SWAP32(state->x[7][i]);
|
||||
}
|
||||
|
||||
/*round 7*roundnumber+6: Sbox and MDS layers*/
|
||||
for (i = 0; i < 2; i++) {
|
||||
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i+2] );
|
||||
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
|
||||
}
|
||||
/*round 7*roundnumber+6: swapping layer*/
|
||||
for (i = 1; i < 8; i = i+2) {
|
||||
temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; state->x[i][1] = temp0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*The compression function F8 */
|
||||
static void F8(hashState *state)
|
||||
{
|
||||
uint64 i;
|
||||
|
||||
/*xor the 512-bit message with the fist half of the 1024-bit hash state*/
|
||||
for (i = 0; i < 8; i++) state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i];
|
||||
|
||||
/*the bijective function E8 */
|
||||
E8(state);
|
||||
|
||||
/*xor the 512-bit message with the second half of the 1024-bit hash state*/
|
||||
for (i = 0; i < 8; i++) state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64*)state->buffer)[i];
|
||||
}
|
||||
|
||||
/*before hashing a message, initialize the hash state as H0 */
|
||||
static HashReturn Init(hashState *state, int hashbitlen)
|
||||
{
|
||||
state->databitlen = 0;
|
||||
state->datasize_in_buffer = 0;
|
||||
|
||||
/*initialize the initial hash value of JH*/
|
||||
state->hashbitlen = hashbitlen;
|
||||
|
||||
/*load the intital hash value into state*/
|
||||
switch (hashbitlen)
|
||||
{
|
||||
case 224: memcpy(state->x,JH224_H0,128); break;
|
||||
case 256: memcpy(state->x,JH256_H0,128); break;
|
||||
case 384: memcpy(state->x,JH384_H0,128); break;
|
||||
case 512: memcpy(state->x,JH512_H0,128); break;
|
||||
}
|
||||
|
||||
return(SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
/*hash each 512-bit message block, except the last partial block*/
|
||||
static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
|
||||
{
|
||||
DataLength index; /*the starting address of the data to be compressed*/
|
||||
|
||||
state->databitlen += databitlen;
|
||||
index = 0;
|
||||
|
||||
/*if there is remaining data in the buffer, fill it to a full message block first*/
|
||||
/*we assume that the size of the data in the buffer is the multiple of 8 bits if it is not at the end of a message*/
|
||||
|
||||
/*There is data in the buffer, but the incoming data is insufficient for a full block*/
|
||||
if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512) ) {
|
||||
if ( (databitlen & 7) == 0 )
|
||||
memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, (size_t) (64-(state->datasize_in_buffer >> 3)) );
|
||||
else memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, (size_t) (64 - (state->datasize_in_buffer >> 3) + 1) );
|
||||
state->datasize_in_buffer += databitlen;
|
||||
databitlen = 0;
|
||||
}
|
||||
|
||||
/*There is data in the buffer, and the incoming data is sufficient for a full block*/
|
||||
if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512) ) {
|
||||
memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, (size_t) (64-(state->datasize_in_buffer >> 3)) );
|
||||
index = 64-(state->datasize_in_buffer >> 3);
|
||||
databitlen = databitlen - (512 - state->datasize_in_buffer);
|
||||
F8(state);
|
||||
state->datasize_in_buffer = 0;
|
||||
}
|
||||
|
||||
/*hash the remaining full message blocks*/
|
||||
for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) {
|
||||
memcpy(state->buffer, data+index, 64);
|
||||
F8(state);
|
||||
}
|
||||
|
||||
/*store the partial block into buffer, assume that -- if part of the last byte is not part of the message, then that part consists of 0 bits*/
|
||||
if ( databitlen > 0) {
|
||||
if ((databitlen & 7) == 0)
|
||||
memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3);
|
||||
else
|
||||
memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1);
|
||||
state->datasize_in_buffer = databitlen;
|
||||
}
|
||||
|
||||
return(SUCCESS);
|
||||
}
|
||||
|
||||
/*pad the message, process the padded block(s), truncate the hash value H to obtain the message digest*/
|
||||
static HashReturn Final(hashState *state, BitSequence *hashval)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if ( (state->databitlen & 0x1ff) == 0 ) {
|
||||
/*pad the message when databitlen is multiple of 512 bits, then process the padded block*/
|
||||
memset(state->buffer, 0, 64);
|
||||
state->buffer[0] = 0x80;
|
||||
state->buffer[63] = state->databitlen & 0xff;
|
||||
state->buffer[62] = (state->databitlen >> 8) & 0xff;
|
||||
state->buffer[61] = (state->databitlen >> 16) & 0xff;
|
||||
state->buffer[60] = (state->databitlen >> 24) & 0xff;
|
||||
state->buffer[59] = (state->databitlen >> 32) & 0xff;
|
||||
state->buffer[58] = (state->databitlen >> 40) & 0xff;
|
||||
state->buffer[57] = (state->databitlen >> 48) & 0xff;
|
||||
state->buffer[56] = (state->databitlen >> 56) & 0xff;
|
||||
F8(state);
|
||||
}
|
||||
else {
|
||||
/*set the rest of the bytes in the buffer to 0*/
|
||||
if ( (state->datasize_in_buffer & 7) == 0)
|
||||
for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++) state->buffer[i] = 0;
|
||||
else
|
||||
for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++) state->buffer[i] = 0;
|
||||
|
||||
/*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/
|
||||
state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7));
|
||||
|
||||
F8(state);
|
||||
memset(state->buffer, 0, 64);
|
||||
state->buffer[63] = state->databitlen & 0xff;
|
||||
state->buffer[62] = (state->databitlen >> 8) & 0xff;
|
||||
state->buffer[61] = (state->databitlen >> 16) & 0xff;
|
||||
state->buffer[60] = (state->databitlen >> 24) & 0xff;
|
||||
state->buffer[59] = (state->databitlen >> 32) & 0xff;
|
||||
state->buffer[58] = (state->databitlen >> 40) & 0xff;
|
||||
state->buffer[57] = (state->databitlen >> 48) & 0xff;
|
||||
state->buffer[56] = (state->databitlen >> 56) & 0xff;
|
||||
F8(state);
|
||||
}
|
||||
|
||||
/*truncating the final hash value to generate the message digest*/
|
||||
switch(state->hashbitlen) {
|
||||
case 224: memcpy(hashval,(unsigned char*)state->x+64+36,28); break;
|
||||
case 256: memcpy(hashval,(unsigned char*)state->x+64+32,32); break;
|
||||
case 384: memcpy(hashval,(unsigned char*)state->x+64+16,48); break;
|
||||
case 512: memcpy(hashval,(unsigned char*)state->x+64,64); break;
|
||||
}
|
||||
|
||||
return(SUCCESS);
|
||||
}
|
||||
|
||||
/* hash a message,
|
||||
three inputs: message digest size in bits (hashbitlen); message (data); message length in bits (databitlen)
|
||||
one output: message digest (hashval)
|
||||
*/
|
||||
HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval)
|
||||
{
|
||||
hashState state;
|
||||
|
||||
if ( hashbitlen == 224 || hashbitlen == 256 || hashbitlen == 384 || hashbitlen == 512 ) {
|
||||
Init(&state, hashbitlen);
|
||||
Update(&state, data, databitlen);
|
||||
Final(&state, hashval);
|
||||
return SUCCESS;
|
||||
}
|
||||
else
|
||||
return(BAD_HASHLEN);
|
||||
}
|
@@ -1,19 +0,0 @@
|
||||
/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
|
||||
|
||||
--------------------------------
|
||||
Performance
|
||||
|
||||
Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
|
||||
Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
|
||||
Speed for long message:
|
||||
1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2
|
||||
2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3
|
||||
|
||||
--------------------------------
|
||||
Last Modified: January 16, 2011
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "hash.h"
|
||||
|
||||
HashReturn jh_hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
|
@@ -1,123 +0,0 @@
|
||||
// keccak.c
|
||||
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
|
||||
// A baseline Keccak (3rd round) implementation.
|
||||
|
||||
#include "hash-ops.h"
|
||||
#include "c_keccak.h"
|
||||
|
||||
const uint64_t keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
const int keccakf_rotc[24] =
|
||||
{
|
||||
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
|
||||
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
|
||||
};
|
||||
|
||||
const int keccakf_piln[24] =
|
||||
{
|
||||
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
|
||||
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
|
||||
};
|
||||
|
||||
// update the state with given number of rounds
|
||||
|
||||
void keccakf(uint64_t st[25], int rounds)
|
||||
{
|
||||
int i, j, round;
|
||||
uint64_t t, bc[5];
|
||||
|
||||
for (round = 0; round < rounds; ++round) {
|
||||
|
||||
// Theta
|
||||
bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
|
||||
bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
|
||||
bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
|
||||
bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
|
||||
bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
|
||||
st[i ] ^= t;
|
||||
st[i + 5] ^= t;
|
||||
st[i + 10] ^= t;
|
||||
st[i + 15] ^= t;
|
||||
st[i + 20] ^= t;
|
||||
}
|
||||
|
||||
// Rho Pi
|
||||
t = st[1];
|
||||
for (i = 0; i < 24; ++i) {
|
||||
bc[0] = st[keccakf_piln[i]];
|
||||
st[keccakf_piln[i]] = ROTL64(t, keccakf_rotc[i]);
|
||||
t = bc[0];
|
||||
}
|
||||
|
||||
// Chi
|
||||
for (j = 0; j < 25; j += 5) {
|
||||
bc[0] = st[j ];
|
||||
bc[1] = st[j + 1];
|
||||
bc[2] = st[j + 2];
|
||||
bc[3] = st[j + 3];
|
||||
bc[4] = st[j + 4];
|
||||
st[j ] ^= (~bc[1]) & bc[2];
|
||||
st[j + 1] ^= (~bc[2]) & bc[3];
|
||||
st[j + 2] ^= (~bc[3]) & bc[4];
|
||||
st[j + 3] ^= (~bc[4]) & bc[0];
|
||||
st[j + 4] ^= (~bc[0]) & bc[1];
|
||||
}
|
||||
|
||||
// Iota
|
||||
st[0] ^= keccakf_rndc[round];
|
||||
}
|
||||
}
|
||||
|
||||
// compute a keccak hash (md) of given byte length from "in"
|
||||
typedef uint64_t state_t[25];
|
||||
|
||||
int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen)
|
||||
{
|
||||
state_t st;
|
||||
uint8_t temp[144];
|
||||
int i, rsiz, rsizw;
|
||||
|
||||
rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
|
||||
rsizw = rsiz / 8;
|
||||
|
||||
memset(st, 0, sizeof(st));
|
||||
|
||||
for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) {
|
||||
for (i = 0; i < rsizw; i++)
|
||||
st[i] ^= ((uint64_t *) in)[i];
|
||||
keccakf(st, KECCAK_ROUNDS);
|
||||
}
|
||||
|
||||
// last block and padding
|
||||
memcpy(temp, in, inlen);
|
||||
temp[inlen++] = 1;
|
||||
memset(temp + inlen, 0, rsiz - inlen);
|
||||
temp[rsiz - 1] |= 0x80;
|
||||
|
||||
for (i = 0; i < rsizw; i++)
|
||||
st[i] ^= ((uint64_t *) temp)[i];
|
||||
|
||||
keccakf(st, KECCAK_ROUNDS);
|
||||
|
||||
memcpy(md, st, mdlen);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void keccak1600(const uint8_t *in, int inlen, uint8_t *md)
|
||||
{
|
||||
keccak(in, inlen, md, sizeof(state_t));
|
||||
}
|
@@ -1,26 +0,0 @@
|
||||
// keccak.h
|
||||
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
|
||||
|
||||
#ifndef KECCAK_H
|
||||
#define KECCAK_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef KECCAK_ROUNDS
|
||||
#define KECCAK_ROUNDS 24
|
||||
#endif
|
||||
|
||||
#ifndef ROTL64
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
#endif
|
||||
|
||||
// compute a keccak hash (md) of given byte length from "in"
|
||||
int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen);
|
||||
|
||||
// update the state
|
||||
void keccakf(uint64_t st[25], int norounds);
|
||||
|
||||
void keccak1600(const uint8_t *in, int inlen, uint8_t *md);
|
||||
|
||||
#endif
|
2036
crypto/c_skein.c
2036
crypto/c_skein.c
File diff suppressed because it is too large
Load Diff
@@ -1,47 +0,0 @@
|
||||
#ifndef _SKEIN_H_
|
||||
#define _SKEIN_H_ 1
|
||||
/**************************************************************************
|
||||
**
|
||||
** Interface declarations and internal definitions for Skein hashing.
|
||||
**
|
||||
** Source code author: Doug Whiting, 2008.
|
||||
**
|
||||
** This algorithm and source code is released to the public domain.
|
||||
**
|
||||
***************************************************************************
|
||||
**
|
||||
** The following compile-time switches may be defined to control some
|
||||
** tradeoffs between speed, code size, error checking, and security.
|
||||
**
|
||||
** The "default" note explains what happens when the switch is not defined.
|
||||
**
|
||||
** SKEIN_DEBUG -- make callouts from inside Skein code
|
||||
** to examine/display intermediate values.
|
||||
** [default: no callouts (no overhead)]
|
||||
**
|
||||
** SKEIN_ERR_CHECK -- how error checking is handled inside Skein
|
||||
** code. If not defined, most error checking
|
||||
** is disabled (for performance). Otherwise,
|
||||
** the switch value is interpreted as:
|
||||
** 0: use assert() to flag errors
|
||||
** 1: return SKEIN_FAIL to flag errors
|
||||
**
|
||||
***************************************************************************/
|
||||
#include "skein_port.h" /* get platform-specific definitions */
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SKEIN_SUCCESS = 0, /* return codes from Skein calls */
|
||||
SKEIN_FAIL = 1,
|
||||
SKEIN_BAD_HASHLEN = 2
|
||||
}
|
||||
SkeinHashReturn;
|
||||
|
||||
typedef size_t SkeinDataLength; /* bit count type */
|
||||
typedef u08b_t SkeinBitSequence; /* bit stream type */
|
||||
|
||||
/* "all-in-one" call */
|
||||
SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data,
|
||||
SkeinDataLength databitlen, SkeinBitSequence *hashval);
|
||||
|
||||
#endif /* ifndef _SKEIN_H_ */
|
@@ -1,38 +0,0 @@
|
||||
#ifndef __tables_h
|
||||
#define __tables_h
|
||||
|
||||
|
||||
const uint32_t T[512] = {0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0xd17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc
|
||||
, 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5
|
||||
, 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d
|
||||
, 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded
|
||||
, 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1
|
||||
, 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441
|
||||
, 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4
|
||||
, 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba
|
||||
, 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616
|
||||
, 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2
|
||||
, 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c
|
||||
, 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de
|
||||
, 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7
|
||||
, 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e
|
||||
, 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c
|
||||
, 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7
|
||||
, 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b
|
||||
, 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4
|
||||
, 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e
|
||||
, 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a
|
||||
, 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37
|
||||
, 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86
|
||||
, 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b
|
||||
, 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028
|
||||
, 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3
|
||||
, 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94
|
||||
, 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836
|
||||
, 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0
|
||||
, 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2
|
||||
, 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e
|
||||
, 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3
|
||||
, 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e};
|
||||
|
||||
#endif /* __tables_h */
|
@@ -1,59 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !defined(__cplusplus)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "int-util.h"
|
||||
|
||||
#if 0
|
||||
static inline void *padd(void *p, size_t i) {
|
||||
return (char *) p + i;
|
||||
}
|
||||
|
||||
static inline const void *cpadd(const void *p, size_t i) {
|
||||
return (const char *) p + i;
|
||||
}
|
||||
|
||||
static inline void place_length(uint8_t *buffer, size_t bufsize, size_t length) {
|
||||
if (sizeof(size_t) == 4) {
|
||||
*(uint32_t*) padd(buffer, bufsize - 4) = swap32be(length);
|
||||
} else {
|
||||
*(uint64_t*) padd(buffer, bufsize - 8) = swap64be(length);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union hash_state {
|
||||
uint8_t b[200];
|
||||
uint64_t w[25];
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
void hash_permutation(union hash_state *state);
|
||||
void hash_process(union hash_state *state, const uint8_t *buf, int count);
|
||||
|
||||
#endif
|
||||
|
||||
enum {
|
||||
HASH_SIZE = 32,
|
||||
HASH_DATA_AREA = 136
|
||||
};
|
||||
|
||||
void cn_fast_hash(const void *data, int len, char *hash);
|
||||
void cn_slow_hash(const void *data, size_t length, char *hash);
|
||||
|
||||
void hash_extra_blake(const void *data, size_t length, char *hash);
|
||||
void hash_extra_groestl(const void *data, size_t length, char *hash);
|
||||
void hash_extra_jh(const void *data, size_t length, char *hash);
|
||||
void hash_extra_skein(const void *data, size_t length, char *hash);
|
||||
|
||||
void tree_hash(const char (*hashes)[HASH_SIZE], size_t count, char *root_hash);
|
@@ -1,24 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "hash-ops.h"
|
||||
#include "c_keccak.h"
|
||||
|
||||
void hash_permutation(union hash_state *state) {
|
||||
keccakf((uint64_t*)state, 24);
|
||||
}
|
||||
|
||||
void hash_process(union hash_state *state, const uint8_t *buf, int count) {
|
||||
keccak1600(buf, count, (uint8_t*)state);
|
||||
}
|
||||
|
||||
void cn_fast_hash(const void *data, int len, char *hash) {
|
||||
union hash_state state;
|
||||
hash_process(&state, data, len);
|
||||
memcpy(hash, &state, HASH_SIZE);
|
||||
}
|
@@ -1,5 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
typedef unsigned char BitSequence;
|
||||
typedef unsigned long long DataLength;
|
||||
typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn;
|
@@ -1,195 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#pragma once
|
||||
#ifndef INT_UTILS_H_
|
||||
#define INT_UTILS_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#ifndef _MSC_VER
|
||||
#include <sys/param.h>
|
||||
#else
|
||||
#define inline __inline
|
||||
#endif
|
||||
|
||||
#ifndef LITTLE_ENDIAN
|
||||
#define LITTLE_ENDIAN 0x1234
|
||||
#define BIG_ENDIAN 0x4321
|
||||
#endif
|
||||
|
||||
#if !defined(BYTE_ORDER) && (defined(__LITTLE_ENDIAN__) || defined(__arm__) || defined(WIN32))
|
||||
#define BYTE_ORDER LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
#if defined(WIN32)
|
||||
#include <stdlib.h>
|
||||
|
||||
static inline uint32_t rol32(uint32_t x, int r) {
|
||||
return _rotl(x, r);
|
||||
}
|
||||
|
||||
static inline uint64_t rol64(uint64_t x, int r) {
|
||||
return _rotl64(x, r);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline uint32_t rol32(uint32_t x, int r) {
|
||||
return (x << (r & 31)) | (x >> (-r & 31));
|
||||
}
|
||||
|
||||
static inline uint64_t rol64(uint64_t x, int r) {
|
||||
return (x << (r & 63)) | (x >> (-r & 63));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline uint64_t hi_dword(uint64_t val) {
|
||||
return val >> 32;
|
||||
}
|
||||
|
||||
static inline uint64_t lo_dword(uint64_t val) {
|
||||
return val & 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
static inline uint64_t div_with_reminder(uint64_t dividend, uint32_t divisor, uint32_t* remainder) {
|
||||
dividend |= ((uint64_t)*remainder) << 32;
|
||||
*remainder = dividend % divisor;
|
||||
return dividend / divisor;
|
||||
}
|
||||
|
||||
// Long division with 2^32 base
|
||||
static inline uint32_t div128_32(uint64_t dividend_hi, uint64_t dividend_lo, uint32_t divisor, uint64_t* quotient_hi, uint64_t* quotient_lo) {
|
||||
uint64_t dividend_dwords[4];
|
||||
uint32_t remainder = 0;
|
||||
|
||||
dividend_dwords[3] = hi_dword(dividend_hi);
|
||||
dividend_dwords[2] = lo_dword(dividend_hi);
|
||||
dividend_dwords[1] = hi_dword(dividend_lo);
|
||||
dividend_dwords[0] = lo_dword(dividend_lo);
|
||||
|
||||
*quotient_hi = div_with_reminder(dividend_dwords[3], divisor, &remainder) << 32;
|
||||
*quotient_hi |= div_with_reminder(dividend_dwords[2], divisor, &remainder);
|
||||
*quotient_lo = div_with_reminder(dividend_dwords[1], divisor, &remainder) << 32;
|
||||
*quotient_lo |= div_with_reminder(dividend_dwords[0], divisor, &remainder);
|
||||
|
||||
return remainder;
|
||||
}
|
||||
|
||||
#define IDENT32(x) ((uint32_t) (x))
|
||||
#define IDENT64(x) ((uint64_t) (x))
|
||||
|
||||
#define SWAP32(x) ((((uint32_t) (x) & 0x000000ff) << 24) | \
|
||||
(((uint32_t) (x) & 0x0000ff00) << 8) | \
|
||||
(((uint32_t) (x) & 0x00ff0000) >> 8) | \
|
||||
(((uint32_t) (x) & 0xff000000) >> 24))
|
||||
#define SWAP64(x) ((((uint64_t) (x) & 0x00000000000000ff) << 56) | \
|
||||
(((uint64_t) (x) & 0x000000000000ff00) << 40) | \
|
||||
(((uint64_t) (x) & 0x0000000000ff0000) << 24) | \
|
||||
(((uint64_t) (x) & 0x00000000ff000000) << 8) | \
|
||||
(((uint64_t) (x) & 0x000000ff00000000) >> 8) | \
|
||||
(((uint64_t) (x) & 0x0000ff0000000000) >> 24) | \
|
||||
(((uint64_t) (x) & 0x00ff000000000000) >> 40) | \
|
||||
(((uint64_t) (x) & 0xff00000000000000) >> 56))
|
||||
|
||||
static inline uint32_t ident32(uint32_t x) { return x; }
|
||||
static inline uint64_t ident64(uint64_t x) { return x; }
|
||||
|
||||
static inline uint32_t swap32(uint32_t x) {
|
||||
x = ((x & 0x00ff00ff) << 8) | ((x & 0xff00ff00) >> 8);
|
||||
return (x << 16) | (x >> 16);
|
||||
}
|
||||
static inline uint64_t swap64(uint64_t x) {
|
||||
x = ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8);
|
||||
x = ((x & 0x0000ffff0000ffff) << 16) | ((x & 0xffff0000ffff0000) >> 16);
|
||||
return (x << 32) | (x >> 32);
|
||||
}
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define UNUSED __attribute__((unused))
|
||||
#else
|
||||
#define UNUSED
|
||||
#endif
|
||||
static inline void mem_inplace_ident(void *mem UNUSED, size_t n UNUSED) { }
|
||||
#undef UNUSED
|
||||
|
||||
static inline void mem_inplace_swap32(void *mem, size_t n) {
|
||||
size_t i;
|
||||
for (i = 0; i < n; i++) {
|
||||
((uint32_t *) mem)[i] = swap32(((const uint32_t *) mem)[i]);
|
||||
}
|
||||
}
|
||||
static inline void mem_inplace_swap64(void *mem, size_t n) {
|
||||
size_t i;
|
||||
for (i = 0; i < n; i++) {
|
||||
((uint64_t *) mem)[i] = swap64(((const uint64_t *) mem)[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void memcpy_ident32(void *dst, const void *src, size_t n) {
|
||||
memcpy(dst, src, 4 * n);
|
||||
}
|
||||
static inline void memcpy_ident64(void *dst, const void *src, size_t n) {
|
||||
memcpy(dst, src, 8 * n);
|
||||
}
|
||||
|
||||
static inline void memcpy_swap32(void *dst, const void *src, size_t n) {
|
||||
size_t i;
|
||||
for (i = 0; i < n; i++) {
|
||||
((uint32_t *) dst)[i] = swap32(((const uint32_t *) src)[i]);
|
||||
}
|
||||
}
|
||||
static inline void memcpy_swap64(void *dst, const void *src, size_t n) {
|
||||
size_t i;
|
||||
for (i = 0; i < n; i++) {
|
||||
((uint64_t *) dst)[i] = swap64(((const uint64_t *) src)[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(BYTE_ORDER) || !defined(LITTLE_ENDIAN) || !defined(BIG_ENDIAN)
|
||||
static_assert(false, "BYTE_ORDER is undefined. Perhaps, GNU extensions are not enabled");
|
||||
#endif
|
||||
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
#define SWAP32LE IDENT32
|
||||
#define SWAP32BE SWAP32
|
||||
#define swap32le ident32
|
||||
#define swap32be swap32
|
||||
#define mem_inplace_swap32le mem_inplace_ident
|
||||
#define mem_inplace_swap32be mem_inplace_swap32
|
||||
#define memcpy_swap32le memcpy_ident32
|
||||
#define memcpy_swap32be memcpy_swap32
|
||||
#define SWAP64LE IDENT64
|
||||
#define SWAP64BE SWAP64
|
||||
#define swap64le ident64
|
||||
#define swap64be swap64
|
||||
#define mem_inplace_swap64le mem_inplace_ident
|
||||
#define mem_inplace_swap64be mem_inplace_swap64
|
||||
#define memcpy_swap64le memcpy_ident64
|
||||
#define memcpy_swap64be memcpy_swap64
|
||||
#endif
|
||||
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
#define SWAP32BE IDENT32
|
||||
#define SWAP32LE SWAP32
|
||||
#define swap32be ident32
|
||||
#define swap32le swap32
|
||||
#define mem_inplace_swap32be mem_inplace_ident
|
||||
#define mem_inplace_swap32le mem_inplace_swap32
|
||||
#define memcpy_swap32be memcpy_ident32
|
||||
#define memcpy_swap32le memcpy_swap32
|
||||
#define SWAP64BE IDENT64
|
||||
#define SWAP64LE SWAP64
|
||||
#define swap64be ident64
|
||||
#define swap64le swap64
|
||||
#define mem_inplace_swap64be mem_inplace_ident
|
||||
#define mem_inplace_swap64le mem_inplace_swap64
|
||||
#define memcpy_swap64be memcpy_ident64
|
||||
#define memcpy_swap64le memcpy_swap64
|
||||
#endif
|
||||
|
||||
#endif /* INT_UTILS_H_ */
|
@@ -1,50 +0,0 @@
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
* OpenAES License
|
||||
* ---------------------------------------------------------------------------
|
||||
* Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
* ---------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _OAES_CONFIG_H
|
||||
#define _OAES_CONFIG_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//#ifndef OAES_HAVE_ISAAC
|
||||
//#define OAES_HAVE_ISAAC 1
|
||||
//#endif // OAES_HAVE_ISAAC
|
||||
|
||||
//#ifndef OAES_DEBUG
|
||||
//#define OAES_DEBUG 0
|
||||
//#endif // OAES_DEBUG
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _OAES_CONFIG_H
|
1461
crypto/oaes_lib.c
1461
crypto/oaes_lib.c
File diff suppressed because it is too large
Load Diff
@@ -1,214 +0,0 @@
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
* OpenAES License
|
||||
* ---------------------------------------------------------------------------
|
||||
* Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
* ---------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _OAES_LIB_H
|
||||
#define _OAES_LIB_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifdef OAES_SHARED
|
||||
# ifdef oaes_lib_EXPORTS
|
||||
# define OAES_API __declspec(dllexport)
|
||||
# else
|
||||
# define OAES_API __declspec(dllimport)
|
||||
# endif
|
||||
# else
|
||||
# define OAES_API
|
||||
# endif
|
||||
#else
|
||||
# define OAES_API
|
||||
#endif // WIN32
|
||||
|
||||
#define OAES_VERSION "0.8.1"
|
||||
#define OAES_BLOCK_SIZE 16
|
||||
|
||||
typedef void OAES_CTX;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
OAES_RET_FIRST = 0,
|
||||
OAES_RET_SUCCESS = 0,
|
||||
OAES_RET_UNKNOWN,
|
||||
OAES_RET_ARG1,
|
||||
OAES_RET_ARG2,
|
||||
OAES_RET_ARG3,
|
||||
OAES_RET_ARG4,
|
||||
OAES_RET_ARG5,
|
||||
OAES_RET_NOKEY,
|
||||
OAES_RET_MEM,
|
||||
OAES_RET_BUF,
|
||||
OAES_RET_HEADER,
|
||||
OAES_RET_COUNT
|
||||
} OAES_RET;
|
||||
|
||||
/*
|
||||
* oaes_set_option() takes one of these values for its [option] parameter
|
||||
* some options accept either an optional or a required [value] parameter
|
||||
*/
|
||||
// no option
|
||||
#define OAES_OPTION_NONE 0
|
||||
// enable ECB mode, disable CBC mode
|
||||
#define OAES_OPTION_ECB 1
|
||||
// enable CBC mode, disable ECB mode
|
||||
// value is optional, may pass uint8_t iv[OAES_BLOCK_SIZE] to specify
|
||||
// the value of the initialization vector, iv
|
||||
#define OAES_OPTION_CBC 2
|
||||
|
||||
#ifdef OAES_DEBUG
|
||||
typedef int ( * oaes_step_cb ) (
|
||||
const uint8_t state[OAES_BLOCK_SIZE],
|
||||
const char * step_name,
|
||||
int step_count,
|
||||
void * user_data );
|
||||
// enable state stepping mode
|
||||
// value is required, must pass oaes_step_cb to receive the state at each step
|
||||
#define OAES_OPTION_STEP_ON 4
|
||||
// disable state stepping mode
|
||||
#define OAES_OPTION_STEP_OFF 8
|
||||
#endif // OAES_DEBUG
|
||||
|
||||
typedef uint16_t OAES_OPTION;
|
||||
|
||||
typedef struct _oaes_key
|
||||
{
|
||||
size_t data_len;
|
||||
uint8_t *data;
|
||||
size_t exp_data_len;
|
||||
uint8_t *exp_data;
|
||||
size_t num_keys;
|
||||
size_t key_base;
|
||||
} oaes_key;
|
||||
|
||||
typedef struct _oaes_ctx
|
||||
{
|
||||
#ifdef OAES_HAVE_ISAAC
|
||||
randctx * rctx;
|
||||
#endif // OAES_HAVE_ISAAC
|
||||
|
||||
#ifdef OAES_DEBUG
|
||||
oaes_step_cb step_cb;
|
||||
#endif // OAES_DEBUG
|
||||
|
||||
oaes_key * key;
|
||||
OAES_OPTION options;
|
||||
uint8_t iv[OAES_BLOCK_SIZE];
|
||||
} oaes_ctx;
|
||||
/*
|
||||
* // usage:
|
||||
*
|
||||
* OAES_CTX * ctx = oaes_alloc();
|
||||
* .
|
||||
* .
|
||||
* .
|
||||
* {
|
||||
* oaes_gen_key_xxx( ctx );
|
||||
* {
|
||||
* oaes_key_export( ctx, _buf, &_buf_len );
|
||||
* // or
|
||||
* oaes_key_export_data( ctx, _buf, &_buf_len );\
|
||||
* }
|
||||
* }
|
||||
* // or
|
||||
* {
|
||||
* oaes_key_import( ctx, _buf, _buf_len );
|
||||
* // or
|
||||
* oaes_key_import_data( ctx, _buf, _buf_len );
|
||||
* }
|
||||
* .
|
||||
* .
|
||||
* .
|
||||
* oaes_encrypt( ctx, m, m_len, c, &c_len );
|
||||
* .
|
||||
* .
|
||||
* .
|
||||
* oaes_decrypt( ctx, c, c_len, m, &m_len );
|
||||
* .
|
||||
* .
|
||||
* .
|
||||
* oaes_free( &ctx );
|
||||
*/
|
||||
|
||||
OAES_API OAES_CTX * oaes_alloc(void);
|
||||
|
||||
OAES_API OAES_RET oaes_free( OAES_CTX ** ctx );
|
||||
|
||||
OAES_API OAES_RET oaes_set_option( OAES_CTX * ctx,
|
||||
OAES_OPTION option, const void * value );
|
||||
|
||||
OAES_API OAES_RET oaes_key_gen_128( OAES_CTX * ctx );
|
||||
|
||||
OAES_API OAES_RET oaes_key_gen_192( OAES_CTX * ctx );
|
||||
|
||||
OAES_API OAES_RET oaes_key_gen_256( OAES_CTX * ctx );
|
||||
|
||||
// export key with header information
|
||||
// set data == NULL to get the required data_len
|
||||
OAES_API OAES_RET oaes_key_export( OAES_CTX * ctx,
|
||||
uint8_t * data, size_t * data_len );
|
||||
|
||||
// directly export the data from key
|
||||
// set data == NULL to get the required data_len
|
||||
OAES_API OAES_RET oaes_key_export_data( OAES_CTX * ctx,
|
||||
uint8_t * data, size_t * data_len );
|
||||
|
||||
// import key with header information
|
||||
OAES_API OAES_RET oaes_key_import( OAES_CTX * ctx,
|
||||
const uint8_t * data, size_t data_len );
|
||||
|
||||
// directly import data into key
|
||||
OAES_API OAES_RET oaes_key_import_data( OAES_CTX * ctx,
|
||||
const uint8_t * data, size_t data_len );
|
||||
|
||||
// set c == NULL to get the required c_len
|
||||
OAES_API OAES_RET oaes_encrypt( OAES_CTX * ctx,
|
||||
const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len );
|
||||
|
||||
// set m == NULL to get the required m_len
|
||||
OAES_API OAES_RET oaes_decrypt( OAES_CTX * ctx,
|
||||
const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len );
|
||||
|
||||
// set buf == NULL to get the required buf_len
|
||||
OAES_API OAES_RET oaes_sprintf(
|
||||
char * buf, size_t * buf_len, const uint8_t * data, size_t data_len );
|
||||
|
||||
OAES_API OAES_RET oaes_encryption_round( const uint8_t * key, uint8_t * c );
|
||||
|
||||
OAES_API OAES_RET oaes_pseudo_encrypt_ecb( OAES_CTX * ctx, uint8_t * c );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _OAES_LIB_H
|
@@ -1,190 +0,0 @@
|
||||
#ifndef _SKEIN_PORT_H_
|
||||
#define _SKEIN_PORT_H_
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifndef RETURN_VALUES
|
||||
# define RETURN_VALUES
|
||||
# if defined( DLL_EXPORT )
|
||||
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
|
||||
# define VOID_RETURN __declspec( dllexport ) void __stdcall
|
||||
# define INT_RETURN __declspec( dllexport ) int __stdcall
|
||||
# elif defined( __GNUC__ )
|
||||
# define VOID_RETURN __declspec( __dllexport__ ) void
|
||||
# define INT_RETURN __declspec( __dllexport__ ) int
|
||||
# else
|
||||
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
|
||||
# endif
|
||||
# elif defined( DLL_IMPORT )
|
||||
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
|
||||
# define VOID_RETURN __declspec( dllimport ) void __stdcall
|
||||
# define INT_RETURN __declspec( dllimport ) int __stdcall
|
||||
# elif defined( __GNUC__ )
|
||||
# define VOID_RETURN __declspec( __dllimport__ ) void
|
||||
# define INT_RETURN __declspec( __dllimport__ ) int
|
||||
# else
|
||||
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
|
||||
# endif
|
||||
# elif defined( __WATCOMC__ )
|
||||
# define VOID_RETURN void __cdecl
|
||||
# define INT_RETURN int __cdecl
|
||||
# else
|
||||
# define VOID_RETURN void
|
||||
# define INT_RETURN int
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* These defines are used to declare buffers in a way that allows
|
||||
faster operations on longer variables to be used. In all these
|
||||
defines 'size' must be a power of 2 and >= 8
|
||||
|
||||
dec_unit_type(size,x) declares a variable 'x' of length
|
||||
'size' bits
|
||||
|
||||
dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize'
|
||||
bytes defined as an array of variables
|
||||
each of 'size' bits (bsize must be a
|
||||
multiple of size / 8)
|
||||
|
||||
ptr_cast(x,size) casts a pointer to a pointer to a
|
||||
varaiable of length 'size' bits
|
||||
*/
|
||||
|
||||
#define ui_type(size) uint##size##_t
|
||||
#define dec_unit_type(size,x) typedef ui_type(size) x
|
||||
#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)]
|
||||
#define ptr_cast(x,size) ((ui_type(size)*)(x))
|
||||
|
||||
typedef unsigned int uint_t; /* native unsigned integer */
|
||||
typedef uint8_t u08b_t; /* 8-bit unsigned integer */
|
||||
typedef uint64_t u64b_t; /* 64-bit unsigned integer */
|
||||
|
||||
#ifndef RotL_64
|
||||
#define RotL_64(x,N) (((x) << (N)) | ((x) >> (64-(N))))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Skein is "natively" little-endian (unlike SHA-xxx), for optimal
|
||||
* performance on x86 CPUs. The Skein code requires the following
|
||||
* definitions for dealing with endianness:
|
||||
*
|
||||
* SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian
|
||||
* Skein_Put64_LSB_First
|
||||
* Skein_Get64_LSB_First
|
||||
* Skein_Swap64
|
||||
*
|
||||
* If SKEIN_NEED_SWAP is defined at compile time, it is used here
|
||||
* along with the portable versions of Put64/Get64/Swap64, which
|
||||
* are slow in general.
|
||||
*
|
||||
* Otherwise, an "auto-detect" of endianness is attempted below.
|
||||
* If the default handling doesn't work well, the user may insert
|
||||
* platform-specific code instead (e.g., for big-endian CPUs).
|
||||
*
|
||||
*/
|
||||
#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */
|
||||
|
||||
|
||||
#include "int-util.h"
|
||||
|
||||
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
|
||||
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
|
||||
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
||||
#endif
|
||||
|
||||
/* special handler for IA64, which may be either endianness (?) */
|
||||
/* here we assume little-endian, but this may need to be changed */
|
||||
#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
|
||||
# define PLATFORM_MUST_ALIGN (1)
|
||||
#ifndef PLATFORM_BYTE_ORDER
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef PLATFORM_MUST_ALIGN
|
||||
# define PLATFORM_MUST_ALIGN (0)
|
||||
#endif
|
||||
|
||||
|
||||
#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
|
||||
/* here for big-endian CPUs */
|
||||
#define SKEIN_NEED_SWAP (1)
|
||||
#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
/* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
|
||||
#define SKEIN_NEED_SWAP (0)
|
||||
#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */
|
||||
#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt)
|
||||
#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt))
|
||||
#endif
|
||||
#else
|
||||
#error "Skein needs endianness setting!"
|
||||
#endif
|
||||
|
||||
#endif /* ifndef SKEIN_NEED_SWAP */
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
* Provide any definitions still needed.
|
||||
******************************************************************
|
||||
*/
|
||||
#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */
|
||||
#if SKEIN_NEED_SWAP
|
||||
#define Skein_Swap64(w64) \
|
||||
( (( ((u64b_t)(w64)) & 0xFF) << 56) | \
|
||||
(((((u64b_t)(w64)) >> 8) & 0xFF) << 48) | \
|
||||
(((((u64b_t)(w64)) >>16) & 0xFF) << 40) | \
|
||||
(((((u64b_t)(w64)) >>24) & 0xFF) << 32) | \
|
||||
(((((u64b_t)(w64)) >>32) & 0xFF) << 24) | \
|
||||
(((((u64b_t)(w64)) >>40) & 0xFF) << 16) | \
|
||||
(((((u64b_t)(w64)) >>48) & 0xFF) << 8) | \
|
||||
(((((u64b_t)(w64)) >>56) & 0xFF) ) )
|
||||
#else
|
||||
#define Skein_Swap64(w64) (w64)
|
||||
#endif
|
||||
#endif /* ifndef Skein_Swap64 */
|
||||
|
||||
|
||||
#ifndef Skein_Put64_LSB_First
|
||||
void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt)
|
||||
#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
|
||||
{ /* this version is fully portable (big-endian or little-endian), but slow */
|
||||
size_t n;
|
||||
|
||||
for (n=0;n<bCnt;n++)
|
||||
dst[n] = (u08b_t) (src[n>>3] >> (8*(n&7)));
|
||||
}
|
||||
#else
|
||||
; /* output only the function prototype */
|
||||
#endif
|
||||
#endif /* ifndef Skein_Put64_LSB_First */
|
||||
|
||||
|
||||
#ifndef Skein_Get64_LSB_First
|
||||
void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt)
|
||||
#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
|
||||
{ /* this version is fully portable (big-endian or little-endian), but slow */
|
||||
size_t n;
|
||||
|
||||
for (n=0;n<8*wCnt;n+=8)
|
||||
dst[n/8] = (((u64b_t) src[n ]) ) +
|
||||
(((u64b_t) src[n+1]) << 8) +
|
||||
(((u64b_t) src[n+2]) << 16) +
|
||||
(((u64b_t) src[n+3]) << 24) +
|
||||
(((u64b_t) src[n+4]) << 32) +
|
||||
(((u64b_t) src[n+5]) << 40) +
|
||||
(((u64b_t) src[n+6]) << 48) +
|
||||
(((u64b_t) src[n+7]) << 56) ;
|
||||
}
|
||||
#else
|
||||
; /* output only the function prototype */
|
||||
#endif
|
||||
#endif /* ifndef Skein_Get64_LSB_First */
|
||||
|
||||
#endif /* ifndef _SKEIN_PORT_H_ */
|
18
miner.h
18
miner.h
@@ -447,26 +447,14 @@ bool stratum_subscribe(struct stratum_ctx *sctx);
|
||||
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
|
||||
bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
|
||||
|
||||
/* rpc 2.0 (xmr) */
|
||||
extern bool lowdiff_debug;
|
||||
|
||||
|
||||
|
||||
extern bool jsonrpc_2;
|
||||
extern bool aes_ni_supported;
|
||||
extern char rpc2_id[64];
|
||||
extern char *rpc2_blob;
|
||||
extern size_t rpc2_bloblen;
|
||||
extern uint32_t rpc2_target;
|
||||
extern char *rpc2_job_id;
|
||||
extern char *rpc_user;
|
||||
extern char *short_url;
|
||||
|
||||
json_t *json_rpc2_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, int *curl_err, int flags);
|
||||
bool rpc2_login(CURL *curl);
|
||||
bool rpc2_login_decode(const json_t *val);
|
||||
bool rpc2_workio_login(CURL *curl);
|
||||
bool rpc2_stratum_job(struct stratum_ctx *sctx, json_t *params);
|
||||
bool rpc2_job_decode(const json_t *job, struct work *work);
|
||||
|
||||
struct thread_q;
|
||||
|
||||
struct thread_q *tq_new(void);
|
||||
@@ -763,8 +751,6 @@ extern bool opt_hash_meter;
|
||||
extern uint32_t accepted_share_count;
|
||||
extern uint32_t rejected_share_count;
|
||||
extern uint32_t solved_block_count;
|
||||
extern pthread_mutex_t rpc2_job_lock;
|
||||
extern pthread_mutex_t rpc2_login_lock;
|
||||
extern pthread_mutex_t applog_lock;
|
||||
extern pthread_mutex_t stats_lock;
|
||||
extern bool opt_sapling;
|
||||
|
@@ -121,12 +121,9 @@ do { \
|
||||
|
||||
|
||||
// Horizontal vector testing
|
||||
|
||||
#define mm256_allbits0( a ) _mm256_testz_si256( a, a )
|
||||
#define mm256_allbits1( a ) _mm256_testc_si256( a, m256_neg1 )
|
||||
//broken
|
||||
//#define mm256_allbitsne( a ) _mm256_testnzc_si256( a, m256_neg1 )
|
||||
#define mm256_anybits0( a ) !mm256_allbits1( a )
|
||||
#define mm256_allbits0( a ) _mm256_testc_si256( a, m256_neg1 )
|
||||
#define mm256_allbits1( a ) _mm256_testz_si256( a, a )
|
||||
#define mm256_anybits0( a ) !mm256_allbits1( a )
|
||||
#define mm256_anybits1( a ) !mm256_allbits0( a )
|
||||
|
||||
|
||||
|
230
util.c
230
util.c
@@ -1423,9 +1423,6 @@ bool stratum_subscribe(struct stratum_ctx *sctx)
|
||||
json_error_t err;
|
||||
bool ret = false, retry = false;
|
||||
|
||||
if (jsonrpc_2)
|
||||
return true;
|
||||
|
||||
start:
|
||||
s = (char*) malloc(128 + (sctx->session_id ? strlen(sctx->session_id) : 0));
|
||||
if (retry)
|
||||
@@ -1514,16 +1511,9 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
|
||||
json_error_t err;
|
||||
bool ret = false;
|
||||
|
||||
if (jsonrpc_2) {
|
||||
s = (char*) malloc(300 + strlen(user) + strlen(pass));
|
||||
sprintf(s, "{\"method\": \"login\", \"params\": {"
|
||||
"\"login\": \"%s\", \"pass\": \"%s\", \"agent\": \"%s\"}, \"id\": 1}",
|
||||
user, pass, USER_AGENT);
|
||||
} else {
|
||||
s = (char*) malloc(80 + strlen(user) + strlen(pass));
|
||||
sprintf(s, "{\"id\": 2, \"method\": \"mining.authorize\", \"params\": [\"%s\", \"%s\"]}",
|
||||
s = (char*) malloc(80 + strlen(user) + strlen(pass));
|
||||
sprintf(s, "{\"id\": 2, \"method\": \"mining.authorize\", \"params\": [\"%s\", \"%s\"]}",
|
||||
user, pass);
|
||||
}
|
||||
|
||||
if (!stratum_send_line(sctx, s))
|
||||
goto out;
|
||||
@@ -1553,15 +1543,6 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (jsonrpc_2) {
|
||||
rpc2_login_decode(val);
|
||||
json_t *job_val = json_object_get(res_val, "job");
|
||||
pthread_mutex_lock(&sctx->work_lock);
|
||||
if(job_val) rpc2_job_decode(job_val, &sctx->work);
|
||||
sctx->job.job_id = strdup(sctx->work.job_id);
|
||||
pthread_mutex_unlock(&sctx->work_lock);
|
||||
}
|
||||
|
||||
ret = true;
|
||||
|
||||
if (!opt_extranonce)
|
||||
@@ -1575,8 +1556,6 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
|
||||
|
||||
if (!socket_full(sctx->sock, 3)) {
|
||||
applog(LOG_WARNING, "stratum extranonce subscribe timed out");
|
||||
// if (opt_debug)
|
||||
// applog(LOG_DEBUG, "stratum extranonce subscribe timed out");
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1607,204 +1586,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
// -------------------- RPC 2.0 (XMR/AEON) -------------------------
|
||||
|
||||
//extern pthread_mutex_t rpc2_login_lock;
|
||||
//extern pthread_mutex_t rpc2_job_lock;
|
||||
|
||||
bool rpc2_login_decode(const json_t *val)
|
||||
{
|
||||
const char *id;
|
||||
const char *s;
|
||||
|
||||
json_t *res = json_object_get(val, "result");
|
||||
if(!res) {
|
||||
applog(LOG_ERR, "JSON invalid result");
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
json_t *tmp;
|
||||
tmp = json_object_get(res, "id");
|
||||
if(!tmp) {
|
||||
applog(LOG_ERR, "JSON inval id");
|
||||
goto err_out;
|
||||
}
|
||||
id = json_string_value(tmp);
|
||||
if(!id) {
|
||||
applog(LOG_ERR, "JSON id is not a string");
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
memcpy(&rpc2_id, id, 64);
|
||||
|
||||
if(opt_debug)
|
||||
applog(LOG_DEBUG, "Auth id: %s", id);
|
||||
|
||||
tmp = json_object_get(res, "status");
|
||||
if(!tmp) {
|
||||
applog(LOG_ERR, "JSON inval status");
|
||||
goto err_out;
|
||||
}
|
||||
s = json_string_value(tmp);
|
||||
if(!s) {
|
||||
applog(LOG_ERR, "JSON status is not a string");
|
||||
goto err_out;
|
||||
}
|
||||
if(strcmp(s, "OK")) {
|
||||
applog(LOG_ERR, "JSON returned status \"%s\"", s);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
err_out:
|
||||
applog(LOG_WARNING,"%s: fail", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
json_t* json_rpc2_call_recur(CURL *curl, const char *url, const char *userpass,
|
||||
json_t *rpc_req, int *curl_err, int flags, int recur)
|
||||
{
|
||||
if(recur >= 5) {
|
||||
if(opt_debug)
|
||||
applog(LOG_DEBUG, "Failed to call rpc command after %i tries", recur);
|
||||
return NULL;
|
||||
}
|
||||
if(!strcmp(rpc2_id, "")) {
|
||||
if(opt_debug)
|
||||
applog(LOG_DEBUG, "Tried to call rpc2 command before authentication");
|
||||
return NULL;
|
||||
}
|
||||
json_t *params = json_object_get(rpc_req, "params");
|
||||
if (params) {
|
||||
json_t *auth_id = json_object_get(params, "id");
|
||||
if (auth_id) {
|
||||
json_string_set(auth_id, rpc2_id);
|
||||
}
|
||||
}
|
||||
json_t *res = json_rpc_call(curl, url, userpass, json_dumps(rpc_req, 0),
|
||||
curl_err, flags | JSON_RPC_IGNOREERR);
|
||||
if(!res) goto end;
|
||||
json_t *error = json_object_get(res, "error");
|
||||
if(!error) goto end;
|
||||
json_t *message;
|
||||
if(json_is_string(error))
|
||||
message = error;
|
||||
else
|
||||
message = json_object_get(error, "message");
|
||||
if(!message || !json_is_string(message)) goto end;
|
||||
const char *mes = json_string_value(message);
|
||||
if(!strcmp(mes, "Unauthenticated")) {
|
||||
pthread_mutex_lock(&rpc2_login_lock);
|
||||
rpc2_login(curl);
|
||||
sleep(1);
|
||||
pthread_mutex_unlock(&rpc2_login_lock);
|
||||
return json_rpc2_call_recur(curl, url, userpass, rpc_req,
|
||||
curl_err, flags, recur + 1);
|
||||
} else if(!strcmp(mes, "Low difficulty share") || !strcmp(mes, "Block expired") || !strcmp(mes, "Invalid job id") || !strcmp(mes, "Duplicate share")) {
|
||||
json_t *result = json_object_get(res, "result");
|
||||
if(!result) {
|
||||
goto end;
|
||||
}
|
||||
json_object_set(result, "reject-reason", json_string(mes));
|
||||
} else {
|
||||
applog(LOG_ERR, "json_rpc2.0 error: %s", mes);
|
||||
return NULL;
|
||||
}
|
||||
end:
|
||||
return res;
|
||||
}
|
||||
|
||||
json_t *json_rpc2_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, int *curl_err, int flags)
|
||||
{
|
||||
json_t* req_json = JSON_LOADS(rpc_req, NULL);
|
||||
json_t* res = json_rpc2_call_recur(curl, url, userpass, req_json, curl_err, flags, 0);
|
||||
json_decref(req_json);
|
||||
return res;
|
||||
}
|
||||
|
||||
bool rpc2_job_decode(const json_t *job, struct work *work)
|
||||
{
|
||||
if (!jsonrpc_2) {
|
||||
applog(LOG_ERR, "Tried to decode job without JSON-RPC 2.0");
|
||||
return false;
|
||||
}
|
||||
json_t *tmp;
|
||||
tmp = json_object_get(job, "job_id");
|
||||
if (!tmp) {
|
||||
applog(LOG_ERR, "JSON invalid job id");
|
||||
goto err_out;
|
||||
}
|
||||
const char *job_id = json_string_value(tmp);
|
||||
tmp = json_object_get(job, "blob");
|
||||
if (!tmp) {
|
||||
applog(LOG_ERR, "JSON invalid blob");
|
||||
goto err_out;
|
||||
}
|
||||
const char *hexblob = json_string_value(tmp);
|
||||
size_t blobLen = strlen(hexblob);
|
||||
if (blobLen % 2 != 0 || ((blobLen / 2) < 40 && blobLen != 0) || (blobLen / 2) > 128) {
|
||||
applog(LOG_ERR, "JSON invalid blob length");
|
||||
goto err_out;
|
||||
}
|
||||
if (blobLen != 0) {
|
||||
uint32_t target = 0;
|
||||
pthread_mutex_lock(&rpc2_job_lock);
|
||||
uchar *blob = (uchar*) malloc(blobLen / 2);
|
||||
if (!hex2bin(blob, hexblob, blobLen / 2)) {
|
||||
applog(LOG_ERR, "JSON invalid blob");
|
||||
pthread_mutex_unlock(&rpc2_job_lock);
|
||||
goto err_out;
|
||||
}
|
||||
rpc2_bloblen = blobLen / 2;
|
||||
if (rpc2_blob) free(rpc2_blob);
|
||||
rpc2_blob = (char*) malloc(rpc2_bloblen);
|
||||
if (!rpc2_blob) {
|
||||
applog(LOG_ERR, "RPC2 OOM!");
|
||||
goto err_out;
|
||||
}
|
||||
memcpy(rpc2_blob, blob, blobLen / 2);
|
||||
free(blob);
|
||||
|
||||
jobj_binary(job, "target", &target, 4);
|
||||
if(rpc2_target != target)
|
||||
{
|
||||
double hashrate = 0.0;
|
||||
pthread_mutex_lock(&stats_lock);
|
||||
for (int i = 0; i < opt_n_threads; i++)
|
||||
hashrate += thr_hashrates[i];
|
||||
pthread_mutex_unlock(&stats_lock);
|
||||
double diff = trunc( ( ((double)0xffffffff) / target ) );
|
||||
if ( !opt_quiet )
|
||||
// xmr pool diff can change a lot...
|
||||
applog(LOG_BLUE, "Stratum difficulty set to %g", diff);
|
||||
work->stratum_diff = diff;
|
||||
stratum_diff = diff;
|
||||
rpc2_target = target;
|
||||
}
|
||||
|
||||
if (rpc2_job_id) free(rpc2_job_id);
|
||||
rpc2_job_id = strdup(job_id);
|
||||
pthread_mutex_unlock(&rpc2_job_lock);
|
||||
}
|
||||
if(work) {
|
||||
if (!rpc2_blob) {
|
||||
applog(LOG_WARNING, "Work requested before it was received");
|
||||
goto err_out;
|
||||
}
|
||||
memcpy(work->data, rpc2_blob, rpc2_bloblen);
|
||||
memset(work->target, 0xff, sizeof(work->target));
|
||||
work->target[7] = rpc2_target;
|
||||
if (work->job_id) free(work->job_id);
|
||||
work->job_id = strdup(rpc2_job_id);
|
||||
}
|
||||
return true;
|
||||
|
||||
err_out:
|
||||
applog(LOG_WARNING, "%s", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract bloc height L H... here len=3, height=0x1333e8
|
||||
* "...0000000000ffffffff2703e83313062f503253482f043d61105408"
|
||||
@@ -2298,13 +2079,6 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
|
||||
|
||||
params = json_object_get(val, "params");
|
||||
|
||||
if (jsonrpc_2) {
|
||||
if (!strcasecmp(method, "job")) {
|
||||
ret = rpc2_stratum_job(sctx, params);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
id = json_object_get(val, "id");
|
||||
|
||||
if (!strcasecmp(method, "mining.notify")) {
|
||||
|
Reference in New Issue
Block a user