Compare commits

...

4 Commits

Author SHA1 Message Date
Jay D Dee
3da2b958cf v3.12.2 2020-02-09 13:30:40 -05:00
Jay D Dee
dc2f8d81d3 v3.12.1 2020-02-07 20:18:20 -05:00
Jay D Dee
fc97ef174a v3.12.0.1 2020-02-06 22:50:20 -05:00
Jay D Dee
13523a12f9 v3.12.0 2020-02-05 22:50:58 -05:00
81 changed files with 2209 additions and 9544 deletions

View File

@@ -21,15 +21,6 @@ cpuminer_SOURCES = \
api.c \
sysinfos.c \
algo-gate-api.c\
crypto/oaes_lib.c \
crypto/c_keccak.c \
crypto/c_groestl.c \
crypto/c_blake256.c \
crypto/c_jh.c \
crypto/c_skein.c \
crypto/hash.c \
crypto/aesb.c \
crypto/magimath.cpp \
algo/argon2/argon2a/argon2a.c \
algo/argon2/argon2a/ar2/argon2.c \
algo/argon2/argon2a/ar2/opt.c \
@@ -76,10 +67,6 @@ cpuminer_SOURCES = \
algo/bmw/bmw512-gate.c \
algo/bmw/bmw512.c \
algo/bmw/bmw512-4way.c \
algo/cryptonight/cryptolight.c \
algo/cryptonight/cryptonight-common.c\
algo/cryptonight/cryptonight-aesni.c\
algo/cryptonight/cryptonight.c\
algo/cubehash/cubehash_sse2.c\
algo/cubehash/cube-hash-2way.c \
algo/echo/sph_echo.c \
@@ -141,7 +128,8 @@ cpuminer_SOURCES = \
algo/lyra2/allium.c \
algo/lyra2/phi2-4way.c \
algo/lyra2/phi2.c \
algo/m7m.c \
algo//m7m/m7m.c \
algo/m7m/magimath.cpp \
algo/nist5/nist5-gate.c \
algo/nist5/nist5-4way.c \
algo/nist5/nist5.c \

View File

@@ -65,10 +65,60 @@ If not what makes it happen or not happen?
Change Log
----------
v3.12.2
Fixed xevan, skein, skein2 AVX2, #238.
Reversed polarity of AVX2 vector bit test utilities, and all users, to be
logically and semantically correct. Follow up to issue #236.
v3.12.1
Fixed anime AVX2 low difficulty shares, git issue #236.
Periodic summary now reports lost hash rate due to rejected and stale shares,
displayed only when non-zero.
v3.12.0.1
Fixed hodl rejects, git issue #237.
Fixed debug code added in v3.12.0 to work with AVX2 to be enabled only
after low difficulty share have been seen to avoid unnecessarily excessive
log outout.
Added more digits of precision to diff in log output to help diagnose
low difficulty shares.
v3.12.0
Faster phi2 AVX2 +62%, AVX512 +150% on Intel CPUs. AMD Ryzen AVX2 is
YMMV due to its inferiour AVX2 implementation.
Fixed Hodl stats, rejects are still an issue since v3.9.5, git issue #237.
API can now be enabled with "-b port" or "--api-bind port".
It will use the default address 127.0.0.1.
Editorial: Short form options should only be used on the command line to save
typing. Configuration files and scripts should always use the long form
"--api-bind addr:port" without relying on any defaults. This is a general
recommendation that applies to all options for any application.
Removed obsolete cryptonight, all variants, and supporting code for more
size reduction and faster compiling.
Tweaked the timing of the CPU temperature and frequency log (Linux only).
Added some debug code to collect more info aboout low difficulty rejects,
git issue #236.
v3.11.9
Fixed x16r invalid shares when Luffa was first in hash order.
API is disabled by default.
New startup message for status of stratum connection, API & extranonce.
New log report for CPU temperature, frequency of fastest and slowest cores.

View File

@@ -113,7 +113,6 @@ void init_algo_gate( algo_gate_t* gate )
gate->hash = (void*)&null_hash;
gate->hash_suw = (void*)&null_hash_suw;
gate->get_new_work = (void*)&std_get_new_work;
gate->get_nonceptr = (void*)&std_get_nonceptr;
gate->work_decode = (void*)&std_le_work_decode;
gate->decode_extra_data = (void*)&do_nothing;
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
@@ -129,7 +128,6 @@ void init_algo_gate( algo_gate_t* gate )
gate->resync_threads = (void*)&do_nothing;
gate->do_this_thread = (void*)&return_true;
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
gate->stratum_handle_response = (void*)&std_stratum_handle_response;
gate->get_work_data_size = (void*)&std_get_work_data_size;
gate->optimizations = EMPTY_SET;
gate->ntime_index = STD_NTIME_INDEX;
@@ -168,9 +166,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
case ALGO_BMW512: register_bmw512_algo ( gate ); break;
case ALGO_C11: register_c11_algo ( gate ); break;
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
case ALGO_CRYPTONIGHTV7: register_cryptonightv7_algo ( gate ); break;
case ALGO_DECRED: register_decred_algo ( gate ); break;
case ALGO_DEEP: register_deep_algo ( gate ); break;
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
@@ -266,25 +261,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
// restore warnings
#pragma GCC diagnostic pop
// override std defaults with jr2 defaults
bool register_json_rpc2( algo_gate_t *gate )
{
// gate->wait_for_diff = (void*)&do_nothing;
gate->get_new_work = (void*)&jr2_get_new_work;
gate->get_nonceptr = (void*)&jr2_get_nonceptr;
gate->stratum_gen_work = (void*)&jr2_stratum_gen_work;
gate->build_stratum_request = (void*)&jr2_build_stratum_request;
gate->submit_getwork_result = (void*)&jr2_submit_getwork_result;
gate->longpoll_rpc_call = (void*)&jr2_longpoll_rpc_call;
gate->work_decode = (void*)&jr2_work_decode;
gate->stratum_handle_response = (void*)&jr2_stratum_handle_response;
gate->nonce_index = JR2_NONCE_INDEX;
jsonrpc_2 = true; // still needed
opt_extranonce = false;
// have_gbt = false;
return true;
}
// run the alternate hash function for a specific algo
void exec_hash_function( int algo, void *output, const void *pdata )
{
@@ -350,7 +326,7 @@ void get_algo_alias( char** algo_or_alias )
if ( !strcasecmp( *algo_or_alias, algo_alias_map[i][ ALIAS ] ) )
{
// found valid alias, return proper name
*algo_or_alias = (const char*)( algo_alias_map[i][ PROPER ] );
*algo_or_alias = (char*)( algo_alias_map[i][ PROPER ] );
return;
}
}

View File

@@ -129,9 +129,6 @@ void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* );
// Get thread local copy of blockheader with unique nonce.
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t* );
// Return pointer to nonce in blockheader.
uint32_t *( *get_nonceptr ) ( uint32_t* );
// Decode getwork blockheader
bool ( *work_decode ) ( const json_t*, struct work* );
@@ -169,7 +166,6 @@ bool ( *do_this_thread ) ( int );
void ( *resync_threads ) ( struct work* );
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
bool ( *stratum_handle_response ) ( json_t* );
set_t optimizations;
int ( *get_work_data_size ) ();
int ntime_index;
@@ -222,31 +218,22 @@ void null_hash_suw();
// optional safe targets, default listed first unless noted.
uint32_t *std_get_nonceptr( uint32_t *work_data );
uint32_t *jr2_get_nonceptr( uint32_t *work_data );
void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
uint32_t* end_nonce_ptr );
void jr2_get_new_work( struct work *work, struct work *g_work, int thr_id,
uint32_t* end_nonce_ptr );
void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
bool std_le_work_decode( const json_t *val, struct work *work );
bool std_be_work_decode( const json_t *val, struct work *work );
bool jr2_work_decode( const json_t *val, struct work *work );
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
bool jr2_submit_getwork_result( CURL *curl, struct work *work );
void std_le_build_stratum_request( char *req, struct work *work );
void std_be_build_stratum_request( char *req, struct work *work );
void jr2_build_stratum_request ( char *req, struct work *work );
char* std_malloc_txs_request( struct work *work );
@@ -263,10 +250,10 @@ void std_build_block_header( struct work* g_work, uint32_t version,
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
json_t* jr2_longpoll_rpc_call( CURL *curl, int *err );
//json_t* jr2_longpoll_rpc_call( CURL *curl, int *err );
bool std_stratum_handle_response( json_t *val );
bool jr2_stratum_handle_response( json_t *val );
//bool std_stratum_handle_response( json_t *val );
//bool jr2_stratum_handle_response( json_t *val );
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
int thr_id );
@@ -288,7 +275,7 @@ bool register_algo( algo_gate_t *gate );
// Overrides a common set of functions used by RPC2 and other RPC2-specific
// init. Called by algo's register function before initializing algo-specific
// functions and data.
bool register_json_rpc2( algo_gate_t *gate );
//bool register_json_rpc2( algo_gate_t *gate );
// use this to call the hash function of an algo directly, ie util.c test.
void exec_hash_function( int algo, void *output, const void *pdata );

View File

@@ -153,7 +153,7 @@ bool register_decred_algo( algo_gate_t* gate )
gate->hash = (void*)&decred_hash;
#endif
gate->optimizations = AVX2_OPT;
gate->get_nonceptr = (void*)&decred_get_nonceptr;
// gate->get_nonceptr = (void*)&decred_get_nonceptr;
gate->decode_extra_data = (void*)&decred_decode_extradata;
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;

View File

@@ -138,7 +138,7 @@ void bmw512_2way_close( bmw512_2way_context *ctx, void *dst );
#if defined(__AVX2__)
// BMW-512 4 way 64
// BMW-512 64 bit 4 way
typedef struct {
__m256i buf[16];
@@ -149,7 +149,6 @@ typedef struct {
typedef bmw_4way_big_context bmw512_4way_context;
void bmw512_4way_init(void *cc);
void bmw512_4way_update(void *cc, const void *data, size_t len);
@@ -164,6 +163,7 @@ void bmw512_4way_addbits_and_close(
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
// BMW-512 64 bit 8 way
typedef struct {
__m512i buf[16];
__m512i H[16];
@@ -171,6 +171,8 @@ typedef struct {
uint64_t bit_count;
} bmw512_8way_context __attribute__((aligned(128)));
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
size_t len );
void bmw512_8way_init( bmw512_8way_context *ctx );
void bmw512_8way_update( bmw512_8way_context *ctx, const void *data,
size_t len );

View File

@@ -1507,6 +1507,93 @@ void bmw512_8way_close( bmw512_8way_context *ctx, void *dst )
casti_m512i( dst, u ) = h1[ v ];
}
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
size_t len )
{
__m512i *vdata = (__m512i*)data;
__m512i *buf = ctx->buf;
__m512i htmp[16];
__m512i *H = ctx->H;
__m512i *h2 = htmp;
uint64_t bit_count = len * 8;
size_t ptr = 0;
const int buf_size = 128; // bytes of one lane, compatible with len
// Init
H[ 0] = m512_const1_64( 0x8081828384858687 );
H[ 1] = m512_const1_64( 0x88898A8B8C8D8E8F );
H[ 2] = m512_const1_64( 0x9091929394959697 );
H[ 3] = m512_const1_64( 0x98999A9B9C9D9E9F );
H[ 4] = m512_const1_64( 0xA0A1A2A3A4A5A6A7 );
H[ 5] = m512_const1_64( 0xA8A9AAABACADAEAF );
H[ 6] = m512_const1_64( 0xB0B1B2B3B4B5B6B7 );
H[ 7] = m512_const1_64( 0xB8B9BABBBCBDBEBF );
H[ 8] = m512_const1_64( 0xC0C1C2C3C4C5C6C7 );
H[ 9] = m512_const1_64( 0xC8C9CACBCCCDCECF );
H[10] = m512_const1_64( 0xD0D1D2D3D4D5D6D7 );
H[11] = m512_const1_64( 0xD8D9DADBDCDDDEDF );
H[12] = m512_const1_64( 0xE0E1E2E3E4E5E6E7 );
H[13] = m512_const1_64( 0xE8E9EAEBECEDEEEF );
H[14] = m512_const1_64( 0xF0F1F2F3F4F5F6F7 );
H[15] = m512_const1_64( 0xF8F9FAFBFCFDFEFF );
// Update
while ( len > 0 )
{
size_t clen;
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_512( buf + (ptr>>3), vdata, clen >> 3 );
vdata = vdata + (clen>>3);
len -= clen;
ptr += clen;
if ( ptr == buf_size )
{
__m512i *ht;
compress_big_8way( buf, H, h2 );
ht = H;
H = h2;
h2 = ht;
ptr = 0;
}
}
if ( H != ctx->H )
memcpy_512( ctx->H, H, 16 );
// Close
{
__m512i h1[16], h2[16];
size_t u, v;
buf[ ptr>>3 ] = m512_const1_64( 0x80 );
ptr += 8;
if ( ptr > (buf_size - 8) )
{
memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
compress_big_8way( buf, H, h1 );
ptr = 0;
H = h1;
}
memset_zero_512( buf + (ptr>>3), (buf_size - 8 - ptr) >> 3 );
buf[ (buf_size - 8) >> 3 ] = _mm512_set1_epi64( bit_count );
compress_big_8way( buf, H, h2 );
for ( u = 0; u < 16; u ++ )
buf[ u ] = h2[ u ];
compress_big_8way( buf, final_b8, h1 );
for (u = 0, v = 8; u < 8; u ++, v ++)
casti_m512i( out, u ) = h1[ v ];
}
}
#endif // AVX512
#ifdef __cplusplus

View File

@@ -1,371 +0,0 @@
// Copyright (c) 2012-2013 The Cryptonote developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include "algo-gate-api.h"
#if defined(__arm__) || defined(_MSC_VER)
#ifndef NOASM
#define NOASM
#endif
#endif
#include "crypto/oaes_lib.h"
#include "crypto/c_keccak.h"
#include "crypto/c_groestl.h"
#include "crypto/c_blake256.h"
#include "crypto/c_jh.h"
#include "crypto/c_skein.h"
#include "crypto/int-util.h"
#include "crypto/hash-ops.h"
#if USE_INT128
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
#elif defined (_MSC_VER)
/* only for mingw64 on windows */
#undef USE_INT128
#define USE_INT128 (0)
#else
typedef __uint128_t uint128_t;
#endif
#endif
#define LITE 1
#if LITE /* cryptonight-light */
#define MEMORY (1 << 20)
#define ITER (1 << 19)
#else
#define MEMORY (1 << 21) /* 2 MiB */
#define ITER (1 << 20)
#endif
#define AES_BLOCK_SIZE 16
#define AES_KEY_SIZE 32 /*16*/
#define INIT_SIZE_BLK 8
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
#pragma pack(push, 1)
union cn_slow_hash_state {
union hash_state hs;
struct {
uint8_t k[64];
uint8_t init[INIT_SIZE_BYTE];
};
};
#pragma pack(pop)
static void do_blake_hash(const void* input, size_t len, char* output) {
blake256_hash((uint8_t*)output, input, len);
}
static void do_groestl_hash(const void* input, size_t len, char* output) {
groestl(input, len * 8, (uint8_t*)output);
}
static void do_jh_hash(const void* input, size_t len, char* output) {
int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
assert(likely(SUCCESS == r));
}
static void do_skein_hash(const void* input, size_t len, char* output) {
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
assert(likely(SKEIN_SUCCESS == r));
}
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
#if !defined(_MSC_VER) && !defined(NOASM)
extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
#else
#define fast_aesb_single_round aesb_single_round
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
#endif
#if defined(NOASM) || !defined(__x86_64__)
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = hi_dword(multiplier);
uint64_t b = lo_dword(multiplier);
uint64_t c = hi_dword(multiplicand);
uint64_t d = lo_dword(multiplicand);
uint64_t ac = a * c;
uint64_t ad = a * d;
uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + bc;
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
assert(ac <= *product_hi);
return product_lo;
}
#else
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
#endif
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
};
static inline size_t e2i(const uint8_t* a) {
#if !LITE
return ((uint32_t *)a)[0] & 0x1FFFF0;
#else
return ((uint32_t *)a)[0] & 0xFFFF0;
#endif
}
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
hi += ((uint64_t*) c)[0];
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
((uint64_t*) dst)[0] = hi;
((uint64_t*) dst)[1] = lo;
}
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
#if USE_INT128
*((uint128_t*) a) ^= *((uint128_t*) b);
#else
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
#endif
}
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
#if USE_INT128
*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
#else
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
#endif
}
struct cryptonight_ctx {
uint8_t _ALIGN(16) long_state[MEMORY];
union cn_slow_hash_state state;
uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
oaes_ctx* aes_ctx;
};
static void cryptolight_hash_ctx(void* output, const void* input, int len, struct cryptonight_ctx* ctx)
{
len = 76;
hash_process(&ctx->state.hs, (const uint8_t*) input, len);
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
size_t i, j;
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
}
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
for (i = 0; likely(i < ITER / 4); ++i) {
/* Dependency chain: address -> read value ------+
* written value <-+ hard function (AES or MUL) <+
* next address <-+
*/
/* Iteration 1 */
j = e2i(ctx->a);
aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
/* Iteration 2 */
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
/* Iteration 3 */
j = e2i(ctx->a);
aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
/* Iteration 4 */
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
}
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
hash_permutation(&ctx->state.hs);
/*memcpy(hash, &state, 32);*/
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
oaes_free((OAES_CTX **) &ctx->aes_ctx);
}
void cryptolight_hash(void* output, const void* input, int len) {
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptolight_hash_ctx(output, input, len, ctx);
free(ctx);
}
#if defined(__AES__)
static void cryptolight_hash_ctx_aes_ni(void* output, const void* input,
int len, struct cryptonight_ctx* ctx)
{
hash_process(&ctx->state.hs, (const uint8_t*)input, len);
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
size_t i, j;
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
}
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
for (i = 0; likely(i < ITER / 4); ++i) {
/* Dependency chain: address -> read value ------+
* written value <-+ hard function (AES or MUL) <+
* next address <-+
*/
/* Iteration 1 */
j = e2i(ctx->a);
fast_aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
/* Iteration 2 */
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
/* Iteration 3 */
j = e2i(ctx->a);
fast_aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
/* Iteration 4 */
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
}
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
fast_aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
fast_aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
fast_aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
fast_aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
fast_aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
fast_aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
fast_aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
fast_aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
hash_permutation(&ctx->state.hs);
/*memcpy(hash, &state, 32);*/
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
oaes_free((OAES_CTX **) &ctx->aes_ctx);
}
#endif
int scanhash_cryptolight( struct work *work,
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
uint32_t n = *nonceptr - 1;
const uint32_t first_nonce = n + 1;
//const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(32) hash[HASH_SIZE / 4];
int thr_id = mythr->id;
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
#if defined(__AES__)
do {
*nonceptr = ++n;
cryptolight_hash_ctx_aes_ni(hash, pdata, 76, ctx);
if (unlikely(hash[7] < ptarget[7])) {
*hashes_done = n - first_nonce + 1;
free(ctx);
return true;
}
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
#else
do {
*nonceptr = ++n;
cryptolight_hash_ctx(hash, pdata, 76, ctx);
if (unlikely(hash[7] < ptarget[7])) {
*hashes_done = n - first_nonce + 1;
free(ctx);
return true;
}
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
#endif
free(ctx);
*hashes_done = n - first_nonce + 1;
return 0;
}
bool register_cryptolight_algo( algo_gate_t* gate )
{
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
register_json_rpc2( gate );
gate->optimizations = SSE2_OPT | AES_OPT;
gate->scanhash = (void*)&scanhash_cryptolight;
gate->hash = (void*)&cryptolight_hash;
gate->hash_suw = (void*)&cryptolight_hash;
return true;
};

View File

@@ -1,357 +0,0 @@
#if defined(__AES__)
#include <x86intrin.h>
#include <memory.h>
#include "cryptonight.h"
#include "miner.h"
#include "crypto/c_keccak.h"
#include <immintrin.h>
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
{
__m128i tmp4;
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
tmp4 = _mm_slli_si128(*tmp1, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
}
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
{
__m128i tmp2, tmp4;
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
tmp4 = _mm_slli_si128(*tmp3, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
}
// Special thanks to Intel for helping me
// with ExpandAESKey256() and its subroutines
static inline void ExpandAESKey256(char *keybuf)
{
__m128i tmp1, tmp2, tmp3, *keys;
keys = (__m128i *)keybuf;
tmp1 = _mm_load_si128((__m128i *)keybuf);
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[2] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[3] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[4] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[5] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[6] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[7] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[8] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[9] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[10] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[11] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[12] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[13] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[14] = tmp1;
}
// align to 64 byte cache line
typedef struct
{
uint8_t long_state[MEMORY] __attribute((aligned(64)));
union cn_slow_hash_state state;
uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(64)));
uint64_t a[AES_BLOCK_SIZE >> 3] __attribute__((aligned(64)));
uint64_t b[AES_BLOCK_SIZE >> 3] __attribute__((aligned(64)));
uint8_t c[AES_BLOCK_SIZE] __attribute__((aligned(64)));
} cryptonight_ctx;
static __thread cryptonight_ctx ctx;
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
{
uint8_t ExpandedKey[256] __attribute__((aligned(64)));
__m128i *longoutput, *expkey, *xmminput;
size_t i, j;
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
if ( cryptonightV7 && len < 43 )
return;
const uint64_t tweak = cryptonightV7
? *((const uint64_t*) (((const uint8_t*)input) + 35))
^ ctx.state.hs.w[24] : 0;
memcpy( ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE );
ExpandAESKey256( ExpandedKey );
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
longoutput = (__m128i*)ctx.long_state;
xmminput = (__m128i*)ctx.text;
expkey = (__m128i*)ExpandedKey;
// prefetch expkey, xmminput and enough longoutput for 4 iterations
_mm_prefetch( xmminput, _MM_HINT_T0 );
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
_mm_prefetch( expkey, _MM_HINT_T0 );
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
for ( i = 0; i < 64; i += 16 )
{
__builtin_prefetch( longoutput + i, 1, 0 );
__builtin_prefetch( longoutput + i + 4, 1, 0 );
__builtin_prefetch( longoutput + i + 8, 1, 0 );
__builtin_prefetch( longoutput + i + 12, 1, 0 );
}
// n-4 iterations
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
i += INIT_SIZE_M128I )
{
// prefetch 4 iterations ahead.
__builtin_prefetch( longoutput + i + 64, 1, 0 );
__builtin_prefetch( longoutput + i + 68, 1, 0 );
for ( j = 0; j < 10; j++ )
{
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
}
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
}
// last 4 iterations
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
{
for ( j = 0; j < 10; j++ )
{
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
}
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
}
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
uint64_t a[2] __attribute((aligned(16))),
b[2] __attribute((aligned(16))),
c[2] __attribute((aligned(16)));
a[0] = ctx.a[0];
a[1] = ctx.a[1];
__m128i b_x = _mm_load_si128( (__m128i*)ctx.b );
__m128i a_x = _mm_load_si128( (__m128i*)a );
__m128i* lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
__m128i c_x = _mm_load_si128( lsa );
uint64_t *nextblock;
uint64_t hi, lo;
// n-1 iterations
for( i = 0; __builtin_expect( i < 0x7ffff, 1 ); i++ )
{
c_x = _mm_aesenc_si128( c_x, a_x );
_mm_store_si128( (__m128i*)c, c_x );
b_x = _mm_xor_si128( b_x, c_x );
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
_mm_store_si128( lsa, b_x );
if ( cryptonightV7 )
{
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
}
b[0] = nextblock[0];
b[1] = nextblock[1];
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
__asm__( "mulq %3\n\t"
: "=d" ( hi ),
"=a" ( lo )
: "%a" ( c[0] ),
"rm" ( b[0] )
: "cc" );
b_x = c_x;
a[0] += hi;
a[1] += lo;
nextblock[0] = a[0];
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
a[0] ^= b[0];
a[1] ^= b[1];
lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
a_x = _mm_load_si128( (__m128i*)a );
c_x = _mm_load_si128( lsa );
}
// abreviated nth iteration
c_x = _mm_aesenc_si128( c_x, a_x );
_mm_store_si128( (__m128i*)c, c_x );
b_x = _mm_xor_si128( b_x, c_x );
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
_mm_store_si128( lsa, b_x );
if ( cryptonightV7 )
{
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
}
b[0] = nextblock[0];
b[1] = nextblock[1];
__asm__( "mulq %3\n\t"
: "=d" ( hi ),
"=a" ( lo )
: "%a" ( c[0] ),
"rm" ( b[0] )
: "cc" );
a[0] += hi;
a[1] += lo;
nextblock[0] = a[0];
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
a[0] ^= b[0];
a[1] ^= b[1];
memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
ExpandAESKey256( ExpandedKey );
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
_mm_prefetch( xmminput, _MM_HINT_T0 );
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
for ( i = 0; i < 64; i += 16 )
{
_mm_prefetch( longoutput + i, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 4, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 8, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 12, _MM_HINT_T0 );
}
_mm_prefetch( expkey, _MM_HINT_T0 );
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
// n-4 iterations
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
i += INIT_SIZE_M128I )
{
// stay 4 iterations ahead.
_mm_prefetch( longoutput + i + 64, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 68, _MM_HINT_T0 );
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
for( j = 0; j < 10; j++ )
{
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
}
}
// last 4 iterations
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
{
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
for( j = 0; j < 10; j++ )
{
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
}
}
memcpy( ctx.state.init, ctx.text, INIT_SIZE_BYTE);
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
}
#endif

View File

@@ -1,133 +0,0 @@
// Copyright (c) 2012-2013 The Cryptonote developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
// Modified for CPUminer by Lucas Jones
#include "cpuminer-config.h"
#include "algo-gate-api.h"
#if defined(__AES__)
#include "algo/groestl/aes_ni/hash-groestl256.h"
#else
#include "crypto/c_groestl.h"
#endif
#include "crypto/c_blake256.h"
#include "crypto/c_jh.h"
#include "crypto/c_skein.h"
#include "cryptonight.h"
/*
#if defined __unix__ && (!defined __APPLE__)
#include <sys/mman.h>
#elif defined _WIN32
#include <windows.h>
#endif
*/
void do_blake_hash(const void* input, size_t len, char* output) {
blake256_hash((uint8_t*)output, input, len);
}
void do_groestl_hash(const void* input, size_t len, char* output) {
#if defined(__AES__)
hashState_groestl256 ctx;
init_groestl256( &ctx, 32 );
update_and_final_groestl256( &ctx, output, input, len * 8 );
#else
groestl(input, len * 8, (uint8_t*)output);
#endif
}
void do_jh_hash(const void* input, size_t len, char* output) {
jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
}
void do_skein_hash(const void* input, size_t len, char* output) {
skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
}
void (* const extra_hashes[4])( const void *, size_t, char *) =
{ do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash };
void cryptonight_hash( void *restrict output, const void *input, int len )
{
#if defined(__AES__)
cryptonight_hash_aes( output, input, len );
#else
cryptonight_hash_ctx ( output, input, len );
#endif
}
void cryptonight_hash_suw( void *restrict output, const void *input )
{
#if defined(__AES__)
cryptonight_hash_aes( output, input, 76 );
#else
cryptonight_hash_ctx ( output, input, 76 );
#endif
}
bool cryptonightV7 = false;
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id;
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
uint32_t n = *nonceptr - 1;
const uint32_t first_nonce = n + 1;
const uint32_t Htarg = ptarget[7];
uint32_t hash[32 / 4] __attribute__((aligned(32)));
// if ( ( cryptonightV7 && ( *(uint8_t*)pdata < 7 ) )
// || ( !cryptonightV7 && ( *(uint8_t*)pdata == 7 ) ) )
// applog(LOG_WARNING,"Cryptonight variant mismatch, shares may be rejected.");
do
{
*nonceptr = ++n;
cryptonight_hash( hash, pdata, 76 );
if (unlikely( hash[7] < Htarg ))
{
*hashes_done = n - first_nonce + 1;
// work_set_target_ratio( work, hash );
return true;
}
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
*hashes_done = n - first_nonce + 1;
return 0;
}
bool register_cryptonight_algo( algo_gate_t* gate )
{
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
cryptonightV7 = false;
register_json_rpc2( gate );
gate->optimizations = SSE2_OPT | AES_OPT;
gate->scanhash = (void*)&scanhash_cryptonight;
gate->hash = (void*)&cryptonight_hash;
gate->hash_suw = (void*)&cryptonight_hash_suw;
return true;
};
bool register_cryptonightv7_algo( algo_gate_t* gate )
{
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
cryptonightV7 = true;
register_json_rpc2( gate );
gate->optimizations = SSE2_OPT | AES_OPT;
gate->scanhash = (void*)&scanhash_cryptonight;
gate->hash = (void*)&cryptonight_hash;
gate->hash_suw = (void*)&cryptonight_hash_suw;
return true;
};

View File

@@ -1,310 +0,0 @@
// Copyright (c) 2012-2013 The Cryptonote developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
// Modified for CPUminer by Lucas Jones
#include "miner.h"
#include <memory.h>
#if defined(__arm__) || defined(_MSC_VER)
#ifndef NOASM
#define NOASM
#endif
#endif
#include "crypto/oaes_lib.h"
#include "crypto/c_keccak.h"
#include "crypto/c_groestl.h"
#include "crypto/c_blake256.h"
#include "crypto/c_jh.h"
#include "crypto/c_skein.h"
#include "crypto/int-util.h"
//#include "crypto/hash-ops.h"
#include "cryptonight.h"
#if USE_INT128
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
#elif defined (_MSC_VER)
/* only for mingw64 on windows */
#undef USE_INT128
#define USE_INT128 (0)
#else
typedef __uint128_t uint128_t;
#endif
#endif
#define LITE 0
#if LITE /* cryptonight-light */
#define MEMORY (1 << 20)
#define ITER (1 << 19)
#else
#define MEMORY (1 << 21) /* 2 MiB */
#define ITER (1 << 20)
#endif
#define AES_BLOCK_SIZE 16
#define AES_KEY_SIZE 32 /*16*/
#define INIT_SIZE_BLK 8
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
/*
#pragma pack(push, 1)
union cn_slow_hash_state {
union hash_state hs;
struct {
uint8_t k[64];
uint8_t init[INIT_SIZE_BYTE];
};
};
#pragma pack(pop)
static void do_blake_hash(const void* input, size_t len, char* output) {
blake256_hash((uint8_t*)output, input, len);
}
static void do_groestl_hash(const void* input, size_t len, char* output) {
groestl(input, len * 8, (uint8_t*)output);
}
static void do_jh_hash(const void* input, size_t len, char* output) {
int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
assert(likely(SUCCESS == r));
}
static void do_skein_hash(const void* input, size_t len, char* output) {
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
assert(likely(SKEIN_SUCCESS == r));
}
*/
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
#if !defined(_MSC_VER) && !defined(NOASM)
extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
#else
#define fast_aesb_single_round aesb_single_round
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
#endif
#if defined(NOASM) || !defined(__x86_64__)
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = hi_dword(multiplier);
uint64_t b = lo_dword(multiplier);
uint64_t c = hi_dword(multiplicand);
uint64_t d = lo_dword(multiplicand);
uint64_t ac = a * c;
uint64_t ad = a * d;
uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + bc;
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
assert(ac <= *product_hi);
return product_lo;
}
#else
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
#endif
/*
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
};
*/
static inline size_t e2i(const uint8_t* a) {
#if !LITE
return ((uint32_t *)a)[0] & 0x1FFFF0;
#else
return ((uint32_t *)a)[0] & 0xFFFF0;
#endif
}
static inline void mul_sum_xor_dst( const uint8_t* a, uint8_t* c, uint8_t* dst,
const uint64_t tweak )
{
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
hi += ((uint64_t*) c)[0];
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
((uint64_t*) dst)[0] = hi;
((uint64_t*) dst)[1] = cryptonightV7 ? lo ^ tweak : lo;
}
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
#if USE_INT128
*((uint128_t*) a) ^= *((uint128_t*) b);
#else
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
#endif
}
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
#if USE_INT128
*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
#else
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
#endif
}
typedef struct {
uint8_t _ALIGN(16) long_state[MEMORY];
union cn_slow_hash_state state;
uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
oaes_ctx* aes_ctx;
} cryptonight_ctx;
static __thread cryptonight_ctx ctx;
void cryptonight_hash_ctx(void* output, const void* input, int len)
{
// hash_process(&ctx.state.hs, (const uint8_t*) input, len);
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
if ( cryptonightV7 && len < 43 )
return;
const uint64_t tweak = cryptonightV7
? *((const uint64_t*) (((const uint8_t*)input) + 35))
^ ctx.state.hs.w[24] : 0;
ctx.aes_ctx = (oaes_ctx*) oaes_alloc();
__builtin_prefetch( ctx.text, 0, 3 );
__builtin_prefetch( ctx.text + 64, 0, 3 );
__builtin_prefetch( ctx.long_state, 1, 0 );
__builtin_prefetch( ctx.long_state + 64, 1, 0 );
__builtin_prefetch( ctx.long_state + 128, 1, 0 );
__builtin_prefetch( ctx.long_state + 192, 1, 0 );
__builtin_prefetch( ctx.long_state + 256, 1, 0 );
__builtin_prefetch( ctx.long_state + 320, 1, 0 );
__builtin_prefetch( ctx.long_state + 384, 1, 0 );
__builtin_prefetch( ctx.long_state + 448, 1, 0 );
size_t i, j;
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
oaes_key_import_data(ctx.aes_ctx, ctx.state.hs.b, AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
__builtin_prefetch( ctx.long_state + i + 512, 1, 0 );
__builtin_prefetch( ctx.long_state + i + 576, 1, 0 );
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 0], ctx.aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 1], ctx.aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 2], ctx.aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 3], ctx.aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 4], ctx.aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 5], ctx.aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 6], ctx.aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 7], ctx.aes_ctx->key->exp_data);
memcpy(&ctx.long_state[i], ctx.text, INIT_SIZE_BYTE);
}
xor_blocks_dst(&ctx.state.k[0], &ctx.state.k[32], ctx.a);
xor_blocks_dst(&ctx.state.k[16], &ctx.state.k[48], ctx.b);
for (i = 0; likely(i < ITER / 4); ++i)
{
/* Dependency chain: address -> read value ------+
* written value <-+ hard function (AES or MUL) <+
* next address <-+
*/
/* Iteration 1 */
j = e2i(ctx.a);
aesb_single_round(&ctx.long_state[j], ctx.c, ctx.a);
xor_blocks_dst(ctx.c, ctx.b, &ctx.long_state[j]);
if ( cryptonightV7 )
{
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
const uint8_t tmp = lsa[11];
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
}
/* Iteration 2 */
mul_sum_xor_dst(ctx.c, ctx.a, &ctx.long_state[e2i(ctx.c)], tweak );
/* Iteration 3 */
j = e2i(ctx.a);
aesb_single_round(&ctx.long_state[j], ctx.b, ctx.a);
xor_blocks_dst(ctx.b, ctx.c, &ctx.long_state[j]);
if ( cryptonightV7 )
{
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
const uint8_t tmp = lsa[11];
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
}
/* Iteration 4 */
mul_sum_xor_dst(ctx.b, ctx.a, &ctx.long_state[e2i(ctx.b)], tweak );
}
__builtin_prefetch( ctx.text, 0, 3 );
__builtin_prefetch( ctx.text + 64, 0, 3 );
__builtin_prefetch( ctx.long_state, 1, 0 );
__builtin_prefetch( ctx.long_state + 64, 1, 0 );
__builtin_prefetch( ctx.long_state + 128, 1, 0 );
__builtin_prefetch( ctx.long_state + 192, 1, 0 );
__builtin_prefetch( ctx.long_state + 256, 1, 0 );
__builtin_prefetch( ctx.long_state + 320, 1, 0 );
__builtin_prefetch( ctx.long_state + 384, 1, 0 );
__builtin_prefetch( ctx.long_state + 448, 1, 0 );
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
oaes_key_import_data(ctx.aes_ctx, &ctx.state.hs.b[32], AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
__builtin_prefetch( ctx.long_state + i + 512, 1, 0 );
__builtin_prefetch( ctx.long_state + i + 576, 1, 0 );
xor_blocks(&ctx.text[0 * AES_BLOCK_SIZE], &ctx.long_state[i + 0 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx.text[0 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
xor_blocks(&ctx.text[1 * AES_BLOCK_SIZE], &ctx.long_state[i + 1 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx.text[1 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
xor_blocks(&ctx.text[2 * AES_BLOCK_SIZE], &ctx.long_state[i + 2 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx.text[2 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
xor_blocks(&ctx.text[3 * AES_BLOCK_SIZE], &ctx.long_state[i + 3 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx.text[3 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
xor_blocks(&ctx.text[4 * AES_BLOCK_SIZE], &ctx.long_state[i + 4 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx.text[4 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
xor_blocks(&ctx.text[5 * AES_BLOCK_SIZE], &ctx.long_state[i + 5 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx.text[5 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
xor_blocks(&ctx.text[6 * AES_BLOCK_SIZE], &ctx.long_state[i + 6 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx.text[6 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
xor_blocks(&ctx.text[7 * AES_BLOCK_SIZE], &ctx.long_state[i + 7 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx.text[7 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
}
memcpy(ctx.state.init, ctx.text, INIT_SIZE_BYTE);
// hash_permutation(&ctx.state.hs);
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
/*memcpy(hash, &state, 32);*/
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
oaes_free((OAES_CTX **) &ctx.aes_ctx);
}

View File

@@ -1,51 +0,0 @@
#ifndef __CRYPTONIGHT_H_INCLUDED
#define __CRYPTONIGHT_H_INCLUDED
#include <stddef.h>
#include "crypto/oaes_lib.h"
#include "miner.h"
#define MEMORY (1 << 21) /* 2 MiB */
#define MEMORY_M128I (MEMORY >> 4) // 2 MiB / 16 = 128 ki * __m128i
#define ITER (1 << 20)
#define AES_BLOCK_SIZE 16
#define AES_KEY_SIZE 32 /*16*/
#define INIT_SIZE_BLK 8
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128
#define INIT_SIZE_M128I (INIT_SIZE_BYTE >> 4) // 8
#pragma pack(push, 1)
union hash_state {
uint8_t b[200];
uint64_t w[25];
};
#pragma pack(pop)
#pragma pack(push, 1)
union cn_slow_hash_state {
union hash_state hs;
struct {
uint8_t k[64];
uint8_t init[INIT_SIZE_BYTE];
};
};
#pragma pack(pop)
void do_blake_hash(const void* input, size_t len, char* output);
void do_groestl_hash(const void* input, size_t len, char* output);
void do_jh_hash(const void* input, size_t len, char* output);
void do_skein_hash(const void* input, size_t len, char* output);
void cryptonight_hash_ctx(void* output, const void* input, int len);
void keccakf(uint64_t st[25], int rounds);
extern void (* const extra_hashes[4])(const void *, size_t, char *);
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void cryptonight_hash_aes( void *restrict output, const void *input, int len );
extern bool cryptonightV7;
#endif

View File

@@ -144,7 +144,7 @@ int hodl_scanhash( struct work* work, uint32_t max_nonce,
#if defined(__AES__)
GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, mythr->id );
pthread_barrier_wait( &hodl_barrier );
return scanhash_hodl_wolf( work, max_nonce, hashes_done, thr_info );
return scanhash_hodl_wolf( work, max_nonce, hashes_done, mythr );
#endif
return false;
}

View File

@@ -129,9 +129,10 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
if( FinalPoW[7] <= ptarget[7] )
{
pdata[20] = swab32( BlockHdr[20] );
pdata[21] = swab32( BlockHdr[21] );
*hashes_done = CollisionCount;
return(1);
pdata[21] = swab32( BlockHdr[21] );
*hashes_done = CollisionCount;
submit_solution( work, FinalPoW, mythr );
return(0);
}
}
}
@@ -198,7 +199,8 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
pdata[20] = swab32( BlockHdr[20] );
pdata[21] = swab32( BlockHdr[21] );
*hashes_done = CollisionCount;
return(1);
submit_solution( work, FinalPoW, mythr );
return(0);
}
}
}

View File

@@ -76,37 +76,34 @@ int scanhash_allium( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t _ALIGN(128) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
if ( opt_benchmark )
ptarget[7] = 0x3ffff;
for ( int i = 0; i < 19; i++ )
be32enc( &endiandata[i], pdata[i] );
edata[i] = bswap_32( pdata[i] );
sph_blake256_init( &allium_ctx.blake );
sph_blake256( &allium_ctx.blake, endiandata, 64 );
sph_blake256( &allium_ctx.blake, edata, 64 );
do {
be32enc( &endiandata[19], nonce );
allium_hash( hash, endiandata );
if ( hash[7] <= Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
edata[19] = nonce;
allium_hash( hash, edata );
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
pdata[19] = bswap_32( nonce );
submit_solution( work, hash, mythr );
}
nonce++;
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}

View File

@@ -119,7 +119,7 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_lyra2rev2;
gate->hash = (void*)&lyra2rev2_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
opt_target_factor = 256.0;
return true;
@@ -228,13 +228,14 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
bool register_phi2_algo( algo_gate_t* gate )
{
// init_phi2_ctx();
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
gate->build_extraheader = (void*)&phi2_build_extraheader;
opt_target_factor = 256.0;
#if defined(PHI2_4WAY)
#if defined(PHI2_8WAY)
gate->scanhash = (void*)&scanhash_phi2_8way;
#elif defined(PHI2_4WAY)
gate->scanhash = (void*)&scanhash_phi2_4way;
#else
init_phi2_ctx();

View File

@@ -184,19 +184,26 @@ bool init_allium_ctx();
/////////////////////////////////////////
#if defined(__AVX2__) && defined(__AES__)
// #define PHI2_4WAY
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define PHI2_8WAY 1
#elif defined(__AVX2__) && defined(__AES__)
#define PHI2_4WAY 1
#endif
extern bool phi2_has_roots;
bool register_phi2_algo( algo_gate_t* gate );
#if defined(PHI2_4WAY)
#if defined(PHI2_8WAY)
void phi2_8way_hash( void *state, const void *input );
int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined(PHI2_4WAY)
void phi2_hash_4way( void *state, const void *input );
int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
//void init_phi2_ctx();
#else

View File

@@ -7,27 +7,8 @@
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/cubehash/cube-hash-2way.h"
#if defined (LYRA2REV2_8WAY)
typedef struct {
blake256_8way_context blake;
keccak256_8way_context keccak;
cube_4way_context cube;
skein256_8way_context skein;
bmw256_8way_context bmw;
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
bool init_lyra2rev2_8way_ctx()
{
keccak256_8way_init( &l2v2_8way_ctx.keccak );
cube_4way_init( &l2v2_8way_ctx.cube, 256, 16, 32 );
skein256_8way_init( &l2v2_8way_ctx.skein );
bmw256_8way_init( &l2v2_8way_ctx.bmw );
return true;
}
#if 0
void lyra2rev2_8way_hash( void *state, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
@@ -52,14 +33,24 @@ void lyra2rev2_8way_hash( void *state, const void *input )
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
// cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
//
// dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
// dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
intrlv_2x256( vhash, hash0, hash1, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
@@ -80,15 +71,123 @@ void lyra2rev2_8way_hash( void *state, const void *input )
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
cube_4way_init( &ctx.cube, 256, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
// cube_4way_init( &ctx.cube, 256, 16, 32 );
// cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
//
// dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
// dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
bmw256_8way_update( &ctx.bmw, vhash, 32 );
bmw256_8way_close( &ctx.bmw, state );
}
#endif
#if defined (LYRA2REV2_8WAY)
typedef struct {
blake256_8way_context blake;
keccak256_8way_context keccak;
cubehashParam cube;
skein256_8way_context skein;
bmw256_8way_context bmw;
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
bool init_lyra2rev2_8way_ctx()
{
keccak256_8way_init( &l2v2_8way_ctx.keccak );
cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
skein256_8way_init( &l2v2_8way_ctx.skein );
bmw256_8way_init( &l2v2_8way_ctx.bmw );
return true;
}
void lyra2rev2_8way_hash( void *state, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
uint32_t hash0[8] __attribute__ ((aligned (64)));
uint32_t hash1[8] __attribute__ ((aligned (64)));
uint32_t hash2[8] __attribute__ ((aligned (64)));
uint32_t hash3[8] __attribute__ ((aligned (64)));
uint32_t hash4[8] __attribute__ ((aligned (64)));
uint32_t hash5[8] __attribute__ ((aligned (64)));
uint32_t hash6[8] __attribute__ ((aligned (64)));
uint32_t hash7[8] __attribute__ ((aligned (64)));
lyra2v2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &l2v2_8way_ctx, sizeof(l2v2_8way_ctx) );
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
blake256_8way_close( &ctx.blake, vhash );
rintrlv_8x32_8x64( vhashA, vhash, 256 );
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
keccak256_8way_close( &ctx.keccak, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
intrlv_2x256( vhash, hash0, hash1, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash0, hash1, vhash, 256 );
intrlv_2x256( vhash, hash2, hash3, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash2, hash3, vhash, 256 );
intrlv_2x256( vhash, hash4, hash5, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash4, hash5, vhash, 256 );
intrlv_2x256( vhash, hash6, hash7, 256 );
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
dintrlv_2x256( hash6, hash7, vhash, 256 );
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
skein256_8way_update( &ctx.skein, vhash, 32 );
skein256_8way_close( &ctx.skein, vhash );
dintrlv_8x64( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash, 256 );
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7, 256 );
@@ -97,49 +196,49 @@ void lyra2rev2_8way_hash( void *state, const void *input )
bmw256_8way_close( &ctx.bmw, state );
}
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &hash[7*8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 19; // aligned
int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 19;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
if ( bench ) ptarget[7] = 0x0000ff;
mm256_bswap32_intrlv80_8x32( vdata, pdata );
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
blake256_8way_init( &l2v2_8way_ctx.blake );
blake256_8way_update( &l2v2_8way_ctx.blake, vdata, 64 );
do
{
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
n+3, n+2, n+1, n ) );
lyra2rev2_8way_hash( hash, vdata );
pdata[19] = n;
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hashd7[lane] <= targ32 ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
n += 8;
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
@@ -226,15 +325,16 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *hashd7 = &(hash[7<<2]);
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
__m128i *noncev = (__m128i*)vdata + 19; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t targ32 = ptarget[7];
__m128i *noncev = (__m128i*)vdata + 19;
int thr_id = mythr->id;
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
@@ -249,20 +349,20 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
lyra2rev2_4way_hash( hash, vdata );
pdata[19] = n;
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
n += 4;
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -99,7 +99,7 @@ int scanhash_lyra2rev2( struct work *work,
lyra2rev2_hash(hash, endiandata);
if (hash[7] <= Htarg )
if( fulltest( hash, ptarget ) && !opt_benchmark )
if( valid_hash( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );

View File

@@ -130,15 +130,15 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
uint32_t *hash7 = &hash[7<<4];
uint32_t *hashd7 = &hash[7*16];
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
const uint32_t last_nonce = max_nonce - 16;
const uint32_t Htarg = ptarget[7];
__m512i *noncev = (__m512i*)vdata + 19; // aligned
const uint32_t targ32 = ptarget[7];
__m512i *noncev = (__m512i*)vdata + 19;
const int thr_id = mythr->id;
if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
@@ -159,10 +159,10 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
pdata[19] = n;
for ( int lane = 0; lane < 16; lane++ )
if ( unlikely( hash7[lane] <= Htarg ) )
if ( unlikely( hashd7[lane] <= targ32 ) )
{
extr_lane_16x32( lane_hash, hash, lane, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
@@ -170,6 +170,7 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
}
n += 16;
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
@@ -194,7 +195,7 @@ bool init_lyra2rev3_8way_ctx()
void lyra2rev3_8way_hash( void *state, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
uint32_t hash0[8] __attribute__ ((aligned (64)));
uint32_t hash1[8] __attribute__ ((aligned (32)));
uint32_t hash2[8] __attribute__ ((aligned (32)));
@@ -250,17 +251,17 @@ void lyra2rev3_8way_hash( void *state, const void *input )
int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &hash[7<<3];
uint32_t *hashd7 = &hash[7*8];
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 19; // aligned
const uint32_t targ32 = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 19;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
@@ -277,7 +278,7 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
pdata[19] = n;
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hash7[lane] <= Htarg ) )
if ( unlikely( hashd7[lane] <= targ32 ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
@@ -357,42 +358,41 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *hashd7 = &(hash[7*4]);
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
const uint32_t Htarg = ptarget[7];
__m128i *noncev = (__m128i*)vdata + 19; // aligned
const int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t targ32 = ptarget[7];
__m128i *noncev = (__m128i*)vdata + 19;
const int thr_id = mythr->id;
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
mm128_bswap32_intrlv80_4x32( vdata, pdata );
*noncev = _mm_set_epi32( n+3, n+2, n+1, n );
blake256_4way_init( &l2v3_4way_ctx.blake );
blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );
do
{
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
lyra2rev3_4way_hash( hash, vdata );
pdata[19] = n;
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
n += 4;
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
pdata[19] = n;
*hashes_done = n - first_nonce + 1;
return 0;
}

View File

@@ -88,7 +88,7 @@ int scanhash_lyra2rev3( struct work *work,
lyra2rev3_hash(hash, endiandata);
if (hash[7] <= Htarg )
if( fulltest( hash, ptarget ) && !opt_benchmark )
if( valid_hash( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );

View File

@@ -56,7 +56,7 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
int thr_id = mythr->id;
if (opt_benchmark)
ptarget[7] = 0x0000ff;
@@ -65,14 +65,13 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[i], pdata[i]);
}
lyra2z_midstate( endiandata );
lyra2z_midstate( endiandata );
do {
be32enc(&endiandata[19], nonce);
lyra2z_hash( hash, endiandata );
if ( hash[7] <= Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );

View File

@@ -9,7 +9,7 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
{
uint32_t _ALIGN(256) hash[16];
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
2, 330, 256 );
memcpy(state, hash, 32);
@@ -18,38 +18,40 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
int scanhash_lyra2z330( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__ ((aligned (64)));
uint32_t hash[8] __attribute__ ((aligned (128)));
uint32_t edata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
if (opt_benchmark)
ptarget[7] = 0x0000ff;
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
do
{
be32enc( &endiandata[19], nonce );
lyra2z330_hash( hash, endiandata, work->height );
if ( hash[7] <= Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
edata[19] = nonce;
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, edata, 80, edata, 80,
2, 330, 256 );
// lyra2z330_hash( hash, edata, work->height );
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
be32enc( pdata + 19, nonce );
submit_solution( work, hash, mythr );
}
nonce++;
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = nonce - first_nonce;
return 0;
}

View File

@@ -1,233 +1,501 @@
/**
* Phi-2 algo Implementation
*/
#include "lyra2-gate.h"
#if defined(PHI2_4WAY)
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/gost/sph_gost.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "lyra2.h"
#if defined(__VAES__)
#include "algo/echo/echo-hash-4way.h"
#elif defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#endif
#if defined(PHI2_8WAY)
typedef struct {
cubehashParam cube;
jh512_8way_context jh;
#if defined(__VAES__)
echo_4way_context echo;
#else
hashState_echo echo;
#endif
sph_gost512_context gost;
skein512_8way_context skein;
} phi2_8way_ctx_holder;
void phi2_8way_hash( void *state, const void *input )
{
unsigned char _ALIGN(128) hash[64*8];
unsigned char _ALIGN(128) hashA[64*2];
unsigned char _ALIGN(64) hash0[64];
unsigned char _ALIGN(64) hash1[64];
unsigned char _ALIGN(64) hash2[64];
unsigned char _ALIGN(64) hash3[64];
unsigned char _ALIGN(64) hash4[64];
unsigned char _ALIGN(64) hash5[64];
unsigned char _ALIGN(64) hash6[64];
unsigned char _ALIGN(64) hash7[64];
const int size = phi2_has_roots ? 144 : 80 ;
phi2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
cubehash_full( &ctx.cube, (byte*)hash0, 512,
(const byte*)input, size );
cubehash_full( &ctx.cube, (byte*)hash1, 512,
(const byte*)input + 144, size );
cubehash_full( &ctx.cube, (byte*)hash2, 512,
(const byte*)input + 2*144, size );
cubehash_full( &ctx.cube, (byte*)hash3, 512,
(const byte*)input + 3*144, size );
cubehash_full( &ctx.cube, (byte*)hash4, 512,
(const byte*)input + 4*144, size );
cubehash_full( &ctx.cube, (byte*)hash5, 512,
(const byte*)input + 5*144, size );
cubehash_full( &ctx.cube, (byte*)hash6, 512,
(const byte*)input + 6*144, size );
cubehash_full( &ctx.cube, (byte*)hash7, 512,
(const byte*)input + 7*144, size );
intrlv_2x256( hashA, hash0, hash1, 512 );
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
dintrlv_2x256( hash0, hash1, hash, 512 );
intrlv_2x256( hashA, hash2, hash3, 512 );
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
dintrlv_2x256( hash2, hash3, hash, 512 );
intrlv_2x256( hashA, hash4, hash5, 512 );
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
dintrlv_2x256( hash4, hash5, hash, 512 );
intrlv_2x256( hashA, hash6, hash7, 512 );
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
dintrlv_2x256( hash6, hash7, hash, 512 );
intrlv_8x64_512( hash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7 );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, (const void*)hash, 64 );
jh512_8way_close( &ctx.jh, (void*)hash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, hash );
#if defined (__VAES__)
unsigned char _ALIGN(64) hashA0[64];
unsigned char _ALIGN(64) hashA1[64];
unsigned char _ALIGN(64) hashA2[64];
unsigned char _ALIGN(64) hashA3[64];
unsigned char _ALIGN(64) hashA4[64];
unsigned char _ALIGN(64) hashA5[64];
unsigned char _ALIGN(64) hashA6[64];
unsigned char _ALIGN(64) hashA7[64];
intrlv_4x128_512( hash, hash0, hash1, hash2, hash3 );
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
dintrlv_4x128_512( hashA0, hashA1, hashA2, hashA3, hash );
intrlv_4x128_512( hash, hash4, hash5, hash6, hash7 );
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
dintrlv_4x128_512( hashA4, hashA5, hashA6, hashA7, hash );
#endif
if ( hash0[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash0, 64 );
sph_gost512_close( &ctx.gost, (void*)hash0 );
}
else
#if defined (__VAES__)
memcpy( hash0, hashA0, 64 );
#else
{
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, 64 );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, 64 );
}
#endif
if ( hash1[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash1, 64 );
sph_gost512_close( &ctx.gost, (void*)hash1 );
}
else
#if defined (__VAES__)
memcpy( hash1, hashA1, 64 );
#else
{
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, 64 );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, 64 );
}
#endif
if ( hash2[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash2, 64 );
sph_gost512_close( &ctx.gost, (void*)hash2 );
}
else
#if defined (__VAES__)
memcpy( hash2, hashA2, 64 );
#else
{
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, 64 );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, 64 );
}
#endif
if ( hash3[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash3, 64 );
sph_gost512_close( &ctx.gost, (void*)hash3 );
}
else
#if defined (__VAES__)
memcpy( hash3, hashA3, 64 );
#else
{
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
}
#endif
if ( hash4[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash4, 64 );
sph_gost512_close( &ctx.gost, (void*)hash4 );
}
else
#if defined (__VAES__)
memcpy( hash4, hashA4, 64 );
#else
{
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
(const BitSequence *)hash4, 64 );
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
(const BitSequence *)hash4, 64 );
}
#endif
if ( hash5[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash5, 64 );
sph_gost512_close( &ctx.gost, (void*)hash5 );
}
else
#if defined (__VAES__)
memcpy( hash5, hashA5, 64 );
#else
{
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
(const BitSequence *)hash5, 64 );
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
(const BitSequence *)hash5, 64 );
}
#endif
if ( hash6[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash6, 64 );
sph_gost512_close( &ctx.gost, (void*)hash6 );
}
else
#if defined (__VAES__)
memcpy( hash6, hashA6, 64 );
#else
{
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
(const BitSequence *)hash6, 64 );
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
(const BitSequence *)hash6, 64 );
}
#endif
if ( hash7[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash7, 64 );
sph_gost512_close( &ctx.gost, (void*)hash7 );
}
else
#if defined (__VAES__)
memcpy( hash7, hashA7, 64 );
#else
{
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
(const BitSequence *)hash7, 64 );
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
(const BitSequence *)hash7, 64 );
}
#endif
intrlv_8x64_512( hash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7 );
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, (const void*)hash, 64 );
skein512_8way_close( &ctx.skein, (void*)hash );
for ( int i = 0; i < 4; i++ )
casti_m512i( state, i ) = _mm512_xor_si512( casti_m512i( hash, i ),
casti_m512i( hash, i+4 ) );
}
int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash[16*8];
uint32_t _ALIGN(128) edata[36*8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t *hash7 = &(hash[49]);
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x00ff;
phi2_has_roots = false;
for ( int i = 0; i < 36; i++ )
{
be32enc( &edata[i], pdata[i] );
edata[ i + 36 ] = edata[ i + 2*36 ] = edata[ i + 3*36 ] =
edata[ i + 4*36 ] = edata[ i + 5*36 ] = edata[ i + 6*36 ] =
edata[ i + 7*36 ] = edata[ i ];
if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
}
edata[ 19 ] = n;
edata[ 36 + 19 ] = n+1;
edata[ 2*36 + 19 ] = n+2;
edata[ 3*36 + 19 ] = n+3;
edata[ 4*36 + 19 ] = n+4;
edata[ 5*36 + 19 ] = n+5;
edata[ 6*36 + 19 ] = n+6;
edata[ 7*36 + 19 ] = n+7;
do {
phi2_8way_hash( hash, edata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
{
uint64_t _ALIGN(64) lane_hash[8];
extr_lane_8x64( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
be32enc( pdata + 19, n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
n += 8;
edata[ 19 ] += 8;
edata[ 36 + 19 ] += 8;
edata[ 2*36 + 19 ] += 8;
edata[ 3*36 + 19 ] += 8;
edata[ 4*36 + 19 ] += 8;
edata[ 5*36 + 19 ] += 8;
edata[ 6*36 + 19 ] += 8;
edata[ 7*36 + 19 ] += 8;
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(PHI2_4WAY)
typedef struct {
cubehashParam cube;
jh512_4way_context jh;
#if defined(__AES__)
hashState_echo echo;
// hashState_echo echo2;
#else
sph_echo512_context echo;
#endif
sph_gost512_context gost;
skein512_4way_context skein;
} phi2_ctx_holder;
/*
phi2_ctx_holder phi2_ctx;
} phi2_4way_ctx_holder;
void init_phi2_ctx()
phi2_4way_ctx_holder phi2_4way_ctx;
void phi2_4way_hash(void *state, const void *input)
{
cubehashInit( &phi2_ctx.cube, 512, 16, 32 );
sph_jh512_init(&phi2_ctx.jh);
init_echo( &phi2_ctx.echo1, 512 );
init_echo( &phi2_ctx.echo2, 512 );
sph_gost512_init(&phi2_ctx.gost);
sph_skein512_init(&phi2_ctx.skein);
};
*/
void phi2_hash_4way( void *state, const void *input )
{
uint32_t hash[4][16] __attribute__ ((aligned (64)));
uint32_t hashA[4][16] __attribute__ ((aligned (64)));
uint32_t hashB[4][16] __attribute__ ((aligned (64)));
uint32_t vhash[4*16] __attribute__ ((aligned (64)));
unsigned char _ALIGN(128) hash[64*4];
unsigned char _ALIGN(64) hash0[64];
unsigned char _ALIGN(64) hash1[64];
unsigned char _ALIGN(64) hash2[64];
unsigned char _ALIGN(64) hash3[64];
unsigned char _ALIGN(64) hash0A[64];
unsigned char _ALIGN(64) hash1A[64];
unsigned char _ALIGN(64) hash2A[64];
unsigned char _ALIGN(64) hash3A[64];
const int size = phi2_has_roots ? 144 : 80 ;
phi2_4way_ctx_holder ctx __attribute__ ((aligned (64)));
// unsigned char _ALIGN(128) hash[64];
// unsigned char _ALIGN(128) hashA[64];
// unsigned char _ALIGN(128) hashB[64];
cubehash_full( &ctx.cube, (byte*)hash0A, 512,
(const byte*)input, size );
cubehash_full( &ctx.cube, (byte*)hash1A, 512,
(const byte*)input + 144, size );
cubehash_full( &ctx.cube, (byte*)hash2A, 512,
(const byte*)input + 2*144, size );
cubehash_full( &ctx.cube, (byte*)hash3A, 512,
(const byte*)input + 3*144, size );
LYRA2RE( &hash0[ 0], 32, hash0A, 32, hash0A, 32, 1, 8, 8 );
LYRA2RE( &hash0[32], 32, hash0A+32, 32, hash0A+32, 32, 1, 8, 8 );
LYRA2RE( &hash1[ 0], 32, hash1A, 32, hash1A, 32, 1, 8, 8 );
LYRA2RE( &hash1[32], 32, hash1A+32, 32, hash1A+32, 32, 1, 8, 8 );
LYRA2RE( &hash2[ 0], 32, hash2A, 32, hash2A, 32, 1, 8, 8 );
LYRA2RE( &hash2[32], 32, hash2A+32, 32, hash2A+32, 32, 1, 8, 8 );
LYRA2RE( &hash3[ 0], 32, hash3A, 32, hash3A, 32, 1, 8, 8 );
LYRA2RE( &hash3[32], 32, hash3A+32, 32, hash3A+32, 32, 1, 8, 8 );
phi2_ctx_holder ctx __attribute__ ((aligned (64)));
// memcpy( &ctx, &phi2_ctx, sizeof(phi2_ctx) );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[0], (const byte*)input,
phi2_has_roots ? 144 : 80 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[1], (const byte*)input+144,
phi2_has_roots ? 144 : 80 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[2], (const byte*)input+288,
phi2_has_roots ? 144 : 80 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[3], (const byte*)input+432,
phi2_has_roots ? 144 : 80 );
LYRA2RE( &hashA[0][0], 32, &hashB[0][0], 32, &hashB[0][0], 32, 1, 8, 8 );
LYRA2RE( &hashA[0][8], 32, &hashB[0][8], 32, &hashB[0][8], 32, 1, 8, 8 );
LYRA2RE( &hashA[1][0], 32, &hashB[1][0], 32, &hashB[1][0], 32, 1, 8, 8 );
LYRA2RE( &hashA[1][8], 32, &hashB[1][8], 32, &hashB[1][8], 32, 1, 8, 8 );
LYRA2RE( &hashA[2][0], 32, &hashB[2][0], 32, &hashB[2][0], 32, 1, 8, 8 );
LYRA2RE( &hashA[2][8], 32, &hashB[2][8], 32, &hashB[2][8], 32, 1, 8, 8 );
LYRA2RE( &hashA[3][0], 32, &hashB[3][0], 32, &hashB[3][0], 32, 1, 8, 8 );
LYRA2RE( &hashA[3][8], 32, &hashB[3][8], 32, &hashB[3][8], 32, 1, 8, 8 );
intrlv_4x64( vhash, hashA[0], hashA[1], hashA[2], hashA[3], 512 );
intrlv_4x64_512( hash, hash0, hash1, hash2, hash3 );
jh512_4way_init( &ctx.jh );
jh512_4way( &ctx.jh, vhash, 64 );
jh512_4way_close( &ctx.jh, vhash );
jh512_4way_update( &ctx.jh, (const void*)hash, 64 );
jh512_4way_close( &ctx.jh, (void*)hash );
dintrlv_4x64( hash[0], hash[1], hash[2], hash[3], vhash, 512 );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, hash );
if ( hash[0][0] & 1 )
if ( hash0[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash[0], 64 );
sph_gost512_close( &ctx.gost, (void*)hash[0] );
sph_gost512( &ctx.gost, (const void*)hash0, 64 );
sph_gost512_close( &ctx.gost, (void*)hash0 );
}
else
{
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
(const BitSequence *)hash[0], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
(const BitSequence *)hash[0], 512 );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, 64 );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, 64 );
}
if ( hash[1][0] & 1 )
if ( hash1[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash[1], 64 );
sph_gost512_close( &ctx.gost, (void*)hash[1] );
sph_gost512( &ctx.gost, (const void*)hash1, 64 );
sph_gost512_close( &ctx.gost, (void*)hash1 );
}
else
{
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
(const BitSequence *)hash[1], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
(const BitSequence *)hash[1], 512 );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, 64 );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, 64 );
}
if ( hash[2][0] & 1 )
if ( hash2[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash[2], 64 );
sph_gost512_close( &ctx.gost, (void*)hash[2] );
sph_gost512( &ctx.gost, (const void*)hash2, 64 );
sph_gost512_close( &ctx.gost, (void*)hash2 );
}
else
{
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
(const BitSequence *)hash[2], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
(const BitSequence *)hash[2], 512 );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, 64 );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, 64 );
}
if ( hash[3][0] & 1 )
if ( hash3[0] & 1 )
{
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, (const void*)hash[3], 64 );
sph_gost512_close( &ctx.gost, (void*)hash[3] );
sph_gost512( &ctx.gost, (const void*)hash3, 64 );
sph_gost512_close( &ctx.gost, (void*)hash3 );
}
else
{
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
(const BitSequence *)hash[3], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
(const BitSequence *)hash[3], 512 );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
}
intrlv_4x64( vhash, hash[0], hash[1], hash[2], hash[3], 512 );
intrlv_4x64_512( hash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein );
skein512_4way( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_update( &ctx.skein, (const void*)hash, 64 );
skein512_4way_close( &ctx.skein, (void*)hash );
for (int i=0; i<4; i++)
{
( (uint64_t*)vhash )[i] ^= ( (uint64_t*)vhash )[i+4];
( (uint64_t*)vhash+ 8 )[i] ^= ( (uint64_t*)vhash+ 8 )[i+4];
( (uint64_t*)vhash+16 )[i] ^= ( (uint64_t*)vhash+16 )[i+4];
( (uint64_t*)vhash+24 )[i] ^= ( (uint64_t*)vhash+24 )[i+4];
}
// for ( int i = 0; i < 4; i++ )
// casti_m256i( vhash, i ) = _mm256_xor_si256( casti_m256i( vhash, i ),
// casti_m256i( vhash, i+4 ) );
memcpy( state, vhash, 128 );
for ( int i = 0; i < 4; i++ )
casti_m256i( state, i ) = _mm256_xor_si256( casti_m256i( hash, i ),
casti_m256i( hash, i+4 ) );
}
int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash[8];
uint32_t _ALIGN(128) edata[36];
uint32_t vdata[4][36] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[25]);
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t _ALIGN(128) hash[16*4];
uint32_t _ALIGN(128) edata[36*4];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t *hash7 = &(hash[25]); // 3*8+1
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
if(opt_benchmark){
ptarget[7] = 0x00ff;
}
// Data is not interleaved, but hash is.
// any non-zero data at index 20 or above sets roots true.
// Split up the operations, bswap first, then set roots.
phi2_has_roots = false;
for ( int i=0; i < 36; i++ )
{
be32enc(&edata[i], pdata[i]);
if (i >= 20 && pdata[i]) phi2_has_roots = true;
}
/*
casti_m256i( vdata[0], 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
casti_m256i( vdata[0], 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
casti_m256i( vdata[0], 2 ) = mm256_bswap_32( casti_m256i( pdata, 2 ) );
casti_m256i( vdata[0], 3 ) = mm256_bswap_32( casti_m256i( pdata, 3 ) );
casti_m128i( vdata[0], 8 ) = mm128_bswap_32( casti_m128i( pdata, 8 ) );
phi2_has_roots = mm128_anybits1( casti_m128i( vdata[0], 5 ) ) ||
mm128_anybits1( casti_m128i( vdata[0], 6 ) ) ||
mm128_anybits1( casti_m128i( vdata[0], 7 ) ) ||
mm128_anybits1( casti_m128i( vdata[0], 8 ) );
*/
memcpy( vdata[0], edata, 144 );
memcpy( vdata[1], edata, 144 );
memcpy( vdata[2], edata, 144 );
memcpy( vdata[3], edata, 144 );
do {
be32enc( &vdata[0][19], n );
be32enc( &vdata[1][19], n+1 );
be32enc( &vdata[2][19], n+2 );
be32enc( &vdata[3][19], n+3 );
phi2_hash_4way( hash, vdata );
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[ lane<<1 ] < Htarg )
{
extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
n += 4;
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return 0;
}
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x00ff;
#endif // PHI2_4WAY
phi2_has_roots = false;
for ( int i = 0; i < 36; i++ )
{
be32enc( &edata[i], pdata[i] );
edata[ i+36 ] = edata[ i+72 ] = edata[ i+108 ] = edata[i];
if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
}
edata[ 19 ] = n;
edata[ 36 + 19 ] = n+1;
edata[ 2*36 + 19 ] = n+2;
edata[ 3*36 + 19 ] = n+3;
do {
phi2_4way_hash( hash, edata );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
{
uint64_t _ALIGN(64) lane_hash[8];
extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )
{
be32enc( pdata + 19, n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
edata[ 19 ] += 4;
edata[ 36 + 19 ] += 4;
edata[ 2*36 + 19 ] += 4;
edata[ 3*36 + 19 ] += 4;
n +=4;
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#endif

View File

@@ -99,7 +99,6 @@ int scanhash_phi2( struct work *work, uint32_t max_nonce,
uint32_t _ALIGN(128) edata[36];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
const int thr_id = mythr->id;

View File

@@ -158,7 +158,7 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
{
// ignore POK in first word
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
uint32_t *nonceptr = work->data + algo_gate.nonce_index;
if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
|| ( *nonceptr >= *end_nonce_ptr ) )
{

View File

@@ -162,59 +162,35 @@ int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
const int thr_id = mythr->id;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
for (int m=0; m < 6; m++)
if (Htarg <= htmax[m])
do
{
anime_4way_hash( hash, vdata );
for ( int i = 0; i < 4; i++ )
if ( valid_hash( hash+(i<<3), ptarget ) && !opt_benchmark )
{
uint32_t mask = masks[m];
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
anime_4way_hash( hash, vdata );
pdata[19] = n;
for ( int i = 0; i < 4; i++ )
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
{
pdata[19] = n+i;
submit_lane_solution( work, hash+(i<<3), mythr, i );
}
n += 4;
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
break;
pdata[19] = bswap_32( n+i );
submit_solution( work, hash+(i<<3), mythr );
}
*hashes_done = n - first_nonce + 1;
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -126,49 +126,28 @@ int scanhash_anime( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t hash[8] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
const int thr_id = mythr->id;
const int bench = opt_benchmark;
swab32_array( edata, pdata, 20 );
swab32_array( endiandata, pdata, 20 );
for (int m=0; m < 6; m++)
if (Htarg <= htmax[m])
{
uint32_t mask = masks[m];
do
{
be32enc( &endiandata[19], n );
anime_hash( hash, endiandata );
pdata[19] = n;
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
submit_solution( work, hash, mythr );
n++;
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
break;
}
*hashes_done = n - first_nonce + 1;
do
{
edata[19] = n;
anime_hash( hash, edata );
if ( valid_hash( hash, ptarget ) && !bench )
{
be32enc( &pdata[19], n );
submit_solution( work, hash, mythr );
}
n++;
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}

View File

@@ -1028,7 +1028,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
// B
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, 64 );
@@ -1050,14 +1050,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
m256_zero );
if ( mm256_anybits0( vh_mask ) )
if ( mm256_anybits1( vh_mask ) )
{
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhashA );
}
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
bmw512_4way_init( &ctx.bmw );
bmw512_4way_update( &ctx.bmw, vhash, 64 );
@@ -1101,14 +1101,14 @@ extern void hmq1725_4way_hash(void *state, const void *input)
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
m256_zero );
if ( mm256_anybits0( vh_mask ) )
if ( mm256_anybits1( vh_mask ) )
{
keccak512_4way_init( &ctx.keccak );
keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhashA );
}
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );
@@ -1180,7 +1180,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
// B
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
haval256_5_4way_init( &ctx.haval );
haval256_5_4way_update( &ctx.haval, vhash, 64 );
@@ -1407,7 +1407,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
sha512_4way_init( &ctx.sha512 );
sha512_4way_update( &ctx.sha512, vhash, 64 );
@@ -1443,7 +1443,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
// 4x32 for haval
intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
if ( mm256_anybits0( vh_mask ) )
if ( mm256_anybits1( vh_mask ) )
{
haval256_5_4way_init( &ctx.haval );
haval256_5_4way_update( &ctx.haval, vhash, 64 );

View File

@@ -402,14 +402,14 @@ void quark_4way_hash( void *state, const void *input )
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
if ( mm256_anybits0( vh_mask ) )
if ( mm256_anybits1( vh_mask ) )
{
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, vhash, 64 );
blake512_4way_close( &ctx.blake, vhashA );
}
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
bmw512_4way_init( &ctx.bmw );
bmw512_4way_update( &ctx.bmw, vhash, 64 );
@@ -427,14 +427,14 @@ void quark_4way_hash( void *state, const void *input )
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
if ( mm256_anybits0( vh_mask ) )
if ( mm256_anybits1( vh_mask ) )
{
keccak512_4way_init( &ctx.keccak );
keccak512_4way_update( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhashA );
}
if ( mm256_anybits1( vh_mask ) )
if ( mm256_anybits0( vh_mask ) )
{
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );

View File

@@ -13,17 +13,21 @@
#if defined (SKEIN_8WAY)
static __thread skein512_8way_context skein512_8way_ctx
__attribute__ ((aligned (64)));
void skeinhash_8way( void *state, const void *input )
{
uint64_t vhash64[8*8] __attribute__ ((aligned (128)));
skein512_8way_context ctx_skein;
memcpy( &ctx_skein, &skein512_8way_ctx, sizeof( ctx_skein ) );
uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
sha256_8way_context ctx_sha256;
skein512_8way_init( &ctx_skein );
skein512_8way_update( &ctx_skein, input, 80 );
skein512_8way_close( &ctx_skein, vhash64 );
skein512_8way_full( &ctx_skein, vhash64, input, 80 );
// skein512_8way_update( &ctx_skein, input + (64*8), 16 );
// skein512_8way_close( &ctx_skein, vhash64 );
rintrlv_8x64_8x32( vhash32, vhash64, 512 );
@@ -36,63 +40,74 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[20*8] __attribute__ ((aligned (128)));
uint32_t hash[16*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t *hash_d7 = &(hash[7*8]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t targ_d7 = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
int thr_id = mythr->id;
__m512i *noncev = (__m512i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
// skein512_8way_init( &skein512_8way_ctx );
// skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
do
{
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
skeinhash_8way( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( hash7[ lane ] <= Htarg )
if ( unlikely( hash_d7[ lane ] <= targ_d7 ) && !bench )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined (SKEIN_4WAY)
//static __thread skein512_4way_context skein512_4way_ctx
// __attribute__ ((aligned (64)));
void skeinhash_4way( void *state, const void *input )
{
uint64_t vhash64[8*4] __attribute__ ((aligned (128)));
skein512_4way_context ctx_skein;
// memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
#if defined(__SHA__)
uint32_t hash0[16] __attribute__ ((aligned (64)));
uint32_t hash1[16] __attribute__ ((aligned (64)));
uint32_t hash2[16] __attribute__ ((aligned (64)));
uint32_t hash3[16] __attribute__ ((aligned (64)));
SHA256_CTX ctx_sha256;
SHA256_CTX ctx_sha256;
#else
uint32_t vhash32[16*4] __attribute__ ((aligned (64)));
sha256_4way_context ctx_sha256;
#endif
skein512_4way_init( &ctx_skein );
skein512_4way_update( &ctx_skein, input, 80 );
skein512_4way_close( &ctx_skein, vhash64 );
skein512_4way_full( &ctx_skein, vhash64, input, 80 );
// skein512_4way_update( &ctx_skein, input + (64*4), 16 );
// skein512_4way_close( &ctx_skein, vhash64 );
#if defined(__SHA__)
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 512 );
@@ -127,38 +142,44 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *hash_d7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t targ_d7 = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id;
__m256i *noncev = (__m256i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
// skein512_4way_init( &skein512_4way_ctx );
// skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
skeinhash_4way( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( hash7[ lane ] <= Htarg )
if ( unlikely( ( hash_d7[ lane ] <= targ_d7 ) && !bench ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart );
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -4,14 +4,16 @@
bool register_skein_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
#if defined (SKEIN_8WAY)
gate->optimizations = AVX2_OPT | AVX512_OPT;
gate->scanhash = (void*)&scanhash_skein_8way;
gate->hash = (void*)&skeinhash_8way;
#elif defined (SKEIN_4WAY)
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_skein_4way;
gate->hash = (void*)&skeinhash_4way;
#else
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_skein;
gate->hash = (void*)&skeinhash;
#endif

View File

@@ -654,6 +654,80 @@ skein_big_close_8way( skein512_8way_context *sc, unsigned ub, unsigned n,
memcpy_512( dst, buf, out_len >> 3 );
}
void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
size_t len )
{
__m512i h0, h1, h2, h3, h4, h5, h6, h7;
__m512i *vdata = (__m512i*)data;
__m512i *buf = sc->buf;
size_t ptr = 0;
unsigned first;
uint64_t bcount = 0;
const int buf_size = 64; // 64 * _m256i
// Init
h0 = m512_const1_64( 0x4903ADFF749C51CE );
h1 = m512_const1_64( 0x0D95DE399746DF03 );
h2 = m512_const1_64( 0x8FD1934127C79BCE );
h3 = m512_const1_64( 0x9A255629FF352CB1 );
h4 = m512_const1_64( 0x5DB62599DF6CA7B0 );
h5 = m512_const1_64( 0xEABE394CA9D5C3F4 );
h6 = m512_const1_64( 0x991112C71A75B523 );
h7 = m512_const1_64( 0xAE18A40B660FCC33 );
// Update
if ( len <= buf_size - ptr )
{
memcpy_512( buf + (ptr>>3), vdata, len>>3 );
ptr += len;
}
else
{
first = ( bcount == 0 ) << 7;
do {
size_t clen;
if ( ptr == buf_size )
{
bcount ++;
UBI_BIG_8WAY( 96 + first, 0 );
first = 0;
ptr = 0;
}
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_512( buf + (ptr>>3), vdata, clen>>3 );
ptr += clen;
vdata += (clen>>3);
len -= clen;
} while ( len > 0 );
}
// Close
unsigned et;
memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
et = 352 + ((bcount == 0) << 7);
UBI_BIG_8WAY( et, ptr );
memset_zero_512( buf, buf_size >> 3 );
bcount = 0;
UBI_BIG_8WAY( 510, 8 );
casti_m512i( out, 0 ) = h0;
casti_m512i( out, 1 ) = h1;
casti_m512i( out, 2 ) = h2;
casti_m512i( out, 3 ) = h3;
casti_m512i( out, 4 ) = h4;
casti_m512i( out, 5 ) = h5;
casti_m512i( out, 6 ) = h6;
casti_m512i( out, 7 ) = h7;
}
void
skein256_8way_update(void *cc, const void *data, size_t len)
{
@@ -709,6 +783,7 @@ void skein512_4way_init( skein512_4way_context *sc )
sc->ptr = 0;
}
// Do not use for 128 bt data length
static void
skein_big_core_4way( skein512_4way_context *sc, const void *data,
size_t len )
@@ -794,6 +869,79 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
memcpy_256( dst, buf, out_len >> 3 );
}
void
skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
size_t len )
{
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
__m256i *vdata = (__m256i*)data;
__m256i *buf = sc->buf;
size_t ptr = 0;
unsigned first;
const int buf_size = 64; // 64 * __m256i
uint64_t bcount = 0;
h0 = m256_const1_64( 0x4903ADFF749C51CE );
h1 = m256_const1_64( 0x0D95DE399746DF03 );
h2 = m256_const1_64( 0x8FD1934127C79BCE );
h3 = m256_const1_64( 0x9A255629FF352CB1 );
h4 = m256_const1_64( 0x5DB62599DF6CA7B0 );
h5 = m256_const1_64( 0xEABE394CA9D5C3F4 );
h6 = m256_const1_64( 0x991112C71A75B523 );
h7 = m256_const1_64( 0xAE18A40B660FCC33 );
// Update
if ( len <= buf_size - ptr )
{
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
ptr += len;
}
else
{
first = ( bcount == 0 ) << 7;
do {
size_t clen;
if ( ptr == buf_size )
{
bcount ++;
UBI_BIG_4WAY( 96 + first, 0 );
first = 0;
ptr = 0;
}
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
ptr += clen;
vdata += (clen>>3);
len -= clen;
} while ( len > 0 );
}
// Close
unsigned et;
memset_zero_256( buf + (ptr>>3), (buf_size - ptr) >> 3 );
et = 352 + ((bcount == 0) << 7);
UBI_BIG_4WAY( et, ptr );
memset_zero_256( buf, buf_size >> 3 );
bcount = 0;
UBI_BIG_4WAY( 510, 8 );
casti_m256i( out, 0 ) = h0;
casti_m256i( out, 1 ) = h1;
casti_m256i( out, 2 ) = h2;
casti_m256i( out, 3 ) = h3;
casti_m256i( out, 4 ) = h4;
casti_m256i( out, 5 ) = h5;
casti_m256i( out, 6 ) = h6;
casti_m256i( out, 7 ) = h7;
}
void
skein256_4way_update(void *cc, const void *data, size_t len)
{
@@ -806,6 +954,9 @@ skein256_4way_close(void *cc, void *dst)
skein_big_close_4way(cc, 0, 0, dst, 32);
}
// Do not use with 128 bit data
void
skein512_4way_update(void *cc, const void *data, size_t len)
{

View File

@@ -63,6 +63,8 @@ typedef struct
typedef skein_8way_big_context skein512_8way_context;
typedef skein_8way_big_context skein256_8way_context;
void skein512_8way_full( skein512_8way_context *sc, void *out,
const void *data, size_t len );
void skein512_8way_init( skein512_8way_context *sc );
void skein512_8way_update( void *cc, const void *data, size_t len );
void skein512_8way_close( void *cc, void *dst );
@@ -85,6 +87,8 @@ typedef skein_4way_big_context skein512_4way_context;
typedef skein_4way_big_context skein256_4way_context;
void skein512_4way_init( skein512_4way_context *sc );
void skein512_4way_full( skein512_4way_context *sc, void *out,
const void *data, size_t len );
void skein512_4way_update( void *cc, const void *data, size_t len );
void skein512_4way_close( void *cc, void *dst );

View File

@@ -5,114 +5,131 @@
#if defined(SKEIN_8WAY)
// static __thread skein512_8way_context skein512_8way_ctx
// __attribute__ ((aligned (64)));
void skein2hash_8way( void *output, const void *input )
{
skein512_8way_context ctx;
uint64_t hash[16*8] __attribute__ ((aligned (128)));
skein512_8way_context ctx;
// memcpy( &ctx, &skein512_8way_ctx, sizeof( ctx ) );
skein512_8way_init( &ctx );
skein512_8way_update( &ctx, input, 80 );
skein512_8way_close( &ctx, hash );
skein512_8way_full( &ctx, hash, input, 80 );
skein512_8way_init( &ctx );
skein512_8way_update( &ctx, hash, 64 );
skein512_8way_close( &ctx, output );
// skein512_8way_update( &ctx, input + (64*8), 16 );
// skein512_8way_close( &ctx, hash );
skein512_8way_full( &ctx, output, hash, 64 );
}
int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[16*8] __attribute__ ((aligned (128)));
uint64_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[49]);
uint64_t *hashq3 = &(hash[3*8]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint64_t targq3 = ((uint64_t*)ptarget)[3];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
uint32_t n = first_nonce;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
int thr_id = mythr->id;
__m512i *noncev = (__m512i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
// skein512_8way_init( &skein512_8way_ctx );
// skein512_8way_update( &skein512_8way_ctx, vdata, 64 );
do
{
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
skein2hash_8way( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( hash7[ lane<<1 ] <= Htarg )
if ( unlikely( hashq3[ lane ] <= targq3 && !bench ) )
{
extr_lane_8x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) && !bench )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#elif defined(SKEIN_4WAY)
//static __thread skein512_4way_context skein512_4way_ctx
// __attribute__ ((aligned (64)));
void skein2hash_4way( void *output, const void *input )
{
skein512_4way_context ctx;
// memcpy( &ctx, &skein512_4way_ctx, sizeof( ctx ) );
uint64_t hash[16*4] __attribute__ ((aligned (64)));
skein512_4way_init( &ctx );
skein512_4way_update( &ctx, input, 80 );
skein512_4way_close( &ctx, hash );
// skein512_4way_update( &ctx, input + (64*4), 16 );
// skein512_4way_close( &ctx, hash );
skein512_4way_init( &ctx );
skein512_4way_update( &ctx, hash, 64 );
skein512_4way_close( &ctx, output );
skein512_4way_full( &ctx, hash, input, 80 );
skein512_4way_full( &ctx, output, hash, 64 );
}
int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint64_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[25]);
uint64_t *hash_q3 = &(hash[3*4]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint64_t targ_q3 = ((uint64_t*)ptarget)[3];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
int thr_id = mythr->id; // thr_id arg is deprecated
__m256i *noncev = (__m256i*)vdata + 9;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
// skein512_4way_init( &skein512_4way_ctx );
// skein512_4way_update( &skein512_4way_ctx, vdata, 64 );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
skein2hash_4way( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( hash7[ lane<<1 ] <= Htarg )
if ( hash_q3[ lane ] <= targ_q3 )
{
extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) && !bench )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -30,9 +30,6 @@
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
static void hex_getAlgoString(const uint32_t* prevblock, char *output)
{
char *sptr = output;
@@ -86,7 +83,7 @@ void hex_hash( void* output, const void* input )
void *in = (void*) input;
int size = 80;
char elem = hashOrder[0];
char elem = x16r_hash_order[0];
uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
for ( int i = 0; i < 16; i++ )
@@ -235,7 +232,7 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
const uint32_t last_nonce = max_nonce;
const int thr_id = mythr->id;
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
@@ -244,17 +241,18 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
mm128_bswap32_80( edata, pdata );
static __thread uint32_t s_ntime = UINT32_MAX;
uint32_t ntime = swab32(pdata[17]);
if ( s_ntime != ntime )
{
hex_getAlgoString( (const uint32_t*) (&edata[1]), hashOrder );
hex_getAlgoString( (const uint32_t*) (&edata[1]), x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
const char elem = hashOrder[0];
const char elem = x16r_hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{

View File

@@ -692,14 +692,15 @@ void x16r_4way_hash_generic( void* output, const void* input )
break;
case SKEIN:
if ( i == 0 )
{
skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
skein512_4way_close( &ctx.skein, vhash );
}
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, size );
skein512_4way_full( &ctx.skein, vhash, vhash, size );
}
skein512_4way_close( &ctx.skein, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break;
case LUFFA:

View File

@@ -134,7 +134,8 @@ void x16r_hash_generic( void* output, const void* input )
break;
case ECHO:
#if defined(__AES__)
echo_full( &ctx.echo, hash, 512, in, size );
echo_full( &ctx.echo, (BitSequence*)hash, 512,
(const BitSequence*)in, size );
#else
sph_echo512_init( &ctx.echo );
sph_echo512( &ctx.echo, in, size );
@@ -237,7 +238,7 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}

View File

@@ -46,7 +46,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}

View File

@@ -35,9 +35,6 @@
#if defined (X16RV2_8WAY)
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rv2_8way_context_overlay
{
blake512_8way_context blake;
@@ -96,7 +93,7 @@ void x16rv2_8way_hash( void* output, const void* input )
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const char elem = x16r_hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -651,17 +648,19 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32( pdata[17] );
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
const char elem = hashOrder[0];
const char elem = x16r_hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{
@@ -737,9 +736,6 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
#elif defined (X16RV2_4WAY)
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rv2_4way_context_overlay
{
blake512_4way_context blake;
@@ -789,7 +785,7 @@ void x16rv2_4way_hash( void* output, const void* input )
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const char elem = x16r_hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -1130,17 +1126,19 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
bedata1[0] = bswap_32( pdata[1] );
bedata1[1] = bswap_32( pdata[2] );
static __thread uint32_t s_ntime = UINT32_MAX;
const uint32_t ntime = bswap_32(pdata[17]);
if ( s_ntime != ntime )
{
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
}
// Do midstate prehash on hash functions with block size <= 64 bytes.
const char elem = hashOrder[0];
const char elem = x16r_hash_order[0];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
{

View File

@@ -34,7 +34,6 @@
#endif
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
union _x16rv2_context_overlay
{
@@ -74,16 +73,10 @@ void x16rv2_hash( void* output, const void* input )
x16rv2_context_overlay ctx;
void *in = (void*) input;
int size = 80;
/*
if ( s_ntime == UINT32_MAX )
{
const uint8_t* in8 = (uint8_t*) input;
x16_r_s_getAlgoString( &in8[4], hashOrder );
}
*/
for ( int i = 0; i < 16; i++ )
{
const char elem = hashOrder[i];
const char elem = x16r_hash_order[i];
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch ( algo )
@@ -203,48 +196,48 @@ int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t _ALIGN(128) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
const bool bench = opt_benchmark;
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
static __thread uint32_t s_ntime = UINT32_MAX;
if ( s_ntime != pdata[17] )
{
uint32_t ntime = swab32(pdata[17]);
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
x16_r_s_getAlgoString( (const uint8_t*) (&edata[1]), x16r_hash_order );
s_ntime = ntime;
if ( opt_debug && !thr_id )
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
applog( LOG_DEBUG, "hash order %s (%08x)",
x16r_hash_order, ntime );
}
if ( opt_benchmark )
ptarget[7] = 0x0cff;
if ( bench ) ptarget[7] = 0x0cff;
do
{
be32enc( &endiandata[19], nonce );
x16rv2_hash( hash32, endiandata );
edata[19] = nonce;
x16rv2_hash( hash32, edata );
if ( hash32[7] <= Htarg )
if (fulltest( hash32, ptarget ) && !opt_benchmark )
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
{
pdata[19] = nonce;
pdata[19] = bswap_32( nonce );
submit_solution( work, hash32, mythr );
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}

View File

@@ -97,7 +97,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -563,59 +563,31 @@ void sonoa_hash( void *state, const void *input )
}
int scanhash_sonoa( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t n = pdata[19] - 1;
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
uint64_t htmax[] =
mm128_bswap32_80( edata, pdata );
do
{
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] =
{
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
// we need bigendian data...
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
{
uint32_t mask = masks[m];
do
edata[19] = n;
sonoa_hash( hash64, edata );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = ++n;
be32enc(&endiandata[19], n);
sonoa_hash(hash32, endiandata);
if ( !( hash32[7] & mask ) )
if ( fulltest( hash32, ptarget ) && !opt_benchmark )
submit_solution( work, hash32, mythr );
} while (n < max_nonce && !work_restart[thr_id].restart);
break;
}
*hashes_done = n - first_nonce + 1;
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}

View File

@@ -74,9 +74,7 @@ void x17_8way_hash( void *state, const void *input )
blake512_8way_full( &ctx.blake, vhash, input, 80 );
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, 64 );
bmw512_8way_close( &ctx.bmw, vhash );
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
#if defined(__VAES__)
@@ -106,9 +104,7 @@ void x17_8way_hash( void *state, const void *input )
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
@@ -287,18 +283,18 @@ void x17_8way_hash( void *state, const void *input )
int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t *hashd7 = &(hash[7*8]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
@@ -310,7 +306,7 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
x17_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hash7[ lane ] <= Htarg ) && !bench ) )
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
@@ -378,9 +374,7 @@ void x17_4way_hash( void *state, const void *input )
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );
@@ -474,18 +468,18 @@ void x17_4way_hash( void *state, const void *input )
int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[16*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t *hashd7 = &(hash[ 7*4 ]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce -4;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
@@ -496,7 +490,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
x17_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash7[ lane ] <= Htarg && !bench ) )
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( valid_hash( lane_hash, ptarget ) )

View File

@@ -169,8 +169,8 @@ int scanhash_x17( struct work *work, uint32_t max_nonce,
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}

View File

@@ -76,9 +76,7 @@ void xevan_8way_hash( void *output, const void *input )
blake512_8way_full( &ctx.blake, vhash, input, 80 );
memset( &vhash[8<<3], 0, 64<<3 );
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, dataLen );
bmw512_8way_close( &ctx.bmw, vhash );
bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );
#if defined(__VAES__)
@@ -108,9 +106,7 @@ void xevan_8way_hash( void *output, const void *input )
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, dataLen );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, dataLen );
@@ -291,9 +287,7 @@ void xevan_8way_hash( void *output, const void *input )
blake512_8way_full( &ctx.blake, vhash, vhash, dataLen );
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, dataLen );
bmw512_8way_close( &ctx.bmw, vhash );
bmw512_8way_full( &ctx.bmw, vhash, vhash, dataLen );
#if defined(__VAES__)
@@ -323,9 +317,7 @@ void xevan_8way_hash( void *output, const void *input )
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, dataLen );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, dataLen );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, dataLen );
@@ -504,40 +496,43 @@ void xevan_8way_hash( void *output, const void *input )
int scanhash_xevan_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*16] __attribute__ ((aligned (128)));
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash7 = &(hash[7<<3]);
uint32_t *hashd7 = &(hash[7*8]);
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
xevan_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if unlikely( ( hash7[ lane ] <= Htarg ) )
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
@@ -578,8 +573,6 @@ void xevan_4way_hash( void *output, const void *input )
const int dataLen = 128;
xevan_4way_context_overlay ctx __attribute__ ((aligned (64)));
// parallel 4 way
blake512_4way_full( &ctx.blake, vhash, input, 80 );
memset( &vhash[8<<2], 0, 64<<2 );
@@ -598,9 +591,7 @@ void xevan_4way_hash( void *output, const void *input )
// Parallel 4way
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, dataLen );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, dataLen );
@@ -618,15 +609,11 @@ void xevan_4way_hash( void *output, const void *input )
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
@@ -718,9 +705,7 @@ void xevan_4way_hash( void *output, const void *input )
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, dataLen );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, dataLen );
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, dataLen );
@@ -738,15 +723,11 @@ void xevan_4way_hash( void *output, const void *input )
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, dataLen );
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, dataLen );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, dataLen );
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, dataLen );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
simd512_2way_full( &ctx.simd, vhashA, vhashA, dataLen );
simd512_2way_full( &ctx.simd, vhashB, vhashB, dataLen );
dintrlv_2x128( hash0, hash1, vhashA, dataLen<<3 );
dintrlv_2x128( hash2, hash3, vhashB, dataLen<<3 );
@@ -818,41 +799,43 @@ void xevan_4way_hash( void *output, const void *input )
int scanhash_xevan_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t hash[16*4] __attribute__ ((aligned (128)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7<<2]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t Htarg = ptarget[7];
__m256i *noncev = (__m256i*)vdata + 9;
const uint32_t targ32 = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
const bool bench = opt_benchmark;
if ( opt_benchmark )
ptarget[7] = 0x0cff;
if ( bench ) ptarget[7] = 0x0cff;
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do {
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev );
xevan_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if ( hash7[ lane ] <= Htarg )
if ( unlikely( hashd7[ lane ] <= targ32 ) && ! bench )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -56,8 +56,6 @@ typedef struct {
} xevan_ctx_holder;
xevan_ctx_holder xevan_ctx __attribute__ ((aligned (64)));
static __thread sph_blake512_context xevan_blake_mid
__attribute__ ((aligned (64)));
void init_xevan_ctx()
{
@@ -85,34 +83,23 @@ void init_xevan_ctx()
#endif
};
void xevan_blake512_midstate( const void* input )
{
memcpy( &xevan_blake_mid, &xevan_ctx.blake, sizeof xevan_blake_mid );
sph_blake512( &xevan_blake_mid, input, 64 );
}
void xevan_hash(void *output, const void *input)
{
uint32_t _ALIGN(64) hash[32]; // 128 bytes required
uint32_t _ALIGN(64) hash[32]; // 128 bytes required
const int dataLen = 128;
xevan_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
const int midlen = 64; // bytes
const int tail = 80 - midlen; // 16
memcpy( &ctx.blake, &xevan_blake_mid, sizeof xevan_blake_mid );
sph_blake512( &ctx.blake, input + midlen, tail );
sph_blake512_close(&ctx.blake, hash);
xevan_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
sph_blake512( &ctx.blake, input, 80 );
sph_blake512_close( &ctx.blake, hash );
memset(&hash[16], 0, 64);
sph_bmw512(&ctx.bmw, hash, dataLen);
sph_bmw512_close(&ctx.bmw, hash);
#if defined(__AES__)
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)hash, dataLen*8 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const char*)hash, dataLen*8 );
#else
sph_groestl512(&ctx.groestl, hash, dataLen);
sph_groestl512_close(&ctx.groestl, hash);
@@ -127,20 +114,20 @@ void xevan_hash(void *output, const void *input)
sph_keccak512(&ctx.keccak, hash, dataLen);
sph_keccak512_close(&ctx.keccak, hash);
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)hash, dataLen );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)hash, dataLen );
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
(const byte*) hash, dataLen );
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
(const byte*) hash, dataLen );
sph_shavite512(&ctx.shavite, hash, dataLen);
sph_shavite512_close(&ctx.shavite, hash);
update_final_sd( &ctx.simd, (BitSequence *)hash,
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, dataLen*8 );
#if defined(__AES__)
update_final_echo( &ctx.echo, (BitSequence *) hash,
update_final_echo( &ctx.echo, (BitSequence *) hash,
(const BitSequence *) hash, dataLen*8 );
#else
sph_echo512(&ctx.echo, hash, dataLen);
@@ -159,15 +146,15 @@ void xevan_hash(void *output, const void *input)
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
sph_whirlpool_close(&ctx.whirlpool, hash);
SHA512_Update( &ctx.sha512, hash, dataLen );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
SHA512_Update( &ctx.sha512, hash, dataLen );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
sph_haval256_5_close(&ctx.haval, hash);
memset(&hash[8], 0, dataLen - 32);
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
memcpy( &ctx, &xevan_ctx, sizeof(xevan_ctx) );
sph_blake512(&ctx.blake, hash, dataLen);
sph_blake512_close(&ctx.blake, hash);
@@ -176,11 +163,11 @@ void xevan_hash(void *output, const void *input)
sph_bmw512_close(&ctx.bmw, hash);
#if defined(__AES__)
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const BitSequence*)hash, dataLen*8 );
update_and_final_groestl( &ctx.groestl, (char*)hash,
(const BitSequence*)hash, dataLen*8 );
#else
sph_groestl512(&ctx.groestl, hash, dataLen);
sph_groestl512_close(&ctx.groestl, hash);
sph_groestl512_close(&ctx.groestl, hash);
#endif
sph_skein512(&ctx.skein, hash, dataLen);
@@ -191,24 +178,25 @@ void xevan_hash(void *output, const void *input)
sph_keccak512(&ctx.keccak, hash, dataLen);
sph_keccak512_close(&ctx.keccak, hash);
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)hash, dataLen );
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
(const byte*) hash, dataLen );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)hash, dataLen );
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
(const byte*) hash, dataLen );
sph_shavite512(&ctx.shavite, hash, dataLen);
sph_shavite512_close(&ctx.shavite, hash);
update_final_sd( &ctx.simd, (BitSequence *)hash,
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, dataLen*8 );
#if defined(__AES__)
update_final_echo( &ctx.echo, (BitSequence *) hash,
update_final_echo( &ctx.echo, (BitSequence *) hash,
(const BitSequence *) hash, dataLen*8 );
#else
sph_echo512(&ctx.echo, hash, dataLen);
sph_echo512_close(&ctx.echo, hash);
sph_echo512(&ctx.echo, hash, dataLen);
sph_echo512_close(&ctx.echo, hash);
#endif
sph_hamsi512(&ctx.hamsi, hash, dataLen);
@@ -223,8 +211,8 @@ void xevan_hash(void *output, const void *input)
sph_whirlpool(&ctx.whirlpool, hash, dataLen);
sph_whirlpool_close(&ctx.whirlpool, hash);
SHA512_Update( &ctx.sha512, hash, dataLen );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
SHA512_Update( &ctx.sha512, hash, dataLen );
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
sph_haval256_5(&ctx.haval,(const void*) hash, dataLen);
sph_haval256_5_close(&ctx.haval, hash);
@@ -233,41 +221,33 @@ void xevan_hash(void *output, const void *input)
}
int scanhash_xevan( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t _ALIGN(64) hash[8];
uint32_t _ALIGN(64) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if (opt_benchmark)
ptarget[7] = 0x0cff;
mm128_bswap32_80( edata, pdata );
for (int k=0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);
xevan_blake512_midstate( endiandata );
do {
be32enc(&endiandata[19], nonce);
xevan_hash(hash, endiandata);
if (hash[7] <= Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
{
pdata[19] = nonce;
submit_solution( work, hash, mythr );
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
do
{
edata[19] = n;
xevan_hash( hash64, edata );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
#endif

View File

@@ -87,64 +87,40 @@ void x22i_8way_hash( void *output, const void *input )
unsigned char hashA7[64] __attribute__((aligned(32))) = {0};
x22i_8way_ctx_overlay ctx;
blake512_8way_init( &ctx.blake );
blake512_8way_update( &ctx.blake, input, 80 );
blake512_8way_close( &ctx.blake, vhash );
blake512_8way_full( &ctx.blake, vhash, input, 80 );
bmw512_8way_init( &ctx.bmw );
bmw512_8way_update( &ctx.bmw, vhash, 64 );
bmw512_8way_close( &ctx.bmw, vhash );
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
#if defined(__VAES__)
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
groestl512_4way_init( &ctx.groestl, 64 );
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
#else
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7, vhash );
dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
vhash );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(const char*)hash0, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(const char*)hash1, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(const char*)hash2, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(const char*)hash3, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash4,
(const char*)hash4, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash5,
(const char*)hash5, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash6,
(const char*)hash6, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash7,
(const char*)hash7, 512 );
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
hash7 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7 );
#endif
skein512_8way_init( &ctx.skein );
skein512_8way_update( &ctx.skein, vhash, 64 );
skein512_8way_close( &ctx.skein, vhash );
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
jh512_8way_init( &ctx.jh );
jh512_8way_update( &ctx.jh, vhash, 64 );
jh512_8way_close( &ctx.jh, vhash );
@@ -155,22 +131,16 @@ void x22i_8way_hash( void *output, const void *input )
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
luffa_4way_init( &ctx.luffa, 512 );
luffa_4way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
luffa512_4way_full( &ctx.luffa, vhashA, vhashA, 64 );
luffa512_4way_full( &ctx.luffa, vhashB, vhashB, 64 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 64 );
cube_4way_init( &ctx.cube, 512, 16, 32 );
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 64 );
cube_4way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
cube_4way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
#if defined(__VAES__)
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_init( &ctx.shavite );
shavite512_4way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_4way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_4way_full( &ctx.shavite, vhashB, vhashB, 64 );
#else
@@ -207,17 +177,13 @@ void x22i_8way_hash( void *output, const void *input )
#endif
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashA, vhashA, 512 );
simd_4way_init( &ctx.simd, 512 );
simd_4way_update_close( &ctx.simd, vhashB, vhashB, 512 );
simd512_4way_full( &ctx.simd, vhashA, vhashA, 64 );
simd512_4way_full( &ctx.simd, vhashB, vhashB, 64 );
#if defined(__VAES__)
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashA, vhashA, 512 );
echo_4way_init( &ctx.echo, 512 );
echo_4way_update_close( &ctx.echo, vhashB, vhashB, 512 );
echo_4way_full( &ctx.echo, vhashA, 512, vhashA, 64 );
echo_4way_full( &ctx.echo, vhashB, 512, vhashB, 64 );
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
@@ -226,30 +192,22 @@ void x22i_8way_hash( void *output, const void *input )
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
(const BitSequence*)hash0, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
(const BitSequence*)hash1, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
(const BitSequence*)hash2, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
(const BitSequence*)hash3, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash4,
(const BitSequence*)hash4, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash5,
(const BitSequence*)hash5, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash6,
(const BitSequence*)hash6, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash7,
(const BitSequence*)hash7, 512 );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, 64 );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, 64 );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, 64 );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
(const BitSequence *)hash4, 64 );
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
(const BitSequence *)hash5, 64 );
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
(const BitSequence *)hash6, 64 );
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
(const BitSequence *)hash7, 64 );
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
hash4, hash5, hash6, hash7 );
@@ -443,6 +401,55 @@ void x22i_8way_hash( void *output, const void *input )
sha256_8way_close( &ctx.sha256, output );
}
int scanhash_x22i_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7*8]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x08ff;
InitializeSWIFFTX();
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x22i_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
/*
int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
@@ -488,6 +495,7 @@ int scanhash_x22i_8way( struct work* work, uint32_t max_nonce,
*hashes_done = n - first_nonce;
return 0;
}
*/
#elif defined(X22I_4WAY)
@@ -531,33 +539,21 @@ void x22i_4way_hash( void *output, const void *input )
unsigned char hashA3[64] __attribute__((aligned(32))) = {0};
x22i_ctx_overlay ctx;
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
blake512_4way_full( &ctx.blake, vhash, input, 80 );
bmw512_4way_init( &ctx.bmw );
bmw512_4way_update( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0,
(const char*)hash0, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1,
(const char*)hash1, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2,
(const char*)hash2, 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3,
(const char*)hash3, 512 );
groestl512_full( &ctx.groestl, (char*)hash0, (const char*)hash0, 512 );
groestl512_full( &ctx.groestl, (char*)hash1, (const char*)hash1, 512 );
groestl512_full( &ctx.groestl, (char*)hash2, (const char*)hash2, 512 );
groestl512_full( &ctx.groestl, (char*)hash3, (const char*)hash3, 512 );
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
jh512_4way_init( &ctx.jh );
jh512_4way_update( &ctx.jh, vhash, 64 );
@@ -569,41 +565,29 @@ void x22i_4way_hash( void *output, const void *input )
rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
luffa_2way_init( &ctx.luffa, 512 );
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
luffa512_2way_full( &ctx.luffa, vhashA, vhashA, 64 );
luffa512_2way_full( &ctx.luffa, vhashB, vhashB, 64 );
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
cube_2way_init( &ctx.cube, 512, 16, 32 );
cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
cube_2way_full( &ctx.cube, vhashA, 512, vhashA, 64 );
cube_2way_full( &ctx.cube, vhashB, 512, vhashB, 64 );
shavite512_2way_full( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_2way_full( &ctx.shavite, vhashB, vhashB, 64 );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
shavite512_2way_init( &ctx.shavite );
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
simd_2way_init( &ctx.simd, 512 );
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
dintrlv_2x128_512( hash0, hash1, vhashA );
dintrlv_2x128_512( hash2, hash3, vhashB );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash0,
(const BitSequence*)hash0, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash1,
(const BitSequence*)hash1, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash2,
(const BitSequence*)hash2, 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash3,
(const BitSequence*)hash3, 512 );
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, 64 );
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, 64 );
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, 64 );
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 );
@@ -722,44 +706,47 @@ void x22i_4way_hash( void *output, const void *input )
int scanhash_x22i_4way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[4*16] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[ 7*4 ]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x08ff;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
InitializeSWIFFTX();
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x22i_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ )
if unlikely( ( hash7[ lane ] <= Htarg ) )
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart ) );
*hashes_done = n - first_nonce + 1;
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -167,40 +167,38 @@ void x22i_hash( void *output, const void *input )
memcpy(output, hash, 32);
}
int scanhash_x22i( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
int scanhash_x22i( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t hash[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t n = first_nonce;
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = n;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);
if ( bench ) ptarget[7] = 0x08ff;
mm128_bswap32_80( edata, pdata );
InitializeSWIFFTX();
do
{
pdata[19] = ++n;
be32enc( &endiandata[19], n );
x22i_hash( hash, endiandata );
if ( hash[7] < Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
submit_solution( work, hash, mythr );
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = pdata[19] - first_nonce;
return 0;
edata[19] = n;
x22i_hash( hash64, edata );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}
#endif

View File

@@ -530,6 +530,55 @@ void x25x_8way_hash( void *output, const void *input )
blake2s_8way_full_blocks( &ctx.blake2s, output, vhashX, 64*24 );
}
int scanhash_x25x_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*8] __attribute__ ((aligned (128)));
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[7*8]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
if ( bench ) ptarget[7] = 0x08ff;
InitializeSWIFFTX();
mm512_bswap32_intrlv80_8x64( vdata, pdata );
*noncev = mm512_intrlv_blend_32(
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
x25x_8way_hash( hash, vdata );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( ( hashd7[ lane ] <= targ32 ) && !bench ) )
{
extr_lane_8x32( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) ) )
{
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm512_add_epi32( *noncev,
m512_const1_64( 0x0000000800000000 ) );
n += 8;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
/*
int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
@@ -574,6 +623,7 @@ int scanhash_x25x_8way( struct work* work, uint32_t max_nonce,
*hashes_done = n - first_nonce;
return 0;
}
*/
#elif defined(X25X_4WAY)
@@ -614,9 +664,7 @@ void x25x_4way_hash( void *output, const void *input )
unsigned char vhashX[24][64*4] __attribute__ ((aligned (64)));
x25x_4way_ctx_overlay ctx __attribute__ ((aligned (64)));
blake512_4way_init( &ctx.blake );
blake512_4way_update( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
blake512_4way_full( &ctx.blake, vhash, input, 80 );
dintrlv_4x64_512( hash0[0], hash1[0], hash2[0], hash3[0], vhash );
bmw512_4way_init( &ctx.bmw );
@@ -624,24 +672,13 @@ void x25x_4way_hash( void *output, const void *input )
bmw512_4way_close( &ctx.bmw, vhash );
dintrlv_4x64_512( hash0[1], hash1[1], hash2[1], hash3[1], vhash );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash0[2],
(const char*)hash0[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash1[2],
(const char*)hash1[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash2[2],
(const char*)hash2[1], 512 );
init_groestl( &ctx.groestl, 64 );
update_and_final_groestl( &ctx.groestl, (char*)hash3[2],
(const char*)hash3[1], 512 );
groestl512_full( &ctx.groestl, (char*)hash0[2], (const char*)hash0[1], 512 );
groestl512_full( &ctx.groestl, (char*)hash1[2], (const char*)hash1[1], 512 );
groestl512_full( &ctx.groestl, (char*)hash2[2], (const char*)hash2[1], 512 );
groestl512_full( &ctx.groestl, (char*)hash3[2], (const char*)hash3[1], 512 );
intrlv_4x64_512( vhash, hash0[2], hash1[2], hash2[2], hash3[2] );
skein512_4way_init( &ctx.skein );
skein512_4way_update( &ctx.skein, vhash, 64 );
skein512_4way_close( &ctx.skein, vhash );
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
dintrlv_4x64_512( hash0[3], hash1[3], hash2[3], hash3[3], vhash );
jh512_4way_init( &ctx.jh );
@@ -654,32 +691,20 @@ void x25x_4way_hash( void *output, const void *input )
keccak512_4way_close( &ctx.keccak, vhash );
dintrlv_4x64_512( hash0[5], hash1[5], hash2[5], hash3[5], vhash );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0[6],
(const BitSequence*)hash0[5], 64 );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash1[6],
(const BitSequence*)hash1[5], 64 );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash2[6],
(const BitSequence*)hash2[5], 64 );
init_luffa( &ctx.luffa, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash3[6],
(const BitSequence*)hash3[5], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0[7],
(const byte*)hash0[6], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash1[7],
(const byte*)hash1[6], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash2[7],
(const byte*)hash2[6], 64 );
cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash3[7],
(const byte*)hash3[6], 64 );
luffa_full( &ctx.luffa, (BitSequence*)hash0[6], 512,
(const BitSequence*)hash0[5], 64 );
luffa_full( &ctx.luffa, (BitSequence*)hash1[6], 512,
(const BitSequence*)hash1[5], 64 );
luffa_full( &ctx.luffa, (BitSequence*)hash2[6], 512,
(const BitSequence*)hash2[5], 64 );
luffa_full( &ctx.luffa, (BitSequence*)hash3[6], 512,
(const BitSequence*)hash3[5], 64 );
cubehash_full( &ctx.cube, (byte*)hash0[7], 512, (const byte*)hash0[6], 64 );
cubehash_full( &ctx.cube, (byte*)hash1[7], 512, (const byte*)hash1[6], 64 );
cubehash_full( &ctx.cube, (byte*)hash2[7], 512, (const byte*)hash2[6], 64 );
cubehash_full( &ctx.cube, (byte*)hash3[7], 512, (const byte*)hash3[6], 64 );
sph_shavite512_init(&ctx.shavite);
sph_shavite512(&ctx.shavite, (const void*) hash0[7], 64);
sph_shavite512_close(&ctx.shavite, hash0[8]);
@@ -693,31 +718,23 @@ void x25x_4way_hash( void *output, const void *input )
sph_shavite512(&ctx.shavite, (const void*) hash3[7], 64);
sph_shavite512_close(&ctx.shavite, hash3[8]);
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)hash0[9],
(const BitSequence*)hash0[8], 512 );
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)hash1[9],
(const BitSequence*)hash1[8], 512 );
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)hash2[9],
(const BitSequence*)hash2[8], 512 );
init_sd( &ctx.simd, 512 );
update_final_sd( &ctx.simd, (BitSequence*)hash3[9],
(const BitSequence*)hash3[8], 512 );
simd_full( &ctx.simd, (BitSequence*)hash0[9],
(const BitSequence*)hash0[8], 512 );
simd_full( &ctx.simd, (BitSequence*)hash1[9],
(const BitSequence*)hash1[8], 512 );
simd_full( &ctx.simd, (BitSequence*)hash2[9],
(const BitSequence*)hash2[8], 512 );
simd_full( &ctx.simd, (BitSequence*)hash3[9],
(const BitSequence*)hash3[8], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash0[10],
(const BitSequence*)hash0[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash1[10],
(const BitSequence*)hash1[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash2[10],
(const BitSequence*)hash2[9], 512 );
init_echo( &ctx.echo, 512 );
update_final_echo ( &ctx.echo, (BitSequence*)hash3[10],
(const BitSequence*)hash3[9], 512 );
echo_full( &ctx.echo, (BitSequence *)hash0[10], 512,
(const BitSequence *)hash0[ 9], 64 );
echo_full( &ctx.echo, (BitSequence *)hash1[10], 512,
(const BitSequence *)hash1[ 9], 64 );
echo_full( &ctx.echo, (BitSequence *)hash2[10], 512,
(const BitSequence *)hash2[ 9], 64 );
echo_full( &ctx.echo, (BitSequence *)hash3[10], 512,
(const BitSequence *)hash3[ 9], 64 );
intrlv_4x64_512( vhash, hash0[10], hash1[10], hash2[10], hash3[10] );
@@ -870,43 +887,46 @@ void x25x_4way_hash( void *output, const void *input )
int scanhash_x25x_4way( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[16*4] __attribute__ ((aligned (128)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[7<<2]);
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hashd7 = &(hash[ 7*4 ]);
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
__m256i *noncev = (__m256i*)vdata + 9; // aligned
uint32_t n = first_nonce;
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
uint32_t n = first_nonce;
const int thr_id = mythr->id;
const uint32_t Htarg = ptarget[7];
const uint32_t targ32 = ptarget[7];
const bool bench = opt_benchmark;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
if ( bench ) ptarget[7] = 0x08ff;
InitializeSWIFFTX();
mm256_bswap32_intrlv80_4x64( vdata, pdata );
*noncev = mm256_intrlv_blend_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
x25x_4way_hash( hash, vdata );
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hashd7[ lane ] <= targ32 && !bench ) )
{
extr_lane_4x32( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
if ( valid_hash( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
submit_lane_solution( work, lane_hash, mythr, lane );
pdata[19] = bswap_32( n + lane );
submit_lane_solution( work, lane_hash, mythr, lane );
}
}
*noncev = _mm256_add_epi32( *noncev,
m256_const1_64( 0x0000000400000000 ) );
n += 4;
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
} while ( likely( ( n <= last_nonce ) && !work_restart[thr_id].restart ) );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}

View File

@@ -201,42 +201,38 @@ void x25x_hash( void *output, const void *input )
memcpy(output, &hash[24], 32);
}
int scanhash_x25x( struct work* work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
int scanhash_x25x( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr)
{
uint32_t edata[20] __attribute__((aligned(64)));
uint32_t hash[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t n = first_nonce;
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = n;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x08ff;
if ( bench ) ptarget[7] = 0x08ff;
mm128_bswap32_80( edata, pdata );
for (int k=0; k < 20; k++)
be32enc(&edata[k], pdata[k]);
InitializeSWIFFTX();
do
{
pdata[19] = ++n;
be32enc( &edata[19], n );
x25x_hash( hash, edata );
if ( hash[7] < Htarg )
if ( fulltest( hash, ptarget ) && !opt_benchmark )
submit_solution( work, hash, mythr );
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = pdata[19] - first_nonce;
return 0;
edata[19] = n;
x25x_hash( hash64, edata );
if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) )
{
pdata[19] = bswap_32( n );
submit_solution( work, hash64, mythr );
}
n++;
} while ( n < max_nonce && !work_restart[thr_id].restart );
*hashes_done = n - first_nonce;
pdata[19] = n;
return 0;
}
#endif

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.11.9.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.2.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.11.9'
PACKAGE_STRING='cpuminer-opt 3.11.9'
PACKAGE_VERSION='3.12.2'
PACKAGE_STRING='cpuminer-opt 3.12.2'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.11.9 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.12.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.11.9:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.12.2:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.11.9
cpuminer-opt configure 3.12.2
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.11.9, which was
It was created by cpuminer-opt $as_me 3.12.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.11.9'
VERSION='3.12.2'
cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.11.9, which was
This file was extended by cpuminer-opt $as_me 3.12.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.11.9
cpuminer-opt config.status 3.12.2
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.11.9])
AC_INIT([cpuminer-opt], [3.12.2])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -143,12 +143,6 @@ int api_thr_id = -1;
bool stratum_need_reset = false;
struct work_restart *work_restart = NULL;
struct stratum_ctx stratum;
bool jsonrpc_2 = false;
char rpc2_id[64] = "";
char *rpc2_blob = NULL;
size_t rpc2_bloblen = 0;
uint32_t rpc2_target = 0;
char *rpc2_job_id = NULL;
double opt_diff_factor = 1.0;
double opt_target_factor = 1.0;
uint32_t zr5_pok = 0;
@@ -165,20 +159,22 @@ double stratum_diff = 0.;
double net_diff = 0.;
double net_hashrate = 0.;
uint64_t net_blocks = 0;
uint32_t opt_work_size = 0;
// conditional mining
bool conditional_state[MAX_CPUS] = { 0 };
double opt_max_temp = 0.0;
double opt_max_diff = 0.0;
double opt_max_rate = 0.0;
bool conditional_state[MAX_CPUS] = { 0 };
double opt_max_temp = 0.0;
double opt_max_diff = 0.0;
double opt_max_rate = 0.0;
uint32_t opt_work_size = 0;
char *opt_api_allow = NULL;
int opt_api_remote = 0;
int opt_api_listen = 0;
// int opt_api_listen = 4048;
// API
static bool opt_api_enabled = false;
char *opt_api_allow = NULL;
int opt_api_listen = 0;
int opt_api_remote = 0;
char *default_api_allow = "127.0.0.1";
int default_api_listen = 4048;
pthread_mutex_t rpc2_job_lock;
pthread_mutex_t rpc2_login_lock;
pthread_mutex_t applog_lock;
pthread_mutex_t stats_lock;
@@ -360,9 +356,6 @@ void work_copy(struct work *dest, const struct work *src)
int std_get_work_data_size() { return STD_WORK_DATA_SIZE; }
bool jr2_work_decode( const json_t *val, struct work *work )
{ return rpc2_job_decode( val, work ); }
// Default
bool std_le_work_decode( const json_t *val, struct work *work )
{
@@ -890,8 +883,6 @@ static double norm_diff_sum = 0.;
static uint32_t last_block_height = 0;
//static bool new_job = false;
static double last_targetdiff = 0.;
static double ref_rate_hi = 0.;
static double ref_rate_lo = 1e100;
#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
static uint32_t hi_temp = 0;
#endif
@@ -939,7 +930,6 @@ void report_summary_log( bool force )
uint64_t accepts = accept_sum; accept_sum = 0;
uint64_t rejects = reject_sum; reject_sum = 0;
uint64_t stales = stale_sum; stale_sum = 0;
// int latency = latency_sum; latency_sum = 0;
memcpy( &start_time, &five_min_start, sizeof start_time );
memcpy( &five_min_start, &now, sizeof now );
@@ -950,34 +940,21 @@ void report_summary_log( bool force )
double share_time = (double)et.tv_sec + (double)et.tv_usec / 1e6;
double ghrate = global_hashrate;
double scaled_ghrate = ghrate;
double shrate = share_time == 0. ? 0. : diff_to_hash * last_targetdiff
* (double)(accepts) / share_time;
double sess_hrate = uptime.tv_sec == 0. ? 0. : diff_to_hash * norm_diff_sum
/ (double)uptime.tv_sec;
double scaled_shrate = shrate;
// int avg_latency = 0;
// double latency_pc = 0.;
double submit_rate = 0.;
double submit_rate = share_time == 0. ? 0. : (double)submits*60. / share_time;
char shr_units[4] = {0};
char ghr_units[4] = {0};
char sess_hr_units[4] = {0};
char et_str[24];
char upt_str[24];
// if ( submits ) avg_latency = latency / submits;
if ( share_time != 0. )
{
submit_rate = (double)submits*60. / share_time;
// latency_pc = (double)latency / (share_time * 10.);
}
if ( ghrate > ref_rate_hi ) ref_rate_hi = ghrate;
if ( ghrate < ref_rate_lo ) ref_rate_lo = ghrate;
scale_hash_for_display( &scaled_shrate, shr_units );
scale_hash_for_display( &scaled_ghrate, ghr_units );
scale_hash_for_display( &shrate, shr_units );
scale_hash_for_display( &ghrate, ghr_units );
scale_hash_for_display( &sess_hrate, sess_hr_units );
sprintf_et( et_str, et.tv_sec );
@@ -988,8 +965,26 @@ void report_summary_log( bool force )
submit_rate, (double)submitted_share_count*60. /
( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ) );
applog2( LOG_INFO, "Hash rate %7.2f%sh/s %7.2f%sh/s (%.2f%sh/s)",
scaled_shrate, shr_units, sess_hrate, sess_hr_units,
scaled_ghrate, ghr_units );
shrate, shr_units, sess_hrate, sess_hr_units,
ghrate, ghr_units );
if ( accepted_share_count < submitted_share_count )
{
double lost_ghrate = uptime.tv_sec == 0. ? 0.
: diff_to_hash * last_targetdiff
* (double)(submitted_share_count - accepted_share_count )
/ (double)uptime.tv_sec;
double lost_shrate = share_time == 0. ? 0.
: diff_to_hash * last_targetdiff * (double)(submits - accepts )
/ share_time;
char lshr_units[4] = {0};
char lghr_units[4] = {0};
scale_hash_for_display( &lost_shrate, lshr_units );
scale_hash_for_display( &lost_ghrate, lghr_units );
applog2( LOG_INFO, "Lost hash rate %7.2f%sh/s %7.2f%sh/s",
lost_shrate, lshr_units, lost_ghrate, lghr_units );
}
applog2( LOG_INFO,"Submitted %6d %6d",
submits, submitted_share_count );
applog2( LOG_INFO,"Accepted %6d %6d",
@@ -1003,29 +998,10 @@ void report_summary_log( bool force )
if ( solved_block_count )
applog2( LOG_INFO,"Blocks solved %6d",
solved_block_count );
/*
#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
int temp = cpu_temp(0);
char tempstr[32];
if ( temp > hi_temp ) hi_temp = temp;
if ( use_colors && ( temp >= 70 ) )
{
if ( temp >= 80 )
sprintf( tempstr, "%s%dC%s", CL_WHT CL_RED, temp, CL_N );
else
sprintf( tempstr, "%s%dC%s", CL_WHT CL_YLW, temp, CL_N );
}
else
sprintf( tempstr, "%dC", temp );
applog2(LOG_INFO,"CPU temp %s max %dC", tempstr, hi_temp );
#endif
*/
}
bool lowdiff_debug = false;
static int share_result( int result, struct work *null_work,
const char *reason )
{
@@ -1038,7 +1014,6 @@ static int share_result( int result, struct work *null_work,
char sres[48];
char rres[48];
char bres[48];
// char job_id[48];
bool solved = false;
bool stale = false;
char *acol = NULL, *bcol = NULL, *scol = NULL, *rcol = NULL;
@@ -1093,7 +1068,11 @@ static int share_result( int result, struct work *null_work,
stale_share_count++;
}
else
{
rejected_share_count++;
lowdiff_debug = true;
}
}
// update global counters for summary report
@@ -1168,7 +1147,7 @@ static int share_result( int result, struct work *null_work,
bres, share_time, latency );
if ( have_stratum && !opt_quiet )
applog2( LOG_NOTICE, "Diff %.3g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
applog2( LOG_NOTICE, "Diff %.5g (%.3g%), %sBlock %d, %sJob %s" CL_WHT,
my_stats.share_diff, share_ratio, bcol, stratum.block_height,
scol, my_stats.job_id );
@@ -1186,13 +1165,15 @@ static int share_result( int result, struct work *null_work,
for ( int i = 0; i < 8; i++ )
be32enc( str2 + i, str1[7 - i] );
bin2hex( str3, (unsigned char*)str2, 12 );
applog2( LOG_INFO, "Share diff: %g, Hash: %s...", my_stats.share_diff, str3 );
applog2( LOG_INFO, "Share diff: %.5g, Hash: %s...",
my_stats.share_diff, str3 );
diff_to_target( str1, my_stats.target_diff );
for ( int i = 0; i < 8; i++ )
be32enc( str2 + i, str1[7 - i] );
bin2hex( str3, (unsigned char*)str2, 12 );
applog2( LOG_INFO, "Target diff: %g, Targ: %s...", str3 );
applog2( LOG_INFO, "Target diff: %.5g, Targ: %s...",
my_stats.target_diff, str3 );
}
if ( unlikely( opt_reset_on_stale && stale ) )
@@ -1234,20 +1215,6 @@ void std_be_build_stratum_request( char *req, struct work *work )
free( xnonce2str );
}
void jr2_build_stratum_request( char *req, struct work *work )
{
uchar hash[32];
char noncestr[9];
bin2hex( noncestr, (char*) algo_gate.get_nonceptr( work->data ),
sizeof(uint32_t) );
algo_gate.hash_suw( hash, work->data );
char *hashhex = abin2hex(hash, 32);
snprintf( req, JSON_BUF_LEN,
"{\"method\": \"submit\", \"params\": {\"id\": \"%s\", \"job_id\": \"%s\", \"nonce\": \"%s\", \"result\": \"%s\"}, \"id\":4}",
rpc2_id, work->job_id, noncestr, hashhex );
free( hashhex );
}
bool std_le_submit_getwork_result( CURL *curl, struct work *work )
{
char req[JSON_BUF_LEN];
@@ -1316,53 +1283,6 @@ bool std_be_submit_getwork_result( CURL *curl, struct work *work )
return true;
}
bool jr2_submit_getwork_result( CURL *curl, struct work *work )
{
json_t *val, *res;
char req[JSON_BUF_LEN];
char noncestr[9];
uchar hash[32];
char *hashhex;
bin2hex( noncestr, (char*) algo_gate.get_nonceptr( work->data ),
sizeof(uint32_t) );
algo_gate.hash_suw( hash, work->data );
hashhex = abin2hex( &hash[0], 32 );
snprintf( req, JSON_BUF_LEN, "{\"method\": \"submit\", \"params\": "
"{\"id\": \"%s\", \"job_id\": \"%s\", \"nonce\": \"%s\", \"result\": \"%s\"},"
"\"id\":4}\r\n",
rpc2_id, work->job_id, noncestr, hashhex );
free( hashhex );
// issue JSON-RPC request
val = json_rpc2_call( curl, rpc_url, rpc_userpass, req, NULL, 0 );
if (unlikely( !val ))
{
applog(LOG_ERR, "submit_upstream_work json_rpc_call failed");
return false;
}
res = json_object_get( val, "result" );
json_t *status = json_object_get( res, "status" );
bool valid = !strcmp( status ? json_string_value( status ) : "", "OK" );
if (valid)
share_result( valid, work, NULL );
else
{
json_t *err = json_object_get( res, "error" );
const char *sreason = json_string_value( json_object_get(
err, "message" ) );
share_result( valid, work, sreason );
if ( !strcasecmp( "Invalid job id", sreason ) )
{
work_free( work );
work_copy( work, &g_work );
g_work_time = 0;
restart_threads();
}
}
json_decref(val);
return true;
}
char* std_malloc_txs_request( struct work *work )
{
char *req;
@@ -1497,18 +1417,10 @@ static bool get_upstream_work( CURL *curl, struct work *work )
start:
gettimeofday( &tv_start, NULL );
if ( jsonrpc_2 )
{
char s[128];
snprintf( s, 128, "{\"method\": \"getjob\", \"params\": {\"id\": \"%s\"}, \"id\":1}\r\n", rpc2_id );
val = json_rpc2_call( curl, rpc_url, rpc_userpass, s, NULL, 0 );
}
else
{
val = json_rpc_call( curl, rpc_url, rpc_userpass,
val = json_rpc_call( curl, rpc_url, rpc_userpass,
have_gbt ? gbt_req : getwork_req, &err,
have_gbt ? JSON_RPC_QUIET_404 : 0);
}
gettimeofday( &tv_end, NULL );
if ( have_stratum )
@@ -1632,68 +1544,6 @@ static bool workio_submit_work(struct workio_cmd *wc, CURL *curl)
return true;
}
bool rpc2_login(CURL *curl)
{
json_t *val;
bool rc = false;
struct timeval tv_start, tv_end, diff;
char s[JSON_BUF_LEN];
if (!jsonrpc_2)
return false;
snprintf(s, JSON_BUF_LEN, "{\"method\": \"login\", \"params\": {"
"\"login\": \"%s\", \"pass\": \"%s\", \"agent\": \"%s\"}, \"id\": 1}",
rpc_user, rpc_pass, USER_AGENT);
gettimeofday(&tv_start, NULL);
val = json_rpc_call(curl, rpc_url, rpc_userpass, s, NULL, 0);
gettimeofday(&tv_end, NULL);
if (!val)
goto end;
rc = rpc2_login_decode(val);
json_t *result = json_object_get(val, "result");
if (!result)
goto end;
json_t *job = json_object_get(result, "job");
if (!rpc2_job_decode(job, &g_work))
goto end;
if (opt_debug && rc)
{
timeval_subtract(&diff, &tv_end, &tv_start);
applog(LOG_DEBUG, "DEBUG: authenticated in %d ms",
diff.tv_sec * 1000 + diff.tv_usec / 1000);
}
json_decref(val);
end:
return rc;
}
bool rpc2_workio_login(CURL *curl)
{
int failures = 0;
if (opt_benchmark)
return true;
/* submit solution to bitcoin via JSON-RPC */
pthread_mutex_lock(&rpc2_login_lock);
while (!rpc2_login(curl))
{
if (unlikely((opt_retries >= 0) && (++failures > opt_retries)))
{
applog(LOG_ERR, "...terminating workio thread");
pthread_mutex_unlock(&rpc2_login_lock);
return false;
}
/* pause, then restart work-request loop */
if (!opt_benchmark)
applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause);
sleep(opt_fail_pause);
pthread_mutex_unlock(&rpc2_login_lock);
pthread_mutex_lock(&rpc2_login_lock);
}
pthread_mutex_unlock(&rpc2_login_lock);
return true;
}
static void *workio_thread(void *userdata)
{
struct thr_info *mythr = (struct thr_info *) userdata;
@@ -1706,8 +1556,6 @@ static void *workio_thread(void *userdata)
applog(LOG_ERR, "CURL initialization failed");
return NULL;
}
if ( jsonrpc_2 && !have_stratum )
ok = rpc2_workio_login( curl );
while ( likely(ok) )
{
@@ -1759,8 +1607,8 @@ static bool get_work(struct thr_info *thr, struct work *work)
// this overwrites much of the for loop init
memset( work->data + algo_gate.nonce_index, 0x00, 52); // nonce..nonce+52
work->data[20] = 0x80000000; // extraheader not used for jr2
work->data[31] = 0x00000280; // extraheader not used for jr2
work->data[20] = 0x80000000;
work->data[31] = 0x00000280;
return true;
}
/* fill out work request message */
@@ -1822,12 +1670,13 @@ static inline double u256_to_double( const uint64_t *u )
void work_set_target_ratio( struct work* work, const void *hash )
{
double dhash;
dhash = u256_to_double( (const uint64_t*)hash );
if ( likely( dhash > 0. ) )
work->sharediff = work->targetdiff *
if ( likely( hash ) )
{
double dhash = u256_to_double( (const uint64_t*)hash );
if ( likely( dhash > 0. ) )
work->sharediff = work->targetdiff *
u256_to_double( (const uint64_t*)( work->target ) ) / dhash;
}
else
work->sharediff = 0.;
@@ -1843,8 +1692,8 @@ void work_set_target_ratio( struct work* work, const void *hash )
share_stats[ s_put_ptr ].net_diff = net_diff;
share_stats[ s_put_ptr ].stratum_diff = stratum_diff;
share_stats[ s_put_ptr ].target_diff = work->targetdiff;
strcpy( share_stats[ s_put_ptr ].job_id, work->job_id );
( (uint64_t*)share_stats[ s_put_ptr ].job_id )[3] = 0;
strncpy( share_stats[ s_put_ptr ].job_id, work->job_id, 30 );
s_put_ptr = stats_ptr_incr( s_put_ptr );
pthread_mutex_unlock( &stats_lock );
@@ -1860,7 +1709,17 @@ bool submit_solution( struct work *work, const void *hash,
if ( !opt_quiet )
applog( LOG_NOTICE, "%d submitted by thread %d, job %s",
submitted_share_count, thr->id, work->job_id );
return true;
if ( lowdiff_debug )
{
uint32_t* h = (uint32_t*)hash;
uint32_t* t = (uint32_t*)work->target;
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
}
return true;
}
else
applog( LOG_WARNING, "%d failed to submit share.",
@@ -1878,6 +1737,18 @@ bool submit_lane_solution( struct work *work, const void *hash,
if ( !opt_quiet )
applog( LOG_NOTICE, "%d submitted by thread %d, lane %d, job %s",
submitted_share_count, thr->id, lane, work->job_id );
if ( lowdiff_debug )
{
uint32_t* h = (uint32_t*)hash;
uint32_t* t = (uint32_t*)work->target;
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
}
return true;
}
else
@@ -1886,22 +1757,6 @@ bool submit_lane_solution( struct work *work, const void *hash,
return false;
}
bool rpc2_stratum_job( struct stratum_ctx *sctx, json_t *params )
{
bool ret = false;
pthread_mutex_lock(&sctx->work_lock);
ret = rpc2_job_decode(params, &sctx->work);
if (ret)
{
if (sctx->job.job_id)
free(sctx->job.job_id);
sctx->job.job_id = strdup(sctx->work.job_id);
}
pthread_mutex_unlock(&sctx->work_lock);
return ret;
}
static bool wanna_mine(int thr_id)
{
bool state = true;
@@ -1988,21 +1843,10 @@ double std_calc_network_diff( struct work* work )
return d;
}
uint32_t *std_get_nonceptr( uint32_t *work_data )
{
return work_data + algo_gate.nonce_index;
}
uint32_t *jr2_get_nonceptr( uint32_t *work_data )
{
// nonce is misaligned, use byte offset
return (uint32_t*) ( ((uint8_t*) work_data) + algo_gate.nonce_index );
}
void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t *end_nonce_ptr )
{
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
uint32_t *nonceptr = work->data + algo_gate.nonce_index;
bool force_new_work = work->job_id ? strtoul( work->job_id, NULL, 16 ) !=
strtoul( g_work->job_id, NULL, 16 )
@@ -2021,28 +1865,6 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
++(*nonceptr);
}
void jr2_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t *end_nonce_ptr )
{
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
// byte data[ 0..38, 43..75 ], skip over misaligned nonce [39..42]
if ( memcmp( work->data, g_work->data, algo_gate.nonce_index )
|| memcmp( ((uint8_t*) work->data) + JR2_WORK_CMP_INDEX_2,
((uint8_t*) g_work->data) + JR2_WORK_CMP_INDEX_2,
JR2_WORK_CMP_SIZE_2 ) )
{
work_free( work );
work_copy( work, g_work );
*nonceptr = ( 0xffffffU / opt_n_threads ) * thr_id
+ ( *nonceptr & 0xff000000U );
*end_nonce_ptr = ( 0xffffffU / opt_n_threads ) * (thr_id+1)
+ ( *nonceptr & 0xff000000U ) - 0x20;
}
else
++(*nonceptr);
}
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
int thr_id )
{
@@ -2060,7 +1882,7 @@ static void *miner_thread( void *userdata )
struct thr_info *mythr = (struct thr_info *) userdata;
int thr_id = mythr->id;
uint32_t max_nonce;
struct timeval cpu_temp_time = {0};
uint32_t *nonceptr = work.data + algo_gate.nonce_index;
// end_nonce gets read before being set so it needs to be initialized
// what is an appropriate value that is completely neutral?
@@ -2170,7 +1992,8 @@ static void *miner_thread( void *userdata )
if ( have_stratum )
{
pthread_mutex_lock( &g_work_lock );
if ( *algo_gate.get_nonceptr( work.data ) >= end_nonce )
if ( *nonceptr >= end_nonce )
algo_gate.stratum_gen_work( &stratum, &g_work );
algo_gate.get_new_work( &work, &g_work, thr_id, &end_nonce );
pthread_mutex_unlock( &g_work_lock );
@@ -2181,7 +2004,7 @@ static void *miner_thread( void *userdata )
pthread_mutex_lock( &g_work_lock );
if ( time(NULL) - g_work_time >= min_scantime
|| *algo_gate.get_nonceptr( work.data ) >= end_nonce )
|| *nonceptr >= end_nonce )
{
if ( unlikely( !get_work( mythr, &g_work ) ) )
{
@@ -2242,7 +2065,7 @@ static void *miner_thread( void *userdata )
// Select nonce range for approx 1 min duration based
// on hashrate, initial value arbitrarilly set to 1000 just to get
// a sample hashrate for the next time.
uint32_t work_nonce = *( algo_gate.get_nonceptr( work.data ) );
uint32_t work_nonce = *nonceptr;
max64 = 60 * thr_hashrates[thr_id];
if ( max64 <= 0)
max64 = 1000;
@@ -2301,15 +2124,15 @@ static void *miner_thread( void *userdata )
if (!opt_quiet && mythr->id == 0 )
{
int temp = cpu_temp(0);
static struct timeval cpu_temp_time = {0};
timeval_subtract( &diff, &tv_end, &cpu_temp_time );
int wait = temp >= 80 ? 30 : temp >= 70 ? 90 : 180;
int wait = temp >= 80 ? 30 : temp >= 70 ? 60 : 120;
if ( ( diff.tv_sec > wait ) || ( temp > hi_temp ) )
{
char tempstr[32];
int lo_freq, hi_freq;
linux_cpu_hilo_freq( &lo_freq, &hi_freq );
memcpy( &cpu_temp_time, &tv_end, sizeof(cpu_temp_time) );
if ( temp > hi_temp ) hi_temp = temp;
if ( use_colors && ( temp >= 70 ) )
{
if ( temp >= 80 )
@@ -2321,6 +2144,7 @@ static void *miner_thread( void *userdata )
sprintf( tempstr, "%d C", temp );
applog( LOG_INFO,"CPU temp: curr %s (max %d), Freq: %.3f/%.3f GHz",
tempstr, hi_temp, (float)lo_freq / 1e6, (float)hi_freq/ 1e6 );
if ( temp > hi_temp ) hi_temp = temp;
}
}
#endif
@@ -2398,25 +2222,6 @@ json_t *std_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
return val;
}
json_t *jr2_longpoll_rpc_call( CURL *curl, int *err )
{
json_t *val;
char req[128];
pthread_mutex_lock( &rpc2_login_lock );
if ( !strlen(rpc2_id) )
{
pthread_mutex_unlock( &rpc2_login_lock );
sleep(1);
return NULL;
}
snprintf( req, 128, "{\"method\": \"getjob\", \"params\": {\"id\": \"%s\"}, \"id\":1}\r\n", rpc2_id );
pthread_mutex_unlock( &rpc2_login_lock );
val = json_rpc2_call( curl, rpc_url, rpc_userpass, req, err,
JSON_RPC_LONGPOLL );
return val;
}
static void *longpoll_thread(void *userdata)
{
struct thr_info *mythr = (struct thr_info*) userdata;
@@ -2478,11 +2283,8 @@ start:
double start_diff = 0.0;
json_t *res, *soval;
res = json_object_get(val, "result");
if (!jsonrpc_2)
{
soval = json_object_get(res, "submitold");
submit_old = soval ? json_is_true(soval) : false;
}
soval = json_object_get(res, "submitold");
submit_old = soval ? json_is_true(soval) : false;
pthread_mutex_lock(&g_work_lock);
start_job_id = g_work.job_id ? strdup(g_work.job_id) : NULL;
if (have_gbt)
@@ -2547,48 +2349,12 @@ out:
return NULL;
}
bool std_stratum_handle_response( json_t *val )
{
bool valid = false;
json_t *err_val, *res_val, *id_val;
res_val = json_object_get( val, "result" );
err_val = json_object_get( val, "error" );
id_val = json_object_get( val, "id" );
if ( !res_val || json_integer_value(id_val) < 4 )
return false;
valid = json_is_true( res_val );
share_result( valid, NULL, err_val ?
json_string_value( json_array_get(err_val, 1) ) : NULL );
return true;
}
bool jr2_stratum_handle_response( json_t *val )
{
bool valid = false;
json_t *err_val, *res_val;
res_val = json_object_get( val, "result" );
err_val = json_object_get( val, "error" );
if ( !res_val && !err_val )
return false;
json_t *status = json_object_get( res_val, "status" );
if ( status )
{
const char *s = json_string_value( status );
valid = !strcmp( s, "OK" ) && json_is_null( err_val );
}
else
valid = json_is_null( err_val );
share_result( valid, NULL, err_val ? json_string_value(err_val) : NULL );
return true;
}
static bool stratum_handle_response( char *buf )
{
json_t *val, *id_val, *res_val;
json_t *val, *id_val, *res_val, *err_val;
json_error_t err;
bool ret = false;
bool share_accepted = false;
val = JSON_LOADS( buf, &err );
if (!val)
@@ -2602,8 +2368,15 @@ static bool stratum_handle_response( char *buf )
id_val = json_object_get( val, "id" );
if ( !id_val || json_is_null(id_val) )
goto out;
if ( !algo_gate.stratum_handle_response( val ) )
err_val = json_object_get( val, "error" );
if ( !res_val || json_integer_value( id_val ) < 4 )
goto out;
share_accepted = json_is_true( res_val );
share_result( share_accepted, NULL, err_val ?
json_string_value( json_array_get(err_val, 1) ) : NULL );
ret = true;
out:
if (val)
@@ -2726,7 +2499,7 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
if ( !opt_quiet )
{
applog2( LOG_INFO, "%s: %s", algo_names[opt_algo], short_url );
applog2( LOG_INFO, "Diff: Net %.3g, Stratum %.3g, Target %.3g",
applog2( LOG_INFO, "Diff: Net %.5g, Stratum %.5g, Target %.5g",
net_diff, stratum_diff, last_targetdiff );
if ( likely( hr > 0. ) )
@@ -2766,31 +2539,6 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
} // new diff/block
}
void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
{
pthread_mutex_lock( &sctx->work_lock );
work_free( g_work );
work_copy( g_work, &sctx->work );
pthread_mutex_unlock( &sctx->work_lock );
/*
if ( stratum_diff != sctx->job.diff )
applog( LOG_BLUE, "New stratum diff %g, block %d, job %s",
sctx->job.diff, sctx->block_height, g_work->job_id );
else if ( last_block_height != sctx->block_height )
applog( LOG_BLUE, "New block %d, job %s",
sctx->block_height, g_work->job_id );
else if ( g_work->job_id )
applog( LOG_BLUE,"New job %s", g_work->job_id );
*/
if ( last_block_height != stratum.block_height )
{
applog(LOG_BLUE, "Stratum detected new block");
last_block_height = stratum.block_height;
}
if ( stratum_diff != g_work->stratum_diff )
stratum_diff = g_work->stratum_diff;
}
static void *stratum_thread(void *userdata )
{
struct thr_info *mythr = (struct thr_info *) userdata;
@@ -2844,11 +2592,6 @@ static void *stratum_thread(void *userdata )
}
else
applog(LOG_BLUE,"Stratum connection established" );
if ( unlikely( jsonrpc_2 ) )
{
work_free(&g_work);
work_copy(&g_work, &stratum.work);
}
}
report_summary_log( ( stratum_diff != stratum.job.diff )
@@ -3010,11 +2753,11 @@ void parse_arg(int key, char *arg )
break;
case 'b':
p = strstr(arg, ":");
opt_api_enabled = true;
p = strstr(arg, ":");
if (p) {
/* ip:port */
if (p - arg > 0) {
free(opt_api_allow);
opt_api_allow = strdup(arg);
opt_api_allow[p - arg] = '\0';
}
@@ -3024,10 +2767,12 @@ void parse_arg(int key, char *arg )
/* ip only */
free(opt_api_allow);
opt_api_allow = strdup(arg);
}
opt_api_listen = default_api_listen;
}
else if (arg) {
/* port or 0 to disable */
opt_api_listen = atoi(arg);
opt_api_allow = default_api_allow;
opt_api_listen = atoi(arg);
}
break;
case 1030: /* --api-remote */
@@ -3635,7 +3380,6 @@ int main(int argc, char *argv[])
rpc_user = strdup("");
rpc_pass = strdup("");
// opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */
parse_cmdline(argc, argv);
@@ -3735,8 +3479,6 @@ int main(int argc, char *argv[])
pthread_mutex_init( &stats_lock, NULL );
pthread_mutex_init( &g_work_lock, NULL );
pthread_mutex_init( &rpc2_job_lock, NULL );
pthread_mutex_init( &rpc2_login_lock, NULL );
pthread_mutex_init( &stratum.sock_lock, NULL );
pthread_mutex_init( &stratum.work_lock, NULL );
@@ -3911,7 +3653,7 @@ int main(int argc, char *argv[])
tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
}
if (opt_api_listen)
if ( opt_api_enabled )
{
/* api thread */
api_thr_id = opt_n_threads + 3;

View File

@@ -1,170 +0,0 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation.
This software is provided 'as is' with no explicit or implied warranties
in respect of its operation, including, but not limited to, correctness
and fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
*/
#include <stdint.h>
#if defined(__cplusplus)
extern "C"
{
#endif
#define TABLE_ALIGN 32
#define WPOLY 0x011b
#define N_COLS 4
#define AES_BLOCK_SIZE 16
#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
#if defined(_MSC_VER)
#define ALIGN __declspec(align(TABLE_ALIGN))
#elif defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(16)))
#else
#define ALIGN
#endif
#define rf1(r,c) (r)
#define word_in(x,c) (*((uint32_t*)(x)+(c)))
#define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
#define s(x,c) x[c]
#define si(y,x,c) (s(y,c) = word_in(x, c))
#define so(y,x,c) word_out(y, c, s(x,c))
#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
#define round(y,x,k) \
y[0] = (k)[0] ^ (t_fn[0][x[0] & 0xff] ^ t_fn[1][(x[1] >> 8) & 0xff] ^ t_fn[2][(x[2] >> 16) & 0xff] ^ t_fn[3][x[3] >> 24]); \
y[1] = (k)[1] ^ (t_fn[0][x[1] & 0xff] ^ t_fn[1][(x[2] >> 8) & 0xff] ^ t_fn[2][(x[3] >> 16) & 0xff] ^ t_fn[3][x[0] >> 24]); \
y[2] = (k)[2] ^ (t_fn[0][x[2] & 0xff] ^ t_fn[1][(x[3] >> 8) & 0xff] ^ t_fn[2][(x[0] >> 16) & 0xff] ^ t_fn[3][x[1] >> 24]); \
y[3] = (k)[3] ^ (t_fn[0][x[3] & 0xff] ^ t_fn[1][(x[0] >> 8) & 0xff] ^ t_fn[2][(x[1] >> 16) & 0xff] ^ t_fn[3][x[2] >> 24]);
#define to_byte(x) ((x) & 0xff)
#define bval(x,n) to_byte((x) >> (8 * (n)))
#define fwd_var(x,r,c)\
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
: r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
: ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
#define sb_data(w) {\
w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
#define rc_data(w) {\
w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
w(0x1b), w(0x36) }
#define bytes2word(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
#define h0(x) (x)
#define w0(p) bytes2word(p, 0, 0, 0)
#define w1(p) bytes2word(0, p, 0, 0)
#define w2(p) bytes2word(0, 0, p, 0)
#define w3(p) bytes2word(0, 0, 0, p)
#define u0(p) bytes2word(f2(p), p, p, f3(p))
#define u1(p) bytes2word(f3(p), f2(p), p, p)
#define u2(p) bytes2word(p, f3(p), f2(p), p)
#define u3(p) bytes2word(p, p, f3(p), f2(p))
#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p))
#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p))
#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p))
#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p))
#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY))
#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) ^ (((x>>5) & 4) * WPOLY))
#define f3(x) (f2(x) ^ x)
#define f9(x) (f8(x) ^ x)
#define fb(x) (f8(x) ^ f2(x) ^ x)
#define fd(x) (f8(x) ^ f4(x) ^ x)
#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
#define t_dec(m,n) t_##m##n
#define t_set(m,n) t_##m##n
#define t_use(m,n) t_##m##n
#define d_4(t,n,b,e,f,g,h) ALIGN const t n[4][256] = { b(e), b(f), b(g), b(h) }
#define four_tables(x,tab,vf,rf,c) \
(tab[0][bval(vf(x,0,c),rf(0,c))] \
^ tab[1][bval(vf(x,1,c),rf(1,c))] \
^ tab[2][bval(vf(x,2,c),rf(2,c))] \
^ tab[3][bval(vf(x,3,c),rf(3,c))])
d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
void aesb_single_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey)
{
round(((uint32_t*) out), ((uint32_t*) in), ((uint32_t*) expandedKey));
}
void aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey)
{
uint32_t b1[4];
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey));
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 1 * N_COLS);
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 2 * N_COLS);
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 3 * N_COLS);
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 4 * N_COLS);
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 5 * N_COLS);
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 6 * N_COLS);
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 7 * N_COLS);
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 8 * N_COLS);
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 9 * N_COLS);
}
#if defined(__cplusplus)
}
#endif

View File

@@ -1,326 +0,0 @@
/*
* The blake256_* and blake224_* functions are largely copied from
* blake256_light.c and blake224_light.c from the BLAKE website:
*
* http://131002.net/blake/
*
* The hmac_* functions implement HMAC-BLAKE-256 and HMAC-BLAKE-224.
* HMAC is specified by RFC 2104.
*/
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include "c_blake256.h"
#define U8TO32(p) \
(((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) ))
#define U32TO8(p, v) \
(p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
(p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) );
const uint8_t sigma[][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
{14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
{11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
{ 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8},
{ 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13},
{ 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9},
{12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11},
{13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10},
{ 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5},
{10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13, 0},
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
{14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
{11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
{ 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8}
};
const uint32_t cst[16] = {
0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
};
static const uint8_t padding[] = {
0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
void blake256_compress(state *S, const uint8_t *block) {
uint32_t v[16], m[16], i;
#define ROT(x,n) (((x)<<(32-n))|((x)>>(n)))
#define G(a,b,c,d,e) \
v[a] += (m[sigma[i][e]] ^ cst[sigma[i][e+1]]) + v[b]; \
v[d] = ROT(v[d] ^ v[a],16); \
v[c] += v[d]; \
v[b] = ROT(v[b] ^ v[c],12); \
v[a] += (m[sigma[i][e+1]] ^ cst[sigma[i][e]])+v[b]; \
v[d] = ROT(v[d] ^ v[a], 8); \
v[c] += v[d]; \
v[b] = ROT(v[b] ^ v[c], 7);
for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4);
for (i = 0; i < 8; ++i) v[i] = S->h[i];
v[ 8] = S->s[0] ^ 0x243F6A88;
v[ 9] = S->s[1] ^ 0x85A308D3;
v[10] = S->s[2] ^ 0x13198A2E;
v[11] = S->s[3] ^ 0x03707344;
v[12] = 0xA4093822;
v[13] = 0x299F31D0;
v[14] = 0x082EFA98;
v[15] = 0xEC4E6C89;
if (S->nullt == 0) {
v[12] ^= S->t[0];
v[13] ^= S->t[0];
v[14] ^= S->t[1];
v[15] ^= S->t[1];
}
for (i = 0; i < 14; ++i) {
G(0, 4, 8, 12, 0);
G(1, 5, 9, 13, 2);
G(2, 6, 10, 14, 4);
G(3, 7, 11, 15, 6);
G(3, 4, 9, 14, 14);
G(2, 7, 8, 13, 12);
G(0, 5, 10, 15, 8);
G(1, 6, 11, 12, 10);
}
for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i];
for (i = 0; i < 8; ++i) S->h[i] ^= S->s[i % 4];
}
void blake256_init(state *S) {
S->h[0] = 0x6A09E667;
S->h[1] = 0xBB67AE85;
S->h[2] = 0x3C6EF372;
S->h[3] = 0xA54FF53A;
S->h[4] = 0x510E527F;
S->h[5] = 0x9B05688C;
S->h[6] = 0x1F83D9AB;
S->h[7] = 0x5BE0CD19;
S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
}
void blake224_init(state *S) {
S->h[0] = 0xC1059ED8;
S->h[1] = 0x367CD507;
S->h[2] = 0x3070DD17;
S->h[3] = 0xF70E5939;
S->h[4] = 0xFFC00B31;
S->h[5] = 0x68581511;
S->h[6] = 0x64F98FA7;
S->h[7] = 0xBEFA4FA4;
S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
}
// datalen = number of bits
void blake256_update(state *S, const uint8_t *data, uint64_t datalen) {
int left = S->buflen >> 3;
int fill = 64 - left;
if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) {
memcpy((void *) (S->buf + left), (void *) data, fill);
S->t[0] += 512;
if (S->t[0] == 0) S->t[1]++;
blake256_compress(S, S->buf);
data += fill;
datalen -= (fill << 3);
left = 0;
}
while (datalen >= 512) {
S->t[0] += 512;
if (S->t[0] == 0) S->t[1]++;
blake256_compress(S, data);
data += 64;
datalen -= 512;
}
if (datalen > 0) {
memcpy((void *) (S->buf + left), (void *) data, (size_t) (datalen >> 3));
S->buflen = (left << 3) + (int) datalen;
} else {
S->buflen = 0;
}
}
// datalen = number of bits
void blake224_update(state *S, const uint8_t *data, uint64_t datalen) {
blake256_update(S, data, datalen);
}
void blake256_final_h(state *S, uint8_t *digest, uint8_t pa, uint8_t pb) {
uint8_t msglen[8];
uint32_t lo = S->t[0] + S->buflen, hi = S->t[1];
if (lo < (unsigned) S->buflen) hi++;
U32TO8(msglen + 0, hi);
U32TO8(msglen + 4, lo);
if (S->buflen == 440) { /* one padding byte */
S->t[0] -= 8;
blake256_update(S, &pa, 8);
} else {
if (S->buflen < 440) { /* enough space to fill the block */
if (S->buflen == 0) S->nullt = 1;
S->t[0] -= 440 - S->buflen;
blake256_update(S, padding, 440 - S->buflen);
} else { /* need 2 compressions */
S->t[0] -= 512 - S->buflen;
blake256_update(S, padding, 512 - S->buflen);
S->t[0] -= 440;
blake256_update(S, padding + 1, 440);
S->nullt = 1;
}
blake256_update(S, &pb, 8);
S->t[0] -= 8;
}
S->t[0] -= 64;
blake256_update(S, msglen, 64);
U32TO8(digest + 0, S->h[0]);
U32TO8(digest + 4, S->h[1]);
U32TO8(digest + 8, S->h[2]);
U32TO8(digest + 12, S->h[3]);
U32TO8(digest + 16, S->h[4]);
U32TO8(digest + 20, S->h[5]);
U32TO8(digest + 24, S->h[6]);
U32TO8(digest + 28, S->h[7]);
}
void blake256_final(state *S, uint8_t *digest) {
blake256_final_h(S, digest, 0x81, 0x01);
}
void blake224_final(state *S, uint8_t *digest) {
blake256_final_h(S, digest, 0x80, 0x00);
}
// inlen = number of bytes
void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
state S;
blake256_init(&S);
blake256_update(&S, in, inlen * 8);
blake256_final(&S, out);
}
// inlen = number of bytes
void blake224_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
state S;
blake224_init(&S);
blake224_update(&S, in, inlen * 8);
blake224_final(&S, out);
}
// keylen = number of bytes
void hmac_blake256_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
const uint8_t *key = _key;
uint8_t keyhash[32];
uint8_t pad[64];
uint64_t i;
if (keylen > 64) {
blake256_hash(keyhash, key, keylen);
key = keyhash;
keylen = 32;
}
blake256_init(&S->inner);
memset(pad, 0x36, 64);
for (i = 0; i < keylen; ++i) {
pad[i] ^= key[i];
}
blake256_update(&S->inner, pad, 512);
blake256_init(&S->outer);
memset(pad, 0x5c, 64);
for (i = 0; i < keylen; ++i) {
pad[i] ^= key[i];
}
blake256_update(&S->outer, pad, 512);
memset(keyhash, 0, 32);
}
// keylen = number of bytes
void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
const uint8_t *key = _key;
uint8_t keyhash[32];
uint8_t pad[64];
uint64_t i;
if (keylen > 64) {
blake256_hash(keyhash, key, keylen);
key = keyhash;
keylen = 28;
}
blake224_init(&S->inner);
memset(pad, 0x36, 64);
for (i = 0; i < keylen; ++i) {
pad[i] ^= key[i];
}
blake224_update(&S->inner, pad, 512);
blake224_init(&S->outer);
memset(pad, 0x5c, 64);
for (i = 0; i < keylen; ++i) {
pad[i] ^= key[i];
}
blake224_update(&S->outer, pad, 512);
memset(keyhash, 0, 32);
}
// datalen = number of bits
void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
// update the inner state
blake256_update(&S->inner, data, datalen);
}
// datalen = number of bits
void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
// update the inner state
blake224_update(&S->inner, data, datalen);
}
void hmac_blake256_final(hmac_state *S, uint8_t *digest) {
uint8_t ihash[32];
blake256_final(&S->inner, ihash);
blake256_update(&S->outer, ihash, 256);
blake256_final(&S->outer, digest);
memset(ihash, 0, 32);
}
void hmac_blake224_final(hmac_state *S, uint8_t *digest) {
uint8_t ihash[32];
blake224_final(&S->inner, ihash);
blake224_update(&S->outer, ihash, 224);
blake224_final(&S->outer, digest);
memset(ihash, 0, 32);
}
// keylen = number of bytes; inlen = number of bytes
void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
hmac_state S;
hmac_blake256_init(&S, key, keylen);
hmac_blake256_update(&S, in, inlen * 8);
hmac_blake256_final(&S, out);
}
// keylen = number of bytes; inlen = number of bytes
void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
hmac_state S;
hmac_blake224_init(&S, key, keylen);
hmac_blake224_update(&S, in, inlen * 8);
hmac_blake224_final(&S, out);
}

View File

@@ -1,43 +0,0 @@
#ifndef _BLAKE256_H_
#define _BLAKE256_H_
#include <stdint.h>
typedef struct {
uint32_t h[8], s[4], t[2];
int buflen, nullt;
uint8_t buf[64];
} state;
typedef struct {
state inner;
state outer;
} hmac_state;
void blake256_init(state *);
void blake224_init(state *);
void blake256_update(state *, const uint8_t *, uint64_t);
void blake224_update(state *, const uint8_t *, uint64_t);
void blake256_final(state *, uint8_t *);
void blake224_final(state *, uint8_t *);
void blake256_hash(uint8_t *, const uint8_t *, uint64_t);
void blake224_hash(uint8_t *, const uint8_t *, uint64_t);
/* HMAC functions: */
void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t);
void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t);
void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t);
void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t);
void hmac_blake256_final(hmac_state *, uint8_t *);
void hmac_blake224_final(hmac_state *, uint8_t *);
void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
#endif /* _BLAKE256_H_ */

View File

@@ -1,360 +0,0 @@
/* hash.c April 2012
* Groestl ANSI C code optimised for 32-bit machines
* Author: Thomas Krinninger
*
* This work is based on the implementation of
* Soeren S. Thomsen and Krystian Matusiewicz
*
*
*/
#include "c_groestl.h"
#include "groestl_tables.h"
#define P_TYPE 0
#define Q_TYPE 1
const uint8_t shift_Values[2][8] = {{0,1,2,3,4,5,6,7},{1,3,5,7,0,2,4,6}};
const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6};
#define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
v1 = temp_var;}
#define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t) \
tu = T[2*(uint32_t)x[4*c0+0]]; \
tl = T[2*(uint32_t)x[4*c0+0]+1]; \
tv1 = T[2*(uint32_t)x[4*c1+1]]; \
tv2 = T[2*(uint32_t)x[4*c1+1]+1]; \
ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \
tu ^= tv1; \
tl ^= tv2; \
tv1 = T[2*(uint32_t)x[4*c2+2]]; \
tv2 = T[2*(uint32_t)x[4*c2+2]+1]; \
ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \
tu ^= tv1; \
tl ^= tv2; \
tv1 = T[2*(uint32_t)x[4*c3+3]]; \
tv2 = T[2*(uint32_t)x[4*c3+3]+1]; \
ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \
tu ^= tv1; \
tl ^= tv2; \
tl ^= T[2*(uint32_t)x[4*c4+0]]; \
tu ^= T[2*(uint32_t)x[4*c4+0]+1]; \
tv1 = T[2*(uint32_t)x[4*c5+1]]; \
tv2 = T[2*(uint32_t)x[4*c5+1]+1]; \
ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \
tl ^= tv1; \
tu ^= tv2; \
tv1 = T[2*(uint32_t)x[4*c6+2]]; \
tv2 = T[2*(uint32_t)x[4*c6+2]+1]; \
ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \
tl ^= tv1; \
tu ^= tv2; \
tv1 = T[2*(uint32_t)x[4*c7+3]]; \
tv2 = T[2*(uint32_t)x[4*c7+3]+1]; \
ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \
tl ^= tv1; \
tu ^= tv2; \
y[i] = tu; \
y[i+1] = tl;
/* compute one round of P (short variants) */
static void RND512P(uint8_t *x, uint32_t *y, uint32_t r) {
uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
uint32_t* x32 = (uint32_t*)x;
x32[ 0] ^= 0x00000000^r;
x32[ 2] ^= 0x00000010^r;
x32[ 4] ^= 0x00000020^r;
x32[ 6] ^= 0x00000030^r;
x32[ 8] ^= 0x00000040^r;
x32[10] ^= 0x00000050^r;
x32[12] ^= 0x00000060^r;
x32[14] ^= 0x00000070^r;
COLUMN(x,y, 0, 0, 2, 4, 6, 9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y, 2, 2, 4, 6, 8, 11, 13, 15, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y, 4, 4, 6, 8, 10, 13, 15, 1, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y, 6, 6, 8, 10, 12, 15, 1, 3, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y, 8, 8, 10, 12, 14, 1, 3, 5, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y,10, 10, 12, 14, 0, 3, 5, 7, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y,12, 12, 14, 0, 2, 5, 7, 9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y,14, 14, 0, 2, 4, 7, 9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
}
/* compute one round of Q (short variants) */
static void RND512Q(uint8_t *x, uint32_t *y, uint32_t r) {
uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
uint32_t* x32 = (uint32_t*)x;
x32[ 0] = ~x32[ 0];
x32[ 1] ^= 0xffffffff^r;
x32[ 2] = ~x32[ 2];
x32[ 3] ^= 0xefffffff^r;
x32[ 4] = ~x32[ 4];
x32[ 5] ^= 0xdfffffff^r;
x32[ 6] = ~x32[ 6];
x32[ 7] ^= 0xcfffffff^r;
x32[ 8] = ~x32[ 8];
x32[ 9] ^= 0xbfffffff^r;
x32[10] = ~x32[10];
x32[11] ^= 0xafffffff^r;
x32[12] = ~x32[12];
x32[13] ^= 0x9fffffff^r;
x32[14] = ~x32[14];
x32[15] ^= 0x8fffffff^r;
COLUMN(x,y, 0, 2, 6, 10, 14, 1, 5, 9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y, 2, 4, 8, 12, 0, 3, 7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y, 4, 6, 10, 14, 2, 5, 9, 13, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y, 6, 8, 12, 0, 4, 7, 11, 15, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y, 8, 10, 14, 2, 6, 9, 13, 1, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y,10, 12, 0, 4, 8, 11, 15, 3, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y,12, 14, 2, 6, 10, 13, 1, 5, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
COLUMN(x,y,14, 0, 4, 8, 12, 15, 3, 7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
}
/* compute compression function (short variants) */
static void F512(uint32_t *h, const uint32_t *m) {
int i;
uint32_t Ptmp[2*COLS512];
uint32_t Qtmp[2*COLS512];
uint32_t y[2*COLS512];
uint32_t z[2*COLS512];
for (i = 0; i < 2*COLS512; i++) {
z[i] = m[i];
Ptmp[i] = h[i]^m[i];
}
/* compute Q(m) */
RND512Q((uint8_t*)z, y, 0x00000000);
RND512Q((uint8_t*)y, z, 0x01000000);
RND512Q((uint8_t*)z, y, 0x02000000);
RND512Q((uint8_t*)y, z, 0x03000000);
RND512Q((uint8_t*)z, y, 0x04000000);
RND512Q((uint8_t*)y, z, 0x05000000);
RND512Q((uint8_t*)z, y, 0x06000000);
RND512Q((uint8_t*)y, z, 0x07000000);
RND512Q((uint8_t*)z, y, 0x08000000);
RND512Q((uint8_t*)y, Qtmp, 0x09000000);
/* compute P(h+m) */
RND512P((uint8_t*)Ptmp, y, 0x00000000);
RND512P((uint8_t*)y, z, 0x00000001);
RND512P((uint8_t*)z, y, 0x00000002);
RND512P((uint8_t*)y, z, 0x00000003);
RND512P((uint8_t*)z, y, 0x00000004);
RND512P((uint8_t*)y, z, 0x00000005);
RND512P((uint8_t*)z, y, 0x00000006);
RND512P((uint8_t*)y, z, 0x00000007);
RND512P((uint8_t*)z, y, 0x00000008);
RND512P((uint8_t*)y, Ptmp, 0x00000009);
/* compute P(h+m) + Q(m) + h */
for (i = 0; i < 2*COLS512; i++) {
h[i] ^= Ptmp[i]^Qtmp[i];
}
}
/* digest up to msglen bytes of input (full blocks only) */
static void Transform(groestlHashState *ctx,
const uint8_t *input,
int msglen) {
/* digest message, one block at a time */
for (; msglen >= SIZE512;
msglen -= SIZE512, input += SIZE512) {
F512(ctx->chaining,(uint32_t*)input);
/* increment block counter */
ctx->block_counter1++;
if (ctx->block_counter1 == 0) ctx->block_counter2++;
}
}
/* given state h, do h <- P(h)+h */
static void OutputTransformation(groestlHashState *ctx) {
int j;
uint32_t temp[2*COLS512];
uint32_t y[2*COLS512];
uint32_t z[2*COLS512];
for (j = 0; j < 2*COLS512; j++) {
temp[j] = ctx->chaining[j];
}
RND512P((uint8_t*)temp, y, 0x00000000);
RND512P((uint8_t*)y, z, 0x00000001);
RND512P((uint8_t*)z, y, 0x00000002);
RND512P((uint8_t*)y, z, 0x00000003);
RND512P((uint8_t*)z, y, 0x00000004);
RND512P((uint8_t*)y, z, 0x00000005);
RND512P((uint8_t*)z, y, 0x00000006);
RND512P((uint8_t*)y, z, 0x00000007);
RND512P((uint8_t*)z, y, 0x00000008);
RND512P((uint8_t*)y, temp, 0x00000009);
for (j = 0; j < 2*COLS512; j++) {
ctx->chaining[j] ^= temp[j];
}
}
/* initialise context */
static void Init(groestlHashState* ctx) {
int i = 0;
/* allocate memory for state and data buffer */
for(;i<(SIZE512/sizeof(uint32_t));i++)
{
ctx->chaining[i] = 0;
}
/* set initial value */
ctx->chaining[2*COLS512-1] = u32BIG((uint32_t)HASH_BIT_LEN);
/* set other variables */
ctx->buf_ptr = 0;
ctx->block_counter1 = 0;
ctx->block_counter2 = 0;
ctx->bits_in_last_byte = 0;
}
/* update state with databitlen bits of input */
static void Update(groestlHashState* ctx,
const BitSequence* input,
DataLength databitlen) {
int index = 0;
int msglen = (int)(databitlen/8);
int rem = (int)(databitlen%8);
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
if (ctx->buf_ptr) {
while (ctx->buf_ptr < SIZE512 && index < msglen) {
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
if (ctx->buf_ptr < SIZE512) {
/* buffer still not full, return */
if (rem) {
ctx->bits_in_last_byte = rem;
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
}
return;
}
/* digest buffer */
ctx->buf_ptr = 0;
Transform(ctx, ctx->buffer, SIZE512);
}
/* digest bulk of message */
Transform(ctx, input+index, msglen-index);
index += ((msglen-index)/SIZE512)*SIZE512;
/* store remaining data in buffer */
while (index < msglen) {
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
/* if non-integral number of bytes have been supplied, store
remaining bits in last byte, together with information about
number of bits */
if (rem) {
ctx->bits_in_last_byte = rem;
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
}
}
#define BILB ctx->bits_in_last_byte
/* finalise: process remaining data (including padding), perform
output transformation, and write hash result to 'output' */
static void Final(groestlHashState* ctx,
BitSequence* output) {
int i, j = 0, hashbytelen = HASH_BIT_LEN/8;
uint8_t *s = (BitSequence*)ctx->chaining;
/* pad with '1'-bit and first few '0'-bits */
if (BILB) {
ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
BILB = 0;
}
else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
/* pad with '0'-bits */
if (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
/* padding requires two blocks */
while (ctx->buf_ptr < SIZE512) {
ctx->buffer[(int)ctx->buf_ptr++] = 0;
}
/* digest first padding block */
Transform(ctx, ctx->buffer, SIZE512);
ctx->buf_ptr = 0;
}
while (ctx->buf_ptr < SIZE512-LENGTHFIELDLEN) {
ctx->buffer[(int)ctx->buf_ptr++] = 0;
}
/* length padding */
ctx->block_counter1++;
if (ctx->block_counter1 == 0) ctx->block_counter2++;
ctx->buf_ptr = SIZE512;
while (ctx->buf_ptr > SIZE512-(int)sizeof(uint32_t)) {
ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1;
ctx->block_counter1 >>= 8;
}
while (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2;
ctx->block_counter2 >>= 8;
}
/* digest final padding block */
Transform(ctx, ctx->buffer, SIZE512);
/* perform output transformation */
OutputTransformation(ctx);
/* store hash result in output */
for (i = SIZE512-hashbytelen; i < SIZE512; i++,j++) {
output[j] = s[i];
}
/* zeroise relevant variables and deallocate memory */
for (i = 0; i < COLS512; i++) {
ctx->chaining[i] = 0;
}
for (i = 0; i < SIZE512; i++) {
ctx->buffer[i] = 0;
}
}
/* hash bit sequence */
void groestl(const BitSequence* data,
DataLength databitlen,
BitSequence* hashval) {
groestlHashState context;
/* initialise */
Init(&context);
/* process message */
Update(&context, data, databitlen);
/* finalise */
Final(&context, hashval);
}
/*
static int crypto_hash(unsigned char *out,
const unsigned char *in,
unsigned long long len)
{
groestl(in, 8*len, out);
return 0;
}
*/

View File

@@ -1,60 +0,0 @@
#ifndef __hash_h
#define __hash_h
/*
#include "crypto_uint8.h"
#include "crypto_uint32.h"
#include "crypto_uint64.h"
#include "crypto_hash.h"
typedef crypto_uint8 uint8_t;
typedef crypto_uint32 uint32_t;
typedef crypto_uint64 uint64_t;
*/
#include <stdint.h>
#include "hash.h"
/* some sizes (number of bytes) */
#define ROWS 8
#define LENGTHFIELDLEN ROWS
#define COLS512 8
#define SIZE512 (ROWS*COLS512)
#define ROUNDS512 10
#define HASH_BIT_LEN 256
#define ROTL32(v, n) ((((v)<<(n))|((v)>>(32-(n))))&li_32(ffffffff))
#define li_32(h) 0x##h##u
#define EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n)))
#define u32BIG(a) \
((ROTL32(a,8) & li_32(00FF00FF)) | \
(ROTL32(a,24) & li_32(FF00FF00)))
/* NIST API begin */
typedef struct {
uint32_t chaining[SIZE512/sizeof(uint32_t)]; /* actual state */
uint32_t block_counter1,
block_counter2; /* message block counter(s) */
BitSequence buffer[SIZE512]; /* data buffer */
int buf_ptr; /* data buffer pointer */
int bits_in_last_byte; /* no. of message bits in last byte of
data buffer */
} groestlHashState;
/*void Init(hashState*);
void Update(hashState*, const BitSequence*, DataLength);
void Final(hashState*, BitSequence*); */
void groestl(const BitSequence*, DataLength, BitSequence*);
/* NIST API end */
/*
int crypto_hash(unsigned char *out,
const unsigned char *in,
unsigned long long len);
*/
#endif /* __hash_h */

View File

@@ -1,366 +0,0 @@
/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
--------------------------------
Performance
Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
Speed for long message:
1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2
2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3
--------------------------------
Last Modified: January 16, 2011
*/
#include "c_jh.h"
#include <stdint.h>
#include <string.h>
/*typedef unsigned long long uint64;*/
typedef uint64_t uint64;
/*define data alignment for different C compilers*/
#if defined(__GNUC__)
#define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
#else
#define DATA_ALIGN16(x) __declspec(align(16)) x
#endif
typedef struct {
int hashbitlen; /*the message digest size*/
unsigned long long databitlen; /*the message size in bits*/
unsigned long long datasize_in_buffer; /*the size of the message remained in buffer; assumed to be multiple of 8bits except for the last partial block at the end of the message*/
DATA_ALIGN16(uint64 x[8][2]); /*the 1024-bit state, ( x[i][0] || x[i][1] ) is the ith row of the state in the pseudocode*/
unsigned char buffer[64]; /*the 512-bit message block to be hashed;*/
} hashState;
/*The initial hash value H(0)*/
const unsigned char JH224_H0[128]={0x2d,0xfe,0xdd,0x62,0xf9,0x9a,0x98,0xac,0xae,0x7c,0xac,0xd6,0x19,0xd6,0x34,0xe7,0xa4,0x83,0x10,0x5,0xbc,0x30,0x12,0x16,0xb8,0x60,0x38,0xc6,0xc9,0x66,0x14,0x94,0x66,0xd9,0x89,0x9f,0x25,0x80,0x70,0x6f,0xce,0x9e,0xa3,0x1b,0x1d,0x9b,0x1a,0xdc,0x11,0xe8,0x32,0x5f,0x7b,0x36,0x6e,0x10,0xf9,0x94,0x85,0x7f,0x2,0xfa,0x6,0xc1,0x1b,0x4f,0x1b,0x5c,0xd8,0xc8,0x40,0xb3,0x97,0xf6,0xa1,0x7f,0x6e,0x73,0x80,0x99,0xdc,0xdf,0x93,0xa5,0xad,0xea,0xa3,0xd3,0xa4,0x31,0xe8,0xde,0xc9,0x53,0x9a,0x68,0x22,0xb4,0xa9,0x8a,0xec,0x86,0xa1,0xe4,0xd5,0x74,0xac,0x95,0x9c,0xe5,0x6c,0xf0,0x15,0x96,0xd,0xea,0xb5,0xab,0x2b,0xbf,0x96,0x11,0xdc,0xf0,0xdd,0x64,0xea,0x6e};
const unsigned char JH256_H0[128]={0xeb,0x98,0xa3,0x41,0x2c,0x20,0xd3,0xeb,0x92,0xcd,0xbe,0x7b,0x9c,0xb2,0x45,0xc1,0x1c,0x93,0x51,0x91,0x60,0xd4,0xc7,0xfa,0x26,0x0,0x82,0xd6,0x7e,0x50,0x8a,0x3,0xa4,0x23,0x9e,0x26,0x77,0x26,0xb9,0x45,0xe0,0xfb,0x1a,0x48,0xd4,0x1a,0x94,0x77,0xcd,0xb5,0xab,0x26,0x2,0x6b,0x17,0x7a,0x56,0xf0,0x24,0x42,0xf,0xff,0x2f,0xa8,0x71,0xa3,0x96,0x89,0x7f,0x2e,0x4d,0x75,0x1d,0x14,0x49,0x8,0xf7,0x7d,0xe2,0x62,0x27,0x76,0x95,0xf7,0x76,0x24,0x8f,0x94,0x87,0xd5,0xb6,0x57,0x47,0x80,0x29,0x6c,0x5c,0x5e,0x27,0x2d,0xac,0x8e,0xd,0x6c,0x51,0x84,0x50,0xc6,0x57,0x5,0x7a,0xf,0x7b,0xe4,0xd3,0x67,0x70,0x24,0x12,0xea,0x89,0xe3,0xab,0x13,0xd3,0x1c,0xd7,0x69};
const unsigned char JH384_H0[128]={0x48,0x1e,0x3b,0xc6,0xd8,0x13,0x39,0x8a,0x6d,0x3b,0x5e,0x89,0x4a,0xde,0x87,0x9b,0x63,0xfa,0xea,0x68,0xd4,0x80,0xad,0x2e,0x33,0x2c,0xcb,0x21,0x48,0xf,0x82,0x67,0x98,0xae,0xc8,0x4d,0x90,0x82,0xb9,0x28,0xd4,0x55,0xea,0x30,0x41,0x11,0x42,0x49,0x36,0xf5,0x55,0xb2,0x92,0x48,0x47,0xec,0xc7,0x25,0xa,0x93,0xba,0xf4,0x3c,0xe1,0x56,0x9b,0x7f,0x8a,0x27,0xdb,0x45,0x4c,0x9e,0xfc,0xbd,0x49,0x63,0x97,0xaf,0xe,0x58,0x9f,0xc2,0x7d,0x26,0xaa,0x80,0xcd,0x80,0xc0,0x8b,0x8c,0x9d,0xeb,0x2e,0xda,0x8a,0x79,0x81,0xe8,0xf8,0xd5,0x37,0x3a,0xf4,0x39,0x67,0xad,0xdd,0xd1,0x7a,0x71,0xa9,0xb4,0xd3,0xbd,0xa4,0x75,0xd3,0x94,0x97,0x6c,0x3f,0xba,0x98,0x42,0x73,0x7f};
const unsigned char JH512_H0[128]={0x6f,0xd1,0x4b,0x96,0x3e,0x0,0xaa,0x17,0x63,0x6a,0x2e,0x5,0x7a,0x15,0xd5,0x43,0x8a,0x22,0x5e,0x8d,0xc,0x97,0xef,0xb,0xe9,0x34,0x12,0x59,0xf2,0xb3,0xc3,0x61,0x89,0x1d,0xa0,0xc1,0x53,0x6f,0x80,0x1e,0x2a,0xa9,0x5,0x6b,0xea,0x2b,0x6d,0x80,0x58,0x8e,0xcc,0xdb,0x20,0x75,0xba,0xa6,0xa9,0xf,0x3a,0x76,0xba,0xf8,0x3b,0xf7,0x1,0x69,0xe6,0x5,0x41,0xe3,0x4a,0x69,0x46,0xb5,0x8a,0x8e,0x2e,0x6f,0xe6,0x5a,0x10,0x47,0xa7,0xd0,0xc1,0x84,0x3c,0x24,0x3b,0x6e,0x71,0xb1,0x2d,0x5a,0xc1,0x99,0xcf,0x57,0xf6,0xec,0x9d,0xb1,0xf8,0x56,0xa7,0x6,0x88,0x7c,0x57,0x16,0xb1,0x56,0xe3,0xc2,0xfc,0xdf,0xe6,0x85,0x17,0xfb,0x54,0x5a,0x46,0x78,0xcc,0x8c,0xdd,0x4b};
/*42 round constants, each round constant is 32-byte (256-bit)*/
const unsigned char E8_bitslice_roundconstant[42][32]={
{0x72,0xd5,0xde,0xa2,0xdf,0x15,0xf8,0x67,0x7b,0x84,0x15,0xa,0xb7,0x23,0x15,0x57,0x81,0xab,0xd6,0x90,0x4d,0x5a,0x87,0xf6,0x4e,0x9f,0x4f,0xc5,0xc3,0xd1,0x2b,0x40},
{0xea,0x98,0x3a,0xe0,0x5c,0x45,0xfa,0x9c,0x3,0xc5,0xd2,0x99,0x66,0xb2,0x99,0x9a,0x66,0x2,0x96,0xb4,0xf2,0xbb,0x53,0x8a,0xb5,0x56,0x14,0x1a,0x88,0xdb,0xa2,0x31},
{0x3,0xa3,0x5a,0x5c,0x9a,0x19,0xe,0xdb,0x40,0x3f,0xb2,0xa,0x87,0xc1,0x44,0x10,0x1c,0x5,0x19,0x80,0x84,0x9e,0x95,0x1d,0x6f,0x33,0xeb,0xad,0x5e,0xe7,0xcd,0xdc},
{0x10,0xba,0x13,0x92,0x2,0xbf,0x6b,0x41,0xdc,0x78,0x65,0x15,0xf7,0xbb,0x27,0xd0,0xa,0x2c,0x81,0x39,0x37,0xaa,0x78,0x50,0x3f,0x1a,0xbf,0xd2,0x41,0x0,0x91,0xd3},
{0x42,0x2d,0x5a,0xd,0xf6,0xcc,0x7e,0x90,0xdd,0x62,0x9f,0x9c,0x92,0xc0,0x97,0xce,0x18,0x5c,0xa7,0xb,0xc7,0x2b,0x44,0xac,0xd1,0xdf,0x65,0xd6,0x63,0xc6,0xfc,0x23},
{0x97,0x6e,0x6c,0x3,0x9e,0xe0,0xb8,0x1a,0x21,0x5,0x45,0x7e,0x44,0x6c,0xec,0xa8,0xee,0xf1,0x3,0xbb,0x5d,0x8e,0x61,0xfa,0xfd,0x96,0x97,0xb2,0x94,0x83,0x81,0x97},
{0x4a,0x8e,0x85,0x37,0xdb,0x3,0x30,0x2f,0x2a,0x67,0x8d,0x2d,0xfb,0x9f,0x6a,0x95,0x8a,0xfe,0x73,0x81,0xf8,0xb8,0x69,0x6c,0x8a,0xc7,0x72,0x46,0xc0,0x7f,0x42,0x14},
{0xc5,0xf4,0x15,0x8f,0xbd,0xc7,0x5e,0xc4,0x75,0x44,0x6f,0xa7,0x8f,0x11,0xbb,0x80,0x52,0xde,0x75,0xb7,0xae,0xe4,0x88,0xbc,0x82,0xb8,0x0,0x1e,0x98,0xa6,0xa3,0xf4},
{0x8e,0xf4,0x8f,0x33,0xa9,0xa3,0x63,0x15,0xaa,0x5f,0x56,0x24,0xd5,0xb7,0xf9,0x89,0xb6,0xf1,0xed,0x20,0x7c,0x5a,0xe0,0xfd,0x36,0xca,0xe9,0x5a,0x6,0x42,0x2c,0x36},
{0xce,0x29,0x35,0x43,0x4e,0xfe,0x98,0x3d,0x53,0x3a,0xf9,0x74,0x73,0x9a,0x4b,0xa7,0xd0,0xf5,0x1f,0x59,0x6f,0x4e,0x81,0x86,0xe,0x9d,0xad,0x81,0xaf,0xd8,0x5a,0x9f},
{0xa7,0x5,0x6,0x67,0xee,0x34,0x62,0x6a,0x8b,0xb,0x28,0xbe,0x6e,0xb9,0x17,0x27,0x47,0x74,0x7,0x26,0xc6,0x80,0x10,0x3f,0xe0,0xa0,0x7e,0x6f,0xc6,0x7e,0x48,0x7b},
{0xd,0x55,0xa,0xa5,0x4a,0xf8,0xa4,0xc0,0x91,0xe3,0xe7,0x9f,0x97,0x8e,0xf1,0x9e,0x86,0x76,0x72,0x81,0x50,0x60,0x8d,0xd4,0x7e,0x9e,0x5a,0x41,0xf3,0xe5,0xb0,0x62},
{0xfc,0x9f,0x1f,0xec,0x40,0x54,0x20,0x7a,0xe3,0xe4,0x1a,0x0,0xce,0xf4,0xc9,0x84,0x4f,0xd7,0x94,0xf5,0x9d,0xfa,0x95,0xd8,0x55,0x2e,0x7e,0x11,0x24,0xc3,0x54,0xa5},
{0x5b,0xdf,0x72,0x28,0xbd,0xfe,0x6e,0x28,0x78,0xf5,0x7f,0xe2,0xf,0xa5,0xc4,0xb2,0x5,0x89,0x7c,0xef,0xee,0x49,0xd3,0x2e,0x44,0x7e,0x93,0x85,0xeb,0x28,0x59,0x7f},
{0x70,0x5f,0x69,0x37,0xb3,0x24,0x31,0x4a,0x5e,0x86,0x28,0xf1,0x1d,0xd6,0xe4,0x65,0xc7,0x1b,0x77,0x4,0x51,0xb9,0x20,0xe7,0x74,0xfe,0x43,0xe8,0x23,0xd4,0x87,0x8a},
{0x7d,0x29,0xe8,0xa3,0x92,0x76,0x94,0xf2,0xdd,0xcb,0x7a,0x9,0x9b,0x30,0xd9,0xc1,0x1d,0x1b,0x30,0xfb,0x5b,0xdc,0x1b,0xe0,0xda,0x24,0x49,0x4f,0xf2,0x9c,0x82,0xbf},
{0xa4,0xe7,0xba,0x31,0xb4,0x70,0xbf,0xff,0xd,0x32,0x44,0x5,0xde,0xf8,0xbc,0x48,0x3b,0xae,0xfc,0x32,0x53,0xbb,0xd3,0x39,0x45,0x9f,0xc3,0xc1,0xe0,0x29,0x8b,0xa0},
{0xe5,0xc9,0x5,0xfd,0xf7,0xae,0x9,0xf,0x94,0x70,0x34,0x12,0x42,0x90,0xf1,0x34,0xa2,0x71,0xb7,0x1,0xe3,0x44,0xed,0x95,0xe9,0x3b,0x8e,0x36,0x4f,0x2f,0x98,0x4a},
{0x88,0x40,0x1d,0x63,0xa0,0x6c,0xf6,0x15,0x47,0xc1,0x44,0x4b,0x87,0x52,0xaf,0xff,0x7e,0xbb,0x4a,0xf1,0xe2,0xa,0xc6,0x30,0x46,0x70,0xb6,0xc5,0xcc,0x6e,0x8c,0xe6},
{0xa4,0xd5,0xa4,0x56,0xbd,0x4f,0xca,0x0,0xda,0x9d,0x84,0x4b,0xc8,0x3e,0x18,0xae,0x73,0x57,0xce,0x45,0x30,0x64,0xd1,0xad,0xe8,0xa6,0xce,0x68,0x14,0x5c,0x25,0x67},
{0xa3,0xda,0x8c,0xf2,0xcb,0xe,0xe1,0x16,0x33,0xe9,0x6,0x58,0x9a,0x94,0x99,0x9a,0x1f,0x60,0xb2,0x20,0xc2,0x6f,0x84,0x7b,0xd1,0xce,0xac,0x7f,0xa0,0xd1,0x85,0x18},
{0x32,0x59,0x5b,0xa1,0x8d,0xdd,0x19,0xd3,0x50,0x9a,0x1c,0xc0,0xaa,0xa5,0xb4,0x46,0x9f,0x3d,0x63,0x67,0xe4,0x4,0x6b,0xba,0xf6,0xca,0x19,0xab,0xb,0x56,0xee,0x7e},
{0x1f,0xb1,0x79,0xea,0xa9,0x28,0x21,0x74,0xe9,0xbd,0xf7,0x35,0x3b,0x36,0x51,0xee,0x1d,0x57,0xac,0x5a,0x75,0x50,0xd3,0x76,0x3a,0x46,0xc2,0xfe,0xa3,0x7d,0x70,0x1},
{0xf7,0x35,0xc1,0xaf,0x98,0xa4,0xd8,0x42,0x78,0xed,0xec,0x20,0x9e,0x6b,0x67,0x79,0x41,0x83,0x63,0x15,0xea,0x3a,0xdb,0xa8,0xfa,0xc3,0x3b,0x4d,0x32,0x83,0x2c,0x83},
{0xa7,0x40,0x3b,0x1f,0x1c,0x27,0x47,0xf3,0x59,0x40,0xf0,0x34,0xb7,0x2d,0x76,0x9a,0xe7,0x3e,0x4e,0x6c,0xd2,0x21,0x4f,0xfd,0xb8,0xfd,0x8d,0x39,0xdc,0x57,0x59,0xef},
{0x8d,0x9b,0xc,0x49,0x2b,0x49,0xeb,0xda,0x5b,0xa2,0xd7,0x49,0x68,0xf3,0x70,0xd,0x7d,0x3b,0xae,0xd0,0x7a,0x8d,0x55,0x84,0xf5,0xa5,0xe9,0xf0,0xe4,0xf8,0x8e,0x65},
{0xa0,0xb8,0xa2,0xf4,0x36,0x10,0x3b,0x53,0xc,0xa8,0x7,0x9e,0x75,0x3e,0xec,0x5a,0x91,0x68,0x94,0x92,0x56,0xe8,0x88,0x4f,0x5b,0xb0,0x5c,0x55,0xf8,0xba,0xbc,0x4c},
{0xe3,0xbb,0x3b,0x99,0xf3,0x87,0x94,0x7b,0x75,0xda,0xf4,0xd6,0x72,0x6b,0x1c,0x5d,0x64,0xae,0xac,0x28,0xdc,0x34,0xb3,0x6d,0x6c,0x34,0xa5,0x50,0xb8,0x28,0xdb,0x71},
{0xf8,0x61,0xe2,0xf2,0x10,0x8d,0x51,0x2a,0xe3,0xdb,0x64,0x33,0x59,0xdd,0x75,0xfc,0x1c,0xac,0xbc,0xf1,0x43,0xce,0x3f,0xa2,0x67,0xbb,0xd1,0x3c,0x2,0xe8,0x43,0xb0},
{0x33,0xa,0x5b,0xca,0x88,0x29,0xa1,0x75,0x7f,0x34,0x19,0x4d,0xb4,0x16,0x53,0x5c,0x92,0x3b,0x94,0xc3,0xe,0x79,0x4d,0x1e,0x79,0x74,0x75,0xd7,0xb6,0xee,0xaf,0x3f},
{0xea,0xa8,0xd4,0xf7,0xbe,0x1a,0x39,0x21,0x5c,0xf4,0x7e,0x9,0x4c,0x23,0x27,0x51,0x26,0xa3,0x24,0x53,0xba,0x32,0x3c,0xd2,0x44,0xa3,0x17,0x4a,0x6d,0xa6,0xd5,0xad},
{0xb5,0x1d,0x3e,0xa6,0xaf,0xf2,0xc9,0x8,0x83,0x59,0x3d,0x98,0x91,0x6b,0x3c,0x56,0x4c,0xf8,0x7c,0xa1,0x72,0x86,0x60,0x4d,0x46,0xe2,0x3e,0xcc,0x8,0x6e,0xc7,0xf6},
{0x2f,0x98,0x33,0xb3,0xb1,0xbc,0x76,0x5e,0x2b,0xd6,0x66,0xa5,0xef,0xc4,0xe6,0x2a,0x6,0xf4,0xb6,0xe8,0xbe,0xc1,0xd4,0x36,0x74,0xee,0x82,0x15,0xbc,0xef,0x21,0x63},
{0xfd,0xc1,0x4e,0xd,0xf4,0x53,0xc9,0x69,0xa7,0x7d,0x5a,0xc4,0x6,0x58,0x58,0x26,0x7e,0xc1,0x14,0x16,0x6,0xe0,0xfa,0x16,0x7e,0x90,0xaf,0x3d,0x28,0x63,0x9d,0x3f},
{0xd2,0xc9,0xf2,0xe3,0x0,0x9b,0xd2,0xc,0x5f,0xaa,0xce,0x30,0xb7,0xd4,0xc,0x30,0x74,0x2a,0x51,0x16,0xf2,0xe0,0x32,0x98,0xd,0xeb,0x30,0xd8,0xe3,0xce,0xf8,0x9a},
{0x4b,0xc5,0x9e,0x7b,0xb5,0xf1,0x79,0x92,0xff,0x51,0xe6,0x6e,0x4,0x86,0x68,0xd3,0x9b,0x23,0x4d,0x57,0xe6,0x96,0x67,0x31,0xcc,0xe6,0xa6,0xf3,0x17,0xa,0x75,0x5},
{0xb1,0x76,0x81,0xd9,0x13,0x32,0x6c,0xce,0x3c,0x17,0x52,0x84,0xf8,0x5,0xa2,0x62,0xf4,0x2b,0xcb,0xb3,0x78,0x47,0x15,0x47,0xff,0x46,0x54,0x82,0x23,0x93,0x6a,0x48},
{0x38,0xdf,0x58,0x7,0x4e,0x5e,0x65,0x65,0xf2,0xfc,0x7c,0x89,0xfc,0x86,0x50,0x8e,0x31,0x70,0x2e,0x44,0xd0,0xb,0xca,0x86,0xf0,0x40,0x9,0xa2,0x30,0x78,0x47,0x4e},
{0x65,0xa0,0xee,0x39,0xd1,0xf7,0x38,0x83,0xf7,0x5e,0xe9,0x37,0xe4,0x2c,0x3a,0xbd,0x21,0x97,0xb2,0x26,0x1,0x13,0xf8,0x6f,0xa3,0x44,0xed,0xd1,0xef,0x9f,0xde,0xe7},
{0x8b,0xa0,0xdf,0x15,0x76,0x25,0x92,0xd9,0x3c,0x85,0xf7,0xf6,0x12,0xdc,0x42,0xbe,0xd8,0xa7,0xec,0x7c,0xab,0x27,0xb0,0x7e,0x53,0x8d,0x7d,0xda,0xaa,0x3e,0xa8,0xde},
{0xaa,0x25,0xce,0x93,0xbd,0x2,0x69,0xd8,0x5a,0xf6,0x43,0xfd,0x1a,0x73,0x8,0xf9,0xc0,0x5f,0xef,0xda,0x17,0x4a,0x19,0xa5,0x97,0x4d,0x66,0x33,0x4c,0xfd,0x21,0x6a},
{0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}};
static void E8(hashState *state); /*The bijective function E8, in bitslice form*/
static void F8(hashState *state); /*The compression function F8 */
/*The API functions*/
static HashReturn Init(hashState *state, int hashbitlen);
static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
static HashReturn Final(hashState *state, BitSequence *hashval);
HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval);
/*swapping bit 2i with bit 2i+1 of 64-bit x*/
#define SWAP1(x) (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1));
/*swapping bits 4i||4i+1 with bits 4i+2||4i+3 of 64-bit x*/
#define SWAP2(x) (x) = ((((x) & 0x3333333333333333ULL) << 2) | (((x) & 0xccccccccccccccccULL) >> 2));
/*swapping bits 8i||8i+1||8i+2||8i+3 with bits 8i+4||8i+5||8i+6||8i+7 of 64-bit x*/
#define SWAP4(x) (x) = ((((x) & 0x0f0f0f0f0f0f0f0fULL) << 4) | (((x) & 0xf0f0f0f0f0f0f0f0ULL) >> 4));
/*swapping bits 16i||16i+1||......||16i+7 with bits 16i+8||16i+9||......||16i+15 of 64-bit x*/
#define SWAP8(x) (x) = ((((x) & 0x00ff00ff00ff00ffULL) << 8) | (((x) & 0xff00ff00ff00ff00ULL) >> 8));
/*swapping bits 32i||32i+1||......||32i+15 with bits 32i+16||32i+17||......||32i+31 of 64-bit x*/
#define SWAP16(x) (x) = ((((x) & 0x0000ffff0000ffffULL) << 16) | (((x) & 0xffff0000ffff0000ULL) >> 16));
/*swapping bits 64i||64i+1||......||64i+31 with bits 64i+32||64i+33||......||64i+63 of 64-bit x*/
#define SWAP32(x) (x) = (((x) << 32) | ((x) >> 32));
/*The MDS transform*/
#define L(m0,m1,m2,m3,m4,m5,m6,m7) \
(m4) ^= (m1); \
(m5) ^= (m2); \
(m6) ^= (m0) ^ (m3); \
(m7) ^= (m0); \
(m0) ^= (m5); \
(m1) ^= (m6); \
(m2) ^= (m4) ^ (m7); \
(m3) ^= (m4);
/*Two Sboxes are computed in parallel, each Sbox implements S0 and S1, selected by a constant bit*/
/*The reason to compute two Sboxes in parallel is to try to fully utilize the parallel processing power*/
#define SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1) \
m3 = ~(m3); \
m7 = ~(m7); \
m0 ^= ((~(m2)) & (cc0)); \
m4 ^= ((~(m6)) & (cc1)); \
temp0 = (cc0) ^ ((m0) & (m1));\
temp1 = (cc1) ^ ((m4) & (m5));\
m0 ^= ((m2) & (m3)); \
m4 ^= ((m6) & (m7)); \
m3 ^= ((~(m1)) & (m2)); \
m7 ^= ((~(m5)) & (m6)); \
m1 ^= ((m0) & (m2)); \
m5 ^= ((m4) & (m6)); \
m2 ^= ((m0) & (~(m3))); \
m6 ^= ((m4) & (~(m7))); \
m0 ^= ((m1) | (m3)); \
m4 ^= ((m5) | (m7)); \
m3 ^= ((m1) & (m2)); \
m7 ^= ((m5) & (m6)); \
m1 ^= (temp0 & (m0)); \
m5 ^= (temp1 & (m4)); \
m2 ^= temp0; \
m6 ^= temp1;
/*The bijective function E8, in bitslice form*/
static void E8(hashState *state)
{
uint64 i,roundnumber,temp0,temp1;
for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) {
/*round 7*roundnumber+0: Sbox, MDS and Swapping layers*/
for (i = 0; i < 2; i++) {
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i+2] );
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
SWAP1(state->x[1][i]); SWAP1(state->x[3][i]); SWAP1(state->x[5][i]); SWAP1(state->x[7][i]);
}
/*round 7*roundnumber+1: Sbox, MDS and Swapping layers*/
for (i = 0; i < 2; i++) {
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i+2] );
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
SWAP2(state->x[1][i]); SWAP2(state->x[3][i]); SWAP2(state->x[5][i]); SWAP2(state->x[7][i]);
}
/*round 7*roundnumber+2: Sbox, MDS and Swapping layers*/
for (i = 0; i < 2; i++) {
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i+2] );
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
SWAP4(state->x[1][i]); SWAP4(state->x[3][i]); SWAP4(state->x[5][i]); SWAP4(state->x[7][i]);
}
/*round 7*roundnumber+3: Sbox, MDS and Swapping layers*/
for (i = 0; i < 2; i++) {
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i+2] );
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
SWAP8(state->x[1][i]); SWAP8(state->x[3][i]); SWAP8(state->x[5][i]); SWAP8(state->x[7][i]);
}
/*round 7*roundnumber+4: Sbox, MDS and Swapping layers*/
for (i = 0; i < 2; i++) {
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i+2] );
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
SWAP16(state->x[1][i]); SWAP16(state->x[3][i]); SWAP16(state->x[5][i]); SWAP16(state->x[7][i]);
}
/*round 7*roundnumber+5: Sbox, MDS and Swapping layers*/
for (i = 0; i < 2; i++) {
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i+2] );
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
SWAP32(state->x[1][i]); SWAP32(state->x[3][i]); SWAP32(state->x[5][i]); SWAP32(state->x[7][i]);
}
/*round 7*roundnumber+6: Sbox and MDS layers*/
for (i = 0; i < 2; i++) {
SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i+2] );
L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
}
/*round 7*roundnumber+6: swapping layer*/
for (i = 1; i < 8; i = i+2) {
temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; state->x[i][1] = temp0;
}
}
}
/*The compression function F8 */
static void F8(hashState *state)
{
uint64 i;
/*xor the 512-bit message with the fist half of the 1024-bit hash state*/
for (i = 0; i < 8; i++) state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i];
/*the bijective function E8 */
E8(state);
/*xor the 512-bit message with the second half of the 1024-bit hash state*/
for (i = 0; i < 8; i++) state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64*)state->buffer)[i];
}
/*before hashing a message, initialize the hash state as H0 */
static HashReturn Init(hashState *state, int hashbitlen)
{
state->databitlen = 0;
state->datasize_in_buffer = 0;
/*initialize the initial hash value of JH*/
state->hashbitlen = hashbitlen;
/*load the intital hash value into state*/
switch (hashbitlen)
{
case 224: memcpy(state->x,JH224_H0,128); break;
case 256: memcpy(state->x,JH256_H0,128); break;
case 384: memcpy(state->x,JH384_H0,128); break;
case 512: memcpy(state->x,JH512_H0,128); break;
}
return(SUCCESS);
}
/*hash each 512-bit message block, except the last partial block*/
static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
{
DataLength index; /*the starting address of the data to be compressed*/
state->databitlen += databitlen;
index = 0;
/*if there is remaining data in the buffer, fill it to a full message block first*/
/*we assume that the size of the data in the buffer is the multiple of 8 bits if it is not at the end of a message*/
/*There is data in the buffer, but the incoming data is insufficient for a full block*/
if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512) ) {
if ( (databitlen & 7) == 0 )
memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, (size_t) (64-(state->datasize_in_buffer >> 3)) );
else memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, (size_t) (64 - (state->datasize_in_buffer >> 3) + 1) );
state->datasize_in_buffer += databitlen;
databitlen = 0;
}
/*There is data in the buffer, and the incoming data is sufficient for a full block*/
if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512) ) {
memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, (size_t) (64-(state->datasize_in_buffer >> 3)) );
index = 64-(state->datasize_in_buffer >> 3);
databitlen = databitlen - (512 - state->datasize_in_buffer);
F8(state);
state->datasize_in_buffer = 0;
}
/*hash the remaining full message blocks*/
for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) {
memcpy(state->buffer, data+index, 64);
F8(state);
}
/*store the partial block into buffer, assume that -- if part of the last byte is not part of the message, then that part consists of 0 bits*/
if ( databitlen > 0) {
if ((databitlen & 7) == 0)
memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3);
else
memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1);
state->datasize_in_buffer = databitlen;
}
return(SUCCESS);
}
/*pad the message, process the padded block(s), truncate the hash value H to obtain the message digest*/
static HashReturn Final(hashState *state, BitSequence *hashval)
{
unsigned int i;
if ( (state->databitlen & 0x1ff) == 0 ) {
/*pad the message when databitlen is multiple of 512 bits, then process the padded block*/
memset(state->buffer, 0, 64);
state->buffer[0] = 0x80;
state->buffer[63] = state->databitlen & 0xff;
state->buffer[62] = (state->databitlen >> 8) & 0xff;
state->buffer[61] = (state->databitlen >> 16) & 0xff;
state->buffer[60] = (state->databitlen >> 24) & 0xff;
state->buffer[59] = (state->databitlen >> 32) & 0xff;
state->buffer[58] = (state->databitlen >> 40) & 0xff;
state->buffer[57] = (state->databitlen >> 48) & 0xff;
state->buffer[56] = (state->databitlen >> 56) & 0xff;
F8(state);
}
else {
/*set the rest of the bytes in the buffer to 0*/
if ( (state->datasize_in_buffer & 7) == 0)
for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++) state->buffer[i] = 0;
else
for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++) state->buffer[i] = 0;
/*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/
state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7));
F8(state);
memset(state->buffer, 0, 64);
state->buffer[63] = state->databitlen & 0xff;
state->buffer[62] = (state->databitlen >> 8) & 0xff;
state->buffer[61] = (state->databitlen >> 16) & 0xff;
state->buffer[60] = (state->databitlen >> 24) & 0xff;
state->buffer[59] = (state->databitlen >> 32) & 0xff;
state->buffer[58] = (state->databitlen >> 40) & 0xff;
state->buffer[57] = (state->databitlen >> 48) & 0xff;
state->buffer[56] = (state->databitlen >> 56) & 0xff;
F8(state);
}
/*truncating the final hash value to generate the message digest*/
switch(state->hashbitlen) {
case 224: memcpy(hashval,(unsigned char*)state->x+64+36,28); break;
case 256: memcpy(hashval,(unsigned char*)state->x+64+32,32); break;
case 384: memcpy(hashval,(unsigned char*)state->x+64+16,48); break;
case 512: memcpy(hashval,(unsigned char*)state->x+64,64); break;
}
return(SUCCESS);
}
/* hash a message,
three inputs: message digest size in bits (hashbitlen); message (data); message length in bits (databitlen)
one output: message digest (hashval)
*/
HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval)
{
hashState state;
if ( hashbitlen == 224 || hashbitlen == 256 || hashbitlen == 384 || hashbitlen == 512 ) {
Init(&state, hashbitlen);
Update(&state, data, databitlen);
Final(&state, hashval);
return SUCCESS;
}
else
return(BAD_HASHLEN);
}

View File

@@ -1,19 +0,0 @@
/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
--------------------------------
Performance
Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
Speed for long message:
1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2
2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3
--------------------------------
Last Modified: January 16, 2011
*/
#pragma once
#include "hash.h"
HashReturn jh_hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);

View File

@@ -1,123 +0,0 @@
// keccak.c
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
// A baseline Keccak (3rd round) implementation.
#include "hash-ops.h"
#include "c_keccak.h"
const uint64_t keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
const int keccakf_rotc[24] =
{
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
};
const int keccakf_piln[24] =
{
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
};
// update the state with given number of rounds
void keccakf(uint64_t st[25], int rounds)
{
int i, j, round;
uint64_t t, bc[5];
for (round = 0; round < rounds; ++round) {
// Theta
bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
for (i = 0; i < 5; ++i) {
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
st[i ] ^= t;
st[i + 5] ^= t;
st[i + 10] ^= t;
st[i + 15] ^= t;
st[i + 20] ^= t;
}
// Rho Pi
t = st[1];
for (i = 0; i < 24; ++i) {
bc[0] = st[keccakf_piln[i]];
st[keccakf_piln[i]] = ROTL64(t, keccakf_rotc[i]);
t = bc[0];
}
// Chi
for (j = 0; j < 25; j += 5) {
bc[0] = st[j ];
bc[1] = st[j + 1];
bc[2] = st[j + 2];
bc[3] = st[j + 3];
bc[4] = st[j + 4];
st[j ] ^= (~bc[1]) & bc[2];
st[j + 1] ^= (~bc[2]) & bc[3];
st[j + 2] ^= (~bc[3]) & bc[4];
st[j + 3] ^= (~bc[4]) & bc[0];
st[j + 4] ^= (~bc[0]) & bc[1];
}
// Iota
st[0] ^= keccakf_rndc[round];
}
}
// compute a keccak hash (md) of given byte length from "in"
typedef uint64_t state_t[25];
int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen)
{
state_t st;
uint8_t temp[144];
int i, rsiz, rsizw;
rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
rsizw = rsiz / 8;
memset(st, 0, sizeof(st));
for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) {
for (i = 0; i < rsizw; i++)
st[i] ^= ((uint64_t *) in)[i];
keccakf(st, KECCAK_ROUNDS);
}
// last block and padding
memcpy(temp, in, inlen);
temp[inlen++] = 1;
memset(temp + inlen, 0, rsiz - inlen);
temp[rsiz - 1] |= 0x80;
for (i = 0; i < rsizw; i++)
st[i] ^= ((uint64_t *) temp)[i];
keccakf(st, KECCAK_ROUNDS);
memcpy(md, st, mdlen);
return 0;
}
void keccak1600(const uint8_t *in, int inlen, uint8_t *md)
{
keccak(in, inlen, md, sizeof(state_t));
}

View File

@@ -1,26 +0,0 @@
// keccak.h
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
#ifndef KECCAK_H
#define KECCAK_H
#include <stdint.h>
#include <string.h>
#ifndef KECCAK_ROUNDS
#define KECCAK_ROUNDS 24
#endif
#ifndef ROTL64
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
#endif
// compute a keccak hash (md) of given byte length from "in"
int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen);
// update the state
void keccakf(uint64_t st[25], int norounds);
void keccak1600(const uint8_t *in, int inlen, uint8_t *md);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,47 +0,0 @@
#ifndef _SKEIN_H_
#define _SKEIN_H_ 1
/**************************************************************************
**
** Interface declarations and internal definitions for Skein hashing.
**
** Source code author: Doug Whiting, 2008.
**
** This algorithm and source code is released to the public domain.
**
***************************************************************************
**
** The following compile-time switches may be defined to control some
** tradeoffs between speed, code size, error checking, and security.
**
** The "default" note explains what happens when the switch is not defined.
**
** SKEIN_DEBUG -- make callouts from inside Skein code
** to examine/display intermediate values.
** [default: no callouts (no overhead)]
**
** SKEIN_ERR_CHECK -- how error checking is handled inside Skein
** code. If not defined, most error checking
** is disabled (for performance). Otherwise,
** the switch value is interpreted as:
** 0: use assert() to flag errors
** 1: return SKEIN_FAIL to flag errors
**
***************************************************************************/
#include "skein_port.h" /* get platform-specific definitions */
typedef enum
{
SKEIN_SUCCESS = 0, /* return codes from Skein calls */
SKEIN_FAIL = 1,
SKEIN_BAD_HASHLEN = 2
}
SkeinHashReturn;
typedef size_t SkeinDataLength; /* bit count type */
typedef u08b_t SkeinBitSequence; /* bit stream type */
/* "all-in-one" call */
SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data,
SkeinDataLength databitlen, SkeinBitSequence *hashval);
#endif /* ifndef _SKEIN_H_ */

View File

@@ -1,38 +0,0 @@
#ifndef __tables_h
#define __tables_h
const uint32_t T[512] = {0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0xd17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc
, 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5
, 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d
, 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded
, 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1
, 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441
, 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4
, 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba
, 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616
, 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2
, 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c
, 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de
, 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7
, 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e
, 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c
, 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7
, 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b
, 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4
, 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e
, 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a
, 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37
, 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86
, 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b
, 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028
, 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3
, 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94
, 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836
, 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0
, 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2
, 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e
, 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3
, 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e};
#endif /* __tables_h */

View File

@@ -1,59 +0,0 @@
// Copyright (c) 2012-2013 The Cryptonote developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#pragma once
#if !defined(__cplusplus)
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include "int-util.h"
#if 0
static inline void *padd(void *p, size_t i) {
return (char *) p + i;
}
static inline const void *cpadd(const void *p, size_t i) {
return (const char *) p + i;
}
static inline void place_length(uint8_t *buffer, size_t bufsize, size_t length) {
if (sizeof(size_t) == 4) {
*(uint32_t*) padd(buffer, bufsize - 4) = swap32be(length);
} else {
*(uint64_t*) padd(buffer, bufsize - 8) = swap64be(length);
}
}
#endif
#pragma pack(push, 1)
union hash_state {
uint8_t b[200];
uint64_t w[25];
};
#pragma pack(pop)
void hash_permutation(union hash_state *state);
void hash_process(union hash_state *state, const uint8_t *buf, int count);
#endif
enum {
HASH_SIZE = 32,
HASH_DATA_AREA = 136
};
void cn_fast_hash(const void *data, int len, char *hash);
void cn_slow_hash(const void *data, size_t length, char *hash);
void hash_extra_blake(const void *data, size_t length, char *hash);
void hash_extra_groestl(const void *data, size_t length, char *hash);
void hash_extra_jh(const void *data, size_t length, char *hash);
void hash_extra_skein(const void *data, size_t length, char *hash);
void tree_hash(const char (*hashes)[HASH_SIZE], size_t count, char *root_hash);

View File

@@ -1,24 +0,0 @@
// Copyright (c) 2012-2013 The Cryptonote developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "hash-ops.h"
#include "c_keccak.h"
void hash_permutation(union hash_state *state) {
keccakf((uint64_t*)state, 24);
}
void hash_process(union hash_state *state, const uint8_t *buf, int count) {
keccak1600(buf, count, (uint8_t*)state);
}
void cn_fast_hash(const void *data, int len, char *hash) {
union hash_state state;
hash_process(&state, data, len);
memcpy(hash, &state, HASH_SIZE);
}

View File

@@ -1,5 +0,0 @@
#pragma once
typedef unsigned char BitSequence;
typedef unsigned long long DataLength;
typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn;

View File

@@ -1,195 +0,0 @@
// Copyright (c) 2012-2013 The Cryptonote developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#pragma once
#ifndef INT_UTILS_H_
#define INT_UTILS_H_
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#ifndef _MSC_VER
#include <sys/param.h>
#else
#define inline __inline
#endif
#ifndef LITTLE_ENDIAN
#define LITTLE_ENDIAN 0x1234
#define BIG_ENDIAN 0x4321
#endif
#if !defined(BYTE_ORDER) && (defined(__LITTLE_ENDIAN__) || defined(__arm__) || defined(WIN32))
#define BYTE_ORDER LITTLE_ENDIAN
#endif
#if defined(WIN32)
#include <stdlib.h>
static inline uint32_t rol32(uint32_t x, int r) {
return _rotl(x, r);
}
static inline uint64_t rol64(uint64_t x, int r) {
return _rotl64(x, r);
}
#else
static inline uint32_t rol32(uint32_t x, int r) {
return (x << (r & 31)) | (x >> (-r & 31));
}
static inline uint64_t rol64(uint64_t x, int r) {
return (x << (r & 63)) | (x >> (-r & 63));
}
#endif
static inline uint64_t hi_dword(uint64_t val) {
return val >> 32;
}
static inline uint64_t lo_dword(uint64_t val) {
return val & 0xFFFFFFFF;
}
static inline uint64_t div_with_reminder(uint64_t dividend, uint32_t divisor, uint32_t* remainder) {
dividend |= ((uint64_t)*remainder) << 32;
*remainder = dividend % divisor;
return dividend / divisor;
}
// Long division with 2^32 base
static inline uint32_t div128_32(uint64_t dividend_hi, uint64_t dividend_lo, uint32_t divisor, uint64_t* quotient_hi, uint64_t* quotient_lo) {
uint64_t dividend_dwords[4];
uint32_t remainder = 0;
dividend_dwords[3] = hi_dword(dividend_hi);
dividend_dwords[2] = lo_dword(dividend_hi);
dividend_dwords[1] = hi_dword(dividend_lo);
dividend_dwords[0] = lo_dword(dividend_lo);
*quotient_hi = div_with_reminder(dividend_dwords[3], divisor, &remainder) << 32;
*quotient_hi |= div_with_reminder(dividend_dwords[2], divisor, &remainder);
*quotient_lo = div_with_reminder(dividend_dwords[1], divisor, &remainder) << 32;
*quotient_lo |= div_with_reminder(dividend_dwords[0], divisor, &remainder);
return remainder;
}
#define IDENT32(x) ((uint32_t) (x))
#define IDENT64(x) ((uint64_t) (x))
#define SWAP32(x) ((((uint32_t) (x) & 0x000000ff) << 24) | \
(((uint32_t) (x) & 0x0000ff00) << 8) | \
(((uint32_t) (x) & 0x00ff0000) >> 8) | \
(((uint32_t) (x) & 0xff000000) >> 24))
#define SWAP64(x) ((((uint64_t) (x) & 0x00000000000000ff) << 56) | \
(((uint64_t) (x) & 0x000000000000ff00) << 40) | \
(((uint64_t) (x) & 0x0000000000ff0000) << 24) | \
(((uint64_t) (x) & 0x00000000ff000000) << 8) | \
(((uint64_t) (x) & 0x000000ff00000000) >> 8) | \
(((uint64_t) (x) & 0x0000ff0000000000) >> 24) | \
(((uint64_t) (x) & 0x00ff000000000000) >> 40) | \
(((uint64_t) (x) & 0xff00000000000000) >> 56))
static inline uint32_t ident32(uint32_t x) { return x; }
static inline uint64_t ident64(uint64_t x) { return x; }
static inline uint32_t swap32(uint32_t x) {
x = ((x & 0x00ff00ff) << 8) | ((x & 0xff00ff00) >> 8);
return (x << 16) | (x >> 16);
}
static inline uint64_t swap64(uint64_t x) {
x = ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8);
x = ((x & 0x0000ffff0000ffff) << 16) | ((x & 0xffff0000ffff0000) >> 16);
return (x << 32) | (x >> 32);
}
#if defined(__GNUC__)
#define UNUSED __attribute__((unused))
#else
#define UNUSED
#endif
static inline void mem_inplace_ident(void *mem UNUSED, size_t n UNUSED) { }
#undef UNUSED
static inline void mem_inplace_swap32(void *mem, size_t n) {
size_t i;
for (i = 0; i < n; i++) {
((uint32_t *) mem)[i] = swap32(((const uint32_t *) mem)[i]);
}
}
static inline void mem_inplace_swap64(void *mem, size_t n) {
size_t i;
for (i = 0; i < n; i++) {
((uint64_t *) mem)[i] = swap64(((const uint64_t *) mem)[i]);
}
}
static inline void memcpy_ident32(void *dst, const void *src, size_t n) {
memcpy(dst, src, 4 * n);
}
static inline void memcpy_ident64(void *dst, const void *src, size_t n) {
memcpy(dst, src, 8 * n);
}
static inline void memcpy_swap32(void *dst, const void *src, size_t n) {
size_t i;
for (i = 0; i < n; i++) {
((uint32_t *) dst)[i] = swap32(((const uint32_t *) src)[i]);
}
}
static inline void memcpy_swap64(void *dst, const void *src, size_t n) {
size_t i;
for (i = 0; i < n; i++) {
((uint64_t *) dst)[i] = swap64(((const uint64_t *) src)[i]);
}
}
#if !defined(BYTE_ORDER) || !defined(LITTLE_ENDIAN) || !defined(BIG_ENDIAN)
static_assert(false, "BYTE_ORDER is undefined. Perhaps, GNU extensions are not enabled");
#endif
#if BYTE_ORDER == LITTLE_ENDIAN
#define SWAP32LE IDENT32
#define SWAP32BE SWAP32
#define swap32le ident32
#define swap32be swap32
#define mem_inplace_swap32le mem_inplace_ident
#define mem_inplace_swap32be mem_inplace_swap32
#define memcpy_swap32le memcpy_ident32
#define memcpy_swap32be memcpy_swap32
#define SWAP64LE IDENT64
#define SWAP64BE SWAP64
#define swap64le ident64
#define swap64be swap64
#define mem_inplace_swap64le mem_inplace_ident
#define mem_inplace_swap64be mem_inplace_swap64
#define memcpy_swap64le memcpy_ident64
#define memcpy_swap64be memcpy_swap64
#endif
#if BYTE_ORDER == BIG_ENDIAN
#define SWAP32BE IDENT32
#define SWAP32LE SWAP32
#define swap32be ident32
#define swap32le swap32
#define mem_inplace_swap32be mem_inplace_ident
#define mem_inplace_swap32le mem_inplace_swap32
#define memcpy_swap32be memcpy_ident32
#define memcpy_swap32le memcpy_swap32
#define SWAP64BE IDENT64
#define SWAP64LE SWAP64
#define swap64be ident64
#define swap64le swap64
#define mem_inplace_swap64be mem_inplace_ident
#define mem_inplace_swap64le mem_inplace_swap64
#define memcpy_swap64be memcpy_ident64
#define memcpy_swap64le memcpy_swap64
#endif
#endif /* INT_UTILS_H_ */

View File

@@ -1,50 +0,0 @@
/*
* ---------------------------------------------------------------------------
* OpenAES License
* ---------------------------------------------------------------------------
* Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------
*/
#ifndef _OAES_CONFIG_H
#define _OAES_CONFIG_H
#ifdef __cplusplus
extern "C" {
#endif
//#ifndef OAES_HAVE_ISAAC
//#define OAES_HAVE_ISAAC 1
//#endif // OAES_HAVE_ISAAC
//#ifndef OAES_DEBUG
//#define OAES_DEBUG 0
//#endif // OAES_DEBUG
#ifdef __cplusplus
}
#endif
#endif // _OAES_CONFIG_H

File diff suppressed because it is too large Load Diff

View File

@@ -1,214 +0,0 @@
/*
* ---------------------------------------------------------------------------
* OpenAES License
* ---------------------------------------------------------------------------
* Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------
*/
#ifndef _OAES_LIB_H
#define _OAES_LIB_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _WIN32
# ifdef OAES_SHARED
# ifdef oaes_lib_EXPORTS
# define OAES_API __declspec(dllexport)
# else
# define OAES_API __declspec(dllimport)
# endif
# else
# define OAES_API
# endif
#else
# define OAES_API
#endif // WIN32
#define OAES_VERSION "0.8.1"
#define OAES_BLOCK_SIZE 16
typedef void OAES_CTX;
typedef enum
{
OAES_RET_FIRST = 0,
OAES_RET_SUCCESS = 0,
OAES_RET_UNKNOWN,
OAES_RET_ARG1,
OAES_RET_ARG2,
OAES_RET_ARG3,
OAES_RET_ARG4,
OAES_RET_ARG5,
OAES_RET_NOKEY,
OAES_RET_MEM,
OAES_RET_BUF,
OAES_RET_HEADER,
OAES_RET_COUNT
} OAES_RET;
/*
* oaes_set_option() takes one of these values for its [option] parameter
* some options accept either an optional or a required [value] parameter
*/
// no option
#define OAES_OPTION_NONE 0
// enable ECB mode, disable CBC mode
#define OAES_OPTION_ECB 1
// enable CBC mode, disable ECB mode
// value is optional, may pass uint8_t iv[OAES_BLOCK_SIZE] to specify
// the value of the initialization vector, iv
#define OAES_OPTION_CBC 2
#ifdef OAES_DEBUG
typedef int ( * oaes_step_cb ) (
const uint8_t state[OAES_BLOCK_SIZE],
const char * step_name,
int step_count,
void * user_data );
// enable state stepping mode
// value is required, must pass oaes_step_cb to receive the state at each step
#define OAES_OPTION_STEP_ON 4
// disable state stepping mode
#define OAES_OPTION_STEP_OFF 8
#endif // OAES_DEBUG
typedef uint16_t OAES_OPTION;
typedef struct _oaes_key
{
size_t data_len;
uint8_t *data;
size_t exp_data_len;
uint8_t *exp_data;
size_t num_keys;
size_t key_base;
} oaes_key;
typedef struct _oaes_ctx
{
#ifdef OAES_HAVE_ISAAC
randctx * rctx;
#endif // OAES_HAVE_ISAAC
#ifdef OAES_DEBUG
oaes_step_cb step_cb;
#endif // OAES_DEBUG
oaes_key * key;
OAES_OPTION options;
uint8_t iv[OAES_BLOCK_SIZE];
} oaes_ctx;
/*
* // usage:
*
* OAES_CTX * ctx = oaes_alloc();
* .
* .
* .
* {
* oaes_gen_key_xxx( ctx );
* {
* oaes_key_export( ctx, _buf, &_buf_len );
* // or
* oaes_key_export_data( ctx, _buf, &_buf_len );\
* }
* }
* // or
* {
* oaes_key_import( ctx, _buf, _buf_len );
* // or
* oaes_key_import_data( ctx, _buf, _buf_len );
* }
* .
* .
* .
* oaes_encrypt( ctx, m, m_len, c, &c_len );
* .
* .
* .
* oaes_decrypt( ctx, c, c_len, m, &m_len );
* .
* .
* .
* oaes_free( &ctx );
*/
OAES_API OAES_CTX * oaes_alloc(void);
OAES_API OAES_RET oaes_free( OAES_CTX ** ctx );
OAES_API OAES_RET oaes_set_option( OAES_CTX * ctx,
OAES_OPTION option, const void * value );
OAES_API OAES_RET oaes_key_gen_128( OAES_CTX * ctx );
OAES_API OAES_RET oaes_key_gen_192( OAES_CTX * ctx );
OAES_API OAES_RET oaes_key_gen_256( OAES_CTX * ctx );
// export key with header information
// set data == NULL to get the required data_len
OAES_API OAES_RET oaes_key_export( OAES_CTX * ctx,
uint8_t * data, size_t * data_len );
// directly export the data from key
// set data == NULL to get the required data_len
OAES_API OAES_RET oaes_key_export_data( OAES_CTX * ctx,
uint8_t * data, size_t * data_len );
// import key with header information
OAES_API OAES_RET oaes_key_import( OAES_CTX * ctx,
const uint8_t * data, size_t data_len );
// directly import data into key
OAES_API OAES_RET oaes_key_import_data( OAES_CTX * ctx,
const uint8_t * data, size_t data_len );
// set c == NULL to get the required c_len
OAES_API OAES_RET oaes_encrypt( OAES_CTX * ctx,
const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len );
// set m == NULL to get the required m_len
OAES_API OAES_RET oaes_decrypt( OAES_CTX * ctx,
const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len );
// set buf == NULL to get the required buf_len
OAES_API OAES_RET oaes_sprintf(
char * buf, size_t * buf_len, const uint8_t * data, size_t data_len );
OAES_API OAES_RET oaes_encryption_round( const uint8_t * key, uint8_t * c );
OAES_API OAES_RET oaes_pseudo_encrypt_ecb( OAES_CTX * ctx, uint8_t * c );
#ifdef __cplusplus
}
#endif
#endif // _OAES_LIB_H

View File

@@ -1,190 +0,0 @@
#ifndef _SKEIN_PORT_H_
#define _SKEIN_PORT_H_
#include <limits.h>
#include <stdint.h>
#ifndef RETURN_VALUES
# define RETURN_VALUES
# if defined( DLL_EXPORT )
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
# define VOID_RETURN __declspec( dllexport ) void __stdcall
# define INT_RETURN __declspec( dllexport ) int __stdcall
# elif defined( __GNUC__ )
# define VOID_RETURN __declspec( __dllexport__ ) void
# define INT_RETURN __declspec( __dllexport__ ) int
# else
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
# endif
# elif defined( DLL_IMPORT )
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
# define VOID_RETURN __declspec( dllimport ) void __stdcall
# define INT_RETURN __declspec( dllimport ) int __stdcall
# elif defined( __GNUC__ )
# define VOID_RETURN __declspec( __dllimport__ ) void
# define INT_RETURN __declspec( __dllimport__ ) int
# else
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
# endif
# elif defined( __WATCOMC__ )
# define VOID_RETURN void __cdecl
# define INT_RETURN int __cdecl
# else
# define VOID_RETURN void
# define INT_RETURN int
# endif
#endif
/* These defines are used to declare buffers in a way that allows
faster operations on longer variables to be used. In all these
defines 'size' must be a power of 2 and >= 8
dec_unit_type(size,x) declares a variable 'x' of length
'size' bits
dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize'
bytes defined as an array of variables
each of 'size' bits (bsize must be a
multiple of size / 8)
ptr_cast(x,size) casts a pointer to a pointer to a
varaiable of length 'size' bits
*/
#define ui_type(size) uint##size##_t
#define dec_unit_type(size,x) typedef ui_type(size) x
#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)]
#define ptr_cast(x,size) ((ui_type(size)*)(x))
typedef unsigned int uint_t; /* native unsigned integer */
typedef uint8_t u08b_t; /* 8-bit unsigned integer */
typedef uint64_t u64b_t; /* 64-bit unsigned integer */
#ifndef RotL_64
#define RotL_64(x,N) (((x) << (N)) | ((x) >> (64-(N))))
#endif
/*
* Skein is "natively" little-endian (unlike SHA-xxx), for optimal
* performance on x86 CPUs. The Skein code requires the following
* definitions for dealing with endianness:
*
* SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian
* Skein_Put64_LSB_First
* Skein_Get64_LSB_First
* Skein_Swap64
*
* If SKEIN_NEED_SWAP is defined at compile time, it is used here
* along with the portable versions of Put64/Get64/Swap64, which
* are slow in general.
*
* Otherwise, an "auto-detect" of endianness is attempted below.
* If the default handling doesn't work well, the user may insert
* platform-specific code instead (e.g., for big-endian CPUs).
*
*/
#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */
#include "int-util.h"
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if BYTE_ORDER == LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if BYTE_ORDER == BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#endif
/* special handler for IA64, which may be either endianness (?) */
/* here we assume little-endian, but this may need to be changed */
#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
# define PLATFORM_MUST_ALIGN (1)
#ifndef PLATFORM_BYTE_ORDER
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#endif
#ifndef PLATFORM_MUST_ALIGN
# define PLATFORM_MUST_ALIGN (0)
#endif
#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
/* here for big-endian CPUs */
#define SKEIN_NEED_SWAP (1)
#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
/* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
#define SKEIN_NEED_SWAP (0)
#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */
#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt)
#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt))
#endif
#else
#error "Skein needs endianness setting!"
#endif
#endif /* ifndef SKEIN_NEED_SWAP */
/*
******************************************************************
* Provide any definitions still needed.
******************************************************************
*/
#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */
#if SKEIN_NEED_SWAP
#define Skein_Swap64(w64) \
( (( ((u64b_t)(w64)) & 0xFF) << 56) | \
(((((u64b_t)(w64)) >> 8) & 0xFF) << 48) | \
(((((u64b_t)(w64)) >>16) & 0xFF) << 40) | \
(((((u64b_t)(w64)) >>24) & 0xFF) << 32) | \
(((((u64b_t)(w64)) >>32) & 0xFF) << 24) | \
(((((u64b_t)(w64)) >>40) & 0xFF) << 16) | \
(((((u64b_t)(w64)) >>48) & 0xFF) << 8) | \
(((((u64b_t)(w64)) >>56) & 0xFF) ) )
#else
#define Skein_Swap64(w64) (w64)
#endif
#endif /* ifndef Skein_Swap64 */
#ifndef Skein_Put64_LSB_First
void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt)
#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
{ /* this version is fully portable (big-endian or little-endian), but slow */
size_t n;
for (n=0;n<bCnt;n++)
dst[n] = (u08b_t) (src[n>>3] >> (8*(n&7)));
}
#else
; /* output only the function prototype */
#endif
#endif /* ifndef Skein_Put64_LSB_First */
#ifndef Skein_Get64_LSB_First
void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt)
#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
{ /* this version is fully portable (big-endian or little-endian), but slow */
size_t n;
for (n=0;n<8*wCnt;n+=8)
dst[n/8] = (((u64b_t) src[n ]) ) +
(((u64b_t) src[n+1]) << 8) +
(((u64b_t) src[n+2]) << 16) +
(((u64b_t) src[n+3]) << 24) +
(((u64b_t) src[n+4]) << 32) +
(((u64b_t) src[n+5]) << 40) +
(((u64b_t) src[n+6]) << 48) +
(((u64b_t) src[n+7]) << 56) ;
}
#else
; /* output only the function prototype */
#endif
#endif /* ifndef Skein_Get64_LSB_First */
#endif /* ifndef _SKEIN_PORT_H_ */

18
miner.h
View File

@@ -447,26 +447,14 @@ bool stratum_subscribe(struct stratum_ctx *sctx);
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
/* rpc 2.0 (xmr) */
extern bool lowdiff_debug;
extern bool jsonrpc_2;
extern bool aes_ni_supported;
extern char rpc2_id[64];
extern char *rpc2_blob;
extern size_t rpc2_bloblen;
extern uint32_t rpc2_target;
extern char *rpc2_job_id;
extern char *rpc_user;
extern char *short_url;
json_t *json_rpc2_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, int *curl_err, int flags);
bool rpc2_login(CURL *curl);
bool rpc2_login_decode(const json_t *val);
bool rpc2_workio_login(CURL *curl);
bool rpc2_stratum_job(struct stratum_ctx *sctx, json_t *params);
bool rpc2_job_decode(const json_t *job, struct work *work);
struct thread_q;
struct thread_q *tq_new(void);
@@ -763,8 +751,6 @@ extern bool opt_hash_meter;
extern uint32_t accepted_share_count;
extern uint32_t rejected_share_count;
extern uint32_t solved_block_count;
extern pthread_mutex_t rpc2_job_lock;
extern pthread_mutex_t rpc2_login_lock;
extern pthread_mutex_t applog_lock;
extern pthread_mutex_t stats_lock;
extern bool opt_sapling;

View File

@@ -121,12 +121,9 @@ do { \
// Horizontal vector testing
#define mm256_allbits0( a ) _mm256_testz_si256( a, a )
#define mm256_allbits1( a ) _mm256_testc_si256( a, m256_neg1 )
//broken
//#define mm256_allbitsne( a ) _mm256_testnzc_si256( a, m256_neg1 )
#define mm256_anybits0( a ) !mm256_allbits1( a )
#define mm256_allbits0( a ) _mm256_testc_si256( a, m256_neg1 )
#define mm256_allbits1( a ) _mm256_testz_si256( a, a )
#define mm256_anybits0( a ) !mm256_allbits1( a )
#define mm256_anybits1( a ) !mm256_allbits0( a )

230
util.c
View File

@@ -1423,9 +1423,6 @@ bool stratum_subscribe(struct stratum_ctx *sctx)
json_error_t err;
bool ret = false, retry = false;
if (jsonrpc_2)
return true;
start:
s = (char*) malloc(128 + (sctx->session_id ? strlen(sctx->session_id) : 0));
if (retry)
@@ -1514,16 +1511,9 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
json_error_t err;
bool ret = false;
if (jsonrpc_2) {
s = (char*) malloc(300 + strlen(user) + strlen(pass));
sprintf(s, "{\"method\": \"login\", \"params\": {"
"\"login\": \"%s\", \"pass\": \"%s\", \"agent\": \"%s\"}, \"id\": 1}",
user, pass, USER_AGENT);
} else {
s = (char*) malloc(80 + strlen(user) + strlen(pass));
sprintf(s, "{\"id\": 2, \"method\": \"mining.authorize\", \"params\": [\"%s\", \"%s\"]}",
s = (char*) malloc(80 + strlen(user) + strlen(pass));
sprintf(s, "{\"id\": 2, \"method\": \"mining.authorize\", \"params\": [\"%s\", \"%s\"]}",
user, pass);
}
if (!stratum_send_line(sctx, s))
goto out;
@@ -1553,15 +1543,6 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
goto out;
}
if (jsonrpc_2) {
rpc2_login_decode(val);
json_t *job_val = json_object_get(res_val, "job");
pthread_mutex_lock(&sctx->work_lock);
if(job_val) rpc2_job_decode(job_val, &sctx->work);
sctx->job.job_id = strdup(sctx->work.job_id);
pthread_mutex_unlock(&sctx->work_lock);
}
ret = true;
if (!opt_extranonce)
@@ -1575,8 +1556,6 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
if (!socket_full(sctx->sock, 3)) {
applog(LOG_WARNING, "stratum extranonce subscribe timed out");
// if (opt_debug)
// applog(LOG_DEBUG, "stratum extranonce subscribe timed out");
goto out;
}
@@ -1607,204 +1586,6 @@ out:
return ret;
}
// -------------------- RPC 2.0 (XMR/AEON) -------------------------
//extern pthread_mutex_t rpc2_login_lock;
//extern pthread_mutex_t rpc2_job_lock;
bool rpc2_login_decode(const json_t *val)
{
const char *id;
const char *s;
json_t *res = json_object_get(val, "result");
if(!res) {
applog(LOG_ERR, "JSON invalid result");
goto err_out;
}
json_t *tmp;
tmp = json_object_get(res, "id");
if(!tmp) {
applog(LOG_ERR, "JSON inval id");
goto err_out;
}
id = json_string_value(tmp);
if(!id) {
applog(LOG_ERR, "JSON id is not a string");
goto err_out;
}
memcpy(&rpc2_id, id, 64);
if(opt_debug)
applog(LOG_DEBUG, "Auth id: %s", id);
tmp = json_object_get(res, "status");
if(!tmp) {
applog(LOG_ERR, "JSON inval status");
goto err_out;
}
s = json_string_value(tmp);
if(!s) {
applog(LOG_ERR, "JSON status is not a string");
goto err_out;
}
if(strcmp(s, "OK")) {
applog(LOG_ERR, "JSON returned status \"%s\"", s);
return false;
}
return true;
err_out:
applog(LOG_WARNING,"%s: fail", __func__);
return false;
}
json_t* json_rpc2_call_recur(CURL *curl, const char *url, const char *userpass,
json_t *rpc_req, int *curl_err, int flags, int recur)
{
if(recur >= 5) {
if(opt_debug)
applog(LOG_DEBUG, "Failed to call rpc command after %i tries", recur);
return NULL;
}
if(!strcmp(rpc2_id, "")) {
if(opt_debug)
applog(LOG_DEBUG, "Tried to call rpc2 command before authentication");
return NULL;
}
json_t *params = json_object_get(rpc_req, "params");
if (params) {
json_t *auth_id = json_object_get(params, "id");
if (auth_id) {
json_string_set(auth_id, rpc2_id);
}
}
json_t *res = json_rpc_call(curl, url, userpass, json_dumps(rpc_req, 0),
curl_err, flags | JSON_RPC_IGNOREERR);
if(!res) goto end;
json_t *error = json_object_get(res, "error");
if(!error) goto end;
json_t *message;
if(json_is_string(error))
message = error;
else
message = json_object_get(error, "message");
if(!message || !json_is_string(message)) goto end;
const char *mes = json_string_value(message);
if(!strcmp(mes, "Unauthenticated")) {
pthread_mutex_lock(&rpc2_login_lock);
rpc2_login(curl);
sleep(1);
pthread_mutex_unlock(&rpc2_login_lock);
return json_rpc2_call_recur(curl, url, userpass, rpc_req,
curl_err, flags, recur + 1);
} else if(!strcmp(mes, "Low difficulty share") || !strcmp(mes, "Block expired") || !strcmp(mes, "Invalid job id") || !strcmp(mes, "Duplicate share")) {
json_t *result = json_object_get(res, "result");
if(!result) {
goto end;
}
json_object_set(result, "reject-reason", json_string(mes));
} else {
applog(LOG_ERR, "json_rpc2.0 error: %s", mes);
return NULL;
}
end:
return res;
}
json_t *json_rpc2_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, int *curl_err, int flags)
{
json_t* req_json = JSON_LOADS(rpc_req, NULL);
json_t* res = json_rpc2_call_recur(curl, url, userpass, req_json, curl_err, flags, 0);
json_decref(req_json);
return res;
}
bool rpc2_job_decode(const json_t *job, struct work *work)
{
if (!jsonrpc_2) {
applog(LOG_ERR, "Tried to decode job without JSON-RPC 2.0");
return false;
}
json_t *tmp;
tmp = json_object_get(job, "job_id");
if (!tmp) {
applog(LOG_ERR, "JSON invalid job id");
goto err_out;
}
const char *job_id = json_string_value(tmp);
tmp = json_object_get(job, "blob");
if (!tmp) {
applog(LOG_ERR, "JSON invalid blob");
goto err_out;
}
const char *hexblob = json_string_value(tmp);
size_t blobLen = strlen(hexblob);
if (blobLen % 2 != 0 || ((blobLen / 2) < 40 && blobLen != 0) || (blobLen / 2) > 128) {
applog(LOG_ERR, "JSON invalid blob length");
goto err_out;
}
if (blobLen != 0) {
uint32_t target = 0;
pthread_mutex_lock(&rpc2_job_lock);
uchar *blob = (uchar*) malloc(blobLen / 2);
if (!hex2bin(blob, hexblob, blobLen / 2)) {
applog(LOG_ERR, "JSON invalid blob");
pthread_mutex_unlock(&rpc2_job_lock);
goto err_out;
}
rpc2_bloblen = blobLen / 2;
if (rpc2_blob) free(rpc2_blob);
rpc2_blob = (char*) malloc(rpc2_bloblen);
if (!rpc2_blob) {
applog(LOG_ERR, "RPC2 OOM!");
goto err_out;
}
memcpy(rpc2_blob, blob, blobLen / 2);
free(blob);
jobj_binary(job, "target", &target, 4);
if(rpc2_target != target)
{
double hashrate = 0.0;
pthread_mutex_lock(&stats_lock);
for (int i = 0; i < opt_n_threads; i++)
hashrate += thr_hashrates[i];
pthread_mutex_unlock(&stats_lock);
double diff = trunc( ( ((double)0xffffffff) / target ) );
if ( !opt_quiet )
// xmr pool diff can change a lot...
applog(LOG_BLUE, "Stratum difficulty set to %g", diff);
work->stratum_diff = diff;
stratum_diff = diff;
rpc2_target = target;
}
if (rpc2_job_id) free(rpc2_job_id);
rpc2_job_id = strdup(job_id);
pthread_mutex_unlock(&rpc2_job_lock);
}
if(work) {
if (!rpc2_blob) {
applog(LOG_WARNING, "Work requested before it was received");
goto err_out;
}
memcpy(work->data, rpc2_blob, rpc2_bloblen);
memset(work->target, 0xff, sizeof(work->target));
work->target[7] = rpc2_target;
if (work->job_id) free(work->job_id);
work->job_id = strdup(rpc2_job_id);
}
return true;
err_out:
applog(LOG_WARNING, "%s", __func__);
return false;
}
/**
* Extract bloc height L H... here len=3, height=0x1333e8
* "...0000000000ffffffff2703e83313062f503253482f043d61105408"
@@ -2298,13 +2079,6 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
params = json_object_get(val, "params");
if (jsonrpc_2) {
if (!strcasecmp(method, "job")) {
ret = rpc2_stratum_job(sctx, params);
}
goto out;
}
id = json_object_get(val, "id");
if (!strcasecmp(method, "mining.notify")) {