mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.12.0
This commit is contained in:
@@ -153,7 +153,7 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&decred_hash;
|
||||
#endif
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
// gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->decode_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
|
||||
@@ -1,371 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__arm__) || defined(_MSC_VER)
|
||||
#ifndef NOASM
|
||||
#define NOASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "crypto/int-util.h"
|
||||
#include "crypto/hash-ops.h"
|
||||
|
||||
#if USE_INT128
|
||||
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
|
||||
typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
|
||||
#elif defined (_MSC_VER)
|
||||
/* only for mingw64 on windows */
|
||||
#undef USE_INT128
|
||||
#define USE_INT128 (0)
|
||||
#else
|
||||
typedef __uint128_t uint128_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define LITE 1
|
||||
#if LITE /* cryptonight-light */
|
||||
#define MEMORY (1 << 20)
|
||||
#define ITER (1 << 19)
|
||||
#else
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define ITER (1 << 20)
|
||||
#endif
|
||||
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
static void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
static void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
}
|
||||
|
||||
static void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SUCCESS == r));
|
||||
}
|
||||
|
||||
static void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SKEIN_SUCCESS == r));
|
||||
}
|
||||
|
||||
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#if !defined(_MSC_VER) && !defined(NOASM)
|
||||
extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#else
|
||||
#define fast_aesb_single_round aesb_single_round
|
||||
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
|
||||
#endif
|
||||
|
||||
#if defined(NOASM) || !defined(__x86_64__)
|
||||
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = hi_dword(multiplier);
|
||||
uint64_t b = lo_dword(multiplier);
|
||||
uint64_t c = hi_dword(multiplicand);
|
||||
uint64_t d = lo_dword(multiplicand);
|
||||
|
||||
uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + bc;
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
assert(ac <= *product_hi);
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#else
|
||||
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
|
||||
#endif
|
||||
|
||||
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
|
||||
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
|
||||
};
|
||||
|
||||
|
||||
static inline size_t e2i(const uint8_t* a) {
|
||||
#if !LITE
|
||||
return ((uint32_t *)a)[0] & 0x1FFFF0;
|
||||
#else
|
||||
return ((uint32_t *)a)[0] & 0xFFFF0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
|
||||
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
|
||||
hi += ((uint64_t*) c)[0];
|
||||
|
||||
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
|
||||
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
|
||||
((uint64_t*) dst)[0] = hi;
|
||||
((uint64_t*) dst)[1] = lo;
|
||||
}
|
||||
|
||||
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) a) ^= *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
|
||||
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
|
||||
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
struct cryptonight_ctx {
|
||||
uint8_t _ALIGN(16) long_state[MEMORY];
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
|
||||
uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
|
||||
oaes_ctx* aes_ctx;
|
||||
};
|
||||
|
||||
static void cryptolight_hash_ctx(void* output, const void* input, int len, struct cryptonight_ctx* ctx)
|
||||
{
|
||||
len = 76;
|
||||
hash_process(&ctx->state.hs, (const uint8_t*) input, len);
|
||||
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
size_t i, j;
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
|
||||
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
|
||||
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx->a);
|
||||
aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
|
||||
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx->a);
|
||||
aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
|
||||
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
hash_permutation(&ctx->state.hs);
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx->aes_ctx);
|
||||
}
|
||||
|
||||
void cryptolight_hash(void* output, const void* input, int len) {
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
cryptolight_hash_ctx(output, input, len, ctx);
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
#if defined(__AES__)
|
||||
|
||||
static void cryptolight_hash_ctx_aes_ni(void* output, const void* input,
|
||||
int len, struct cryptonight_ctx* ctx)
|
||||
{
|
||||
hash_process(&ctx->state.hs, (const uint8_t*)input, len);
|
||||
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
size_t i, j;
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
|
||||
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
|
||||
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx->a);
|
||||
fast_aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
|
||||
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx->a);
|
||||
fast_aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
|
||||
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
hash_permutation(&ctx->state.hs);
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx->aes_ctx);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int scanhash_cryptolight( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
|
||||
uint32_t n = *nonceptr - 1;
|
||||
const uint32_t first_nonce = n + 1;
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
uint32_t _ALIGN(32) hash[HASH_SIZE / 4];
|
||||
int thr_id = mythr->id;
|
||||
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
#if defined(__AES__)
|
||||
do {
|
||||
*nonceptr = ++n;
|
||||
cryptolight_hash_ctx_aes_ni(hash, pdata, 76, ctx);
|
||||
if (unlikely(hash[7] < ptarget[7])) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
free(ctx);
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
#else
|
||||
do {
|
||||
*nonceptr = ++n;
|
||||
cryptolight_hash_ctx(hash, pdata, 76, ctx);
|
||||
if (unlikely(hash[7] < ptarget[7])) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
free(ctx);
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
#endif
|
||||
free(ctx);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_cryptolight_algo( algo_gate_t* gate )
|
||||
{
|
||||
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
|
||||
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
|
||||
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptolight;
|
||||
gate->hash = (void*)&cryptolight_hash;
|
||||
gate->hash_suw = (void*)&cryptolight_hash;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -1,357 +0,0 @@
|
||||
#if defined(__AES__)
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <memory.h>
|
||||
#include "cryptonight.h"
|
||||
#include "miner.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include <immintrin.h>
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
{
|
||||
__m128i tmp4;
|
||||
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
|
||||
tmp4 = _mm_slli_si128(*tmp1, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
|
||||
}
|
||||
|
||||
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
{
|
||||
__m128i tmp2, tmp4;
|
||||
|
||||
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
|
||||
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
|
||||
tmp4 = _mm_slli_si128(*tmp3, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
|
||||
}
|
||||
|
||||
// Special thanks to Intel for helping me
|
||||
// with ExpandAESKey256() and its subroutines
|
||||
static inline void ExpandAESKey256(char *keybuf)
|
||||
{
|
||||
__m128i tmp1, tmp2, tmp3, *keys;
|
||||
|
||||
keys = (__m128i *)keybuf;
|
||||
|
||||
tmp1 = _mm_load_si128((__m128i *)keybuf);
|
||||
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[2] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[3] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[4] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[5] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[6] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[7] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[8] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[9] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[10] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[11] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[12] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[13] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[14] = tmp1;
|
||||
}
|
||||
|
||||
// align to 64 byte cache line
|
||||
typedef struct
|
||||
{
|
||||
uint8_t long_state[MEMORY] __attribute((aligned(64)));
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(64)));
|
||||
uint64_t a[AES_BLOCK_SIZE >> 3] __attribute__((aligned(64)));
|
||||
uint64_t b[AES_BLOCK_SIZE >> 3] __attribute__((aligned(64)));
|
||||
uint8_t c[AES_BLOCK_SIZE] __attribute__((aligned(64)));
|
||||
} cryptonight_ctx;
|
||||
|
||||
static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
{
|
||||
uint8_t ExpandedKey[256] __attribute__((aligned(64)));
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
size_t i, j;
|
||||
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
|
||||
if ( cryptonightV7 && len < 43 )
|
||||
return;
|
||||
|
||||
const uint64_t tweak = cryptonightV7
|
||||
? *((const uint64_t*) (((const uint8_t*)input) + 35))
|
||||
^ ctx.state.hs.w[24] : 0;
|
||||
|
||||
memcpy( ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||
|
||||
longoutput = (__m128i*)ctx.long_state;
|
||||
xmminput = (__m128i*)ctx.text;
|
||||
expkey = (__m128i*)ExpandedKey;
|
||||
|
||||
// prefetch expkey, xmminput and enough longoutput for 4 iterations
|
||||
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
||||
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
||||
for ( i = 0; i < 64; i += 16 )
|
||||
{
|
||||
__builtin_prefetch( longoutput + i, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 4, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 8, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 12, 1, 0 );
|
||||
}
|
||||
|
||||
// n-4 iterations
|
||||
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
|
||||
i += INIT_SIZE_M128I )
|
||||
{
|
||||
// prefetch 4 iterations ahead.
|
||||
__builtin_prefetch( longoutput + i + 64, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 68, 1, 0 );
|
||||
|
||||
for ( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
|
||||
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
|
||||
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
|
||||
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
|
||||
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
|
||||
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
|
||||
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
|
||||
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
|
||||
}
|
||||
// last 4 iterations
|
||||
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||
{
|
||||
for ( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
|
||||
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
|
||||
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
|
||||
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
|
||||
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
|
||||
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
|
||||
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
|
||||
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
|
||||
}
|
||||
|
||||
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
|
||||
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
|
||||
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
|
||||
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
|
||||
|
||||
uint64_t a[2] __attribute((aligned(16))),
|
||||
b[2] __attribute((aligned(16))),
|
||||
c[2] __attribute((aligned(16)));
|
||||
a[0] = ctx.a[0];
|
||||
a[1] = ctx.a[1];
|
||||
__m128i b_x = _mm_load_si128( (__m128i*)ctx.b );
|
||||
__m128i a_x = _mm_load_si128( (__m128i*)a );
|
||||
__m128i* lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||
__m128i c_x = _mm_load_si128( lsa );
|
||||
uint64_t *nextblock;
|
||||
uint64_t hi, lo;
|
||||
|
||||
// n-1 iterations
|
||||
for( i = 0; __builtin_expect( i < 0x7ffff, 1 ); i++ )
|
||||
{
|
||||
c_x = _mm_aesenc_si128( c_x, a_x );
|
||||
_mm_store_si128( (__m128i*)c, c_x );
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
|
||||
__asm__( "mulq %3\n\t"
|
||||
: "=d" ( hi ),
|
||||
"=a" ( lo )
|
||||
: "%a" ( c[0] ),
|
||||
"rm" ( b[0] )
|
||||
: "cc" );
|
||||
|
||||
b_x = c_x;
|
||||
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
nextblock[0] = a[0];
|
||||
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
|
||||
lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||
a_x = _mm_load_si128( (__m128i*)a );
|
||||
c_x = _mm_load_si128( lsa );
|
||||
}
|
||||
// abreviated nth iteration
|
||||
c_x = _mm_aesenc_si128( c_x, a_x );
|
||||
_mm_store_si128( (__m128i*)c, c_x );
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
__asm__( "mulq %3\n\t"
|
||||
: "=d" ( hi ),
|
||||
"=a" ( lo )
|
||||
: "%a" ( c[0] ),
|
||||
"rm" ( b[0] )
|
||||
: "cc" );
|
||||
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
nextblock[0] = a[0];
|
||||
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
|
||||
memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||
|
||||
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
|
||||
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
||||
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
||||
for ( i = 0; i < 64; i += 16 )
|
||||
{
|
||||
_mm_prefetch( longoutput + i, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 8, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 12, _MM_HINT_T0 );
|
||||
}
|
||||
_mm_prefetch( expkey, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
||||
|
||||
// n-4 iterations
|
||||
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
|
||||
i += INIT_SIZE_M128I )
|
||||
{
|
||||
// stay 4 iterations ahead.
|
||||
_mm_prefetch( longoutput + i + 64, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 68, _MM_HINT_T0 );
|
||||
|
||||
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
|
||||
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
|
||||
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
|
||||
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
|
||||
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
|
||||
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
|
||||
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
|
||||
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
|
||||
|
||||
for( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
}
|
||||
// last 4 iterations
|
||||
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||
{
|
||||
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
|
||||
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
|
||||
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
|
||||
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
|
||||
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
|
||||
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
|
||||
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
|
||||
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
|
||||
|
||||
for( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
}
|
||||
|
||||
memcpy( ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
|
||||
}
|
||||
#endif
|
||||
@@ -1,133 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
// Modified for CPUminer by Lucas Jones
|
||||
|
||||
#include "cpuminer-config.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
#else
|
||||
#include "crypto/c_groestl.h"
|
||||
#endif
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "cryptonight.h"
|
||||
|
||||
/*
|
||||
#if defined __unix__ && (!defined __APPLE__)
|
||||
#include <sys/mman.h>
|
||||
#elif defined _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
*/
|
||||
|
||||
void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
#if defined(__AES__)
|
||||
hashState_groestl256 ctx;
|
||||
init_groestl256( &ctx, 32 );
|
||||
update_and_final_groestl256( &ctx, output, input, len * 8 );
|
||||
#else
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
#endif
|
||||
}
|
||||
|
||||
void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
void (* const extra_hashes[4])( const void *, size_t, char *) =
|
||||
{ do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash };
|
||||
|
||||
void cryptonight_hash( void *restrict output, const void *input, int len )
|
||||
{
|
||||
#if defined(__AES__)
|
||||
cryptonight_hash_aes( output, input, len );
|
||||
#else
|
||||
cryptonight_hash_ctx ( output, input, len );
|
||||
#endif
|
||||
}
|
||||
|
||||
void cryptonight_hash_suw( void *restrict output, const void *input )
|
||||
{
|
||||
#if defined(__AES__)
|
||||
cryptonight_hash_aes( output, input, 76 );
|
||||
#else
|
||||
cryptonight_hash_ctx ( output, input, 76 );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cryptonightV7 = false;
|
||||
|
||||
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
|
||||
uint32_t n = *nonceptr - 1;
|
||||
const uint32_t first_nonce = n + 1;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t hash[32 / 4] __attribute__((aligned(32)));
|
||||
|
||||
// if ( ( cryptonightV7 && ( *(uint8_t*)pdata < 7 ) )
|
||||
// || ( !cryptonightV7 && ( *(uint8_t*)pdata == 7 ) ) )
|
||||
// applog(LOG_WARNING,"Cryptonight variant mismatch, shares may be rejected.");
|
||||
|
||||
do
|
||||
{
|
||||
*nonceptr = ++n;
|
||||
cryptonight_hash( hash, pdata, 76 );
|
||||
if (unlikely( hash[7] < Htarg ))
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
// work_set_target_ratio( work, hash );
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_cryptonight_algo( algo_gate_t* gate )
|
||||
{
|
||||
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
|
||||
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
|
||||
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
|
||||
cryptonightV7 = false;
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
gate->hash = (void*)&cryptonight_hash;
|
||||
gate->hash_suw = (void*)&cryptonight_hash_suw;
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_cryptonightv7_algo( algo_gate_t* gate )
|
||||
{
|
||||
applog(LOG_WARNING,"Cryptonight algorithm and variants are no longer");
|
||||
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
|
||||
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
|
||||
cryptonightV7 = true;
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
gate->hash = (void*)&cryptonight_hash;
|
||||
gate->hash_suw = (void*)&cryptonight_hash_suw;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -1,310 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
// Modified for CPUminer by Lucas Jones
|
||||
|
||||
#include "miner.h"
|
||||
#include <memory.h>
|
||||
|
||||
#if defined(__arm__) || defined(_MSC_VER)
|
||||
#ifndef NOASM
|
||||
#define NOASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "crypto/int-util.h"
|
||||
//#include "crypto/hash-ops.h"
|
||||
#include "cryptonight.h"
|
||||
|
||||
#if USE_INT128
|
||||
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
|
||||
typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
|
||||
#elif defined (_MSC_VER)
|
||||
/* only for mingw64 on windows */
|
||||
#undef USE_INT128
|
||||
#define USE_INT128 (0)
|
||||
#else
|
||||
typedef __uint128_t uint128_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define LITE 0
|
||||
#if LITE /* cryptonight-light */
|
||||
#define MEMORY (1 << 20)
|
||||
#define ITER (1 << 19)
|
||||
#else
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define ITER (1 << 20)
|
||||
#endif
|
||||
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
|
||||
|
||||
/*
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
static void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
static void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
}
|
||||
|
||||
static void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SUCCESS == r));
|
||||
}
|
||||
|
||||
static void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SKEIN_SUCCESS == r));
|
||||
}
|
||||
*/
|
||||
|
||||
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#if !defined(_MSC_VER) && !defined(NOASM)
|
||||
extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#else
|
||||
#define fast_aesb_single_round aesb_single_round
|
||||
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(NOASM) || !defined(__x86_64__)
|
||||
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = hi_dword(multiplier);
|
||||
uint64_t b = lo_dword(multiplier);
|
||||
uint64_t c = hi_dword(multiplicand);
|
||||
uint64_t d = lo_dword(multiplicand);
|
||||
|
||||
uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + bc;
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
assert(ac <= *product_hi);
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#else
|
||||
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
|
||||
#endif
|
||||
|
||||
/*
|
||||
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
|
||||
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
|
||||
};
|
||||
*/
|
||||
|
||||
static inline size_t e2i(const uint8_t* a) {
|
||||
#if !LITE
|
||||
return ((uint32_t *)a)[0] & 0x1FFFF0;
|
||||
#else
|
||||
return ((uint32_t *)a)[0] & 0xFFFF0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mul_sum_xor_dst( const uint8_t* a, uint8_t* c, uint8_t* dst,
|
||||
const uint64_t tweak )
|
||||
{
|
||||
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
|
||||
hi += ((uint64_t*) c)[0];
|
||||
|
||||
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
|
||||
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
|
||||
((uint64_t*) dst)[0] = hi;
|
||||
((uint64_t*) dst)[1] = cryptonightV7 ? lo ^ tweak : lo;
|
||||
}
|
||||
|
||||
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) a) ^= *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
|
||||
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
|
||||
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint8_t _ALIGN(16) long_state[MEMORY];
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
|
||||
uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
|
||||
oaes_ctx* aes_ctx;
|
||||
} cryptonight_ctx;
|
||||
|
||||
static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_ctx(void* output, const void* input, int len)
|
||||
{
|
||||
// hash_process(&ctx.state.hs, (const uint8_t*) input, len);
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
|
||||
if ( cryptonightV7 && len < 43 )
|
||||
return;
|
||||
const uint64_t tweak = cryptonightV7
|
||||
? *((const uint64_t*) (((const uint8_t*)input) + 35))
|
||||
^ ctx.state.hs.w[24] : 0;
|
||||
|
||||
ctx.aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
|
||||
__builtin_prefetch( ctx.text, 0, 3 );
|
||||
__builtin_prefetch( ctx.text + 64, 0, 3 );
|
||||
__builtin_prefetch( ctx.long_state, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 64, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 128, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 192, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 256, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 320, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 384, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 448, 1, 0 );
|
||||
|
||||
size_t i, j;
|
||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx.aes_ctx, ctx.state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
|
||||
__builtin_prefetch( ctx.long_state + i + 512, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + i + 576, 1, 0 );
|
||||
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 0], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 1], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 2], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 3], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 4], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 5], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 6], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 7], ctx.aes_ctx->key->exp_data);
|
||||
memcpy(&ctx.long_state[i], ctx.text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx.state.k[0], &ctx.state.k[32], ctx.a);
|
||||
xor_blocks_dst(&ctx.state.k[16], &ctx.state.k[48], ctx.b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i)
|
||||
{
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.c, ctx.a);
|
||||
xor_blocks_dst(ctx.c, ctx.b, &ctx.long_state[j]);
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
|
||||
const uint8_t tmp = lsa[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx.c, ctx.a, &ctx.long_state[e2i(ctx.c)], tweak );
|
||||
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.b, ctx.a);
|
||||
xor_blocks_dst(ctx.b, ctx.c, &ctx.long_state[j]);
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
|
||||
const uint8_t tmp = lsa[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx.b, ctx.a, &ctx.long_state[e2i(ctx.b)], tweak );
|
||||
|
||||
}
|
||||
|
||||
__builtin_prefetch( ctx.text, 0, 3 );
|
||||
__builtin_prefetch( ctx.text + 64, 0, 3 );
|
||||
__builtin_prefetch( ctx.long_state, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 64, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 128, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 192, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 256, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 320, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 384, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 448, 1, 0 );
|
||||
|
||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx.aes_ctx, &ctx.state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
|
||||
__builtin_prefetch( ctx.long_state + i + 512, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + i + 576, 1, 0 );
|
||||
|
||||
xor_blocks(&ctx.text[0 * AES_BLOCK_SIZE], &ctx.long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[0 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[1 * AES_BLOCK_SIZE], &ctx.long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[1 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[2 * AES_BLOCK_SIZE], &ctx.long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[2 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[3 * AES_BLOCK_SIZE], &ctx.long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[3 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[4 * AES_BLOCK_SIZE], &ctx.long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[4 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[5 * AES_BLOCK_SIZE], &ctx.long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[5 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[6 * AES_BLOCK_SIZE], &ctx.long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[6 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[7 * AES_BLOCK_SIZE], &ctx.long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[7 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||
// hash_permutation(&ctx.state.hs);
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx.aes_ctx);
|
||||
}
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
#ifndef __CRYPTONIGHT_H_INCLUDED
|
||||
#define __CRYPTONIGHT_H_INCLUDED
|
||||
|
||||
#include <stddef.h>
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "miner.h"
|
||||
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define MEMORY_M128I (MEMORY >> 4) // 2 MiB / 16 = 128 ki * __m128i
|
||||
#define ITER (1 << 20)
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128
|
||||
#define INIT_SIZE_M128I (INIT_SIZE_BYTE >> 4) // 8
|
||||
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union hash_state {
|
||||
uint8_t b[200];
|
||||
uint64_t w[25];
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
void do_blake_hash(const void* input, size_t len, char* output);
|
||||
void do_groestl_hash(const void* input, size_t len, char* output);
|
||||
void do_jh_hash(const void* input, size_t len, char* output);
|
||||
void do_skein_hash(const void* input, size_t len, char* output);
|
||||
void cryptonight_hash_ctx(void* output, const void* input, int len);
|
||||
void keccakf(uint64_t st[25], int rounds);
|
||||
extern void (* const extra_hashes[4])(const void *, size_t, char *);
|
||||
|
||||
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len );
|
||||
|
||||
extern bool cryptonightV7;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -129,9 +129,10 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
|
||||
if( FinalPoW[7] <= ptarget[7] )
|
||||
{
|
||||
pdata[20] = swab32( BlockHdr[20] );
|
||||
pdata[21] = swab32( BlockHdr[21] );
|
||||
*hashes_done = CollisionCount;
|
||||
return(1);
|
||||
pdata[21] = swab32( BlockHdr[21] );
|
||||
*hashes_done = CollisionCount;
|
||||
submit_solution( work, FinalPoW, mythr );
|
||||
return(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -198,7 +199,8 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
|
||||
pdata[20] = swab32( BlockHdr[20] );
|
||||
pdata[21] = swab32( BlockHdr[21] );
|
||||
*hashes_done = CollisionCount;
|
||||
return(1);
|
||||
submit_solution( work, FinalPoW, mythr );
|
||||
return(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -228,13 +228,14 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
|
||||
bool register_phi2_algo( algo_gate_t* gate )
|
||||
{
|
||||
// init_phi2_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
|
||||
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
|
||||
gate->build_extraheader = (void*)&phi2_build_extraheader;
|
||||
opt_target_factor = 256.0;
|
||||
#if defined(PHI2_4WAY)
|
||||
#if defined(PHI2_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_phi2_8way;
|
||||
#elif defined(PHI2_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_phi2_4way;
|
||||
#else
|
||||
init_phi2_ctx();
|
||||
|
||||
@@ -184,19 +184,26 @@ bool init_allium_ctx();
|
||||
|
||||
/////////////////////////////////////////
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
// #define PHI2_4WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define PHI2_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define PHI2_4WAY 1
|
||||
#endif
|
||||
|
||||
extern bool phi2_has_roots;
|
||||
|
||||
bool register_phi2_algo( algo_gate_t* gate );
|
||||
#if defined(PHI2_4WAY)
|
||||
#if defined(PHI2_8WAY)
|
||||
|
||||
void phi2_8way_hash( void *state, const void *input );
|
||||
int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(PHI2_4WAY)
|
||||
|
||||
void phi2_hash_4way( void *state, const void *input );
|
||||
int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
//void init_phi2_ctx();
|
||||
|
||||
#else
|
||||
|
||||
|
||||
@@ -1,233 +1,501 @@
|
||||
/**
|
||||
* Phi-2 algo Implementation
|
||||
*/
|
||||
|
||||
#include "lyra2-gate.h"
|
||||
|
||||
#if defined(PHI2_4WAY)
|
||||
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "lyra2.h"
|
||||
#if defined(__VAES__)
|
||||
#include "algo/echo/echo-hash-4way.h"
|
||||
#elif defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
|
||||
#if defined(PHI2_8WAY)
|
||||
|
||||
typedef struct {
|
||||
cubehashParam cube;
|
||||
jh512_8way_context jh;
|
||||
#if defined(__VAES__)
|
||||
echo_4way_context echo;
|
||||
#else
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
sph_gost512_context gost;
|
||||
skein512_8way_context skein;
|
||||
} phi2_8way_ctx_holder;
|
||||
|
||||
void phi2_8way_hash( void *state, const void *input )
|
||||
{
|
||||
unsigned char _ALIGN(128) hash[64*8];
|
||||
unsigned char _ALIGN(128) hashA[64*2];
|
||||
unsigned char _ALIGN(64) hash0[64];
|
||||
unsigned char _ALIGN(64) hash1[64];
|
||||
unsigned char _ALIGN(64) hash2[64];
|
||||
unsigned char _ALIGN(64) hash3[64];
|
||||
unsigned char _ALIGN(64) hash4[64];
|
||||
unsigned char _ALIGN(64) hash5[64];
|
||||
unsigned char _ALIGN(64) hash6[64];
|
||||
unsigned char _ALIGN(64) hash7[64];
|
||||
const int size = phi2_has_roots ? 144 : 80 ;
|
||||
phi2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*)hash0, 512,
|
||||
(const byte*)input, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash1, 512,
|
||||
(const byte*)input + 144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash2, 512,
|
||||
(const byte*)input + 2*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash3, 512,
|
||||
(const byte*)input + 3*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash4, 512,
|
||||
(const byte*)input + 4*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash5, 512,
|
||||
(const byte*)input + 5*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash6, 512,
|
||||
(const byte*)input + 6*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash7, 512,
|
||||
(const byte*)input + 7*144, size );
|
||||
|
||||
intrlv_2x256( hashA, hash0, hash1, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, hash, 512 );
|
||||
intrlv_2x256( hashA, hash2, hash3, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, hash, 512 );
|
||||
intrlv_2x256( hashA, hash4, hash5, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, hash, 512 );
|
||||
intrlv_2x256( hashA, hash6, hash7, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, hash, 512 );
|
||||
|
||||
intrlv_8x64_512( hash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, (const void*)hash, 64 );
|
||||
jh512_8way_close( &ctx.jh, (void*)hash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, hash );
|
||||
|
||||
#if defined (__VAES__)
|
||||
|
||||
unsigned char _ALIGN(64) hashA0[64];
|
||||
unsigned char _ALIGN(64) hashA1[64];
|
||||
unsigned char _ALIGN(64) hashA2[64];
|
||||
unsigned char _ALIGN(64) hashA3[64];
|
||||
unsigned char _ALIGN(64) hashA4[64];
|
||||
unsigned char _ALIGN(64) hashA5[64];
|
||||
unsigned char _ALIGN(64) hashA6[64];
|
||||
unsigned char _ALIGN(64) hashA7[64];
|
||||
|
||||
intrlv_4x128_512( hash, hash0, hash1, hash2, hash3 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
dintrlv_4x128_512( hashA0, hashA1, hashA2, hashA3, hash );
|
||||
|
||||
intrlv_4x128_512( hash, hash4, hash5, hash6, hash7 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
dintrlv_4x128_512( hashA4, hashA5, hashA6, hashA7, hash );
|
||||
|
||||
#endif
|
||||
|
||||
if ( hash0[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash0 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash0, hashA0, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash1[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash1, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash1 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash1, hashA1, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash2[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash2, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash2 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash2, hashA2, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash3[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash3 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash3, hashA3, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash4[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash4, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash4 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash4, hashA4, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)hash4, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)hash4, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash5[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash5, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash5 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash5, hashA5, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)hash5, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)hash5, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash6[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash6, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash6 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash6, hashA6, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)hash6, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)hash6, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash7[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash7, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash7 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash7, hashA7, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)hash7, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)hash7, 64 );
|
||||
}
|
||||
#endif
|
||||
|
||||
intrlv_8x64_512( hash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, (const void*)hash, 64 );
|
||||
skein512_8way_close( &ctx.skein, (void*)hash );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
casti_m512i( state, i ) = _mm512_xor_si512( casti_m512i( hash, i ),
|
||||
casti_m512i( hash, i+4 ) );
|
||||
}
|
||||
|
||||
int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[16*8];
|
||||
uint32_t _ALIGN(128) edata[36*8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *hash7 = &(hash[49]);
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
if ( bench ) ptarget[7] = 0x00ff;
|
||||
|
||||
phi2_has_roots = false;
|
||||
|
||||
for ( int i = 0; i < 36; i++ )
|
||||
{
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
edata[ i + 36 ] = edata[ i + 2*36 ] = edata[ i + 3*36 ] =
|
||||
edata[ i + 4*36 ] = edata[ i + 5*36 ] = edata[ i + 6*36 ] =
|
||||
edata[ i + 7*36 ] = edata[ i ];
|
||||
if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
|
||||
}
|
||||
|
||||
edata[ 19 ] = n;
|
||||
edata[ 36 + 19 ] = n+1;
|
||||
edata[ 2*36 + 19 ] = n+2;
|
||||
edata[ 3*36 + 19 ] = n+3;
|
||||
edata[ 4*36 + 19 ] = n+4;
|
||||
edata[ 5*36 + 19 ] = n+5;
|
||||
edata[ 6*36 + 19 ] = n+6;
|
||||
edata[ 7*36 + 19 ] = n+7;
|
||||
|
||||
do {
|
||||
phi2_8way_hash( hash, edata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
|
||||
{
|
||||
uint64_t _ALIGN(64) lane_hash[8];
|
||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
be32enc( pdata + 19, n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
edata[ 19 ] += 8;
|
||||
edata[ 36 + 19 ] += 8;
|
||||
edata[ 2*36 + 19 ] += 8;
|
||||
edata[ 3*36 + 19 ] += 8;
|
||||
edata[ 4*36 + 19 ] += 8;
|
||||
edata[ 5*36 + 19 ] += 8;
|
||||
edata[ 6*36 + 19 ] += 8;
|
||||
edata[ 7*36 + 19 ] += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
#elif defined(PHI2_4WAY)
|
||||
|
||||
typedef struct {
|
||||
cubehashParam cube;
|
||||
jh512_4way_context jh;
|
||||
#if defined(__AES__)
|
||||
hashState_echo echo;
|
||||
// hashState_echo echo2;
|
||||
#else
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_gost512_context gost;
|
||||
skein512_4way_context skein;
|
||||
} phi2_ctx_holder;
|
||||
/*
|
||||
phi2_ctx_holder phi2_ctx;
|
||||
} phi2_4way_ctx_holder;
|
||||
|
||||
void init_phi2_ctx()
|
||||
phi2_4way_ctx_holder phi2_4way_ctx;
|
||||
|
||||
void phi2_4way_hash(void *state, const void *input)
|
||||
{
|
||||
cubehashInit( &phi2_ctx.cube, 512, 16, 32 );
|
||||
sph_jh512_init(&phi2_ctx.jh);
|
||||
init_echo( &phi2_ctx.echo1, 512 );
|
||||
init_echo( &phi2_ctx.echo2, 512 );
|
||||
sph_gost512_init(&phi2_ctx.gost);
|
||||
sph_skein512_init(&phi2_ctx.skein);
|
||||
};
|
||||
*/
|
||||
void phi2_hash_4way( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash[4][16] __attribute__ ((aligned (64)));
|
||||
uint32_t hashA[4][16] __attribute__ ((aligned (64)));
|
||||
uint32_t hashB[4][16] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[4*16] __attribute__ ((aligned (64)));
|
||||
unsigned char _ALIGN(128) hash[64*4];
|
||||
unsigned char _ALIGN(64) hash0[64];
|
||||
unsigned char _ALIGN(64) hash1[64];
|
||||
unsigned char _ALIGN(64) hash2[64];
|
||||
unsigned char _ALIGN(64) hash3[64];
|
||||
unsigned char _ALIGN(64) hash0A[64];
|
||||
unsigned char _ALIGN(64) hash1A[64];
|
||||
unsigned char _ALIGN(64) hash2A[64];
|
||||
unsigned char _ALIGN(64) hash3A[64];
|
||||
const int size = phi2_has_roots ? 144 : 80 ;
|
||||
phi2_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
// unsigned char _ALIGN(128) hash[64];
|
||||
// unsigned char _ALIGN(128) hashA[64];
|
||||
// unsigned char _ALIGN(128) hashB[64];
|
||||
cubehash_full( &ctx.cube, (byte*)hash0A, 512,
|
||||
(const byte*)input, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash1A, 512,
|
||||
(const byte*)input + 144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash2A, 512,
|
||||
(const byte*)input + 2*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash3A, 512,
|
||||
(const byte*)input + 3*144, size );
|
||||
|
||||
LYRA2RE( &hash0[ 0], 32, hash0A, 32, hash0A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash0[32], 32, hash0A+32, 32, hash0A+32, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash1[ 0], 32, hash1A, 32, hash1A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash1[32], 32, hash1A+32, 32, hash1A+32, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash2[ 0], 32, hash2A, 32, hash2A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash2[32], 32, hash2A+32, 32, hash2A+32, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash3[ 0], 32, hash3A, 32, hash3A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash3[32], 32, hash3A+32, 32, hash3A+32, 32, 1, 8, 8 );
|
||||
|
||||
phi2_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
// memcpy( &ctx, &phi2_ctx, sizeof(phi2_ctx) );
|
||||
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[0], (const byte*)input,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[1], (const byte*)input+144,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[2], (const byte*)input+288,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[3], (const byte*)input+432,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
|
||||
LYRA2RE( &hashA[0][0], 32, &hashB[0][0], 32, &hashB[0][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[0][8], 32, &hashB[0][8], 32, &hashB[0][8], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[1][0], 32, &hashB[1][0], 32, &hashB[1][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[1][8], 32, &hashB[1][8], 32, &hashB[1][8], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[2][0], 32, &hashB[2][0], 32, &hashB[2][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[2][8], 32, &hashB[2][8], 32, &hashB[2][8], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[3][0], 32, &hashB[3][0], 32, &hashB[3][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[3][8], 32, &hashB[3][8], 32, &hashB[3][8], 32, 1, 8, 8 );
|
||||
|
||||
intrlv_4x64( vhash, hashA[0], hashA[1], hashA[2], hashA[3], 512 );
|
||||
intrlv_4x64_512( hash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4way_update( &ctx.jh, (const void*)hash, 64 );
|
||||
jh512_4way_close( &ctx.jh, (void*)hash );
|
||||
|
||||
dintrlv_4x64( hash[0], hash[1], hash[2], hash[3], vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, hash );
|
||||
|
||||
if ( hash[0][0] & 1 )
|
||||
if ( hash0[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[0], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[0] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
|
||||
(const BitSequence *)hash[0], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
|
||||
(const BitSequence *)hash[0], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
}
|
||||
|
||||
if ( hash[1][0] & 1 )
|
||||
if ( hash1[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[1], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[1] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash1, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
|
||||
(const BitSequence *)hash[1], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
|
||||
(const BitSequence *)hash[1], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
}
|
||||
|
||||
if ( hash[2][0] & 1 )
|
||||
if ( hash2[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[2], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[2] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash2, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash2 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
|
||||
(const BitSequence *)hash[2], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
|
||||
(const BitSequence *)hash[2], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
}
|
||||
|
||||
if ( hash[3][0] & 1 )
|
||||
if ( hash3[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[3], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[3] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash3 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
|
||||
(const BitSequence *)hash[3], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
|
||||
(const BitSequence *)hash[3], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
}
|
||||
|
||||
intrlv_4x64( vhash, hash[0], hash[1], hash[2], hash[3], 512 );
|
||||
|
||||
intrlv_4x64_512( hash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_update( &ctx.skein, (const void*)hash, 64 );
|
||||
skein512_4way_close( &ctx.skein, (void*)hash );
|
||||
|
||||
for (int i=0; i<4; i++)
|
||||
{
|
||||
( (uint64_t*)vhash )[i] ^= ( (uint64_t*)vhash )[i+4];
|
||||
( (uint64_t*)vhash+ 8 )[i] ^= ( (uint64_t*)vhash+ 8 )[i+4];
|
||||
( (uint64_t*)vhash+16 )[i] ^= ( (uint64_t*)vhash+16 )[i+4];
|
||||
( (uint64_t*)vhash+24 )[i] ^= ( (uint64_t*)vhash+24 )[i+4];
|
||||
}
|
||||
// for ( int i = 0; i < 4; i++ )
|
||||
// casti_m256i( vhash, i ) = _mm256_xor_si256( casti_m256i( vhash, i ),
|
||||
// casti_m256i( vhash, i+4 ) );
|
||||
|
||||
memcpy( state, vhash, 128 );
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
casti_m256i( state, i ) = _mm256_xor_si256( casti_m256i( hash, i ),
|
||||
casti_m256i( hash, i+4 ) );
|
||||
}
|
||||
|
||||
int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) edata[36];
|
||||
uint32_t vdata[4][36] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[25]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t _ALIGN(128) hash[16*4];
|
||||
uint32_t _ALIGN(128) edata[36*4];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *hash7 = &(hash[25]); // 3*8+1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if(opt_benchmark){
|
||||
ptarget[7] = 0x00ff;
|
||||
}
|
||||
|
||||
// Data is not interleaved, but hash is.
|
||||
// any non-zero data at index 20 or above sets roots true.
|
||||
// Split up the operations, bswap first, then set roots.
|
||||
|
||||
phi2_has_roots = false;
|
||||
for ( int i=0; i < 36; i++ )
|
||||
{
|
||||
be32enc(&edata[i], pdata[i]);
|
||||
if (i >= 20 && pdata[i]) phi2_has_roots = true;
|
||||
}
|
||||
/*
|
||||
casti_m256i( vdata[0], 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( vdata[0], 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m256i( vdata[0], 2 ) = mm256_bswap_32( casti_m256i( pdata, 2 ) );
|
||||
casti_m256i( vdata[0], 3 ) = mm256_bswap_32( casti_m256i( pdata, 3 ) );
|
||||
casti_m128i( vdata[0], 8 ) = mm128_bswap_32( casti_m128i( pdata, 8 ) );
|
||||
phi2_has_roots = mm128_anybits1( casti_m128i( vdata[0], 5 ) ) ||
|
||||
mm128_anybits1( casti_m128i( vdata[0], 6 ) ) ||
|
||||
mm128_anybits1( casti_m128i( vdata[0], 7 ) ) ||
|
||||
mm128_anybits1( casti_m128i( vdata[0], 8 ) );
|
||||
*/
|
||||
|
||||
memcpy( vdata[0], edata, 144 );
|
||||
memcpy( vdata[1], edata, 144 );
|
||||
memcpy( vdata[2], edata, 144 );
|
||||
memcpy( vdata[3], edata, 144 );
|
||||
|
||||
do {
|
||||
be32enc( &vdata[0][19], n );
|
||||
be32enc( &vdata[1][19], n+1 );
|
||||
be32enc( &vdata[2][19], n+2 );
|
||||
be32enc( &vdata[3][19], n+3 );
|
||||
|
||||
phi2_hash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[ lane<<1 ] < Htarg )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
if ( bench ) ptarget[7] = 0x00ff;
|
||||
|
||||
#endif // PHI2_4WAY
|
||||
phi2_has_roots = false;
|
||||
|
||||
for ( int i = 0; i < 36; i++ )
|
||||
{
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
edata[ i+36 ] = edata[ i+72 ] = edata[ i+108 ] = edata[i];
|
||||
if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
|
||||
}
|
||||
|
||||
edata[ 19 ] = n;
|
||||
edata[ 36 + 19 ] = n+1;
|
||||
edata[ 2*36 + 19 ] = n+2;
|
||||
edata[ 3*36 + 19 ] = n+3;
|
||||
|
||||
do {
|
||||
phi2_4way_hash( hash, edata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
|
||||
{
|
||||
uint64_t _ALIGN(64) lane_hash[8];
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
be32enc( pdata + 19, n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
edata[ 19 ] += 4;
|
||||
edata[ 36 + 19 ] += 4;
|
||||
edata[ 2*36 + 19 ] += 4;
|
||||
edata[ 3*36 + 19 ] += 4;
|
||||
n +=4;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -99,7 +99,6 @@ int scanhash_phi2( struct work *work, uint32_t max_nonce,
|
||||
uint32_t _ALIGN(128) edata[36];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
75
algo/m7m/magimath.cpp
Normal file
75
algo/m7m/magimath.cpp
Normal file
@@ -0,0 +1,75 @@
|
||||
// Copyright (c) 2014 The Magi developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#include <iostream>
|
||||
#include <cfloat>
|
||||
#include <limits>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "magimath.h"
|
||||
|
||||
#define EPS1 (std::numeric_limits<double>::epsilon())
|
||||
#define EPS2 3.0e-11
|
||||
|
||||
static void gauleg(double x1, double x2, double x[], double w[], const int n)
|
||||
{
|
||||
int m,j,i;
|
||||
double z1, z, xm, xl, pp, p3, p2, p1;
|
||||
m=(n+1)/2;
|
||||
xm=0.5*(x2+x1);
|
||||
xl=0.5*(x2-x1);
|
||||
for (i=1;i<=m;i++) {
|
||||
z=cos(3.141592654*(i-0.25)/(n+0.5));
|
||||
do {
|
||||
p1=1.0;
|
||||
p2=0.0;
|
||||
for (j=1;j<=n;j++) {
|
||||
p3=p2;
|
||||
p2=p1;
|
||||
p1=((2.0*j-1.0)*z*p2-(j-1.0)*p3)/j;
|
||||
}
|
||||
pp=n*(z*p1-p2)/(z*z-1.0);
|
||||
z1=z;
|
||||
z=z1-p1/pp;
|
||||
} while (fabs(z-z1) > EPS2);
|
||||
x[i]=xm-xl*z;
|
||||
x[n+1-i]=xm+xl*z;
|
||||
w[i]=2.0*xl/((1.0-z*z)*pp*pp);
|
||||
w[n+1-i]=w[i];
|
||||
}
|
||||
}
|
||||
|
||||
static double GaussianQuad_N(double func(const double), const double a2, const double b2, const int NptGQ)
|
||||
{
|
||||
double s=0.0;
|
||||
#ifdef _MSC_VER
|
||||
#define SW_DIVS 23
|
||||
double x[SW_DIVS+1], w[SW_DIVS+1];
|
||||
#else
|
||||
double x[NptGQ+1], w[NptGQ+1];
|
||||
#endif
|
||||
|
||||
gauleg(a2, b2, x, w, NptGQ);
|
||||
|
||||
for (int j=1; j<=NptGQ; j++) {
|
||||
s += w[j]*func(x[j]);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static double swit_(double wvnmb)
|
||||
{
|
||||
return pow( (5.55243*(exp_n(-0.3*wvnmb/15.762) - exp_n(-0.6*wvnmb/15.762)))*wvnmb, 0.5)
|
||||
/ 1034.66 * pow(sin(wvnmb/65.), 2.);
|
||||
}
|
||||
|
||||
uint32_t sw_(int nnounce, int divs)
|
||||
{
|
||||
double wmax = ((sqrt((double)(nnounce))*(1.+EPS1))/450+100);
|
||||
return ((uint32_t)(GaussianQuad_N(swit_, 0., wmax, divs)*(1.+EPS1)*1.e6));
|
||||
}
|
||||
54
algo/m7m/magimath.h
Normal file
54
algo/m7m/magimath.h
Normal file
@@ -0,0 +1,54 @@
|
||||
// Copyright (c) 2014 The Magi developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
#ifndef MAGI_MATH_H
|
||||
#define MAGI_MATH_H
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint32_t sw_(int nnounce, int divs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
inline double exp_n(double xt)
|
||||
{
|
||||
double p1 = -700.0, p3 = -0.8e-8, p4 = 0.8e-8, p6 = 700.0;
|
||||
if(xt < p1)
|
||||
return 0;
|
||||
else if(xt > p6)
|
||||
return 1e200;
|
||||
else if(xt > p3 && xt < p4)
|
||||
return (1.0 + xt);
|
||||
else
|
||||
return exp(xt);
|
||||
}
|
||||
|
||||
// 1 / (1 + exp(x1-x2))
|
||||
inline double exp_n2(double x1, double x2)
|
||||
{
|
||||
double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8, p5 = 37., p6 = 700.;
|
||||
double xt = x1 - x2;
|
||||
if (xt < p1+1.e-200)
|
||||
return 1.;
|
||||
else if (xt > p1 && xt < p2 + 1.e-200)
|
||||
return ( 1. - exp(xt) );
|
||||
else if (xt > p2 && xt < p3 + 1.e-200)
|
||||
return ( 1. / (1. + exp(xt)) );
|
||||
else if (xt > p3 && xt < p4)
|
||||
return ( 1. / (2. + xt) );
|
||||
else if (xt > p4 - 1.e-200 && xt < p5)
|
||||
return ( exp(-xt) / (1. + exp(-xt)) );
|
||||
else if (xt > p5 - 1.e-200 && xt < p6)
|
||||
return ( exp(-xt) );
|
||||
else //if (xt > p6 - 1.e-200)
|
||||
return 0.;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -158,7 +158,7 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
{
|
||||
// ignore POK in first word
|
||||
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
uint32_t *nonceptr = work->data + algo_gate.nonce_index;
|
||||
if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
|
||||
|| ( *nonceptr >= *end_nonce_ptr ) )
|
||||
{
|
||||
|
||||
@@ -134,7 +134,8 @@ void x16r_hash_generic( void* output, const void* input )
|
||||
break;
|
||||
case ECHO:
|
||||
#if defined(__AES__)
|
||||
echo_full( &ctx.echo, hash, 512, in, size );
|
||||
echo_full( &ctx.echo, (BitSequence*)hash, 512,
|
||||
(const BitSequence*)in, size );
|
||||
#else
|
||||
sph_echo512_init( &ctx.echo );
|
||||
sph_echo512( &ctx.echo, in, size );
|
||||
|
||||
Reference in New Issue
Block a user