diff --git a/RELEASE_ANNOUNCEMENT b/RELEASE_ANNOUNCEMENT index 78c006a..2a7b722 100644 --- a/RELEASE_ANNOUNCEMENT +++ b/RELEASE_ANNOUNCEMENT @@ -1,4 +1,4 @@ -cpuminer-opt now supports 38 algorithms on CPUs with at least SSE2 +cpuminer-opt now supports over 40 algorithms on CPUs with at least SSE2 capabilities including Intel Core2, Nehalem and AMD equivalent. See the performance chart below for details. @@ -7,10 +7,10 @@ CPUs with AES_NI for even greater performance, including the Intel Westbridge and newer and AMD equivalent. See the performance comparison below. -New in 3.4.11 +New in 3.4.12 -- groestl algo AES optimized +200% -- myr-gr algo AES optimized +100% +- lyra2z (zcoin) modified for blocks after 8192 +- fixed scryptjane to support various N factors Users with non-SSE2 CPUs or who want to mine algos not supported by cpuminer-opt may find cpuminer-multi by TPruvot useful. diff --git a/algo-gate-api.c b/algo-gate-api.c index 7603f4e..4bf34a3 100644 --- a/algo-gate-api.c +++ b/algo-gate-api.c @@ -259,6 +259,7 @@ const char* const algo_alias_map[][2] = { "blake256r8", "blakecoin" }, { "blake256r8vnl", "vanilla" }, { "blake256r14", "blake" }, + { "cryptonote", "cryptonight" }, { "cryptonight-light", "cryptolight" }, { "dmd-gr", "groestl" }, { "droplp", "drop" }, diff --git a/algo/lyra2/zcoin.c b/algo/lyra2/zcoin.c index 62c8cb9..ef2b9a1 100644 --- a/algo/lyra2/zcoin.c +++ b/algo/lyra2/zcoin.c @@ -8,7 +8,8 @@ void zcoin_hash(void *state, const void *input, uint32_t height) uint32_t _ALIGN(256) hash[16]; - LYRA2Z(hash, 32, input, 80, input, 80, 2, height, 256); +// LYRA2Z(hash, 32, input, 80, input, 80, 2, height, 256); + LYRA2Z(hash, 32, input, 80, input, 80, 2, 8192, 256); memcpy(state, hash, 32); } diff --git a/algo/neoscrypt.c b/algo/neoscrypt.c index 63a26bd..228a202 100644 --- a/algo/neoscrypt.c +++ b/algo/neoscrypt.c @@ -1056,7 +1056,6 @@ int scanhash_neoscrypt( int thr_id, struct work *work, while (pdata[19] < max_nonce && !work_restart[thr_id].restart) { neoscrypt((uint8_t *) hash, (uint8_t *) pdata ); - // , 0x80000020 | (opt_nfactor << 8) ); /* Quick hash check */ if (hash[7] <= Htarg && fulltest_le(hash, ptarget)) { @@ -1071,15 +1070,7 @@ int scanhash_neoscrypt( int thr_id, struct work *work, return 0; } -int64_t get_neoscrypt_max64() -{ - int64_t max64 = opt_scrypt_n < 16 ? 0x3ffff : 0x3fffff / opt_scrypt_n; - if ( opt_nfactor > 3) - max64 >>= ( opt_nfactor - 3); - else if ( opt_nfactor > 16) - max64 = 0xF; - return max64; -} +int64_t get_neoscrypt_max64() { return 0x3ffff; } void neoscrypt_wait_for_diff( struct stratum_ctx *stratum ) { diff --git a/algo/neoscrypt.c.broke b/algo/neoscrypt.c.broke deleted file mode 100644 index 252be3a..0000000 --- a/algo/neoscrypt.c.broke +++ /dev/null @@ -1,1239 +0,0 @@ -/* - * Copyright (c) 2009 Colin Percival, 2011 ArtForz - * Copyright (c) 2012 Andrew Moon (floodyberry) - * Copyright (c) 2012 Samuel Neves - * Copyright (c) 2014 John Doering - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include -#include -#include -#include -#include - -#include "miner.h" -#include "algo-gate-api.h" - -#define USE_CUSTOM_BLAKE2S -// TODO: try blake2sp -//#include "crypto/blake2s.h" - -#define STACK_ALIGN 0x40 - -#ifdef _MSC_VER // todo: msvc -#define ASM 0 -#elif defined(__arm__) -#define ASM 0 -#endif - -#ifdef __GNUC__ -#if defined(NOASM) || defined(__arm__) -#define ASM 0 -#else -#define ASM 1 -#endif -#endif - -#if (WINDOWS) -/* sizeof(unsigned long) = 4 for MinGW64 */ -typedef unsigned long long ulong; -#else -typedef unsigned long ulong; -#endif -typedef unsigned int uint; - - -#define MIN(a, b) ((a) < (b) ? a : b) -#define MAX(a, b) ((a) > (b) ? a : b) - -#define SCRYPT_BLOCK_SIZE 64U -#define SCRYPT_HASH_BLOCK_SIZE 64U -#define SCRYPT_HASH_DIGEST_SIZE 32U - -#define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b))) -#define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b))) - -#define U8TO32_BE(p) \ - (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \ - ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]))) - -#define U32TO8_BE(p, v) \ - (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \ - (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) ); - -#define U64TO8_BE(p, v) \ - U32TO8_BE((p), (uint32_t)((v) >> 32)); \ - U32TO8_BE((p) + 4, (uint32_t)((v) )); - - -typedef uint8_t hash_digest[SCRYPT_HASH_DIGEST_SIZE]; - - -/* SHA-256 */ - -static const uint32_t sha256_constants[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - -#define Ch(x,y,z) (z ^ (x & (y ^ z))) -#define Maj(x,y,z) (((x | y) & z) | (x & y)) -#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22)) -#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25)) -#define G0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ (x >> 3)) -#define G1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10)) -#define W0(in,i) (U8TO32_BE(&in[i * 4])) -#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16]) -#define STEP(i) \ - t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \ - t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha256_constants[i] + w[i]; \ - r[7] = r[6]; \ - r[6] = r[5]; \ - r[5] = r[4]; \ - r[4] = r[3] + t0; \ - r[3] = r[2]; \ - r[2] = r[1]; \ - r[1] = r[0]; \ - r[0] = t0 + t1; - - -typedef struct sha256_hash_state_t { - uint32_t H[8]; - uint64_t T; - uint32_t leftover; - uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; -} sha256_hash_state; - - -static void sha256_blocks(sha256_hash_state *S, const uint8_t *in, size_t blocks) { - uint32_t r[8], w[64], t0, t1; - size_t i; - - for(i = 0; i < 8; i++) - r[i] = S->H[i]; - - while(blocks--) { - - for(i = 0; i < 16; i++) { - w[i] = W0(in, i); - } - for(i = 16; i < 64; i++) { - w[i] = W1(i); - } - - for(i = 0; i < 64; i++) { - STEP(i); - } - - for(i = 0; i < 8; i++) { - r[i] += S->H[i]; - S->H[i] = r[i]; - } - - S->T += SCRYPT_HASH_BLOCK_SIZE * 8; - in += SCRYPT_HASH_BLOCK_SIZE; - } -} - -static void neoscrypt_hash_init_sha256(sha256_hash_state *S) { - S->H[0] = 0x6a09e667; - S->H[1] = 0xbb67ae85; - S->H[2] = 0x3c6ef372; - S->H[3] = 0xa54ff53a; - S->H[4] = 0x510e527f; - S->H[5] = 0x9b05688c; - S->H[6] = 0x1f83d9ab; - S->H[7] = 0x5be0cd19; - S->T = 0; - S->leftover = 0; -} - -static void neoscrypt_hash_update_sha256(sha256_hash_state *S, const uint8_t *in, size_t inlen) { - size_t blocks, want; - - /* handle the previous data */ - if(S->leftover) { - want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); - want = (want < inlen) ? want : inlen; - memcpy(S->buffer + S->leftover, in, want); - S->leftover += (uint32_t)want; - if(S->leftover < SCRYPT_HASH_BLOCK_SIZE) - return; - in += want; - inlen -= want; - sha256_blocks(S, S->buffer, 1); - } - - /* handle the current data */ - blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1)); - S->leftover = (uint32_t)(inlen - blocks); - if(blocks) { - sha256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE); - in += blocks; - } - - /* handle leftover data */ - if(S->leftover) - memcpy(S->buffer, in, S->leftover); -} - -static void neoscrypt_hash_finish_sha256(sha256_hash_state *S, uint8_t *hash) { - uint64_t t = S->T + (S->leftover * 8); - - S->buffer[S->leftover] = 0x80; - if(S->leftover <= 55) { - memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover); - } else { - memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover); - sha256_blocks(S, S->buffer, 1); - memset(S->buffer, 0, 56); - } - - U64TO8_BE(S->buffer + 56, t); - sha256_blocks(S, S->buffer, 1); - - U32TO8_BE(&hash[ 0], S->H[0]); - U32TO8_BE(&hash[ 4], S->H[1]); - U32TO8_BE(&hash[ 8], S->H[2]); - U32TO8_BE(&hash[12], S->H[3]); - U32TO8_BE(&hash[16], S->H[4]); - U32TO8_BE(&hash[20], S->H[5]); - U32TO8_BE(&hash[24], S->H[6]); - U32TO8_BE(&hash[28], S->H[7]); -} - -static void neoscrypt_hash_sha256(hash_digest hash, const uint8_t *m, size_t mlen) { - sha256_hash_state st; - neoscrypt_hash_init_sha256(&st); - neoscrypt_hash_update_sha256(&st, m, mlen); - neoscrypt_hash_finish_sha256(&st, hash); -} - - -/* HMAC for SHA-256 */ - -typedef struct sha256_hmac_state_t { - sha256_hash_state inner, outer; -} sha256_hmac_state; - -static void neoscrypt_hmac_init_sha256(sha256_hmac_state *st, const uint8_t *key, size_t keylen) { - uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0}; - size_t i; - - neoscrypt_hash_init_sha256(&st->inner); - neoscrypt_hash_init_sha256(&st->outer); - - if(keylen <= SCRYPT_HASH_BLOCK_SIZE) { - /* use the key directly if it's <= blocksize bytes */ - memcpy(pad, key, keylen); - } else { - /* if it's > blocksize bytes, hash it */ - neoscrypt_hash_sha256(pad, key, keylen); - } - - /* inner = (key ^ 0x36) */ - /* h(inner || ...) */ - for(i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) - pad[i] ^= 0x36; - - neoscrypt_hash_update_sha256(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE); - - /* outer = (key ^ 0x5c) */ - /* h(outer || ...) */ - for(i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) - pad[i] ^= (0x5c ^ 0x36); - - neoscrypt_hash_update_sha256(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE); -} - -static void neoscrypt_hmac_update_sha256(sha256_hmac_state *st, const uint8_t *m, size_t mlen) { - /* h(inner || m...) */ - neoscrypt_hash_update_sha256(&st->inner, m, mlen); -} - -static void neoscrypt_hmac_finish_sha256(sha256_hmac_state *st, hash_digest mac) { - /* h(inner || m) */ - hash_digest innerhash; - neoscrypt_hash_finish_sha256(&st->inner, innerhash); - - /* h(outer || h(inner || m)) */ - neoscrypt_hash_update_sha256(&st->outer, innerhash, sizeof(innerhash)); - neoscrypt_hash_finish_sha256(&st->outer, mac); -} - - -/* PBKDF2 for SHA-256 */ - -static void neoscrypt_pbkdf2_sha256(const uint8_t *password, size_t password_len, - const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *output, size_t output_len) { - sha256_hmac_state hmac_pw, hmac_pw_salt, work; - hash_digest ti, u; - uint8_t be[4]; - uint32_t i, j, k, blocks; - - /* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */ - - /* hmac(password, ...) */ - neoscrypt_hmac_init_sha256(&hmac_pw, password, password_len); - - /* hmac(password, salt...) */ - hmac_pw_salt = hmac_pw; - neoscrypt_hmac_update_sha256(&hmac_pw_salt, salt, salt_len); - - blocks = ((uint32_t)output_len + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE; - for(i = 1; i <= blocks; i++) { - /* U1 = hmac(password, salt || be(i)) */ - U32TO8_BE(be, i); - work = hmac_pw_salt; - neoscrypt_hmac_update_sha256(&work, be, 4); - neoscrypt_hmac_finish_sha256(&work, ti); - memcpy(u, ti, sizeof(u)); - - /* T[i] = U1 ^ U2 ^ U3... */ - for(j = 0; j < N - 1; j++) { - /* UX = hmac(password, U{X-1}) */ - work = hmac_pw; - neoscrypt_hmac_update_sha256(&work, u, SCRYPT_HASH_DIGEST_SIZE); - neoscrypt_hmac_finish_sha256(&work, u); - - /* T[i] ^= UX */ - for(k = 0; k < sizeof(u); k++) - ti[k] ^= u[k]; - } - - memcpy(output, ti, (output_len > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : output_len); - output += SCRYPT_HASH_DIGEST_SIZE; - output_len -= SCRYPT_HASH_DIGEST_SIZE; - } -} - - -/* NeoScrypt */ - -//#if (ASM) - -extern void neoscrypt_salsa(uint *X, uint rounds); -extern void neoscrypt_salsa_tangle(uint *X, uint count); -extern void neoscrypt_chacha(uint *X, uint rounds); - -extern void neoscrypt_blkcpy_sse2(void *dstp, const void *srcp, uint len); -extern void neoscrypt_blkswp_sse2(void *blkAp, void *blkBp, uint len); -extern void neoscrypt_blkxor_sse2(void *dstp, const void *srcp, uint len); - -//#else - -/* Salsa20, rounds must be a multiple of 2 */ -/* -static void neoscrypt_salsa(uint *X, uint rounds) { - uint x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, t; - - x0 = X[0]; x1 = X[1]; x2 = X[2]; x3 = X[3]; - x4 = X[4]; x5 = X[5]; x6 = X[6]; x7 = X[7]; - x8 = X[8]; x9 = X[9]; x10 = X[10]; x11 = X[11]; - x12 = X[12]; x13 = X[13]; x14 = X[14]; x15 = X[15]; - -#define quarter(a, b, c, d) \ - t = a + d; t = ROTL32(t, 7); b ^= t; \ - t = b + a; t = ROTL32(t, 9); c ^= t; \ - t = c + b; t = ROTL32(t, 13); d ^= t; \ - t = d + c; t = ROTL32(t, 18); a ^= t; - - for(; rounds; rounds -= 2) { - quarter( x0, x4, x8, x12); - quarter( x5, x9, x13, x1); - quarter(x10, x14, x2, x6); - quarter(x15, x3, x7, x11); - quarter( x0, x1, x2, x3); - quarter( x5, x6, x7, x4); - quarter(x10, x11, x8, x9); - quarter(x15, x12, x13, x14); - } - - X[0] += x0; X[1] += x1; X[2] += x2; X[3] += x3; - X[4] += x4; X[5] += x5; X[6] += x6; X[7] += x7; - X[8] += x8; X[9] += x9; X[10] += x10; X[11] += x11; - X[12] += x12; X[13] += x13; X[14] += x14; X[15] += x15; - -#undef quarter -} -*/ -/* ChaCha20, rounds must be a multiple of 2 */ -/* -static void neoscrypt_chacha(uint *X, uint rounds) { - uint x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, t; - - x0 = X[0]; x1 = X[1]; x2 = X[2]; x3 = X[3]; - x4 = X[4]; x5 = X[5]; x6 = X[6]; x7 = X[7]; - x8 = X[8]; x9 = X[9]; x10 = X[10]; x11 = X[11]; - x12 = X[12]; x13 = X[13]; x14 = X[14]; x15 = X[15]; - -#define quarter(a,b,c,d) \ - a += b; t = d ^ a; d = ROTL32(t, 16); \ - c += d; t = b ^ c; b = ROTL32(t, 12); \ - a += b; t = d ^ a; d = ROTL32(t, 8); \ - c += d; t = b ^ c; b = ROTL32(t, 7); - - for(; rounds; rounds -= 2) { - quarter( x0, x4, x8, x12); - quarter( x1, x5, x9, x13); - quarter( x2, x6, x10, x14); - quarter( x3, x7, x11, x15); - quarter( x0, x5, x10, x15); - quarter( x1, x6, x11, x12); - quarter( x2, x7, x8, x13); - quarter( x3, x4, x9, x14); - } - - X[0] += x0; X[1] += x1; X[2] += x2; X[3] += x3; - X[4] += x4; X[5] += x5; X[6] += x6; X[7] += x7; - X[8] += x8; X[9] += x9; X[10] += x10; X[11] += x11; - X[12] += x12; X[13] += x13; X[14] += x14; X[15] += x15; - -#undef quarter -} -*/ - -/* Fast 32-bit / 64-bit memcpy(); - * len must be a multiple of 32 bytes */ -static void neoscrypt_blkcpy(uint64_t *dstp, const uint64_t *srcp, uint len) -{ - -// len is 64 uint32_t = -#ifdef __AVX2__ - - __m256i s0, s1, s2, s3, s4, s5, s6, s7; - - s0 = _mm256_loadu_si256( (__m256i*)(&srcp[ 0]) ); - s1 = _mm256_loadu_si256( (__m256i*)(&srcp[ 4]) ); - s2 = _mm256_loadu_si256( (__m256i*)(&srcp[ 8]) ); - s3 = _mm256_loadu_si256( (__m256i*)(&srcp[12]) ); - s4 = _mm256_loadu_si256( (__m256i*)(&srcp[16]) ); - s5 = _mm256_loadu_si256( (__m256i*)(&srcp[20]) ); - s6 = _mm256_loadu_si256( (__m256i*)(&srcp[24]) ); - s7 = _mm256_loadu_si256( (__m256i*)(&srcp[28]) ); - - _mm256_storeu_si256( (__m256i*)(&dstp[ 0]), s0 ); - _mm256_storeu_si256( (__m256i*)(&dstp[ 4]), s1 ); - _mm256_storeu_si256( (__m256i*)(&dstp[ 8]), s2 ); - _mm256_storeu_si256( (__m256i*)(&dstp[12]), s3 ); - _mm256_storeu_si256( (__m256i*)(&dstp[16]), s4 ); - _mm256_storeu_si256( (__m256i*)(&dstp[20]), s5 ); - _mm256_storeu_si256( (__m256i*)(&dstp[24]), s6 ); - _mm256_storeu_si256( (__m256i*)(&dstp[28]), s7 ); - - -#else - - neoscrypt_blkcpy_sse2(void *dstp, const void *srcp, uint len); - -#endif - -/* ulong *dst = (ulong *) dstp; - ulong *src = (ulong *) srcp; - uint i; - - for(i = 0; i < (len / sizeof(ulong)); i += 4) { - dst[i] = src[i]; - dst[i + 1] = src[i + 1]; - dst[i + 2] = src[i + 2]; - dst[i + 3] = src[i + 3]; - } -*/ -} - -/* Fast 32-bit / 64-bit block swapper; - * len must be a multiple of 32 bytes */ - -static void neoscrypt_blkswp( uint64_t *blkAp, uint64_t *blkBp, uint len ) -{ - -#ifdef __AVX2__ - - __m256i a0, a1, a2, a3, b0, b1, b2, b3; - - a0 = _mm256_loadu_si256( (__m256i*)(&blkAp[ 0]) ); - a1 = _mm256_loadu_si256( (__m256i*)(&blkAp[ 4]) ); - a2 = _mm256_loadu_si256( (__m256i*)(&blkAp[ 8]) ); - a3 = _mm256_loadu_si256( (__m256i*)(&blkAp[12]) ); - b0 = _mm256_loadu_si256( (__m256i*)(&blkBp[ 0]) ); - b1 = _mm256_loadu_si256( (__m256i*)(&blkBp[ 4]) ); - b2 = _mm256_loadu_si256( (__m256i*)(&blkBp[ 8]) ); - b3 = _mm256_loadu_si256( (__m256i*)(&blkBp[12]) ); - - _mm256_storeu_si256( (__m256i*)(&blkBp[ 0]), a0 ); - _mm256_storeu_si256( (__m256i*)(&blkBp[ 4]), a1 ); - _mm256_storeu_si256( (__m256i*)(&blkBp[ 8]), a2 ); - _mm256_storeu_si256( (__m256i*)(&blkBp[12]), a3 ); - _mm256_storeu_si256( (__m256i*)(&blkAp[ 0]), b0 ); - _mm256_storeu_si256( (__m256i*)(&blkAp[ 4]), b1 ); - _mm256_storeu_si256( (__m256i*)(&blkAp[ 8]), b2 ); - _mm256_storeu_si256( (__m256i*)(&blkAp[12]), b3 ); - - a0 = _mm256_loadu_si256( (__m256i*)(&blkAp[16]) ); - a1 = _mm256_loadu_si256( (__m256i*)(&blkAp[20]) ); - a2 = _mm256_loadu_si256( (__m256i*)(&blkAp[24]) ); - a3 = _mm256_loadu_si256( (__m256i*)(&blkAp[28]) ); - b0 = _mm256_loadu_si256( (__m256i*)(&blkBp[16]) ); - b1 = _mm256_loadu_si256( (__m256i*)(&blkBp[20]) ); - b2 = _mm256_loadu_si256( (__m256i*)(&blkBp[24]) ); - b3 = _mm256_loadu_si256( (__m256i*)(&blkBp[28]) ); - - _mm256_storeu_si256( (__m256i*)(&blkBp[16]), a0 ); - _mm256_storeu_si256( (__m256i*)(&blkBp[20]), a1 ); - _mm256_storeu_si256( (__m256i*)(&blkBp[24]), a2 ); - _mm256_storeu_si256( (__m256i*)(&blkBp[28]), a3 ); - _mm256_storeu_si256( (__m256i*)(&blkAp[16]), b0 ); - _mm256_storeu_si256( (__m256i*)(&blkAp[20]), b1 ); - _mm256_storeu_si256( (__m256i*)(&blkAp[24]), b2 ); - _mm256_storeu_si256( (__m256i*)(&blkAp[28]), b3 ); - - -#else - - neoscrypt_blkswp_sse2(void *blkAp, void *blkBp, uint len); - -#endif - -/* ulong *blkA = (ulong *) blkAp; - ulong *blkB = (ulong *) blkBp; - register ulong t0, t1, t2, t3; - uint i; - - for(i = 0; i < (len / sizeof(ulong)); i += 4) { - t0 = blkA[i]; - t1 = blkA[i + 1]; - t2 = blkA[i + 2]; - t3 = blkA[i + 3]; - blkA[i] = blkB[i]; - blkA[i + 1] = blkB[i + 1]; - blkA[i + 2] = blkB[i + 2]; - blkA[i + 3] = blkB[i + 3]; - blkB[i] = t0; - blkB[i + 1] = t1; - blkB[i + 2] = t2; - blkB[i + 3] = t3; - } -*/ -} - -/* Fast 32-bit / 64-bit block XOR engine; - * len must be a multiple of 32 bytes */ - -static void neoscrypt_blkxor( uint64_t *dstp, const uint64_t *srcp, uint len) -{ - -#ifdef __AVX2__ - - __m256i s0, s1, s2, s3, d0, d1, d2, d3; - - s0 = _mm256_loadu_si256( (__m256i*)(&srcp[ 0]) ); - s1 = _mm256_loadu_si256( (__m256i*)(&srcp[ 4]) ); - s2 = _mm256_loadu_si256( (__m256i*)(&srcp[ 8]) ); - s3 = _mm256_loadu_si256( (__m256i*)(&srcp[12]) ); - d0 = _mm256_loadu_si256( (__m256i*)(&dstp[ 0]) ); - d1 = _mm256_loadu_si256( (__m256i*)(&dstp[ 4]) ); - d2 = _mm256_loadu_si256( (__m256i*)(&dstp[ 8]) ); - d3 = _mm256_loadu_si256( (__m256i*)(&dstp[12]) ); - - _mm256_storeu_si256( (__m256i*)(&dstp[ 0]), _mm256_xor_si256( d0, s0 ) ); - _mm256_storeu_si256( (__m256i*)(&dstp[ 4]), _mm256_xor_si256( d1, s1 ) ); - _mm256_storeu_si256( (__m256i*)(&dstp[ 8]), _mm256_xor_si256( d2, s2 ) ); - _mm256_storeu_si256( (__m256i*)(&dstp[12]), _mm256_xor_si256( d3, s3 ) ); - - s0 = _mm256_loadu_si256( (__m256i*)(&srcp[16]) ); - s1 = _mm256_loadu_si256( (__m256i*)(&srcp[20]) ); - s2 = _mm256_loadu_si256( (__m256i*)(&srcp[24]) ); - s3 = _mm256_loadu_si256( (__m256i*)(&srcp[28]) ); - d0 = _mm256_loadu_si256( (__m256i*)(&dstp[16]) ); - d1 = _mm256_loadu_si256( (__m256i*)(&dstp[20]) ); - d2 = _mm256_loadu_si256( (__m256i*)(&dstp[24]) ); - d3 = _mm256_loadu_si256( (__m256i*)(&dstp[28]) ); - - _mm256_storeu_si256( (__m256i*)(&dstp[16]), _mm256_xor_si256( d0, s0 ) ); - _mm256_storeu_si256( (__m256i*)(&dstp[20]), _mm256_xor_si256( d1, s1 ) ); - _mm256_storeu_si256( (__m256i*)(&dstp[24]), _mm256_xor_si256( d2, s2 ) ); - _mm256_storeu_si256( (__m256i*)(&dstp[28]), _mm256_xor_si256( d3, s3 ) ); - - - -#else - -neoscrypt_blkxor_sse2(void *dstp, const void *srcp, uint len); - -#endif - -/* ulong *dst = (ulong *) dstp; - ulong *src = (ulong *) srcp; - uint i; - - for(i = 0; i < (len / sizeof(ulong)); i += 4) { - dst[i] ^= src[i]; - dst[i + 1] ^= src[i + 1]; - dst[i + 2] ^= src[i + 2]; - dst[i + 3] ^= src[i + 3]; - } -*/ -} - -//#endif - - -/* 32-bit / 64-bit optimised memcpy() */ -static void neoscrypt_copy(void *dstp, const void *srcp, uint len) { - ulong *dst = (ulong *) dstp; - ulong *src = (ulong *) srcp; - uint i, tail; - - for(i = 0; i < (len / sizeof(ulong)); i++) - dst[i] = src[i]; - - tail = len & (sizeof(ulong) - 1); - if(tail) { - uchar *dstb = (uchar *) dstp; - uchar *srcb = (uchar *) srcp; - - for(i = len - tail; i < len; i++) - dstb[i] = srcb[i]; - } -} - -/* 32-bit / 64-bit optimised memory erase aka memset() to zero */ -static void neoscrypt_erase(void *dstp, uint len) { - const ulong null = 0; - ulong *dst = (ulong *) dstp; - uint i, tail; - - for(i = 0; i < (len / sizeof(ulong)); i++) - dst[i] = null; - - tail = len & (sizeof(ulong) - 1); - if(tail) { - uchar *dstb = (uchar *) dstp; - - for(i = len - tail; i < len; i++) - dstb[i] = (uchar)null; - } -} - -/* 32-bit / 64-bit optimised XOR engine */ -static void neoscrypt_xor(void *dstp, const void *srcp, uint len) { - ulong *dst = (ulong *) dstp; - ulong *src = (ulong *) srcp; - uint i, tail; - - for(i = 0; i < (len / sizeof(ulong)); i++) - dst[i] ^= src[i]; - - tail = len & (sizeof(ulong) - 1); - if(tail) { - uchar *dstb = (uchar *) dstp; - uchar *srcb = (uchar *) srcp; - - for(i = len - tail; i < len; i++) - dstb[i] ^= srcb[i]; - } -} - -/* BLAKE2s */ - -#define BLAKE2S_BLOCK_SIZE 64U -#define BLAKE2S_OUT_SIZE 32U -#define BLAKE2S_KEY_SIZE 32U - -static const uint blake2s_IV[8] = { - 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, - 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 -}; - -#ifdef USE_CUSTOM_BLAKE2S - -static const uint8_t blake2s_sigma[10][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , -}; - -/* Parameter block of 32 bytes */ -typedef struct blake2s_param_t { - uchar digest_length; - uchar key_length; - uchar fanout; - uchar depth; - uint leaf_length; - uchar node_offset[6]; - uchar node_depth; - uchar inner_length; - uchar salt[8]; - uchar personal[8]; -} blake2s_param; - -/* State block of 180 bytes */ -typedef struct blake2s_state_t { - uint h[8]; - uint t[2]; - uint f[2]; - uchar buf[2 * BLAKE2S_BLOCK_SIZE]; - uint buflen; -} blake2s_state; - -static void blake2s_compress(blake2s_state *S, const void *buf) { - uint i; - uint m[16]; - uint v[16]; - - neoscrypt_copy(m, buf, 64); - neoscrypt_copy(v, S, 32); - - v[ 8] = blake2s_IV[0]; - v[ 9] = blake2s_IV[1]; - v[10] = blake2s_IV[2]; - v[11] = blake2s_IV[3]; - v[12] = S->t[0] ^ blake2s_IV[4]; - v[13] = S->t[1] ^ blake2s_IV[5]; - v[14] = S->f[0] ^ blake2s_IV[6]; - v[15] = S->f[1] ^ blake2s_IV[7]; -#define G(r,i,a,b,c,d) \ - do { \ - a = a + b + m[blake2s_sigma[r][2*i+0]]; \ - d = ROTR32(d ^ a, 16); \ - c = c + d; \ - b = ROTR32(b ^ c, 12); \ - a = a + b + m[blake2s_sigma[r][2*i+1]]; \ - d = ROTR32(d ^ a, 8); \ - c = c + d; \ - b = ROTR32(b ^ c, 7); \ - } while(0) -#define ROUND(r) \ - do { \ - G(r, 0, v[ 0], v[ 4], v[ 8], v[12]); \ - G(r, 1, v[ 1], v[ 5], v[ 9], v[13]); \ - G(r, 2, v[ 2], v[ 6], v[10], v[14]); \ - G(r, 3, v[ 3], v[ 7], v[11], v[15]); \ - G(r, 4, v[ 0], v[ 5], v[10], v[15]); \ - G(r, 5, v[ 1], v[ 6], v[11], v[12]); \ - G(r, 6, v[ 2], v[ 7], v[ 8], v[13]); \ - G(r, 7, v[ 3], v[ 4], v[ 9], v[14]); \ - } while(0) - ROUND(0); - ROUND(1); - ROUND(2); - ROUND(3); - ROUND(4); - ROUND(5); - ROUND(6); - ROUND(7); - ROUND(8); - ROUND(9); - - for(i = 0; i < 8; i++) - S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; - -#undef G -#undef ROUND -} - -static void blake2s_update(blake2s_state *S, const uchar *input, uint input_size) { - uint left, fill; - - while(input_size > 0) { - left = S->buflen; - fill = 2 * BLAKE2S_BLOCK_SIZE - left; - if(input_size > fill) { - /* Buffer fill */ - neoscrypt_copy(S->buf + left, input, fill); - S->buflen += fill; - /* Counter increment */ - S->t[0] += BLAKE2S_BLOCK_SIZE; - /* Compress */ - blake2s_compress(S, (void *) S->buf); - /* Shift buffer left */ - neoscrypt_copy(S->buf, S->buf + BLAKE2S_BLOCK_SIZE, BLAKE2S_BLOCK_SIZE); - S->buflen -= BLAKE2S_BLOCK_SIZE; - input += fill; - input_size -= fill; - } else { - neoscrypt_copy(S->buf + left, input, input_size); - S->buflen += input_size; - /* Do not compress */ - input += input_size; - input_size = 0; - } - } -} -#endif - -static void neoscrypt_blake2s(const void *input, const uint input_size, const void *key, const uchar key_size, - void *output, const uchar output_size) { - uchar block[BLAKE2S_BLOCK_SIZE]; - blake2s_param P[1]; - blake2s_state S[1]; - - /* Initialise */ - neoscrypt_erase(P, 32); - P->digest_length = output_size; - P->key_length = key_size; - P->fanout = 1; - P->depth = 1; - - neoscrypt_erase(S, 180); - neoscrypt_copy(S, blake2s_IV, 32); - neoscrypt_xor(S, P, 32); - - neoscrypt_erase(block, BLAKE2S_BLOCK_SIZE); - neoscrypt_copy(block, key, key_size); - blake2s_update(S, (uchar *) block, BLAKE2S_BLOCK_SIZE); - - /* Update */ - blake2s_update(S, (uchar *) input, input_size); - - /* Finish */ - if(S->buflen > BLAKE2S_BLOCK_SIZE) { - S->t[0] += BLAKE2S_BLOCK_SIZE; - blake2s_compress(S, (void *) S->buf); - S->buflen -= BLAKE2S_BLOCK_SIZE; - neoscrypt_copy(S->buf, S->buf + BLAKE2S_BLOCK_SIZE, S->buflen); - } - S->t[0] += S->buflen; - S->f[0] = ~0U; - neoscrypt_erase(S->buf + S->buflen, 2 * BLAKE2S_BLOCK_SIZE - S->buflen); - blake2s_compress(S, (void *) S->buf); - - /* Write back */ - neoscrypt_copy(output, S, output_size); -} - - -#define FASTKDF_BUFFER_SIZE 256U - -/* FastKDF, a fast buffered key derivation function: - * FASTKDF_BUFFER_SIZE must be a power of 2; - * password_len, salt_len and output_len should not exceed FASTKDF_BUFFER_SIZE; - * prf_output_size must be <= prf_key_size; */ -static void neoscrypt_fastkdf(const uchar *password, uint password_len, const uchar *salt, uint salt_len, - uint N, uchar *output, uint output_len) { - -#define kdf_buf_size FASTKDF_BUFFER_SIZE -#define prf_input_size BLAKE2S_BLOCK_SIZE -#define prf_key_size BLAKE2S_KEY_SIZE -#define prf_output_size BLAKE2S_OUT_SIZE - - uint bufptr, a, b, i, j; - uchar *A, *B, *prf_input, *prf_key, *prf_output; - - /* Align and set up the buffers in stack */ - uchar stack[2 * kdf_buf_size + prf_input_size + prf_key_size + prf_output_size + STACK_ALIGN]; - A = &stack[STACK_ALIGN & ~(STACK_ALIGN - 1)]; - B = &A[kdf_buf_size + prf_input_size]; - prf_output = &A[2 * kdf_buf_size + prf_input_size + prf_key_size]; - - /* Initialise the password buffer */ - if(password_len > kdf_buf_size) - password_len = kdf_buf_size; - - a = kdf_buf_size / password_len; - for(i = 0; i < a; i++) - neoscrypt_copy(&A[i * password_len], &password[0], password_len); - b = kdf_buf_size - a * password_len; - if(b) - neoscrypt_copy(&A[a * password_len], &password[0], b); - neoscrypt_copy(&A[kdf_buf_size], &password[0], prf_input_size); - - /* Initialise the salt buffer */ - if(salt_len > kdf_buf_size) - salt_len = kdf_buf_size; - - a = kdf_buf_size / salt_len; - for(i = 0; i < a; i++) - neoscrypt_copy(&B[i * salt_len], &salt[0], salt_len); - b = kdf_buf_size - a * salt_len; - if(b) - neoscrypt_copy(&B[a * salt_len], &salt[0], b); - neoscrypt_copy(&B[kdf_buf_size], &salt[0], prf_key_size); - - /* The primary iteration */ - for(i = 0, bufptr = 0; i < N; i++) { - - /* Map the PRF input buffer */ - prf_input = &A[bufptr]; - - /* Map the PRF key buffer */ - prf_key = &B[bufptr]; - - /* PRF */ - neoscrypt_blake2s(prf_input, prf_input_size, prf_key, prf_key_size, prf_output, prf_output_size); - - /* Calculate the next buffer pointer */ - for(j = 0, bufptr = 0; j < prf_output_size; j++) - bufptr += prf_output[j]; - bufptr &= (kdf_buf_size - 1); - - /* Modify the salt buffer */ - neoscrypt_xor(&B[bufptr], &prf_output[0], prf_output_size); - - /* Head modified, tail updated */ - if(bufptr < prf_key_size) - neoscrypt_copy(&B[kdf_buf_size + bufptr], &B[bufptr], MIN(prf_output_size, prf_key_size - bufptr)); - - /* Tail modified, head updated */ - if((kdf_buf_size - bufptr) < prf_output_size) - neoscrypt_copy(&B[0], &B[kdf_buf_size], prf_output_size - (kdf_buf_size - bufptr)); - - } - - /* Modify and copy into the output buffer */ - if(output_len > kdf_buf_size) - output_len = kdf_buf_size; - - a = kdf_buf_size - bufptr; - if(a >= output_len) { - neoscrypt_xor(&B[bufptr], &A[0], output_len); - neoscrypt_copy(&output[0], &B[bufptr], output_len); - } else { - neoscrypt_xor(&B[bufptr], &A[0], a); - neoscrypt_xor(&B[0], &A[a], output_len - a); - neoscrypt_copy(&output[0], &B[bufptr], a); - neoscrypt_copy(&output[a], &B[0], output_len - a); - } - -} - - -/* Configurable optimised block mixer */ -static void neoscrypt_blkmix(uint *X, uint *Y, uint r, uint mixmode) { - uint i, mixer, rounds; - - mixer = mixmode >> 8; - rounds = mixmode & 0xFF; - - /* NeoScrypt flow: Scrypt flow: - Xa ^= Xd; M(Xa'); Ya = Xa"; Xa ^= Xb; M(Xa'); Ya = Xa"; - Xb ^= Xa"; M(Xb'); Yb = Xb"; Xb ^= Xa"; M(Xb'); Yb = Xb"; - Xc ^= Xb"; M(Xc'); Yc = Xc"; Xa" = Ya; - Xd ^= Xc"; M(Xd'); Yd = Xd"; Xb" = Yb; - Xa" = Ya; Xb" = Yc; - Xc" = Yb; Xd" = Yd; */ - - if(r == 1) { - neoscrypt_blkxor(&X[0], &X[16], SCRYPT_BLOCK_SIZE); - if(mixer) - neoscrypt_chacha(&X[0], rounds); - else - neoscrypt_salsa(&X[0], rounds); - neoscrypt_blkxor(&X[16], &X[0], SCRYPT_BLOCK_SIZE); - if(mixer) - neoscrypt_chacha(&X[16], rounds); - else - neoscrypt_salsa(&X[16], rounds); - return; - } - - if(r == 2) { - neoscrypt_blkxor(&X[0], &X[48], SCRYPT_BLOCK_SIZE); - if(mixer) - neoscrypt_chacha(&X[0], rounds); - else - neoscrypt_salsa(&X[0], rounds); - neoscrypt_blkxor(&X[16], &X[0], SCRYPT_BLOCK_SIZE); - if(mixer) - neoscrypt_chacha(&X[16], rounds); - else - neoscrypt_salsa(&X[16], rounds); - neoscrypt_blkxor(&X[32], &X[16], SCRYPT_BLOCK_SIZE); - if(mixer) - neoscrypt_chacha(&X[32], rounds); - else - neoscrypt_salsa(&X[32], rounds); - neoscrypt_blkxor(&X[48], &X[32], SCRYPT_BLOCK_SIZE); - if(mixer) - neoscrypt_chacha(&X[48], rounds); - else - neoscrypt_salsa(&X[48], rounds); - neoscrypt_blkswp(&X[16], &X[32], SCRYPT_BLOCK_SIZE); - return; - } - - /* Reference code for any reasonable r */ - for(i = 0; i < 2 * r; i++) { - if(i) neoscrypt_blkxor(&X[16 * i], &X[16 * (i - 1)], SCRYPT_BLOCK_SIZE); - else neoscrypt_blkxor(&X[0], &X[16 * (2 * r - 1)], SCRYPT_BLOCK_SIZE); - if(mixer) - neoscrypt_chacha(&X[16 * i], rounds); - else - neoscrypt_salsa(&X[16 * i], rounds); - neoscrypt_blkcpy(&Y[16 * i], &X[16 * i], SCRYPT_BLOCK_SIZE); - } - for(i = 0; i < r; i++) - neoscrypt_blkcpy(&X[16 * i], &Y[16 * 2 * i], SCRYPT_BLOCK_SIZE); - for(i = 0; i < r; i++) - neoscrypt_blkcpy(&X[16 * (i + r)], &Y[16 * (2 * i + 1)], SCRYPT_BLOCK_SIZE); -} - -/* NeoScrypt core engine: - * p = 1, salt = password; - * Basic customisation (required): - * profile bit 0: - * 0 = NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20; - * 1 = Scrypt(1024, 1, 1) with Salsa20/8; - * profile bits 4 to 1: - * 0000 = FastKDF-BLAKE2s; - * 0001 = PBKDF2-HMAC-SHA256; - * Extended customisation (optional): - * profile bit 31: - * 0 = extended customisation absent; - * 1 = extended customisation present; - * profile bits 7 to 5 (rfactor): - * 000 = r of 1; - * 001 = r of 2; - * 010 = r of 4; - * ... - * 111 = r of 128; - * profile bits 12 to 8 (Nfactor): - * 00000 = N of 2; - * 00001 = N of 4; - * 00010 = N of 8; - * ..... - * 00110 = N of 128; - * ..... - * 01001 = N of 1024; - * ..... - * 11110 = N of 2147483648; - * profile bits 30 to 13 are reserved */ -void neoscrypt(uchar *output, const uchar *password) -{ - uint N = 128, r = 2, dblmix = 1, mixmode = 0x14; - uint kdf, i, j; - uint *X, *Y, *Z, *V; - - // default, option not yet required - uint32_t profile = 0x80000020| (6 << 8) ; - - if(profile & 0x1) { - N = 1024; /* N = (1 << (Nfactor + 1)); */ - r = 1; /* r = (1 << rfactor); */ - dblmix = 0; /* Salsa only */ - mixmode = 0x08; /* 8 rounds */ - } - - if(profile >> 31) { - N = (1 << (((profile >> 8) & 0x1F) + 1)); - r = (1 << ((profile >> 5) & 0x7)); - } - - uchar *stack = (uchar*) malloc((N + 3) * r * 2 * SCRYPT_BLOCK_SIZE + STACK_ALIGN); - /* X = r * 2 * SCRYPT_BLOCK_SIZE */ - X = (uint *) &stack[STACK_ALIGN & ~(STACK_ALIGN - 1)]; - /* Z is a copy of X for ChaCha */ - Z = &X[32 * r]; - /* Y is an X sized temporal space */ - Y = &X[64 * r]; - /* V = N * r * 2 * SCRYPT_BLOCK_SIZE */ - V = &X[96 * r]; - - /* X = KDF(password, salt) */ - kdf = (profile >> 1) & 0xF; - - switch(kdf) { - - default: - case(0x0): - neoscrypt_fastkdf(password, 80, password, 80, 32, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE); - break; - - case(0x1): - neoscrypt_pbkdf2_sha256(password, 80, password, 80, 1, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE); - break; - - } - - /* Process ChaCha 1st, Salsa 2nd and XOR them into FastKDF; otherwise Salsa only */ - - if(dblmix) { - /* blkcpy(Z, X) */ - neoscrypt_blkcpy(&Z[0], &X[0], r * 2 * SCRYPT_BLOCK_SIZE); - - /* Z = SMix(Z) */ - for(i = 0; i < N; i++) { - /* blkcpy(V, Z) */ - neoscrypt_blkcpy(&V[i * (32 * r)], &Z[0], r * 2 * SCRYPT_BLOCK_SIZE); - /* blkmix(Z, Y) */ - neoscrypt_blkmix(&Z[0], &Y[0], r, (mixmode | 0x0100)); - } - for(i = 0; i < N; i++) { - /* integerify(Z) mod N */ - j = (32 * r) * (Z[16 * (2 * r - 1)] & (N - 1)); - /* blkxor(Z, V) */ - neoscrypt_blkxor(&Z[0], &V[j], r * 2 * SCRYPT_BLOCK_SIZE); - /* blkmix(Z, Y) */ - neoscrypt_blkmix(&Z[0], &Y[0], r, (mixmode | 0x0100)); - } - } - -#if (ASM) - /* Must be called before and after SSE2 Salsa */ - neoscrypt_salsa_tangle(&X[0], r * 2); -#endif - - /* X = SMix(X) */ - for(i = 0; i < N; i++) { - /* blkcpy(V, X) */ - neoscrypt_blkcpy(&V[i * (32 * r)], &X[0], r * 2 * SCRYPT_BLOCK_SIZE); - /* blkmix(X, Y) */ - neoscrypt_blkmix(&X[0], &Y[0], r, mixmode); - } - for(i = 0; i < N; i++) { - /* integerify(X) mod N */ - j = (32 * r) * (X[16 * (2 * r - 1)] & (N - 1)); - /* blkxor(X, V) */ - neoscrypt_blkxor(&X[0], &V[j], r * 2 * SCRYPT_BLOCK_SIZE); - /* blkmix(X, Y) */ - neoscrypt_blkmix(&X[0], &Y[0], r, mixmode); - } - -#if (ASM) - neoscrypt_salsa_tangle(&X[0], r * 2); -#endif - - if(dblmix) - /* blkxor(X, Z) */ - neoscrypt_blkxor(&X[0], &Z[0], r * 2 * SCRYPT_BLOCK_SIZE); - - /* output = KDF(password, X) */ - switch(kdf) { - default: - case(0x0): - neoscrypt_fastkdf(password, 80, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE, 32, output, 32); - break; - - case(0x1): - neoscrypt_pbkdf2_sha256(password, 80, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE, 1, output, 32); - break; - } - - free(stack); -} - -static bool fulltest_le(const uint *hash, const uint *target) -{ - bool rc = false; - - for (int i = 7; i >= 0; i--) { - if (hash[i] > target[i]) { - rc = false; - break; - } - if(hash[i] < target[i]) { - rc = true; - break; - } - } - - if (opt_debug) { - uchar hash_str[65], target_str[65]; - - bin2hex(hash_str, (uint8_t *) hash, 32); - bin2hex(target_str, (uint8_t *) target, 32); - - applog(LOG_DEBUG, "DEBUG (little endian): %s\nHash: %sx0\nTarget: %sx0", - rc ? "hash <= target" : "hash > target (false positive)", - hash_str, target_str); - } - - return(rc); -} - -int scanhash_neoscrypt( int thr_id, struct work *work, - uint32_t max_nonce, uint64_t *hashes_done ) - -{ - uint32_t *pdata = work->data; - uint32_t *ptarget = work->target; - uint32_t _ALIGN(64) hash[8]; - const uint32_t Htarg = ptarget[7]; - const uint32_t first_nonce = pdata[19]; - - while (pdata[19] < max_nonce && !work_restart[thr_id].restart) - { - neoscrypt((uint8_t *) hash, (uint8_t *) pdata ); - // , 0x80000020 | (opt_nfactor << 8) ); - - /* Quick hash check */ - if (hash[7] <= Htarg && fulltest_le(hash, ptarget)) { - *hashes_done = pdata[19] - first_nonce + 1; - return 1; - } - - pdata[19]++; - } - - *hashes_done = pdata[19] - first_nonce; - return 0; -} - -int64_t get_neoscrypt_max64() -{ - int64_t max64 = opt_scrypt_n < 16 ? 0x3ffff : 0x3fffff / opt_scrypt_n; - if ( opt_nfactor > 3) - max64 >>= ( opt_nfactor - 3); - else if ( opt_nfactor > 16) - max64 = 0xF; - return max64; -} - -void neoscrypt_wait_for_diff( struct stratum_ctx *stratum ) -{ - while ( !stratum->job.diff ) - { -// applog(LOG_DEBUG, "Waiting for Stratum to set the job difficulty"); - sleep(1); - } -} - -bool register_neoscrypt_algo( algo_gate_t* gate ) -{ - gate->scanhash = (void*)&scanhash_neoscrypt; - gate->hash = (void*)&neoscrypt; - gate->hash_alt = (void*)&neoscrypt; - gate->get_max64 = (void*)&get_neoscrypt_max64; - gate->set_target = (void*)&scrypt_set_target; - gate->wait_for_diff = (void*)&neoscrypt_wait_for_diff; - gate->build_stratum_request = (void*)&std_be_build_stratum_request; - gate->set_work_data_endian = (void*)&swab_work_data; - gate->work_data_size = 80; - return true; -}; - - diff --git a/algo/scrypt.c b/algo/scrypt.c index fedc9a7..40613df 100644 --- a/algo/scrypt.c +++ b/algo/scrypt.c @@ -48,6 +48,7 @@ static const uint32_t finalblk[16] = { }; static __thread char *scratchbuf; +int scratchbuf_size = 0; static inline void HMAC_SHA256_80_init(const uint32_t *key, uint32_t *tstate, uint32_t *ostate) @@ -726,29 +727,29 @@ extern int scanhash_scrypt( int thr_id, struct work *work, uint32_t max_nonce, #if defined(HAVE_SHA256_4WAY) if (throughput == 4) scrypt_1024_1_1_256_4way(data, hash, midstate, - scratchbuf, opt_scrypt_n); + scratchbuf, scratchbuf_size ); else #endif #if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY) if (throughput == 12) scrypt_1024_1_1_256_12way(data, hash, midstate, - scratchbuf, opt_scrypt_n); + scratchbuf, scratchbuf_size ); else #endif #if defined(HAVE_SCRYPT_6WAY) if (throughput == 24) scrypt_1024_1_1_256_24way(data, hash, midstate, - scratchbuf, opt_scrypt_n); + scratchbuf, scratchbuf_size ); else #endif #if defined(HAVE_SCRYPT_3WAY) if (throughput == 3) scrypt_1024_1_1_256_3way(data, hash, midstate, - scratchbuf, opt_scrypt_n); + scratchbuf, scratchbuf_size ); else #endif scrypt_1024_1_1_256(data, hash, midstate, scratchbuf, - opt_scrypt_n); + scratchbuf_size ); for (i = 0; i < throughput; i++) { if (unlikely(hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget))) { @@ -764,19 +765,11 @@ extern int scanhash_scrypt( int thr_id, struct work *work, uint32_t max_nonce, return 0; } -int64_t scrypt_get_max64() -{ - int64_t max64 = opt_scrypt_n < 16 ? 0x3ffff : 0x3fffff / opt_scrypt_n; - if ( opt_nfactor > 3) - max64 >>= ( opt_nfactor - 3); - else if ( opt_nfactor > 16) - max64 = 0xF; - return max64; -} +int64_t scrypt_get_max64() { return 0xfff; } bool scrypt_miner_thread_init( int thr_id ) { - scratchbuf = scrypt_buffer_alloc( opt_scrypt_n ); + scratchbuf = scrypt_buffer_alloc( scratchbuf_size ); if ( scratchbuf ) return true; applog( LOG_ERR, "Thread %u: Scrypt buffer allocation failed", thr_id ); @@ -791,8 +784,10 @@ bool register_scrypt_algo( algo_gate_t* gate ) gate->hash_alt = (void*)&scrypt_1024_1_1_256_24way; gate->set_target = (void*)&scrypt_set_target; gate->get_max64 = (void*)&scrypt_get_max64; - if ( opt_nfactor == 0 ) - opt_nfactor = 6; - return true; + + if ( !opt_scrypt_n ) + scratchbuf_size = 1024; + else + scratchbuf_size = opt_scrypt_n; }; diff --git a/algo/scryptjane/scrypt-jane.c b/algo/scryptjane/scrypt-jane.c index bcab680..80187a7 100644 --- a/algo/scryptjane/scrypt-jane.c +++ b/algo/scryptjane/scrypt-jane.c @@ -45,6 +45,8 @@ #define scrypt_maxr scrypt_r_32kb /* 32kb */ #define scrypt_maxp 25 /* (1 << 25) = ~33 million */ +uint64_t sj_N; + typedef struct scrypt_aligned_alloc_t { uint8_t *mem, *ptr; } scrypt_aligned_alloc; @@ -140,7 +142,8 @@ int scanhash_scryptjane( int thr_id, struct work *work, uint32_t max_nonce, { scrypt_aligned_alloc YX, V; uint8_t *X, *Y; - uint32_t N, chunk_bytes; +// uint32_t N, chunk_bytes; + uint32_t chunk_bytes; const uint32_t r = SCRYPT_R; const uint32_t p = SCRYPT_P; @@ -162,10 +165,14 @@ int scanhash_scryptjane( int thr_id, struct work *work, uint32_t max_nonce, // //scrypt_fatal_error("scrypt: N out of range"); //} - N = (1 << ( opt_scrypt_n + 1)); +// opt_scrypt_n default is 1024 which makes no sense in this context +// and results in N = 2, but it seems to work on Nicehash scryptjanenf16 +// (leocoin). Need to test with proper NF 16 for functionality and performance. +// Also test yacoin (NF 18). +// N = (1 << ( opt_scrypt_n + 1)); chunk_bytes = SCRYPT_BLOCK_BYTES * r * 2; - if (!scrypt_alloc((uint64_t)N * chunk_bytes, &V)) return 1; + if (!scrypt_alloc( sj_N * chunk_bytes, &V ) ) return 1; if (!scrypt_alloc((p + 1) * chunk_bytes, &YX)) { scrypt_free(&V); return 1; @@ -181,7 +188,7 @@ int scanhash_scryptjane( int thr_id, struct work *work, uint32_t max_nonce, scrypt_N_1_1((unsigned char *)endiandata, 80, (unsigned char *)endiandata, 80, - N, (unsigned char *)hash, 32, X, Y, V.ptr); + sj_N, (unsigned char *)hash, 32, X, Y, V.ptr); if (hash[7] <= Htarg && fulltest(hash, ptarget)) { pdata[19] = nonce; @@ -208,14 +215,13 @@ void scryptjanehash(void *output, const void *input ) scrypt_aligned_alloc YX, V; uint8_t *X, *Y; uint32_t chunk_bytes; - uint32_t N = (1 << ( opt_scrypt_n + 1)); const uint32_t r = SCRYPT_R; const uint32_t p = SCRYPT_P; memset(output, 0, 32); chunk_bytes = SCRYPT_BLOCK_BYTES * r * 2; - if (!scrypt_alloc((uint64_t)N * chunk_bytes, &V)) return; + if (!scrypt_alloc( sj_N * chunk_bytes, &V ) ) return; if (!scrypt_alloc((p + 1) * chunk_bytes, &YX)) { scrypt_free(&V); return; @@ -225,7 +231,7 @@ void scryptjanehash(void *output, const void *input ) X = Y + chunk_bytes; scrypt_N_1_1((unsigned char*)input, 80, (unsigned char*)input, 80, - N, (unsigned char*)output, 32, X, Y, V.ptr); + sj_N, (unsigned char*)output, 32, X, Y, V.ptr); scrypt_free(&V); scrypt_free(&YX); @@ -238,8 +244,23 @@ bool register_scryptjane_algo( algo_gate_t* gate ) gate->hash_alt = (void*)&scryptjanehash; gate->set_target = (void*)&scrypt_set_target; gate->get_max64 = (void*)&get_max64_0x40LL; - if ( opt_nfactor == 0 ) - opt_nfactor = 16; + + // figure out if arg in N or Nfactor + if ( !opt_scrypt_n ) + { + applog( LOG_ERR, "The N factor must be specified in the form algo:nf"); + return false; + } + else if ( opt_scrypt_n < 32 ) + { + // arg is Nfactor, calculate N + sj_N = 1 << ( opt_scrypt_n + 1 ); + } + else + { + // arg is N + sj_N = opt_scrypt_n; + } return true; } diff --git a/configure.ac b/configure.ac index dceb9b2..557d1f9 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [3.4.11]) +AC_INIT([cpuminer-opt], [3.4.12]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index c2d7f91..4cbdd89 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -100,9 +100,8 @@ int opt_timeout = 300; static int opt_scantime = 5; static const bool opt_time = true; enum algos opt_algo = ALGO_NULL; -int opt_scrypt_n = 1024; +int opt_scrypt_n = 0; int opt_pluck_n = 128; -unsigned int opt_nfactor = 0; int opt_n_threads = 0; int64_t opt_affinity = -1L; int opt_priority = 0; @@ -2321,7 +2320,7 @@ void parse_arg(int key, char *arg ) { char *ep; v = strtol(arg+v+1, &ep, 10); - if (*ep || v & (v-1) || v < 2) + if (*ep || v < 2) continue; opt_algo = (enum algos) i; opt_scrypt_n = v; @@ -2360,8 +2359,6 @@ void parse_arg(int key, char *arg ) case 1030: /* --api-remote */ opt_api_remote = 1; break; - case 'n': - break; case 'B': opt_background = true; use_colors = false; @@ -2386,8 +2383,6 @@ void parse_arg(int key, char *arg ) } break; } - case 'C': - break; case 'q': opt_quiet = true; break; diff --git a/miner.h b/miner.h index 8adb2cd..5f91e77 100644 --- a/miner.h +++ b/miner.h @@ -620,7 +620,6 @@ extern double net_hashrate; extern int opt_pluck_n; extern int opt_scrypt_n; extern double opt_diff_factor; -extern unsigned int opt_nfactor; extern bool opt_randomize; extern bool allow_mininginfo; extern time_t g_work_time; @@ -645,16 +644,16 @@ Options:\n\ blakecoin blake256r8\n\ blake2s Blake-2 S\n\ bmw BMW 256\n\ - c11 flax\n\ + c11 Flax\n\ cryptolight Cryptonight-light\n\ - cryptonight Monero (XMR)\n\ + cryptonight cryptonote, Monero (XMR)\n\ decred\n\ drop Dropcoin\n\ fresh Fresh\n\ groestl groestl\n\ heavy Heavy\n\ hmq1725 Espers\n\ - hodl hodlcoin\n\ + hodl Hodlcoin\n\ keccak Keccak\n\ lbry LBC, LBRY Credits\n\ luffa Luffa\n\ @@ -671,13 +670,12 @@ Options:\n\ quark Quark\n\ qubit Qubit\n\ scrypt scrypt(1024, 1, 1) (default)\n\ - scryptjane\n\ scrypt:N scrypt(N, 1, 1)\n\ + scryptjane:nf\n\ sha256d SHA-256d\n\ shavite3 Shavite3\n\ skein Skein+Sha (Skeincoin)\n\ skein2 Double Skein (Woodcoin)\n\ - s3 S3\n\ vanilla blake256r8vnl (VCash)\n\ veltor\n\ whirlpool\n\ @@ -710,7 +708,6 @@ Options:\n\ -f, --diff-factor Divide req. difficulty by this factor (std is 1.0)\n\ -m, --diff-multiplier Multiply difficulty by this factor (std is 1.0)\n\ --hide-diff Do not display changes in difficulty\n\ - -n, --nfactor neoscrypt N-Factor\n\ --coinbase-addr=ADDR payout address for solo mining\n\ --coinbase-sig=TEXT data to insert in the coinbase when possible\n\ --no-longpoll disable long polling support\n\ @@ -775,7 +772,6 @@ static struct option const options[] = { { "diff-multiplier", 1, NULL, 'm' }, { "hide-diff", 0, NULL, 1013 }, { "help", 0, NULL, 'h' }, - { "nfactor", 1, NULL, 'n' }, { "no-gbt", 0, NULL, 1011 }, { "no-getwork", 0, NULL, 1010 }, { "no-longpoll", 0, NULL, 1003 },