mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
dd5e552357 | ||
![]() |
f449c6725f | ||
![]() |
3363d61524 | ||
![]() |
20fe05054c | ||
![]() |
157508bd07 | ||
![]() |
c24a4bdbc2 |
24
Makefile.am
24
Makefile.am
@@ -31,12 +31,19 @@ cpuminer_SOURCES = \
|
||||
crypto/hash.c \
|
||||
crypto/aesb.c \
|
||||
crypto/magimath.cpp \
|
||||
algo/argon2/argon2a.c \
|
||||
algo/argon2/ar2/argon2.c \
|
||||
algo/argon2/ar2/opt.c \
|
||||
algo/argon2/ar2/cores.c \
|
||||
algo/argon2/ar2/ar2-scrypt-jane.c \
|
||||
algo/argon2/ar2/blake2b.c \
|
||||
algo/argon2/argon2a/argon2a.c \
|
||||
algo/argon2/argon2a/ar2/argon2.c \
|
||||
algo/argon2/argon2a/ar2/opt.c \
|
||||
algo/argon2/argon2a/ar2/cores.c \
|
||||
algo/argon2/argon2a/ar2/ar2-scrypt-jane.c \
|
||||
algo/argon2/argon2a/ar2/blake2b.c \
|
||||
algo/argon2/argon2d/argon2d-gate.c \
|
||||
algo/argon2/argon2d/blake2/blake2b.c \
|
||||
algo/argon2/argon2d/argon2d/argon2.c \
|
||||
algo/argon2/argon2d/argon2d/core.c \
|
||||
algo/argon2/argon2d/argon2d/opt.c \
|
||||
algo/argon2/argon2d/argon2d/thread.c \
|
||||
algo/argon2/argon2d/argon2d/encoding.c \
|
||||
algo/blake/sph_blake.c \
|
||||
algo/blake/blake-hash-4way.c \
|
||||
algo/blake/blake-gate.c \
|
||||
@@ -68,6 +75,7 @@ cpuminer_SOURCES = \
|
||||
algo/cryptonight/cryptonight.c\
|
||||
algo/cubehash/sph_cubehash.c \
|
||||
algo/cubehash/sse2/cubehash_sse2.c\
|
||||
algo/cubehash/cube-hash-2way.c \
|
||||
algo/echo/sph_echo.c \
|
||||
algo/echo/aes_ni/hash.c\
|
||||
algo/gost/sph_gost.c \
|
||||
@@ -152,6 +160,8 @@ cpuminer_SOURCES = \
|
||||
algo/sha/sph_sha2big.c \
|
||||
algo/sha/sha2-hash-4way.c \
|
||||
algo/sha/sha2.c \
|
||||
algo/sha/sha256t-gate.c \
|
||||
algo/sha/sha256t-4way.c \
|
||||
algo/sha/sha256t.c \
|
||||
algo/shabal/sph_shabal.c \
|
||||
algo/shabal/shabal-hash-4way.c \
|
||||
@@ -242,7 +252,7 @@ cpuminer_SOURCES = \
|
||||
algo/x17/hmq1725.c \
|
||||
algo/yescrypt/yescrypt.c \
|
||||
algo/yescrypt/sha256_Y.c \
|
||||
algo/yescrypt/yescrypt-simd.c
|
||||
algo/yescrypt/yescrypt-best.c
|
||||
|
||||
disable_flags =
|
||||
|
||||
|
29
README.md
29
README.md
@@ -28,11 +28,12 @@ performance.
|
||||
ARM CPUs are not supported.
|
||||
|
||||
2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
|
||||
Centos are known to work and have all dependencies in their repositories.
|
||||
Others may work but may require more effort.
|
||||
Centos, are known to work and have all dependencies in their repositories.
|
||||
Others may work but may require more effort. Older versions such as Centos 6
|
||||
don't work due to missing features.
|
||||
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
|
||||
|
||||
MacOS, OSx is not supported.
|
||||
MacOS, OSx and Android are not supported.
|
||||
|
||||
3. Stratum pool. Some algos may work wallet mining using getwork or GBT. YMMV.
|
||||
|
||||
@@ -41,7 +42,9 @@ Supported Algorithms
|
||||
|
||||
allium Garlicoin
|
||||
anime Animecoin
|
||||
argon2
|
||||
argon2 Argon2 coin (AR2)
|
||||
argon2d-crds Credits (CRDS)
|
||||
argon2d-dyn Dynamic (DYN)
|
||||
axiom Shabal-256 MemoHash
|
||||
bastion
|
||||
blake Blake-256 (SFR)
|
||||
@@ -104,17 +107,21 @@ Supported Algorithms
|
||||
x13sm3 hsr (Hshare)
|
||||
x14 X14
|
||||
x15 X15
|
||||
x16r Ravencoin
|
||||
x16r Ravencoin (RVN)
|
||||
x16s pigeoncoin (PGN)
|
||||
x17
|
||||
xevan Bitsend
|
||||
xevan Bitsend (BSD)
|
||||
yescrypt Globalboost-Y (BSTY)
|
||||
yescryptr8 BitZeny (ZNY)
|
||||
yescryptr16 Yenten (YTN)
|
||||
yescryptr32 WAVI
|
||||
zr5 Ziftr
|
||||
|
||||
Errata
|
||||
------
|
||||
|
||||
Neoscrypt crashes on Windows, use legacy version.
|
||||
|
||||
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
||||
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
||||
these CPUs. Some algos may crash the miner with an invalid instruction.
|
||||
@@ -142,11 +149,11 @@ Donations
|
||||
|
||||
cpuminer-opt has no fees of any kind but donations are accepted.
|
||||
|
||||
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
|
||||
ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
|
||||
LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
|
||||
BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
|
||||
BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ
|
||||
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
|
||||
ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
|
||||
LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
|
||||
BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
|
||||
BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ
|
||||
|
||||
Happy mining!
|
||||
|
||||
|
@@ -19,7 +19,7 @@ Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
||||
|
||||
Exe name Compile flags Arch name
|
||||
|
||||
cpuminer-sse2.exe "-march=core2" Core2, Nehalem
|
||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
||||
cpuminer-aes-sse42.exe "-maes -msse4.2" Westmere
|
||||
cpuminer-aes-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge
|
||||
cpuminer-avx2.exe "-march=core-avx2" Haswell...
|
||||
|
@@ -1,4 +1,4 @@
|
||||
cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
|
||||
puminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
|
||||
This feature requires recent SW including GCC version 5 or higher and
|
||||
openssl version 1.1 or higher. It may also require using "-march=znver1"
|
||||
compile flag.
|
||||
@@ -90,7 +90,8 @@ Additional optional compile flags, add the following to CFLAGS to activate:
|
||||
|
||||
SPH may give slightly better performance on algos that use sha256 when using
|
||||
openssl 1.0.1 or older. Openssl 1.0.2 adds AVX2 and 1.1 adds SHA and perform
|
||||
better than SPH.
|
||||
better than SPH. This option is ignored when 4-way is used, even for CPUs
|
||||
with SHA.
|
||||
|
||||
Start mining.
|
||||
|
||||
@@ -159,6 +160,46 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.8.6
|
||||
|
||||
Fixed argon2 regression in v3.8.5.
|
||||
Added x16s algo for Pigeoncoin.
|
||||
Some code cleanup.
|
||||
|
||||
v3.8.5
|
||||
|
||||
Added argon2d-crds and argon2d-dyn algos.
|
||||
sha256t 8 way AVX2 & 4 way SSE4.2 optimized.
|
||||
CPUs with SSE4.2 get optimizations previously reserved for AVX.
|
||||
|
||||
v3.8.4.1
|
||||
|
||||
Fixed sha256t low difficulty rejects.
|
||||
Fixed compile error on CPUs with AVX512.
|
||||
|
||||
v3.8.4
|
||||
|
||||
Added yescryptr32 algo for WAVI coin.
|
||||
Added URL to API data.
|
||||
Improved detection of __int128 support (linux only)
|
||||
Compile support for CPUs without SSSE3 (no binary support)
|
||||
|
||||
v3.8.3.3
|
||||
|
||||
Integrated getblocktemplate with algo_gate.
|
||||
Added support for hodl gbt (untested).
|
||||
Reworked some recent quick fixes.
|
||||
|
||||
v3.8.3.2
|
||||
|
||||
Reverted gbt changes from v3.8.0 that broke getwork.
|
||||
Reverted scaled hash rate for API, added HS term in addition to KHS.
|
||||
Added blocks solved to console display and API.
|
||||
|
||||
v3.8.3.1
|
||||
|
||||
Fixed regression in v3.8.3 that broke several algos.
|
||||
|
||||
v3.8.3
|
||||
|
||||
More restoration of lost lyra2 hash.
|
||||
|
@@ -119,9 +119,11 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
|
||||
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
|
||||
gate->malloc_txs_request = (void*)&std_malloc_txs_request;
|
||||
gate->set_target = (void*)&std_set_target;
|
||||
gate->work_decode = (void*)&std_le_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
|
||||
gate->build_block_header = (void*)&std_build_block_header;
|
||||
gate->build_extraheader = (void*)&std_build_extraheader;
|
||||
gate->set_work_data_endian = (void*)&do_nothing;
|
||||
gate->calc_network_diff = (void*)&std_calc_network_diff;
|
||||
@@ -158,6 +160,8 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
|
||||
case ALGO_ANIME: register_anime_algo ( gate ); break;
|
||||
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
|
||||
case ALGO_ARGON2DCRDS: register_argon2d_crds_algo( gate ); break;
|
||||
case ALGO_ARGON2DDYN: register_argon2d_dyn_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
@@ -220,11 +224,13 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_X14: register_x14_algo ( gate ); break;
|
||||
case ALGO_X15: register_x15_algo ( gate ); break;
|
||||
case ALGO_X16R: register_x16r_algo ( gate ); break;
|
||||
case ALGO_X16S: register_x16s_algo ( gate ); break;
|
||||
case ALGO_X17: register_x17_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
default:
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
|
||||
|
@@ -87,10 +87,10 @@ typedef uint32_t set_t;
|
||||
#define EMPTY_SET 0
|
||||
#define SSE2_OPT 1
|
||||
#define AES_OPT 2
|
||||
#define AVX_OPT 4
|
||||
#define AVX2_OPT 8
|
||||
#define SHA_OPT 0x10
|
||||
//#define FOUR_WAY_OPT 0x20
|
||||
#define SSE42_OPT 4
|
||||
#define AVX_OPT 8
|
||||
#define AVX2_OPT 0x10
|
||||
#define SHA_OPT 0x20
|
||||
|
||||
// return set containing all elements from sets a & b
|
||||
inline set_t set_union ( set_t a, set_t b ) { return a | b; }
|
||||
@@ -127,7 +127,10 @@ void ( *set_target) ( struct work*, double );
|
||||
bool ( *submit_getwork_result ) ( CURL*, struct work* );
|
||||
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
|
||||
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
|
||||
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
|
||||
uint32_t*, uint32_t, uint32_t );
|
||||
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
|
||||
char* ( *malloc_txs_request ) ( struct work* );
|
||||
void ( *set_work_data_endian ) ( struct work* );
|
||||
double ( *calc_network_diff ) ( struct work* );
|
||||
bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
|
||||
@@ -228,11 +231,17 @@ void std_le_build_stratum_request( char *req, struct work *work );
|
||||
void std_be_build_stratum_request( char *req, struct work *work );
|
||||
void jr2_build_stratum_request ( char *req, struct work *work );
|
||||
|
||||
char* std_malloc_txs_request( struct work *work );
|
||||
|
||||
// Default is do_nothing (assumed LE)
|
||||
void set_work_data_big_endian( struct work *work );
|
||||
|
||||
double std_calc_network_diff( struct work *work );
|
||||
|
||||
void std_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits );
|
||||
|
||||
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
|
||||
|
||||
json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
|
||||
|
@@ -99,18 +99,18 @@ static const char *Argon2_ErrorMessage[] = {
|
||||
{ARGON2_MISSING_ARGS, */ "Missing arguments", /*},*/
|
||||
};
|
||||
|
||||
int argon2d(argon2_context *context) { return argon2_core(context, Argon2_d); }
|
||||
int argon2d(argon2_context *context) { return ar2_argon2_core(context, Argon2_d); }
|
||||
|
||||
int argon2i(argon2_context *context) { return argon2_core(context, Argon2_i); }
|
||||
int argon2i(argon2_context *context) { return ar2_argon2_core(context, Argon2_i); }
|
||||
|
||||
int verify_d(argon2_context *context, const char *hash)
|
||||
int ar2_verify_d(argon2_context *context, const char *hash)
|
||||
{
|
||||
int result;
|
||||
/*if (0 == context->outlen || NULL == hash) {
|
||||
return ARGON2_OUT_PTR_MISMATCH;
|
||||
}*/
|
||||
|
||||
result = argon2_core(context, Argon2_d);
|
||||
result = ar2_argon2_core(context, Argon2_d);
|
||||
|
||||
if (ARGON2_OK != result) {
|
||||
return result;
|
||||
@@ -223,7 +223,7 @@ static size_t to_base64(char *dst, size_t dst_len, const void *src)
|
||||
* The output length is always exactly 32 bytes.
|
||||
*/
|
||||
|
||||
int encode_string(char *dst, size_t dst_len, argon2_context *ctx)
|
||||
int ar2_encode_string(char *dst, size_t dst_len, argon2_context *ctx)
|
||||
{
|
||||
#define SS(str) \
|
||||
do { \
|
@@ -255,7 +255,7 @@ int argon2id(argon2_context *context);
|
||||
* specified by the context outlen member
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
int verify_d(argon2_context *context, const char *hash);
|
||||
int ar2_verify_d(argon2_context *context, const char *hash);
|
||||
|
||||
/*
|
||||
* Get the associated error message for given error code
|
||||
@@ -283,7 +283,7 @@ const char *error_message(int error_code);
|
||||
* The output length is always exactly 32 bytes.
|
||||
*/
|
||||
|
||||
int encode_string(char *dst, size_t dst_len, argon2_context *ctx);
|
||||
int ar2_encode_string(char *dst, size_t dst_len, argon2_context *ctx);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
@@ -52,22 +52,22 @@ enum {
|
||||
};
|
||||
|
||||
/* Streaming API */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen);
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
int ar2_blake2b_init(blake2b_state *S, size_t outlen);
|
||||
int ar2_blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
size_t keylen);
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
int ar2_blake2b_init_param(blake2b_state *S, const blake2b_param *P);
|
||||
int ar2_blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
void my_blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
int ar2_blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int blake2b(void *out, const void *in, const void *key, size_t keylen);
|
||||
int ar2_blake2b(void *out, const void *in, const void *key, size_t keylen);
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *out, const void *in);
|
||||
int ar2_blake2b_long(void *out, const void *in);
|
||||
/* Argon2 Team - End Code */
|
||||
/* Miouyouyou */
|
||||
void blake2b_too(void *out, const void *in);
|
||||
void ar2_blake2b_too(void *out, const void *in);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
@@ -107,7 +107,7 @@ static const blake2b_state miou = {
|
||||
};
|
||||
|
||||
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P)
|
||||
int ar2_blake2b_init_param(blake2b_state *S, const blake2b_param *P)
|
||||
{
|
||||
const unsigned char *p = (const unsigned char *)P;
|
||||
unsigned int i;
|
||||
@@ -133,7 +133,7 @@ void compare_buffs(uint64_t *h, size_t outlen)
|
||||
}
|
||||
|
||||
/* Sequential blake2b initialization */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen)
|
||||
int ar2_blake2b_init(blake2b_state *S, size_t outlen)
|
||||
{
|
||||
memcpy(S, &miou, sizeof(*S));
|
||||
S->h[0] += outlen;
|
||||
@@ -147,7 +147,7 @@ void print64(const char *name, const uint64_t *array, uint16_t size)
|
||||
printf("};\n");
|
||||
}
|
||||
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen)
|
||||
int ar2_blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -207,7 +207,7 @@ static void blake2b_compress(blake2b_state *S, const uint8_t *block)
|
||||
#undef ROUND
|
||||
}
|
||||
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen)
|
||||
int ar2_blake2b_update(blake2b_state *S, const void *in, size_t inlen)
|
||||
{
|
||||
const uint8_t *pin = (const uint8_t *)in;
|
||||
/* Complete current block */
|
||||
@@ -235,7 +235,7 @@ void my_blake2b_update(blake2b_state *S, const void *in, size_t inlen)
|
||||
S->buflen += (unsigned int)inlen;
|
||||
}
|
||||
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen)
|
||||
int ar2_blake2b_final(blake2b_state *S, void *out, size_t outlen)
|
||||
{
|
||||
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
|
||||
unsigned int i;
|
||||
@@ -257,48 +257,48 @@ int blake2b_final(blake2b_state *S, void *out, size_t outlen)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b(void *out, const void *in, const void *key, size_t keylen)
|
||||
int ar2_blake2b(void *out, const void *in, const void *key, size_t keylen)
|
||||
{
|
||||
blake2b_state S;
|
||||
|
||||
blake2b_init(&S, 64);
|
||||
ar2_blake2b_init(&S, 64);
|
||||
my_blake2b_update(&S, in, 64);
|
||||
blake2b_final(&S, out, 64);
|
||||
ar2_blake2b_final(&S, out, 64);
|
||||
burn(&S, sizeof(S));
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blake2b_too(void *pout, const void *in)
|
||||
void ar2_blake2b_too(void *pout, const void *in)
|
||||
{
|
||||
uint8_t *out = (uint8_t *)pout;
|
||||
uint8_t out_buffer[64];
|
||||
uint8_t in_buffer[64];
|
||||
|
||||
blake2b_state blake_state;
|
||||
blake2b_init(&blake_state, 64);
|
||||
ar2_blake2b_init(&blake_state, 64);
|
||||
blake_state.buflen = blake_state.buf[1] = 4;
|
||||
my_blake2b_update(&blake_state, in, 72);
|
||||
blake2b_final(&blake_state, out_buffer, 64);
|
||||
ar2_blake2b_final(&blake_state, out_buffer, 64);
|
||||
memcpy(out, out_buffer, 32);
|
||||
out += 32;
|
||||
|
||||
register uint8_t i = 29;
|
||||
while (i--) {
|
||||
memcpy(in_buffer, out_buffer, 64);
|
||||
blake2b(out_buffer, in_buffer, NULL, 0);
|
||||
ar2_blake2b(out_buffer, in_buffer, NULL, 0);
|
||||
memcpy(out, out_buffer, 32);
|
||||
out += 32;
|
||||
}
|
||||
|
||||
memcpy(in_buffer, out_buffer, 64);
|
||||
blake2b(out_buffer, in_buffer, NULL, 0);
|
||||
ar2_blake2b(out_buffer, in_buffer, NULL, 0);
|
||||
memcpy(out, out_buffer, 64);
|
||||
|
||||
burn(&blake_state, sizeof(blake_state));
|
||||
}
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *pout, const void *in)
|
||||
int ar2_blake2b_long(void *pout, const void *in)
|
||||
{
|
||||
uint8_t *out = (uint8_t *)pout;
|
||||
blake2b_state blake_state;
|
||||
@@ -306,10 +306,10 @@ int blake2b_long(void *pout, const void *in)
|
||||
|
||||
store32(outlen_bytes, 32);
|
||||
|
||||
blake2b_init(&blake_state, 32);
|
||||
ar2_blake2b_init(&blake_state, 32);
|
||||
my_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes));
|
||||
blake2b_update(&blake_state, in, 1024);
|
||||
blake2b_final(&blake_state, out, 32);
|
||||
ar2_blake2b_update(&blake_state, in, 1024);
|
||||
ar2_blake2b_final(&blake_state, out, 32);
|
||||
burn(&blake_state, sizeof(blake_state));
|
||||
return 0;
|
||||
}
|
@@ -51,15 +51,15 @@
|
||||
#endif
|
||||
|
||||
/***************Instance and Position constructors**********/
|
||||
void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
void ar2_init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
//inline void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
|
||||
void copy_block(block *dst, const block *src) {
|
||||
void ar2_copy_block(block *dst, const block *src) {
|
||||
//inline void copy_block(block *dst, const block *src) {
|
||||
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_WORDS_IN_BLOCK);
|
||||
}
|
||||
|
||||
void xor_block(block *dst, const block *src) {
|
||||
void ar2_xor_block(block *dst, const block *src) {
|
||||
//inline void xor_block(block *dst, const block *src) {
|
||||
int i;
|
||||
for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) {
|
||||
@@ -67,7 +67,7 @@ void xor_block(block *dst, const block *src) {
|
||||
}
|
||||
}
|
||||
|
||||
static void load_block(block *dst, const void *input) {
|
||||
static void ar2_load_block(block *dst, const void *input) {
|
||||
//static inline void load_block(block *dst, const void *input) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) {
|
||||
@@ -75,7 +75,7 @@ static void load_block(block *dst, const void *input) {
|
||||
}
|
||||
}
|
||||
|
||||
static void store_block(void *output, const block *src) {
|
||||
static void ar2_store_block(void *output, const block *src) {
|
||||
//static inline void store_block(void *output, const block *src) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) {
|
||||
@@ -84,7 +84,7 @@ static void store_block(void *output, const block *src) {
|
||||
}
|
||||
|
||||
/***************Memory allocators*****************/
|
||||
int allocate_memory(block **memory, uint32_t m_cost) {
|
||||
int ar2_allocate_memory(block **memory, uint32_t m_cost) {
|
||||
if (memory != NULL) {
|
||||
size_t memory_size = sizeof(block) * m_cost;
|
||||
if (m_cost != 0 &&
|
||||
@@ -105,34 +105,34 @@ int allocate_memory(block **memory, uint32_t m_cost) {
|
||||
}
|
||||
}
|
||||
|
||||
void secure_wipe_memory(void *v, size_t n) { memset(v, 0, n); }
|
||||
void ar2_secure_wipe_memory(void *v, size_t n) { memset(v, 0, n); }
|
||||
//inline void secure_wipe_memory(void *v, size_t n) { memset(v, 0, n); }
|
||||
|
||||
/*********Memory functions*/
|
||||
|
||||
void clear_memory(argon2_instance_t *instance, int clear) {
|
||||
void ar2_clear_memory(argon2_instance_t *instance, int clear) {
|
||||
//inline void clear_memory(argon2_instance_t *instance, int clear) {
|
||||
if (instance->memory != NULL && clear) {
|
||||
secure_wipe_memory(instance->memory,
|
||||
ar2_secure_wipe_memory(instance->memory,
|
||||
sizeof(block) * /*instance->memory_blocks*/16);
|
||||
}
|
||||
}
|
||||
|
||||
void free_memory(block *memory) { free(memory); }
|
||||
void ar2_free_memory(block *memory) { free(memory); }
|
||||
//inline void free_memory(block *memory) { free(memory); }
|
||||
|
||||
void finalize(const argon2_context *context, argon2_instance_t *instance) {
|
||||
void ar2_finalize(const argon2_context *context, argon2_instance_t *instance) {
|
||||
if (context != NULL && instance != NULL) {
|
||||
block blockhash;
|
||||
copy_block(&blockhash, instance->memory + 15);
|
||||
ar2_copy_block(&blockhash, instance->memory + 15);
|
||||
|
||||
/* Hash the result */
|
||||
{
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
store_block(blockhash_bytes, &blockhash);
|
||||
blake2b_long(context->out, blockhash_bytes);
|
||||
secure_wipe_memory(blockhash.v, ARGON2_BLOCK_SIZE);
|
||||
secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); /* clear blockhash_bytes */
|
||||
ar2_store_block(blockhash_bytes, &blockhash);
|
||||
ar2_blake2b_long(context->out, blockhash_bytes);
|
||||
ar2_secure_wipe_memory(blockhash.v, ARGON2_BLOCK_SIZE);
|
||||
ar2_secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); /* clear blockhash_bytes */
|
||||
}
|
||||
|
||||
#ifdef GENKAT
|
||||
@@ -142,11 +142,11 @@ void finalize(const argon2_context *context, argon2_instance_t *instance) {
|
||||
/* Clear memory */
|
||||
// clear_memory(instance, 1);
|
||||
|
||||
free_memory(instance->memory);
|
||||
ar2_free_memory(instance->memory);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
uint32_t ar2_index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane) {
|
||||
/*
|
||||
@@ -207,7 +207,7 @@ uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
return absolute_position;
|
||||
}
|
||||
|
||||
void fill_memory_blocks(argon2_instance_t *instance) {
|
||||
void ar2_fill_memory_blocks(argon2_instance_t *instance) {
|
||||
uint32_t r, s;
|
||||
|
||||
for (r = 0; r < 2; ++r) {
|
||||
@@ -218,7 +218,7 @@ void fill_memory_blocks(argon2_instance_t *instance) {
|
||||
position.lane = 0;
|
||||
position.slice = (uint8_t)s;
|
||||
position.index = 0;
|
||||
fill_segment(instance, position);
|
||||
ar2_fill_segment(instance, position);
|
||||
}
|
||||
|
||||
#ifdef GENKAT
|
||||
@@ -227,19 +227,19 @@ void fill_memory_blocks(argon2_instance_t *instance) {
|
||||
}
|
||||
}
|
||||
|
||||
void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
|
||||
void ar2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
|
||||
/* Make the first and second block in each lane as G(H0||i||0) or
|
||||
G(H0||i||1) */
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, 0);
|
||||
blake2b_too(blockhash_bytes, blockhash);
|
||||
load_block(&instance->memory[0], blockhash_bytes);
|
||||
ar2_blake2b_too(blockhash_bytes, blockhash);
|
||||
ar2_load_block(&instance->memory[0], blockhash_bytes);
|
||||
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
|
||||
blake2b_too(blockhash_bytes, blockhash);
|
||||
load_block(&instance->memory[1], blockhash_bytes);
|
||||
secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
ar2_blake2b_too(blockhash_bytes, blockhash);
|
||||
ar2_load_block(&instance->memory[1], blockhash_bytes);
|
||||
ar2_secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
|
||||
@@ -268,7 +268,7 @@ static const blake2b_state base_hash = {
|
||||
#define SALTLEN 32
|
||||
#define SECRETLEN 0
|
||||
#define ADLEN 0
|
||||
void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
void ar2_initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type) {
|
||||
|
||||
uint8_t value[sizeof(uint32_t)];
|
||||
@@ -280,7 +280,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
PWDLEN);
|
||||
|
||||
|
||||
secure_wipe_memory(context->pwd, PWDLEN);
|
||||
ar2_secure_wipe_memory(context->pwd, PWDLEN);
|
||||
context->pwdlen = 0;
|
||||
|
||||
store32(&value, SALTLEN);
|
||||
@@ -295,22 +295,22 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
store32(&value, ADLEN);
|
||||
my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
ar2_blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
int ar2_initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
/* 1. Memory allocation */
|
||||
|
||||
|
||||
allocate_memory(&(instance->memory), 16);
|
||||
ar2_allocate_memory(&(instance->memory), 16);
|
||||
|
||||
/* 2. Initial hashing */
|
||||
/* H_0 + 8 extra bytes to produce the first blocks */
|
||||
/* Hashing all inputs */
|
||||
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
|
||||
initial_hash(blockhash, context, instance->type);
|
||||
ar2_initial_hash(blockhash, context, instance->type);
|
||||
/* Zeroing 8 extra bytes */
|
||||
secure_wipe_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ar2_secure_wipe_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ARGON2_PREHASH_SEED_LENGTH -
|
||||
ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
@@ -320,14 +320,14 @@ int initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
|
||||
/* 3. Creating first blocks, we always have at least two blocks in a slice
|
||||
*/
|
||||
fill_first_blocks(blockhash, instance);
|
||||
ar2_fill_first_blocks(blockhash, instance);
|
||||
/* Clearing the hash */
|
||||
secure_wipe_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
|
||||
ar2_secure_wipe_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int argon2_core(argon2_context *context, argon2_type type) {
|
||||
int ar2_argon2_core(argon2_context *context, argon2_type type) {
|
||||
argon2_instance_t instance;
|
||||
instance.memory = NULL;
|
||||
instance.type = type;
|
||||
@@ -336,14 +336,14 @@ int argon2_core(argon2_context *context, argon2_type type) {
|
||||
* blocks
|
||||
*/
|
||||
|
||||
int result = initialize(&instance, context);
|
||||
int result = ar2_initialize(&instance, context);
|
||||
if (ARGON2_OK != result) return result;
|
||||
|
||||
/* 4. Filling memory */
|
||||
fill_memory_blocks(&instance);
|
||||
ar2_fill_memory_blocks(&instance);
|
||||
|
||||
/* 5. Finalization */
|
||||
finalize(context, &instance);
|
||||
ar2_finalize(context, &instance);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
@@ -62,13 +62,13 @@ typedef struct _block { uint64_t v[ARGON2_WORDS_IN_BLOCK]; } ALIGN(16) block;
|
||||
/*****************Functions that work with the block******************/
|
||||
|
||||
/* Initialize each byte of the block with @in */
|
||||
void init_block_value(block *b, uint8_t in);
|
||||
void ar2_init_block_value(block *b, uint8_t in);
|
||||
|
||||
/* Copy block @src to block @dst */
|
||||
void copy_block(block *dst, const block *src);
|
||||
void ar2_copy_block(block *dst, const block *src);
|
||||
|
||||
/* XOR @src onto @dst bytewise */
|
||||
void xor_block(block *dst, const block *src);
|
||||
void ar2_xor_block(block *dst, const block *src);
|
||||
|
||||
/*
|
||||
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
|
||||
@@ -101,24 +101,24 @@ typedef struct Argon2_position_t {
|
||||
* @param m_cost number of blocks to allocate in the memory
|
||||
* @return ARGON2_OK if @memory is a valid pointer and memory is allocated
|
||||
*/
|
||||
int allocate_memory(block **memory, uint32_t m_cost);
|
||||
int ar2_allocate_memory(block **memory, uint32_t m_cost);
|
||||
|
||||
/* Function that securely cleans the memory
|
||||
* @param mem Pointer to the memory
|
||||
* @param s Memory size in bytes
|
||||
*/
|
||||
void secure_wipe_memory(void *v, size_t n);
|
||||
void ar2_secure_wipe_memory(void *v, size_t n);
|
||||
|
||||
/* Clears memory
|
||||
* @param instance pointer to the current instance
|
||||
* @param clear_memory indicates if we clear the memory with zeros.
|
||||
*/
|
||||
void clear_memory(argon2_instance_t *instance, int clear);
|
||||
void ar2_clear_memory(argon2_instance_t *instance, int clear);
|
||||
|
||||
/* Deallocates memory
|
||||
* @param memory pointer to the blocks
|
||||
*/
|
||||
void free_memory(block *memory);
|
||||
void ar2_free_memory(block *memory);
|
||||
|
||||
/*
|
||||
* Computes absolute position of reference block in the lane following a skewed
|
||||
@@ -130,7 +130,7 @@ void free_memory(block *memory);
|
||||
* If so we can reference the current segment
|
||||
* @pre All pointers must be valid
|
||||
*/
|
||||
uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
uint32_t ar2_index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane);
|
||||
|
||||
@@ -141,7 +141,7 @@ uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
* @return ARGON2_OK if everything is all right, otherwise one of error codes
|
||||
* (all defined in <argon2.h>
|
||||
*/
|
||||
int validate_inputs(const argon2_context *context);
|
||||
int ar2_validate_inputs(const argon2_context *context);
|
||||
|
||||
/*
|
||||
* Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears
|
||||
@@ -153,7 +153,7 @@ int validate_inputs(const argon2_context *context);
|
||||
* @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes
|
||||
* allocated
|
||||
*/
|
||||
void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
void ar2_initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type);
|
||||
|
||||
/*
|
||||
@@ -162,7 +162,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
* @param blockhash Pointer to the pre-hashing digest
|
||||
* @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values
|
||||
*/
|
||||
void fill_firsts_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||
void ar2_fill_firsts_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function allocates memory, hashes the inputs with Blake, and creates first
|
||||
@@ -174,7 +174,7 @@ void fill_firsts_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||
* @return Zero if successful, -1 if memory failed to allocate. @context->state
|
||||
* will be modified if successful.
|
||||
*/
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
int ar2_initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
|
||||
/*
|
||||
* XORing the last block of each lane, hashing it, making the tag. Deallocates
|
||||
@@ -187,7 +187,7 @@ int initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
* @pre if context->free_cbk is not NULL, it should point to a function that
|
||||
* deallocates memory
|
||||
*/
|
||||
void finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
void ar2_finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function that fills the segment using previous segments also from other
|
||||
@@ -196,7 +196,7 @@ void finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
void ar2_fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
|
||||
/*
|
||||
@@ -204,13 +204,13 @@ void fill_segment(const argon2_instance_t *instance,
|
||||
* blocks in each lane
|
||||
* @param instance Pointer to the current instance
|
||||
*/
|
||||
void fill_memory_blocks(argon2_instance_t *instance);
|
||||
void ar2_fill_memory_blocks(argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function that performs memory-hard hashing with certain degree of parallelism
|
||||
* @param context Pointer to the Argon2 internal structure
|
||||
* @return Error code if smth is wrong, ARGON2_OK otherwise
|
||||
*/
|
||||
int argon2_core(argon2_context *context, argon2_type type);
|
||||
int ar2_argon2_core(argon2_context *context, argon2_type type);
|
||||
|
||||
#endif
|
@@ -26,7 +26,7 @@
|
||||
#include "blake2/blake2.h"
|
||||
#include "blake2/blamka-round-opt.h"
|
||||
|
||||
void fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block)
|
||||
void ar2_fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block)
|
||||
{
|
||||
__m128i ALIGN(16) block_XY[ARGON2_QWORDS_IN_BLOCK];
|
||||
uint32_t i;
|
||||
@@ -95,7 +95,7 @@ static const uint64_t bad_rands[32] = {
|
||||
UINT64_C(8548260058287621283), UINT64_C(8641748798041936364)
|
||||
};
|
||||
|
||||
void generate_addresses(const argon2_instance_t *instance,
|
||||
void ar2_generate_addresses(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position,
|
||||
uint64_t *pseudo_rands)
|
||||
{
|
||||
@@ -113,7 +113,7 @@ void generate_addresses(const argon2_instance_t *instance,
|
||||
#define LANE_LENGTH 16
|
||||
#define POS_LANE 0
|
||||
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
void ar2_fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position)
|
||||
{
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
@@ -129,7 +129,7 @@ void fill_segment(const argon2_instance_t *instance,
|
||||
pseudo_rands = (uint64_t *)malloc(/*sizeof(uint64_t) * 4*/32);
|
||||
|
||||
if (data_independent_addressing) {
|
||||
generate_addresses(instance, &position, pseudo_rands);
|
||||
ar2_generate_addresses(instance, &position, pseudo_rands);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
@@ -173,12 +173,12 @@ void fill_segment(const argon2_instance_t *instance,
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,1);
|
||||
ref_index = ar2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,1);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block = instance->memory + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
fill_block(state, (__m128i const *)ref_block->v, (__m128i *)curr_block->v);
|
||||
ar2_fill_block(state, (__m128i const *)ref_block->v, (__m128i *)curr_block->v);
|
||||
}
|
||||
|
||||
free(pseudo_rands);
|
@@ -21,7 +21,7 @@
|
||||
* @param next_block Pointer to the block to be constructed
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block);
|
||||
void ar2_fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block);
|
||||
|
||||
/*
|
||||
* Generate pseudo-random values to reference blocks in the segment and puts
|
||||
@@ -31,7 +31,7 @@ void fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block);
|
||||
* @param pseudo_rands Pointer to the array of 64-bit values
|
||||
* @pre pseudo_rands must point to @a instance->segment_length allocated values
|
||||
*/
|
||||
void generate_addresses(const argon2_instance_t *instance,
|
||||
void ar2_generate_addresses(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position,
|
||||
uint64_t *pseudo_rands);
|
||||
|
||||
@@ -43,7 +43,7 @@ void generate_addresses(const argon2_instance_t *instance,
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
void ar2_fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
|
||||
#endif /* ARGON2_OPT_H */
|
@@ -24,7 +24,7 @@ inline void argon_call(void *out, void *in, void *salt, int type)
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
|
||||
argon2_core(&context, type);
|
||||
ar2_argon2_core(&context, type);
|
||||
}
|
||||
|
||||
void argon2hash(void *output, const void *input)
|
143
algo/argon2/argon2d/argon2d-gate.c
Normal file
143
algo/argon2/argon2d/argon2d-gate.c
Normal file
@@ -0,0 +1,143 @@
|
||||
#include "argon2d-gate.h"
|
||||
#include "argon2d/argon2.h"
|
||||
|
||||
static const size_t INPUT_BYTES = 80; // Lenth of a block header in bytes. Input Length = Salt Length (salt = input)
|
||||
static const size_t OUTPUT_BYTES = 32; // Length of output needed for a 256-bit hash
|
||||
static const unsigned int DEFAULT_ARGON2_FLAG = 2; //Same as ARGON2_DEFAULT_FLAGS
|
||||
|
||||
// Credits
|
||||
|
||||
void argon2d_crds_hash( void *output, const void *input )
|
||||
{
|
||||
argon2_context context;
|
||||
context.out = (uint8_t *)output;
|
||||
context.outlen = (uint32_t)OUTPUT_BYTES;
|
||||
context.pwd = (uint8_t *)input;
|
||||
context.pwdlen = (uint32_t)INPUT_BYTES;
|
||||
context.salt = (uint8_t *)input; //salt = input
|
||||
context.saltlen = (uint32_t)INPUT_BYTES;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
|
||||
// main configurable Argon2 hash parameters
|
||||
context.m_cost = 250; // Memory in KiB (~256KB)
|
||||
context.lanes = 4; // Degree of Parallelism
|
||||
context.threads = 1; // Threads
|
||||
context.t_cost = 1; // Iterations
|
||||
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
int scanhash_argon2d_crds( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_crds_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio(work, hash);
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_argon2d_crds_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d_crds;
|
||||
gate->hash = (void*)&argon2d_crds_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
}
|
||||
|
||||
// Dynamic
|
||||
|
||||
void argon2d_dyn_hash( void *output, const void *input )
|
||||
{
|
||||
argon2_context context;
|
||||
context.out = (uint8_t *)output;
|
||||
context.outlen = (uint32_t)OUTPUT_BYTES;
|
||||
context.pwd = (uint8_t *)input;
|
||||
context.pwdlen = (uint32_t)INPUT_BYTES;
|
||||
context.salt = (uint8_t *)input; //salt = input
|
||||
context.saltlen = (uint32_t)INPUT_BYTES;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
|
||||
// main configurable Argon2 hash parameters
|
||||
context.m_cost = 500; // Memory in KiB (512KB)
|
||||
context.lanes = 8; // Degree of Parallelism
|
||||
context.threads = 1; // Threads
|
||||
context.t_cost = 2; // Iterations
|
||||
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
int scanhash_argon2d_dyn( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_dyn_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio(work, hash);
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_argon2d_dyn_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d_dyn;
|
||||
gate->hash = (void*)&argon2d_dyn_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
}
|
||||
|
25
algo/argon2/argon2d/argon2d-gate.h
Normal file
25
algo/argon2/argon2d/argon2d-gate.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#ifndef ARGON2D_GATE_H__
|
||||
#define ARGON2D_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
// Credits
|
||||
bool register_argon2d_crds_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d_crds_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_argon2d_crds( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
// Dynamic
|
||||
bool register_argon2d_dyn_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d_dyn_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_argon2d_dyn( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
|
||||
#endif
|
||||
|
680
algo/argon2/argon2d/argon2d/argon2.c
Normal file
680
algo/argon2/argon2d/argon2d/argon2.c
Normal file
@@ -0,0 +1,680 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#include "argon2.h"
|
||||
#include "encoding.h"
|
||||
#include "core.h"
|
||||
|
||||
const char *argon2_type2string(argon2_type type, int uppercase) {
|
||||
switch (type) {
|
||||
case Argon2_d:
|
||||
return uppercase ? "Argon2d" : "argon2d";
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int argon2_ctx(argon2_context *context, argon2_type type) {
|
||||
/* 1. Validate all inputs */
|
||||
int result = validate_inputs(context);
|
||||
uint32_t memory_blocks, segment_length;
|
||||
argon2_instance_t instance;
|
||||
|
||||
if (ARGON2_OK != result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (Argon2_d != type) {
|
||||
return ARGON2_INCORRECT_TYPE;
|
||||
}
|
||||
|
||||
/* 2. Align memory size */
|
||||
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
|
||||
memory_blocks = context->m_cost;
|
||||
|
||||
if (memory_blocks < 2 * ARGON2_SYNC_POINTS * context->lanes) {
|
||||
memory_blocks = 2 * ARGON2_SYNC_POINTS * context->lanes;
|
||||
}
|
||||
|
||||
segment_length = memory_blocks / (context->lanes * ARGON2_SYNC_POINTS);
|
||||
/* Ensure that all segments have equal length */
|
||||
memory_blocks = segment_length * (context->lanes * ARGON2_SYNC_POINTS);
|
||||
|
||||
instance.memory = NULL;
|
||||
instance.passes = context->t_cost;
|
||||
instance.memory_blocks = memory_blocks;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||
instance.lanes = context->lanes;
|
||||
instance.limit = 1;
|
||||
instance.threads = context->threads;
|
||||
instance.type = type;
|
||||
|
||||
if (instance.threads > instance.limit) {
|
||||
instance.threads = instance.limit;
|
||||
}
|
||||
|
||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
* blocks
|
||||
*/
|
||||
result = initialize(&instance, context);
|
||||
|
||||
if (ARGON2_OK != result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
/* 4. Filling memory */
|
||||
result = fill_memory_blocks(&instance);
|
||||
|
||||
if (ARGON2_OK != result) {
|
||||
return result;
|
||||
}
|
||||
/* 5. Finalization */
|
||||
finalize(context, &instance);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt, const size_t saltlen,
|
||||
void *hash, const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen, argon2_type type){
|
||||
|
||||
argon2_context context;
|
||||
int result;
|
||||
uint8_t *out;
|
||||
|
||||
if (pwdlen > ARGON2_MAX_PWD_LENGTH) {
|
||||
return ARGON2_PWD_TOO_LONG;
|
||||
}
|
||||
|
||||
if (saltlen > ARGON2_MAX_SALT_LENGTH) {
|
||||
return ARGON2_SALT_TOO_LONG;
|
||||
}
|
||||
|
||||
if (hashlen > ARGON2_MAX_OUTLEN) {
|
||||
return ARGON2_OUTPUT_TOO_LONG;
|
||||
}
|
||||
|
||||
if (hashlen < ARGON2_MIN_OUTLEN) {
|
||||
return ARGON2_OUTPUT_TOO_SHORT;
|
||||
}
|
||||
|
||||
out = malloc(hashlen);
|
||||
if (!out) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
context.out = (uint8_t *)out;
|
||||
context.outlen = (uint32_t)hashlen;
|
||||
context.pwd = CONST_CAST(uint8_t *)pwd;
|
||||
context.pwdlen = (uint32_t)pwdlen;
|
||||
context.salt = CONST_CAST(uint8_t *)salt;
|
||||
context.saltlen = (uint32_t)saltlen;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.t_cost = t_cost;
|
||||
context.m_cost = m_cost;
|
||||
context.lanes = parallelism;
|
||||
context.threads = parallelism;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
|
||||
result = argon2_ctx(&context, type);
|
||||
|
||||
if (result != ARGON2_OK) {
|
||||
clear_internal_memory(out, hashlen);
|
||||
free(out);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* if raw hash requested, write it */
|
||||
if (hash) {
|
||||
memcpy(hash, out, hashlen);
|
||||
}
|
||||
|
||||
/* if encoding requested, write it */
|
||||
if (encoded && encodedlen) {
|
||||
if (encode_string(encoded, encodedlen, &context, type) != ARGON2_OK) {
|
||||
clear_internal_memory(out, hashlen); /* wipe buffers if error */
|
||||
clear_internal_memory(encoded, encodedlen);
|
||||
free(out);
|
||||
return ARGON2_ENCODING_FAIL;
|
||||
}
|
||||
}
|
||||
clear_internal_memory(out, hashlen);
|
||||
free(out);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, const size_t hashlen,
|
||||
char *encoded, const size_t encodedlen) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, NULL, 0, Argon2_d);
|
||||
}
|
||||
|
||||
static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) {
|
||||
size_t i;
|
||||
uint8_t d = 0U;
|
||||
|
||||
for (i = 0U; i < len; i++) {
|
||||
d |= b1[i] ^ b2[i];
|
||||
}
|
||||
return (int)((1 & ((d - 1) >> 8)) - 1);
|
||||
}
|
||||
|
||||
int argon2_verify(const char *encoded, const void *pwd, const size_t pwdlen,
|
||||
argon2_type type) {
|
||||
|
||||
argon2_context ctx;
|
||||
uint8_t *desired_result = NULL;
|
||||
|
||||
int ret = ARGON2_OK;
|
||||
|
||||
size_t encoded_len;
|
||||
uint32_t max_field_len;
|
||||
|
||||
if (pwdlen > ARGON2_MAX_PWD_LENGTH) {
|
||||
return ARGON2_PWD_TOO_LONG;
|
||||
}
|
||||
|
||||
if (encoded == NULL) {
|
||||
return ARGON2_DECODING_FAIL;
|
||||
}
|
||||
|
||||
encoded_len = strlen(encoded);
|
||||
if (encoded_len > UINT32_MAX) {
|
||||
return ARGON2_DECODING_FAIL;
|
||||
}
|
||||
|
||||
/* No field can be longer than the encoded length */
|
||||
max_field_len = (uint32_t)encoded_len;
|
||||
|
||||
ctx.saltlen = max_field_len;
|
||||
ctx.outlen = max_field_len;
|
||||
|
||||
ctx.salt = malloc(ctx.saltlen);
|
||||
ctx.out = malloc(ctx.outlen);
|
||||
if (!ctx.salt || !ctx.out) {
|
||||
ret = ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ctx.pwd = (uint8_t *)pwd;
|
||||
ctx.pwdlen = (uint32_t)pwdlen;
|
||||
|
||||
ret = decode_string(&ctx, encoded, type);
|
||||
if (ret != ARGON2_OK) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Set aside the desired result, and get a new buffer. */
|
||||
desired_result = ctx.out;
|
||||
ctx.out = malloc(ctx.outlen);
|
||||
if (!ctx.out) {
|
||||
ret = ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = argon2_verify_ctx(&ctx, (char *)desired_result, type);
|
||||
if (ret != ARGON2_OK) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
fail:
|
||||
free(ctx.salt);
|
||||
free(ctx.out);
|
||||
free(desired_result);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
|
||||
|
||||
return argon2_verify(encoded, pwd, pwdlen, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2d_ctx(argon2_context *context) {
|
||||
return argon2_ctx(context, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2_verify_ctx(argon2_context *context, const char *hash,
|
||||
argon2_type type) {
|
||||
int ret = argon2_ctx(context, type);
|
||||
if (ret != ARGON2_OK) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (argon2_compare((uint8_t *)hash, context->out, context->outlen)) {
|
||||
return ARGON2_VERIFY_MISMATCH;
|
||||
}
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int argon2d_verify_ctx(argon2_context *context, const char *hash) {
|
||||
return argon2_verify_ctx(context, hash, Argon2_d);
|
||||
}
|
||||
|
||||
const char *argon2_error_message(int error_code) {
|
||||
switch (error_code) {
|
||||
case ARGON2_OK:
|
||||
return "OK";
|
||||
case ARGON2_OUTPUT_PTR_NULL:
|
||||
return "Output pointer is NULL";
|
||||
case ARGON2_OUTPUT_TOO_SHORT:
|
||||
return "Output is too short";
|
||||
case ARGON2_OUTPUT_TOO_LONG:
|
||||
return "Output is too long";
|
||||
case ARGON2_PWD_TOO_SHORT:
|
||||
return "Password is too short";
|
||||
case ARGON2_PWD_TOO_LONG:
|
||||
return "Password is too long";
|
||||
case ARGON2_SALT_TOO_SHORT:
|
||||
return "Salt is too short";
|
||||
case ARGON2_SALT_TOO_LONG:
|
||||
return "Salt is too long";
|
||||
case ARGON2_AD_TOO_SHORT:
|
||||
return "Associated data is too short";
|
||||
case ARGON2_AD_TOO_LONG:
|
||||
return "Associated data is too long";
|
||||
case ARGON2_SECRET_TOO_SHORT:
|
||||
return "Secret is too short";
|
||||
case ARGON2_SECRET_TOO_LONG:
|
||||
return "Secret is too long";
|
||||
case ARGON2_TIME_TOO_SMALL:
|
||||
return "Time cost is too small";
|
||||
case ARGON2_TIME_TOO_LARGE:
|
||||
return "Time cost is too large";
|
||||
case ARGON2_MEMORY_TOO_LITTLE:
|
||||
return "Memory cost is too small";
|
||||
case ARGON2_MEMORY_TOO_MUCH:
|
||||
return "Memory cost is too large";
|
||||
case ARGON2_LANES_TOO_FEW:
|
||||
return "Too few lanes";
|
||||
case ARGON2_LANES_TOO_MANY:
|
||||
return "Too many lanes";
|
||||
case ARGON2_PWD_PTR_MISMATCH:
|
||||
return "Password pointer is NULL, but password length is not 0";
|
||||
case ARGON2_SALT_PTR_MISMATCH:
|
||||
return "Salt pointer is NULL, but salt length is not 0";
|
||||
case ARGON2_SECRET_PTR_MISMATCH:
|
||||
return "Secret pointer is NULL, but secret length is not 0";
|
||||
case ARGON2_AD_PTR_MISMATCH:
|
||||
return "Associated data pointer is NULL, but ad length is not 0";
|
||||
case ARGON2_MEMORY_ALLOCATION_ERROR:
|
||||
return "Memory allocation error";
|
||||
case ARGON2_FREE_MEMORY_CBK_NULL:
|
||||
return "The free memory callback is NULL";
|
||||
case ARGON2_ALLOCATE_MEMORY_CBK_NULL:
|
||||
return "The allocate memory callback is NULL";
|
||||
case ARGON2_INCORRECT_PARAMETER:
|
||||
return "Argon2_Context context is NULL";
|
||||
case ARGON2_INCORRECT_TYPE:
|
||||
return "There is no such version of Argon2";
|
||||
case ARGON2_OUT_PTR_MISMATCH:
|
||||
return "Output pointer mismatch";
|
||||
case ARGON2_THREADS_TOO_FEW:
|
||||
return "Not enough threads";
|
||||
case ARGON2_THREADS_TOO_MANY:
|
||||
return "Too many threads";
|
||||
case ARGON2_MISSING_ARGS:
|
||||
return "Missing arguments";
|
||||
case ARGON2_ENCODING_FAIL:
|
||||
return "Encoding failed";
|
||||
case ARGON2_DECODING_FAIL:
|
||||
return "Decoding failed";
|
||||
case ARGON2_THREAD_FAIL:
|
||||
return "Threading failure";
|
||||
case ARGON2_DECODING_LENGTH_FAIL:
|
||||
return "Some of encoded parameters are too long or too short";
|
||||
case ARGON2_VERIFY_MISMATCH:
|
||||
return "The password does not match the supplied hash";
|
||||
default:
|
||||
return "Unknown error code";
|
||||
}
|
||||
}
|
||||
|
||||
size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism,
|
||||
uint32_t saltlen, uint32_t hashlen, argon2_type type) {
|
||||
return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) +
|
||||
numlen(t_cost) + numlen(m_cost) + numlen(parallelism) +
|
||||
b64len(saltlen) + b64len(hashlen);
|
||||
}
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
///////////////////////////
|
||||
// Wolf's Additions
|
||||
///////////////////////////
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <pthread.h>
|
||||
#include <x86intrin.h>
|
||||
#include "../blake2/blake2.h"
|
||||
|
||||
typedef struct _Argon2d_Block
|
||||
{
|
||||
union
|
||||
{
|
||||
uint64_t data[1024 / 8] __attribute__((aligned(32)));
|
||||
__m128i dqwords[1024 / 16] __attribute__((aligned(32)));
|
||||
__m256i qqwords[1024 / 32] __attribute__((aligned(32)));
|
||||
};
|
||||
} Argon2d_Block;
|
||||
|
||||
typedef struct _Argon2ThreadData
|
||||
{
|
||||
Argon2d_Block *Matrix;
|
||||
uint32_t slice;
|
||||
uint32_t lane;
|
||||
} Argon2ThreadData;
|
||||
|
||||
#define SEGMENT_LENGTH (250U / (4U * 4U)) // memory_blocks / (context->lanes * ARGON2_SYNC_POINTS);
|
||||
#define LANE_LENGTH (SEGMENT_LENGTH * 4U) // segment_length * ARGON2_SYNC_POINTS;
|
||||
#define CONCURRENT_THREADS 4
|
||||
|
||||
static const uint64_t blake2b_IV[8] =
|
||||
{
|
||||
0x6A09E667F3BCC908ULL, 0xBB67AE8584CAA73BULL,
|
||||
0x3C6EF372FE94F82BULL, 0xA54FF53A5F1D36F1ULL,
|
||||
0x510E527FADE682D1ULL, 0x9B05688C2B3E6C1FULL,
|
||||
0x1F83D9ABFB41BD6BULL, 0x5BE0CD19137E2179ULL
|
||||
};
|
||||
|
||||
static const unsigned int blake2b_sigma[12][16] =
|
||||
{
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
};
|
||||
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
|
||||
#define G(r, i, a, b, c, d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
|
||||
d = ROTL64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = ROTL64(b ^ c, 40); \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
|
||||
d = ROTL64(d ^ a, 48); \
|
||||
c = c + d; \
|
||||
b = ROTL64(b ^ c, 1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r, 0, v[0], v[4], v[8], v[12]); \
|
||||
G(r, 1, v[1], v[5], v[9], v[13]); \
|
||||
G(r, 2, v[2], v[6], v[10], v[14]); \
|
||||
G(r, 3, v[3], v[7], v[11], v[15]); \
|
||||
G(r, 4, v[0], v[5], v[10], v[15]); \
|
||||
G(r, 5, v[1], v[6], v[11], v[12]); \
|
||||
G(r, 6, v[2], v[7], v[8], v[13]); \
|
||||
G(r, 7, v[3], v[4], v[9], v[14]); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
void CompressBlock(uint64_t *h, const uint64_t *m, uint64_t t, uint64_t f)
|
||||
{
|
||||
uint64_t v[16];
|
||||
|
||||
int i;
|
||||
for(i = 0; i < 8; ++i) v[i] = h[i];
|
||||
|
||||
for(i = 8; i < 16; ++i) v[i] = blake2b_IV[i - 8];
|
||||
|
||||
v[12] ^= t;
|
||||
v[14] ^= f;
|
||||
|
||||
int r;
|
||||
for(r = 0; r < 12; ++r)
|
||||
{
|
||||
ROUND(r);
|
||||
}
|
||||
|
||||
for(i = 0; i < 8; ++i) h[i] ^= v[i] ^ v[i + 8];
|
||||
}
|
||||
|
||||
void Argon2dInitHash(void *HashOut, void *Input)
|
||||
{
|
||||
blake2b_state BlakeHash;
|
||||
uint32_t InBuf[64]; // Is only 50 uint32_t, but need more space for Blake2B
|
||||
|
||||
memset(InBuf, 0x00, 200);
|
||||
|
||||
InBuf[0] = 4UL; // Lanes
|
||||
InBuf[1] = 32UL; // Output Length
|
||||
InBuf[2] = 250UL; // Memory Cost
|
||||
InBuf[3] = 1UL; // Time Cost
|
||||
InBuf[4] = 16UL; // Argon2 Version Number
|
||||
InBuf[5] = 0UL; // Type
|
||||
InBuf[6] = 80UL; // Password Length
|
||||
|
||||
memcpy(InBuf + 7, Input, 80); // Password
|
||||
|
||||
InBuf[27] = 80UL; // Salt Length
|
||||
|
||||
memcpy(InBuf + 28, Input, 80); // Salt
|
||||
|
||||
InBuf[48] = 0UL; // Secret Length
|
||||
InBuf[49] = 0UL; // Associated Data Length
|
||||
|
||||
int i;
|
||||
for(i = 50; i < 64; ++i) InBuf[i] = 0UL;
|
||||
|
||||
uint64_t H[8];
|
||||
|
||||
for(i = 0; i < 8; ++i) H[i] = blake2b_IV[i];
|
||||
|
||||
H[0] ^= 0x0000000001010040;
|
||||
|
||||
CompressBlock(H, (uint64_t *)InBuf, 128ULL, 0ULL);
|
||||
CompressBlock(H, (uint64_t *)(InBuf + 32), 200ULL, 0xFFFFFFFFFFFFFFFFULL);
|
||||
|
||||
memcpy(HashOut, H, 64U);
|
||||
}
|
||||
|
||||
void Argon2dFillFirstBlocks(Argon2d_Block *Matrix, void *InitHash)
|
||||
{
|
||||
uint32_t lane;
|
||||
for(lane = 0; lane < 4; ++lane)
|
||||
{
|
||||
((uint32_t *)InitHash)[16] = 0;
|
||||
((uint32_t *)InitHash)[17] = lane;
|
||||
blake2b_long(Matrix[lane * LANE_LENGTH].data, 1024, InitHash, 72);
|
||||
((uint32_t *)InitHash)[16] |= 1;
|
||||
blake2b_long(Matrix[lane * LANE_LENGTH + 1].data, 1024, InitHash, 72);
|
||||
}
|
||||
}
|
||||
|
||||
#include "../blake2/blamka-round-opt.h"
|
||||
|
||||
void Argon2dFillSingleBlock(Argon2d_Block *State, Argon2d_Block *RefBlock, Argon2d_Block *NextBlock)
|
||||
{
|
||||
__m256i XY[32];
|
||||
|
||||
int i;
|
||||
for(i = 0; i < 32; ++i)
|
||||
XY[i] = State->qqwords[i] = _mm256_xor_si256(State->qqwords[i], RefBlock->qqwords[i]);
|
||||
|
||||
for(i = 0; i < 8; ++i)
|
||||
{
|
||||
BLAKE2_ROUND( State->dqwords[8 * i + 0], State->dqwords[8 * i + 1], State->dqwords[8 * i + 2], State->dqwords[8 * i + 3],
|
||||
State->dqwords[8 * i + 4], State->dqwords[8 * i + 5], State->dqwords[8 * i + 6], State->dqwords[8 * i + 7]);
|
||||
}
|
||||
|
||||
for(i = 0; i < 8; ++i)
|
||||
{
|
||||
BLAKE2_ROUND( State->dqwords[8 * 0 + i], State->dqwords[8 * 1 + i], State->dqwords[8 * 2 + i], State->dqwords[8 * 3 + i],
|
||||
State->dqwords[8 * 4 + i], State->dqwords[8 * 5 + i], State->dqwords[8 * 6 + i], State->dqwords[8 * 7 + i]);
|
||||
}
|
||||
|
||||
for(i = 0; i < 32; ++i)
|
||||
{
|
||||
State->qqwords[i] = _mm256_xor_si256(State->qqwords[i], XY[i]);
|
||||
_mm256_store_si256(NextBlock->qqwords + i, State->qqwords[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void FillSegment(Argon2d_Block *Matrix, uint32_t slice, uint32_t lane)
|
||||
{
|
||||
uint32_t startidx, prevoff, curoff;
|
||||
Argon2d_Block State;
|
||||
|
||||
startidx = (!slice) ? 2 : 0;
|
||||
curoff = lane * LANE_LENGTH + slice * SEGMENT_LENGTH + startidx;
|
||||
|
||||
//if(!(curoff % LANE_LENGTH)) prevoff = curoff + LANE_LENGTH - 1;
|
||||
//else prevoff = curoff - 1;
|
||||
|
||||
prevoff = (!(curoff % LANE_LENGTH)) ? curoff + LANE_LENGTH - 1 : curoff - 1;
|
||||
|
||||
memcpy(State.data, (Matrix + prevoff)->data, 1024);
|
||||
|
||||
int i;
|
||||
for(i = startidx; i < SEGMENT_LENGTH; ++i, ++curoff, ++prevoff)
|
||||
{
|
||||
if((curoff % LANE_LENGTH) == 1) prevoff = curoff - 1;
|
||||
|
||||
uint64_t pseudorand = Matrix[prevoff].data[0];
|
||||
uint64_t reflane = (!slice) ? lane : (pseudorand >> 32) & 3; // mod lanes
|
||||
|
||||
uint32_t index = i;
|
||||
bool samelane = reflane == lane;
|
||||
pseudorand &= 0xFFFFFFFFULL;
|
||||
uint32_t refareasize = ((reflane == lane) ? slice * SEGMENT_LENGTH + index - 1 : slice * SEGMENT_LENGTH + ((!index) ? -1 : 0));
|
||||
|
||||
|
||||
if(!slice) refareasize = index - 1;
|
||||
|
||||
uint64_t relativepos = (pseudorand & 0xFFFFFFFFULL);
|
||||
relativepos = relativepos * relativepos >> 32;
|
||||
relativepos = refareasize - 1 - (refareasize * relativepos >> 32);
|
||||
|
||||
uint32_t startpos = 0;
|
||||
|
||||
uint32_t abspos = (startpos + relativepos) % LANE_LENGTH;
|
||||
|
||||
uint32_t refidx = abspos;
|
||||
|
||||
Argon2dFillSingleBlock(&State, Matrix + (LANE_LENGTH * reflane + refidx), Matrix + curoff);
|
||||
}
|
||||
}
|
||||
|
||||
void *ThreadedSegmentFill(void *ThrData)
|
||||
{
|
||||
Argon2ThreadData *Data = (Argon2ThreadData *)ThrData;
|
||||
|
||||
FillSegment(Data->Matrix, Data->slice, Data->lane);
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
void Argon2dFillAllBlocks(Argon2d_Block *Matrix)
|
||||
{
|
||||
pthread_t ThrHandles[CONCURRENT_THREADS];
|
||||
Argon2ThreadData ThrData[CONCURRENT_THREADS];
|
||||
|
||||
int s;
|
||||
for(s = 0; s < 4; ++s)
|
||||
{
|
||||
// WARNING: Assumes CONCURRENT_THREADS == lanes == 4
|
||||
int l;
|
||||
for(l = 0; l < 4; ++l)
|
||||
{
|
||||
FillSegment(Matrix, s, l);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Argon2dFinalizeHash(void *OutputHash, Argon2d_Block *Matrix)
|
||||
{
|
||||
int l;
|
||||
for(l = 1; l < 4; ++l)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < 32; ++i)
|
||||
Matrix[LANE_LENGTH - 1].qqwords[i] = _mm256_xor_si256(Matrix[LANE_LENGTH - 1].qqwords[i], Matrix[LANE_LENGTH * l + (LANE_LENGTH - 1)].qqwords[i]);
|
||||
}
|
||||
|
||||
blake2b_long(OutputHash, 32, Matrix[LANE_LENGTH - 1].data, 1024);
|
||||
}
|
||||
|
||||
void WolfArgon2dPoWHash(void *Output, void *Matrix, const void *BlkHdr)
|
||||
{
|
||||
uint8_t tmp[72];
|
||||
|
||||
Argon2dInitHash(tmp, (uint8_t *)BlkHdr);
|
||||
|
||||
Argon2dFillFirstBlocks(Matrix, tmp);
|
||||
|
||||
Argon2dFillAllBlocks(Matrix);
|
||||
|
||||
Argon2dFinalizeHash((uint8_t *)Output, Matrix);
|
||||
}
|
||||
|
||||
void WolfArgon2dAllocateCtx(void **Matrix)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
*((Argon2d_Block **)Matrix) = (Argon2d_Block *)_aligned_malloc(32, sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
#else
|
||||
*((Argon2d_Block **)Matrix) = (Argon2d_Block *)malloc(sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
posix_memalign(Matrix, 32, sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
#endif
|
||||
}
|
||||
|
||||
void WolfArgon2dFreeCtx(void *Matrix)
|
||||
{
|
||||
free(Matrix);
|
||||
}
|
||||
|
||||
#endif
|
345
algo/argon2/argon2d/argon2d/argon2.h
Normal file
345
algo/argon2/argon2d/argon2d/argon2.h
Normal file
@@ -0,0 +1,345 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_H
|
||||
#define ARGON2_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Symbols visibility control */
|
||||
#ifdef A2_VISCTL
|
||||
#define ARGON2_PUBLIC __attribute__((visibility("default")))
|
||||
#elif _MSC_VER
|
||||
#define ARGON2_PUBLIC __declspec(dllexport)
|
||||
#else
|
||||
#define ARGON2_PUBLIC
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Argon2 input parameter restrictions
|
||||
*/
|
||||
|
||||
/* Minimum and maximum number of lanes (degree of parallelism) */
|
||||
#define ARGON2_MIN_LANES UINT32_C(1)
|
||||
#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF)
|
||||
|
||||
/* Minimum and maximum number of threads */
|
||||
#define ARGON2_MIN_THREADS UINT32_C(1)
|
||||
#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF)
|
||||
|
||||
/* Number of synchronization points between lanes per pass */
|
||||
#define ARGON2_SYNC_POINTS UINT32_C(4)
|
||||
|
||||
/* Minimum and maximum digest size in bytes */
|
||||
#define ARGON2_MIN_OUTLEN UINT32_C(4)
|
||||
#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */
|
||||
#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */
|
||||
|
||||
#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */
|
||||
#define ARGON2_MAX_MEMORY_BITS \
|
||||
ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1))
|
||||
#define ARGON2_MAX_MEMORY \
|
||||
ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS)
|
||||
|
||||
/* Minimum and maximum number of passes */
|
||||
#define ARGON2_MIN_TIME UINT32_C(1)
|
||||
#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum password length in bytes */
|
||||
#define ARGON2_MIN_PWD_LENGTH UINT32_C(0)
|
||||
#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum associated data length in bytes */
|
||||
#define ARGON2_MIN_AD_LENGTH UINT32_C(0)
|
||||
#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum salt length in bytes */
|
||||
#define ARGON2_MIN_SALT_LENGTH UINT32_C(8)
|
||||
#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum key length in bytes */
|
||||
#define ARGON2_MIN_SECRET UINT32_C(0)
|
||||
#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Flags to determine which fields are securely wiped (default = no wipe). */
|
||||
#define ARGON2_DEFAULT_FLAGS UINT32_C(0)
|
||||
#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0)
|
||||
#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1)
|
||||
|
||||
/* Global flag to determine if we are wiping internal memory buffers. This flag
|
||||
* is defined in core.c and deafults to 1 (wipe internal memory). */
|
||||
extern int FLAG_clear_internal_memory;
|
||||
|
||||
/* Error codes */
|
||||
typedef enum Argon2_ErrorCodes {
|
||||
ARGON2_OK = 0,
|
||||
|
||||
ARGON2_OUTPUT_PTR_NULL = -1,
|
||||
|
||||
ARGON2_OUTPUT_TOO_SHORT = -2,
|
||||
ARGON2_OUTPUT_TOO_LONG = -3,
|
||||
|
||||
ARGON2_PWD_TOO_SHORT = -4,
|
||||
ARGON2_PWD_TOO_LONG = -5,
|
||||
|
||||
ARGON2_SALT_TOO_SHORT = -6,
|
||||
ARGON2_SALT_TOO_LONG = -7,
|
||||
|
||||
ARGON2_AD_TOO_SHORT = -8,
|
||||
ARGON2_AD_TOO_LONG = -9,
|
||||
|
||||
ARGON2_SECRET_TOO_SHORT = -10,
|
||||
ARGON2_SECRET_TOO_LONG = -11,
|
||||
|
||||
ARGON2_TIME_TOO_SMALL = -12,
|
||||
ARGON2_TIME_TOO_LARGE = -13,
|
||||
|
||||
ARGON2_MEMORY_TOO_LITTLE = -14,
|
||||
ARGON2_MEMORY_TOO_MUCH = -15,
|
||||
|
||||
ARGON2_LANES_TOO_FEW = -16,
|
||||
ARGON2_LANES_TOO_MANY = -17,
|
||||
|
||||
ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */
|
||||
ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */
|
||||
ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */
|
||||
ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */
|
||||
|
||||
ARGON2_MEMORY_ALLOCATION_ERROR = -22,
|
||||
|
||||
ARGON2_FREE_MEMORY_CBK_NULL = -23,
|
||||
ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24,
|
||||
|
||||
ARGON2_INCORRECT_PARAMETER = -25,
|
||||
ARGON2_INCORRECT_TYPE = -26,
|
||||
|
||||
ARGON2_OUT_PTR_MISMATCH = -27,
|
||||
|
||||
ARGON2_THREADS_TOO_FEW = -28,
|
||||
ARGON2_THREADS_TOO_MANY = -29,
|
||||
|
||||
ARGON2_MISSING_ARGS = -30,
|
||||
|
||||
ARGON2_ENCODING_FAIL = -31,
|
||||
|
||||
ARGON2_DECODING_FAIL = -32,
|
||||
|
||||
ARGON2_THREAD_FAIL = -33,
|
||||
|
||||
ARGON2_DECODING_LENGTH_FAIL = -34,
|
||||
|
||||
ARGON2_VERIFY_MISMATCH = -35
|
||||
} argon2_error_codes;
|
||||
|
||||
/* Memory allocator types --- for external allocation */
|
||||
typedef int (*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate);
|
||||
typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate);
|
||||
|
||||
/* Argon2 external data structures */
|
||||
|
||||
/*
|
||||
*****
|
||||
* Context: structure to hold Argon2 inputs:
|
||||
* output array and its length,
|
||||
* password and its length,
|
||||
* salt and its length,
|
||||
* secret and its length,
|
||||
* associated data and its length,
|
||||
* number of passes, amount of used memory (in KBytes, can be rounded up a bit)
|
||||
* number of parallel threads that will be run.
|
||||
* All the parameters above affect the output hash value.
|
||||
* Additionally, two function pointers can be provided to allocate and
|
||||
* deallocate the memory (if NULL, memory will be allocated internally).
|
||||
* Also, three flags indicate whether to erase password, secret as soon as they
|
||||
* are pre-hashed (and thus not needed anymore), and the entire memory
|
||||
*****
|
||||
* Simplest situation: you have output array out[8], password is stored in
|
||||
* pwd[32], salt is stored in salt[16], you do not have keys nor associated
|
||||
* data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with
|
||||
* 4 parallel lanes.
|
||||
* You want to erase the password, but you're OK with last pass not being
|
||||
* erased. You want to use the default memory allocator.
|
||||
* Then you initialize:
|
||||
Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false)
|
||||
*/
|
||||
typedef struct Argon2_Context {
|
||||
uint8_t *out; /* output array */
|
||||
uint32_t outlen; /* digest length */
|
||||
|
||||
uint8_t *pwd; /* password array */
|
||||
uint32_t pwdlen; /* password length */
|
||||
|
||||
uint8_t *salt; /* salt array */
|
||||
uint32_t saltlen; /* salt length */
|
||||
|
||||
uint8_t *secret; /* key array */
|
||||
uint32_t secretlen; /* key length */
|
||||
|
||||
uint8_t *ad; /* associated data array */
|
||||
uint32_t adlen; /* associated data length */
|
||||
|
||||
uint32_t t_cost; /* number of passes */
|
||||
uint32_t m_cost; /* amount of memory requested (KB) */
|
||||
uint32_t lanes; /* number of lanes */
|
||||
uint32_t threads; /* maximum number of threads */
|
||||
|
||||
allocate_fptr allocate_cbk; /* pointer to memory allocator */
|
||||
deallocate_fptr free_cbk; /* pointer to memory deallocator */
|
||||
|
||||
uint32_t flags; /* array of bool options */
|
||||
} argon2_context;
|
||||
|
||||
/* Argon2 primitive type */
|
||||
typedef enum Argon2_type {
|
||||
Argon2_d = 0
|
||||
} argon2_type;
|
||||
|
||||
/*
|
||||
* Function that gives the string representation of an argon2_type.
|
||||
* @param type The argon2_type that we want the string for
|
||||
* @param uppercase Whether the string should have the first letter uppercase
|
||||
* @return NULL if invalid type, otherwise the string representation.
|
||||
*/
|
||||
ARGON2_PUBLIC const char *argon2_type2string(argon2_type type, int uppercase);
|
||||
|
||||
/*
|
||||
* Function that performs memory-hard hashing with certain degree of parallelism
|
||||
* @param context Pointer to the Argon2 internal structure
|
||||
* @return Error code if smth is wrong, ARGON2_OK otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type);
|
||||
|
||||
/**
|
||||
* Hashes a password with Argon2i, producing a raw hash by allocating memory at
|
||||
* @hash
|
||||
* @param t_cost Number of iterations
|
||||
* @param m_cost Sets memory usage to m_cost kibibytes
|
||||
* @param parallelism Number of threads and compute lanes
|
||||
* @param pwd Pointer to password
|
||||
* @param pwdlen Password size in bytes
|
||||
* @param salt Pointer to salt
|
||||
* @param saltlen Salt size in bytes
|
||||
* @param hash Buffer where to write the raw hash - updated by the function
|
||||
* @param hashlen Desired length of the hash in bytes
|
||||
* @pre Different parallelism levels will give different results
|
||||
* @pre Returns ARGON2_OK if successful
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen);
|
||||
|
||||
ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
const uint32_t parallelism,
|
||||
const void *pwd, const size_t pwdlen,
|
||||
const void *salt, const size_t saltlen,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen, argon2_type type);
|
||||
|
||||
/**
|
||||
* Verifies a password against an encoded string
|
||||
* Encoded string is restricted as in validate_inputs()
|
||||
* @param encoded String encoding parameters, salt, hash
|
||||
* @param pwd Pointer to password
|
||||
* @pre Returns ARGON2_OK if successful
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen, argon2_type type);
|
||||
|
||||
/**
|
||||
* Argon2d: Version of Argon2 that picks memory blocks depending
|
||||
* on the password and salt. Only for side-channel-free
|
||||
* environment!!
|
||||
*****
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_ctx(argon2_context *context);
|
||||
|
||||
/**
|
||||
* Verify if a given password is correct for Argon2d hashing
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @param hash The password hash to verify. The length of the hash is
|
||||
* specified by the context outlen member
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_verify_ctx(argon2_context *context, const char *hash);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_verify_ctx(argon2_context *context, const char *hash,
|
||||
argon2_type type);
|
||||
|
||||
/**
|
||||
* Get the associated error message for given error code
|
||||
* @return The error message associated with the given error code
|
||||
*/
|
||||
ARGON2_PUBLIC const char *argon2_error_message(int error_code);
|
||||
|
||||
/**
|
||||
* Returns the encoded hash length for the given input parameters
|
||||
* @param t_cost Number of iterations
|
||||
* @param m_cost Memory usage in kibibytes
|
||||
* @param parallelism Number of threads; used to compute lanes
|
||||
* @param saltlen Salt size in bytes
|
||||
* @param hashlen Hash size in bytes
|
||||
* @param type The argon2_type that we want the encoded length for
|
||||
* @return The encoded hash length in bytes
|
||||
*/
|
||||
ARGON2_PUBLIC size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost,
|
||||
uint32_t parallelism, uint32_t saltlen,
|
||||
uint32_t hashlen, argon2_type type);
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
///////////////////////////
|
||||
// Wolf's Additions
|
||||
///////////////////////////
|
||||
|
||||
void WolfArgon2dPoWHash(void *Output, void *Matrix, const void *BlkHdr);
|
||||
void WolfArgon2dAllocateCtx(void **Matrix);
|
||||
void WolfArgon2dFreeCtx(void *Matrix);
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
617
algo/argon2/argon2d/argon2d/core.c
Normal file
617
algo/argon2/argon2d/argon2d/core.c
Normal file
@@ -0,0 +1,617 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
/*For memory wiping*/
|
||||
#ifdef _MSC_VER
|
||||
#include <windows.h>
|
||||
#include <winbase.h> /* For SecureZeroMemory */
|
||||
#endif
|
||||
#if defined __STDC_LIB_EXT1__
|
||||
#define __STDC_WANT_LIB_EXT1__ 1
|
||||
#endif
|
||||
#define VC_GE_2005(version) (version >= 1400)
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "core.h"
|
||||
#include "thread.h"
|
||||
#include "../blake2/blake2.h"
|
||||
#include "../blake2/blake2-impl.h"
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_attribute(optnone)
|
||||
#define NOT_OPTIMIZED __attribute__((optnone))
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#define GCC_VERSION \
|
||||
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
|
||||
#if GCC_VERSION >= 40400
|
||||
#define NOT_OPTIMIZED __attribute__((optimize("O0")))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef NOT_OPTIMIZED
|
||||
#define NOT_OPTIMIZED
|
||||
#endif
|
||||
|
||||
/***************Instance and Position constructors**********/
|
||||
void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
|
||||
void copy_block(block *dst, const block *src) {
|
||||
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
|
||||
}
|
||||
|
||||
void xor_block(block *dst, const block *src) {
|
||||
int i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] ^= src->v[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void load_block(block *dst, const void *input) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static void store_block(void *output, const block *src) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/***************Memory functions*****************/
|
||||
|
||||
int allocate_memory(const argon2_context *context, uint8_t **memory,
|
||||
size_t num, size_t size) {
|
||||
size_t memory_size = num*size;
|
||||
if (memory == NULL) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
/* 1. Check for multiplication overflow */
|
||||
if (size != 0 && memory_size / size != num) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
/* 2. Try to allocate with appropriate allocator */
|
||||
if (context->allocate_cbk) {
|
||||
(context->allocate_cbk)(memory, memory_size);
|
||||
} else {
|
||||
*memory = malloc(memory_size);
|
||||
}
|
||||
|
||||
if (*memory == NULL) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
void free_memory(const argon2_context *context, uint8_t *memory,
|
||||
size_t num, size_t size) {
|
||||
size_t memory_size = num*size;
|
||||
clear_internal_memory(memory, memory_size);
|
||||
if (context->free_cbk) {
|
||||
(context->free_cbk)(memory, memory_size);
|
||||
} else {
|
||||
free(memory);
|
||||
}
|
||||
}
|
||||
|
||||
void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) {
|
||||
#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER)
|
||||
SecureZeroMemory(v, n);
|
||||
#elif defined memset_s
|
||||
memset_s(v, n, 0, n);
|
||||
#elif defined(__OpenBSD__)
|
||||
explicit_bzero(v, n);
|
||||
#else
|
||||
static void *(*const volatile memset_sec)(void *, int, size_t) = &memset;
|
||||
memset_sec(v, 0, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Memory clear flag defaults to true. */
|
||||
int FLAG_clear_internal_memory = 1;
|
||||
void clear_internal_memory(void *v, size_t n) {
|
||||
if (FLAG_clear_internal_memory && v) {
|
||||
secure_wipe_memory(v, n);
|
||||
}
|
||||
}
|
||||
|
||||
void finalize(const argon2_context *context, argon2_instance_t *instance) {
|
||||
if (context != NULL && instance != NULL) {
|
||||
block blockhash;
|
||||
uint32_t l;
|
||||
|
||||
copy_block(&blockhash, instance->memory + instance->lane_length - 1);
|
||||
|
||||
/* XOR the last blocks */
|
||||
for (l = 1; l < instance->lanes; ++l) {
|
||||
uint32_t last_block_in_lane =
|
||||
l * instance->lane_length + (instance->lane_length - 1);
|
||||
xor_block(&blockhash, instance->memory + last_block_in_lane);
|
||||
}
|
||||
|
||||
/* Hash the result */
|
||||
{
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
store_block(blockhash_bytes, &blockhash);
|
||||
blake2b_long(context->out, context->outlen, blockhash_bytes,
|
||||
ARGON2_BLOCK_SIZE);
|
||||
/* clear blockhash and blockhash_bytes */
|
||||
clear_internal_memory(blockhash.v, ARGON2_BLOCK_SIZE);
|
||||
clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
free_memory(context, (uint8_t *)instance->memory,
|
||||
instance->memory_blocks, sizeof(block));
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane) {
|
||||
/*
|
||||
* Pass 0:
|
||||
* This lane : all already finished segments plus already constructed
|
||||
* blocks in this segment
|
||||
* Other lanes : all already finished segments
|
||||
* Pass 1+:
|
||||
* This lane : (SYNC_POINTS - 1) last segments plus already constructed
|
||||
* blocks in this segment
|
||||
* Other lanes : (SYNC_POINTS - 1) last segments
|
||||
*/
|
||||
uint32_t reference_area_size;
|
||||
uint64_t relative_position;
|
||||
uint32_t start_position, absolute_position;
|
||||
|
||||
if (0 == position->pass) {
|
||||
/* First pass */
|
||||
if (0 == position->slice) {
|
||||
/* First slice */
|
||||
reference_area_size =
|
||||
position->index - 1; /* all but the previous */
|
||||
} else {
|
||||
if (same_lane) {
|
||||
/* The same lane => add current segment */
|
||||
reference_area_size =
|
||||
position->slice * instance->segment_length +
|
||||
position->index - 1;
|
||||
} else {
|
||||
reference_area_size =
|
||||
position->slice * instance->segment_length +
|
||||
((position->index == 0) ? (-1) : 0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Second pass */
|
||||
if (same_lane) {
|
||||
reference_area_size = instance->lane_length -
|
||||
instance->segment_length + position->index -
|
||||
1;
|
||||
} else {
|
||||
reference_area_size = instance->lane_length -
|
||||
instance->segment_length +
|
||||
((position->index == 0) ? (-1) : 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
|
||||
* relative position */
|
||||
relative_position = pseudo_rand;
|
||||
relative_position = relative_position * relative_position >> 32;
|
||||
relative_position = reference_area_size - 1 -
|
||||
(reference_area_size * relative_position >> 32);
|
||||
|
||||
/* 1.2.5 Computing starting position */
|
||||
start_position = 0;
|
||||
|
||||
if (0 != position->pass) {
|
||||
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
|
||||
? 0
|
||||
: (position->slice + 1) * instance->segment_length;
|
||||
}
|
||||
|
||||
/* 1.2.6. Computing absolute position */
|
||||
absolute_position = (start_position + relative_position) %
|
||||
instance->lane_length; /* absolute position */
|
||||
return absolute_position;
|
||||
}
|
||||
|
||||
/* Single-threaded version for p=1 case */
|
||||
static int fill_memory_blocks_st(argon2_instance_t *instance) {
|
||||
uint32_t r, s, l;
|
||||
|
||||
for (r = 0; r < instance->passes; ++r) {
|
||||
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
|
||||
for (l = 0; l < instance->lanes; ++l) {
|
||||
argon2_position_t position = {r, l, (uint8_t)s, 0};
|
||||
fill_segment(instance, position);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
#if !defined(ARGON2_NO_THREADS)
|
||||
|
||||
#ifdef _WIN32
|
||||
static unsigned __stdcall fill_segment_thr(void *thread_data)
|
||||
#else
|
||||
static void *fill_segment_thr(void *thread_data)
|
||||
#endif
|
||||
{
|
||||
argon2_thread_data *my_data = thread_data;
|
||||
fill_segment(my_data->instance_ptr, my_data->pos);
|
||||
argon2_thread_exit();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Multi-threaded version for p > 1 case */
|
||||
static int fill_memory_blocks_mt(argon2_instance_t *instance) {
|
||||
uint32_t r, s;
|
||||
argon2_thread_handle_t *thread = NULL;
|
||||
argon2_thread_data *thr_data = NULL;
|
||||
int rc = ARGON2_OK;
|
||||
|
||||
/* 1. Allocating space for threads */
|
||||
thread = calloc(instance->lanes, sizeof(argon2_thread_handle_t));
|
||||
if (thread == NULL) {
|
||||
rc = ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
thr_data = calloc(instance->lanes, sizeof(argon2_thread_data));
|
||||
if (thr_data == NULL) {
|
||||
rc = ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
for (r = 0; r < instance->passes; ++r) {
|
||||
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
|
||||
uint32_t l;
|
||||
|
||||
/* 2. Calling threads */
|
||||
for (l = 0; l < instance->lanes; ++l) {
|
||||
argon2_position_t position;
|
||||
|
||||
/* 2.1 Join a thread if limit is exceeded */
|
||||
if (l >= instance->threads) {
|
||||
if (argon2_thread_join(thread[l - instance->threads])) {
|
||||
rc = ARGON2_THREAD_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* 2.2 Create thread */
|
||||
position.pass = r;
|
||||
position.lane = l;
|
||||
position.slice = (uint8_t)s;
|
||||
position.index = 0;
|
||||
thr_data[l].instance_ptr =
|
||||
instance; /* preparing the thread input */
|
||||
memcpy(&(thr_data[l].pos), &position,
|
||||
sizeof(argon2_position_t));
|
||||
if (argon2_thread_create(&thread[l], &fill_segment_thr,
|
||||
(void *)&thr_data[l])) {
|
||||
rc = ARGON2_THREAD_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* fill_segment(instance, position); */
|
||||
/*Non-thread equivalent of the lines above */
|
||||
}
|
||||
|
||||
/* 3. Joining remaining threads */
|
||||
for (l = instance->lanes - instance->threads; l < instance->lanes;
|
||||
++l) {
|
||||
if (argon2_thread_join(thread[l])) {
|
||||
rc = ARGON2_THREAD_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fail:
|
||||
if (thread != NULL) {
|
||||
free(thread);
|
||||
}
|
||||
if (thr_data != NULL) {
|
||||
free(thr_data);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* ARGON2_NO_THREADS */
|
||||
|
||||
int fill_memory_blocks(argon2_instance_t *instance) {
|
||||
if (instance == NULL || instance->lanes == 0) {
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
#if defined(ARGON2_NO_THREADS)
|
||||
return fill_memory_blocks_st(instance);
|
||||
#else
|
||||
return instance->threads == 1 ?
|
||||
fill_memory_blocks_st(instance) : fill_memory_blocks_mt(instance);
|
||||
#endif
|
||||
}
|
||||
|
||||
int validate_inputs(const argon2_context *context) {
|
||||
if (NULL == context) {
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
|
||||
if (NULL == context->out) {
|
||||
return ARGON2_OUTPUT_PTR_NULL;
|
||||
}
|
||||
|
||||
/* Validate output length */
|
||||
if (ARGON2_MIN_OUTLEN > context->outlen) {
|
||||
return ARGON2_OUTPUT_TOO_SHORT;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_OUTLEN < context->outlen) {
|
||||
return ARGON2_OUTPUT_TOO_LONG;
|
||||
}
|
||||
|
||||
/* Validate password (required param) */
|
||||
if (NULL == context->pwd) {
|
||||
if (0 != context->pwdlen) {
|
||||
return ARGON2_PWD_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) {
|
||||
return ARGON2_PWD_TOO_SHORT;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) {
|
||||
return ARGON2_PWD_TOO_LONG;
|
||||
}
|
||||
|
||||
/* Validate salt (required param) */
|
||||
if (NULL == context->salt) {
|
||||
if (0 != context->saltlen) {
|
||||
return ARGON2_SALT_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
if (ARGON2_MIN_SALT_LENGTH > context->saltlen) {
|
||||
return ARGON2_SALT_TOO_SHORT;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_SALT_LENGTH < context->saltlen) {
|
||||
return ARGON2_SALT_TOO_LONG;
|
||||
}
|
||||
|
||||
/* Validate secret (optional param) */
|
||||
if (NULL == context->secret) {
|
||||
if (0 != context->secretlen) {
|
||||
return ARGON2_SECRET_PTR_MISMATCH;
|
||||
}
|
||||
} else {
|
||||
if (ARGON2_MIN_SECRET > context->secretlen) {
|
||||
return ARGON2_SECRET_TOO_SHORT;
|
||||
}
|
||||
if (ARGON2_MAX_SECRET < context->secretlen) {
|
||||
return ARGON2_SECRET_TOO_LONG;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate associated data (optional param) */
|
||||
if (NULL == context->ad) {
|
||||
if (0 != context->adlen) {
|
||||
return ARGON2_AD_PTR_MISMATCH;
|
||||
}
|
||||
} else {
|
||||
if (ARGON2_MIN_AD_LENGTH > context->adlen) {
|
||||
return ARGON2_AD_TOO_SHORT;
|
||||
}
|
||||
if (ARGON2_MAX_AD_LENGTH < context->adlen) {
|
||||
return ARGON2_AD_TOO_LONG;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate memory cost */
|
||||
if (ARGON2_MIN_MEMORY > context->m_cost) {
|
||||
return ARGON2_MEMORY_TOO_LITTLE;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_MEMORY < context->m_cost) {
|
||||
return ARGON2_MEMORY_TOO_MUCH;
|
||||
}
|
||||
|
||||
if (context->m_cost < 8 * context->lanes) {
|
||||
return ARGON2_MEMORY_TOO_LITTLE;
|
||||
}
|
||||
|
||||
/* Validate time cost */
|
||||
if (ARGON2_MIN_TIME > context->t_cost) {
|
||||
return ARGON2_TIME_TOO_SMALL;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_TIME < context->t_cost) {
|
||||
return ARGON2_TIME_TOO_LARGE;
|
||||
}
|
||||
|
||||
/* Validate lanes */
|
||||
if (ARGON2_MIN_LANES > context->lanes) {
|
||||
return ARGON2_LANES_TOO_FEW;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_LANES < context->lanes) {
|
||||
return ARGON2_LANES_TOO_MANY;
|
||||
}
|
||||
|
||||
/* Validate threads */
|
||||
if (ARGON2_MIN_THREADS > context->threads) {
|
||||
return ARGON2_THREADS_TOO_FEW;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_THREADS < context->threads) {
|
||||
return ARGON2_THREADS_TOO_MANY;
|
||||
}
|
||||
|
||||
if (NULL != context->allocate_cbk && NULL == context->free_cbk) {
|
||||
return ARGON2_FREE_MEMORY_CBK_NULL;
|
||||
}
|
||||
|
||||
if (NULL == context->allocate_cbk && NULL != context->free_cbk) {
|
||||
return ARGON2_ALLOCATE_MEMORY_CBK_NULL;
|
||||
}
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
|
||||
uint32_t l;
|
||||
/* Make the first and second block in each lane as G(H0||0||i) or
|
||||
G(H0||1||i) */
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
for (l = 0; l < instance->lanes; ++l) {
|
||||
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
|
||||
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
|
||||
ARGON2_PREHASH_SEED_LENGTH);
|
||||
load_block(&instance->memory[l * instance->lane_length + 0],
|
||||
blockhash_bytes);
|
||||
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
|
||||
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
|
||||
ARGON2_PREHASH_SEED_LENGTH);
|
||||
load_block(&instance->memory[l * instance->lane_length + 1],
|
||||
blockhash_bytes);
|
||||
}
|
||||
clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type) {
|
||||
blake2b_state BlakeHash;
|
||||
uint8_t value[sizeof(uint32_t)];
|
||||
|
||||
if (NULL == context || NULL == blockhash) {
|
||||
return;
|
||||
}
|
||||
|
||||
blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
store32(&value, context->lanes);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->outlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->m_cost);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->t_cost);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, ARGON2_VERSION_NUMBER);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, (uint32_t)type);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->pwdlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->pwd != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
|
||||
context->pwdlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
|
||||
secure_wipe_memory(context->pwd, context->pwdlen);
|
||||
context->pwdlen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
store32(&value, context->saltlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->salt != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->salt,
|
||||
context->saltlen);
|
||||
}
|
||||
|
||||
store32(&value, context->secretlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->secret != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->secret,
|
||||
context->secretlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
|
||||
secure_wipe_memory(context->secret, context->secretlen);
|
||||
context->secretlen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
store32(&value, context->adlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->ad != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->ad,
|
||||
context->adlen);
|
||||
}
|
||||
|
||||
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
|
||||
int result = ARGON2_OK;
|
||||
|
||||
if (instance == NULL || context == NULL)
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
instance->context_ptr = context;
|
||||
|
||||
/* 1. Memory allocation */
|
||||
result = allocate_memory(context, (uint8_t **)&(instance->memory),
|
||||
instance->memory_blocks, sizeof(block));
|
||||
if (result != ARGON2_OK) {
|
||||
return result;
|
||||
}
|
||||
|
||||
/* 2. Initial hashing */
|
||||
/* H_0 + 8 extra bytes to produce the first blocks */
|
||||
/* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */
|
||||
/* Hashing all inputs */
|
||||
initial_hash(blockhash, context, instance->type);
|
||||
/* Zeroing 8 extra bytes */
|
||||
|
||||
clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ARGON2_PREHASH_SEED_LENGTH -
|
||||
ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
/* 3. Creating first blocks, we always have at least two blocks in a slice
|
||||
*/
|
||||
fill_first_blocks(blockhash, instance);
|
||||
/* Clearing the hash */
|
||||
clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
229
algo/argon2/argon2d/argon2d/core.h
Normal file
229
algo/argon2/argon2d/argon2d/core.h
Normal file
@@ -0,0 +1,229 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_CORE_H
|
||||
#define ARGON2_CORE_H
|
||||
|
||||
#include "argon2.h"
|
||||
|
||||
#define CONST_CAST(x) (x)(uintptr_t)
|
||||
|
||||
/**********************Argon2 internal constants*******************************/
|
||||
|
||||
enum argon2_core_constants {
|
||||
/* Version of the algorithm */
|
||||
ARGON2_VERSION_NUMBER = 0x10,
|
||||
/* Memory block size in bytes */
|
||||
ARGON2_BLOCK_SIZE = 1024,
|
||||
ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
|
||||
ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
|
||||
|
||||
/* Number of pseudo-random values generated by one call to Blake in Argon2i
|
||||
to
|
||||
generate reference block positions */
|
||||
ARGON2_ADDRESSES_IN_BLOCK = 128,
|
||||
|
||||
/* Pre-hashing digest length and its extension*/
|
||||
ARGON2_PREHASH_DIGEST_LENGTH = 64,
|
||||
ARGON2_PREHASH_SEED_LENGTH = 72
|
||||
};
|
||||
|
||||
/*************************Argon2 internal data types***********************/
|
||||
|
||||
/*
|
||||
* Structure for the (1KB) memory block implemented as 128 64-bit words.
|
||||
* Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
|
||||
* bounds checking).
|
||||
*/
|
||||
typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
|
||||
|
||||
/*****************Functions that work with the block******************/
|
||||
|
||||
/* Initialize each byte of the block with @in */
|
||||
void init_block_value(block *b, uint8_t in);
|
||||
|
||||
/* Copy block @src to block @dst */
|
||||
void copy_block(block *dst, const block *src);
|
||||
|
||||
/* XOR @src onto @dst bytewise */
|
||||
void xor_block(block *dst, const block *src);
|
||||
|
||||
/*
|
||||
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
|
||||
* and derived values.
|
||||
* Used to evaluate the number and location of blocks to construct in each
|
||||
* thread
|
||||
*/
|
||||
typedef struct Argon2_instance_t {
|
||||
block *memory; /* Memory pointer */
|
||||
uint32_t version;
|
||||
uint32_t passes; /* Number of passes */
|
||||
uint32_t memory_blocks; /* Number of blocks in memory */
|
||||
uint32_t segment_length;
|
||||
uint32_t lane_length;
|
||||
uint32_t lanes;
|
||||
uint32_t limit;
|
||||
uint32_t threads;
|
||||
argon2_type type;
|
||||
int print_internals; /* whether to print the memory blocks */
|
||||
argon2_context *context_ptr; /* points back to original context */
|
||||
} argon2_instance_t;
|
||||
|
||||
/*
|
||||
* Argon2 position: where we construct the block right now. Used to distribute
|
||||
* work between threads.
|
||||
*/
|
||||
typedef struct Argon2_position_t {
|
||||
uint32_t pass;
|
||||
uint32_t lane;
|
||||
uint8_t slice;
|
||||
uint32_t index;
|
||||
} argon2_position_t;
|
||||
|
||||
/*Struct that holds the inputs for thread handling FillSegment*/
|
||||
typedef struct Argon2_thread_data {
|
||||
argon2_instance_t *instance_ptr;
|
||||
argon2_position_t pos;
|
||||
} argon2_thread_data;
|
||||
|
||||
/*************************Argon2 core functions********************************/
|
||||
|
||||
/* Allocates memory to the given pointer, uses the appropriate allocator as
|
||||
* specified in the context. Total allocated memory is num*size.
|
||||
* @param context argon2_context which specifies the allocator
|
||||
* @param memory pointer to the pointer to the memory
|
||||
* @param size the size in bytes for each element to be allocated
|
||||
* @param num the number of elements to be allocated
|
||||
* @return ARGON2_OK if @memory is a valid pointer and memory is allocated
|
||||
*/
|
||||
int allocate_memory(const argon2_context *context, uint8_t **memory,
|
||||
size_t num, size_t size);
|
||||
|
||||
/*
|
||||
* Frees memory at the given pointer, uses the appropriate deallocator as
|
||||
* specified in the context. Also cleans the memory using clear_internal_memory.
|
||||
* @param context argon2_context which specifies the deallocator
|
||||
* @param memory pointer to buffer to be freed
|
||||
* @param size the size in bytes for each element to be deallocated
|
||||
* @param num the number of elements to be deallocated
|
||||
*/
|
||||
void free_memory(const argon2_context *context, uint8_t *memory,
|
||||
size_t num, size_t size);
|
||||
|
||||
/* Function that securely cleans the memory. This ignores any flags set
|
||||
* regarding clearing memory. Usually one just calls clear_internal_memory.
|
||||
* @param mem Pointer to the memory
|
||||
* @param s Memory size in bytes
|
||||
*/
|
||||
void secure_wipe_memory(void *v, size_t n);
|
||||
|
||||
/* Function that securely clears the memory if FLAG_clear_internal_memory is
|
||||
* set. If the flag isn't set, this function does nothing.
|
||||
* @param mem Pointer to the memory
|
||||
* @param s Memory size in bytes
|
||||
*/
|
||||
void clear_internal_memory(void *v, size_t n);
|
||||
|
||||
/*
|
||||
* Computes absolute position of reference block in the lane following a skewed
|
||||
* distribution and using a pseudo-random value as input
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Pointer to the current position
|
||||
* @param pseudo_rand 32-bit pseudo-random value used to determine the position
|
||||
* @param same_lane Indicates if the block will be taken from the current lane.
|
||||
* If so we can reference the current segment
|
||||
* @pre All pointers must be valid
|
||||
*/
|
||||
uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane);
|
||||
|
||||
/*
|
||||
* Function that validates all inputs against predefined restrictions and return
|
||||
* an error code
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return ARGON2_OK if everything is all right, otherwise one of error codes
|
||||
* (all defined in <argon2.h>
|
||||
*/
|
||||
int validate_inputs(const argon2_context *context);
|
||||
|
||||
/*
|
||||
* Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears
|
||||
* password and secret if needed
|
||||
* @param context Pointer to the Argon2 internal structure containing memory
|
||||
* pointer, and parameters for time and space requirements.
|
||||
* @param blockhash Buffer for pre-hashing digest
|
||||
* @param type Argon2 type
|
||||
* @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes
|
||||
* allocated
|
||||
*/
|
||||
void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type);
|
||||
|
||||
/*
|
||||
* Function creates first 2 blocks per lane
|
||||
* @param instance Pointer to the current instance
|
||||
* @param blockhash Pointer to the pre-hashing digest
|
||||
* @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values
|
||||
*/
|
||||
void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function allocates memory, hashes the inputs with Blake, and creates first
|
||||
* two blocks. Returns the pointer to the main memory with 2 blocks per lane
|
||||
* initialized
|
||||
* @param context Pointer to the Argon2 internal structure containing memory
|
||||
* pointer, and parameters for time and space requirements.
|
||||
* @param instance Current Argon2 instance
|
||||
* @return Zero if successful, -1 if memory failed to allocate. @context->state
|
||||
* will be modified if successful.
|
||||
*/
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
|
||||
/*
|
||||
* XORing the last block of each lane, hashing it, making the tag. Deallocates
|
||||
* the memory.
|
||||
* @param context Pointer to current Argon2 context (use only the out parameters
|
||||
* from it)
|
||||
* @param instance Pointer to current instance of Argon2
|
||||
* @pre instance->state must point to necessary amount of memory
|
||||
* @pre context->out must point to outlen bytes of memory
|
||||
* @pre if context->free_cbk is not NULL, it should point to a function that
|
||||
* deallocates memory
|
||||
*/
|
||||
void finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function that fills the segment using previous segments also from other
|
||||
* threads
|
||||
* @param context current context
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
|
||||
/*
|
||||
* Function that fills the entire memory t_cost times based on the first two
|
||||
* blocks in each lane
|
||||
* @param instance Pointer to the current instance
|
||||
* @return ARGON2_OK if successful, @context->state
|
||||
*/
|
||||
int fill_memory_blocks(argon2_instance_t *instance);
|
||||
|
||||
#endif
|
456
algo/argon2/argon2d/argon2d/encoding.c
Normal file
456
algo/argon2/argon2d/argon2d/encoding.c
Normal file
@@ -0,0 +1,456 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include "encoding.h"
|
||||
#include "core.h"
|
||||
|
||||
/*
|
||||
* Example code for a decoder and encoder of "hash strings", with Argon2
|
||||
* parameters.
|
||||
*
|
||||
* This code comprises three sections:
|
||||
*
|
||||
* -- The first section contains generic Base64 encoding and decoding
|
||||
* functions. It is conceptually applicable to any hash function
|
||||
* implementation that uses Base64 to encode and decode parameters,
|
||||
* salts and outputs. It could be made into a library, provided that
|
||||
* the relevant functions are made public (non-static) and be given
|
||||
* reasonable names to avoid collisions with other functions.
|
||||
*
|
||||
* -- The second section is specific to Argon2. It encodes and decodes
|
||||
* the parameters, salts and outputs. It does not compute the hash
|
||||
* itself.
|
||||
*
|
||||
* The code was originally written by Thomas Pornin <pornin@bolet.org>,
|
||||
* to whom comments and remarks may be sent. It is released under what
|
||||
* should amount to Public Domain or its closest equivalent; the
|
||||
* following mantra is supposed to incarnate that fact with all the
|
||||
* proper legal rituals:
|
||||
*
|
||||
* ---------------------------------------------------------------------
|
||||
* This file is provided under the terms of Creative Commons CC0 1.0
|
||||
* Public Domain Dedication. To the extent possible under law, the
|
||||
* author (Thomas Pornin) has waived all copyright and related or
|
||||
* neighboring rights to this file. This work is published from: Canada.
|
||||
* ---------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (c) 2015 Thomas Pornin
|
||||
*/
|
||||
|
||||
/* ==================================================================== */
|
||||
/*
|
||||
* Common code; could be shared between different hash functions.
|
||||
*
|
||||
* Note: the Base64 functions below assume that uppercase letters (resp.
|
||||
* lowercase letters) have consecutive numerical codes, that fit on 8
|
||||
* bits. All modern systems use ASCII-compatible charsets, where these
|
||||
* properties are true. If you are stuck with a dinosaur of a system
|
||||
* that still defaults to EBCDIC then you already have much bigger
|
||||
* interoperability issues to deal with.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Some macros for constant-time comparisons. These work over values in
|
||||
* the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true".
|
||||
*/
|
||||
#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF)
|
||||
#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF)
|
||||
#define GE(x, y) (GT(y, x) ^ 0xFF)
|
||||
#define LT(x, y) GT(y, x)
|
||||
#define LE(x, y) GE(y, x)
|
||||
|
||||
/*
|
||||
* Convert value x (0..63) to corresponding Base64 character.
|
||||
*/
|
||||
static int b64_byte_to_char(unsigned x) {
|
||||
return (LT(x, 26) & (x + 'A')) |
|
||||
(GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) |
|
||||
(GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') |
|
||||
(EQ(x, 63) & '/');
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert character c to the corresponding 6-bit value. If character c
|
||||
* is not a Base64 character, then 0xFF (255) is returned.
|
||||
*/
|
||||
static unsigned b64_char_to_byte(int c) {
|
||||
unsigned x;
|
||||
|
||||
x = (GE(c, 'A') & LE(c, 'Z') & (c - 'A')) |
|
||||
(GE(c, 'a') & LE(c, 'z') & (c - ('a' - 26))) |
|
||||
(GE(c, '0') & LE(c, '9') & (c - ('0' - 52))) | (EQ(c, '+') & 62) |
|
||||
(EQ(c, '/') & 63);
|
||||
return x | (EQ(x, 0) & (EQ(c, 'A') ^ 0xFF));
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert some bytes to Base64. 'dst_len' is the length (in characters)
|
||||
* of the output buffer 'dst'; if that buffer is not large enough to
|
||||
* receive the result (including the terminating 0), then (size_t)-1
|
||||
* is returned. Otherwise, the zero-terminated Base64 string is written
|
||||
* in the buffer, and the output length (counted WITHOUT the terminating
|
||||
* zero) is returned.
|
||||
*/
|
||||
static size_t to_base64(char *dst, size_t dst_len, const void *src,
|
||||
size_t src_len) {
|
||||
size_t olen;
|
||||
const unsigned char *buf;
|
||||
unsigned acc, acc_len;
|
||||
|
||||
olen = (src_len / 3) << 2;
|
||||
switch (src_len % 3) {
|
||||
case 2:
|
||||
olen++;
|
||||
/* fall through */
|
||||
case 1:
|
||||
olen += 2;
|
||||
break;
|
||||
}
|
||||
if (dst_len <= olen) {
|
||||
return (size_t)-1;
|
||||
}
|
||||
acc = 0;
|
||||
acc_len = 0;
|
||||
buf = (const unsigned char *)src;
|
||||
while (src_len-- > 0) {
|
||||
acc = (acc << 8) + (*buf++);
|
||||
acc_len += 8;
|
||||
while (acc_len >= 6) {
|
||||
acc_len -= 6;
|
||||
*dst++ = (char)b64_byte_to_char((acc >> acc_len) & 0x3F);
|
||||
}
|
||||
}
|
||||
if (acc_len > 0) {
|
||||
*dst++ = (char)b64_byte_to_char((acc << (6 - acc_len)) & 0x3F);
|
||||
}
|
||||
*dst++ = 0;
|
||||
return olen;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode Base64 chars into bytes. The '*dst_len' value must initially
|
||||
* contain the length of the output buffer '*dst'; when the decoding
|
||||
* ends, the actual number of decoded bytes is written back in
|
||||
* '*dst_len'.
|
||||
*
|
||||
* Decoding stops when a non-Base64 character is encountered, or when
|
||||
* the output buffer capacity is exceeded. If an error occurred (output
|
||||
* buffer is too small, invalid last characters leading to unprocessed
|
||||
* buffered bits), then NULL is returned; otherwise, the returned value
|
||||
* points to the first non-Base64 character in the source stream, which
|
||||
* may be the terminating zero.
|
||||
*/
|
||||
static const char *from_base64(void *dst, size_t *dst_len, const char *src) {
|
||||
size_t len;
|
||||
unsigned char *buf;
|
||||
unsigned acc, acc_len;
|
||||
|
||||
buf = (unsigned char *)dst;
|
||||
len = 0;
|
||||
acc = 0;
|
||||
acc_len = 0;
|
||||
for (;;) {
|
||||
unsigned d;
|
||||
|
||||
d = b64_char_to_byte(*src);
|
||||
if (d == 0xFF) {
|
||||
break;
|
||||
}
|
||||
src++;
|
||||
acc = (acc << 6) + d;
|
||||
acc_len += 6;
|
||||
if (acc_len >= 8) {
|
||||
acc_len -= 8;
|
||||
if ((len++) >= *dst_len) {
|
||||
return NULL;
|
||||
}
|
||||
*buf++ = (acc >> acc_len) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the input length is equal to 1 modulo 4 (which is
|
||||
* invalid), then there will remain 6 unprocessed bits;
|
||||
* otherwise, only 0, 2 or 4 bits are buffered. The buffered
|
||||
* bits must also all be zero.
|
||||
*/
|
||||
if (acc_len > 4 || (acc & (((unsigned)1 << acc_len) - 1)) != 0) {
|
||||
return NULL;
|
||||
}
|
||||
*dst_len = len;
|
||||
return src;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode decimal integer from 'str'; the value is written in '*v'.
|
||||
* Returned value is a pointer to the next non-decimal character in the
|
||||
* string. If there is no digit at all, or the value encoding is not
|
||||
* minimal (extra leading zeros), or the value does not fit in an
|
||||
* 'unsigned long', then NULL is returned.
|
||||
*/
|
||||
static const char *decode_decimal(const char *str, unsigned long *v) {
|
||||
const char *orig;
|
||||
unsigned long acc;
|
||||
|
||||
acc = 0;
|
||||
for (orig = str;; str++) {
|
||||
int c;
|
||||
|
||||
c = *str;
|
||||
if (c < '0' || c > '9') {
|
||||
break;
|
||||
}
|
||||
c -= '0';
|
||||
if (acc > (ULONG_MAX / 10)) {
|
||||
return NULL;
|
||||
}
|
||||
acc *= 10;
|
||||
if ((unsigned long)c > (ULONG_MAX - acc)) {
|
||||
return NULL;
|
||||
}
|
||||
acc += (unsigned long)c;
|
||||
}
|
||||
if (str == orig || (*orig == '0' && str != (orig + 1))) {
|
||||
return NULL;
|
||||
}
|
||||
*v = acc;
|
||||
return str;
|
||||
}
|
||||
|
||||
/* ==================================================================== */
|
||||
/*
|
||||
* Code specific to Argon2.
|
||||
*
|
||||
* The code below applies the following format:
|
||||
*
|
||||
* $argon2<T>[$v=<num>]$m=<num>,t=<num>,p=<num>$<bin>$<bin>
|
||||
*
|
||||
* where <T> is either 'd', 'id', or 'i', <num> is a decimal integer (positive,
|
||||
* fits in an 'unsigned long'), and <bin> is Base64-encoded data (no '=' padding
|
||||
* characters, no newline or whitespace).
|
||||
*
|
||||
* The last two binary chunks (encoded in Base64) are, in that order,
|
||||
* the salt and the output. Both are required. The binary salt length and the
|
||||
* output length must be in the allowed ranges defined in argon2.h.
|
||||
*
|
||||
* The ctx struct must contain buffers large enough to hold the salt and pwd
|
||||
* when it is fed into decode_string.
|
||||
*/
|
||||
|
||||
int decode_string(argon2_context *ctx, const char *str, argon2_type type) {
|
||||
|
||||
/* check for prefix */
|
||||
#define CC(prefix) \
|
||||
do { \
|
||||
size_t cc_len = strlen(prefix); \
|
||||
if (strncmp(str, prefix, cc_len) != 0) { \
|
||||
return ARGON2_DECODING_FAIL; \
|
||||
} \
|
||||
str += cc_len; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
/* optional prefix checking with supplied code */
|
||||
#define CC_opt(prefix, code) \
|
||||
do { \
|
||||
size_t cc_len = strlen(prefix); \
|
||||
if (strncmp(str, prefix, cc_len) == 0) { \
|
||||
str += cc_len; \
|
||||
{ code; } \
|
||||
} \
|
||||
} while ((void)0, 0)
|
||||
|
||||
/* Decoding prefix into decimal */
|
||||
#define DECIMAL(x) \
|
||||
do { \
|
||||
unsigned long dec_x; \
|
||||
str = decode_decimal(str, &dec_x); \
|
||||
if (str == NULL) { \
|
||||
return ARGON2_DECODING_FAIL; \
|
||||
} \
|
||||
(x) = dec_x; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
|
||||
/* Decoding prefix into uint32_t decimal */
|
||||
#define DECIMAL_U32(x) \
|
||||
do { \
|
||||
unsigned long dec_x; \
|
||||
str = decode_decimal(str, &dec_x); \
|
||||
if (str == NULL || dec_x > UINT32_MAX) { \
|
||||
return ARGON2_DECODING_FAIL; \
|
||||
} \
|
||||
(x) = (uint32_t)dec_x; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
|
||||
/* Decoding base64 into a binary buffer */
|
||||
#define BIN(buf, max_len, len) \
|
||||
do { \
|
||||
size_t bin_len = (max_len); \
|
||||
str = from_base64(buf, &bin_len, str); \
|
||||
if (str == NULL || bin_len > UINT32_MAX) { \
|
||||
return ARGON2_DECODING_FAIL; \
|
||||
} \
|
||||
(len) = (uint32_t)bin_len; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
size_t maxsaltlen = ctx->saltlen;
|
||||
size_t maxoutlen = ctx->outlen;
|
||||
int validation_result;
|
||||
const char* type_string;
|
||||
|
||||
/* We should start with the argon2_type we are using */
|
||||
type_string = argon2_type2string(type, 0);
|
||||
if (!type_string) {
|
||||
return ARGON2_INCORRECT_TYPE;
|
||||
}
|
||||
|
||||
CC("$");
|
||||
CC(type_string);
|
||||
|
||||
CC("$m=");
|
||||
DECIMAL_U32(ctx->m_cost);
|
||||
CC(",t=");
|
||||
DECIMAL_U32(ctx->t_cost);
|
||||
CC(",p=");
|
||||
DECIMAL_U32(ctx->lanes);
|
||||
ctx->threads = ctx->lanes;
|
||||
|
||||
CC("$");
|
||||
BIN(ctx->salt, maxsaltlen, ctx->saltlen);
|
||||
CC("$");
|
||||
BIN(ctx->out, maxoutlen, ctx->outlen);
|
||||
|
||||
/* The rest of the fields get the default values */
|
||||
ctx->secret = NULL;
|
||||
ctx->secretlen = 0;
|
||||
ctx->ad = NULL;
|
||||
ctx->adlen = 0;
|
||||
ctx->allocate_cbk = NULL;
|
||||
ctx->free_cbk = NULL;
|
||||
ctx->flags = ARGON2_DEFAULT_FLAGS;
|
||||
|
||||
/* On return, must have valid context */
|
||||
validation_result = validate_inputs(ctx);
|
||||
if (validation_result != ARGON2_OK) {
|
||||
return validation_result;
|
||||
}
|
||||
|
||||
/* Can't have any additional characters */
|
||||
if (*str == 0) {
|
||||
return ARGON2_OK;
|
||||
} else {
|
||||
return ARGON2_DECODING_FAIL;
|
||||
}
|
||||
#undef CC
|
||||
#undef CC_opt
|
||||
#undef DECIMAL
|
||||
#undef BIN
|
||||
}
|
||||
|
||||
int encode_string(char *dst, size_t dst_len, argon2_context *ctx,
|
||||
argon2_type type) {
|
||||
#define SS(str) \
|
||||
do { \
|
||||
size_t pp_len = strlen(str); \
|
||||
if (pp_len >= dst_len) { \
|
||||
return ARGON2_ENCODING_FAIL; \
|
||||
} \
|
||||
memcpy(dst, str, pp_len + 1); \
|
||||
dst += pp_len; \
|
||||
dst_len -= pp_len; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define SX(x) \
|
||||
do { \
|
||||
char tmp[30]; \
|
||||
sprintf(tmp, "%lu", (unsigned long)(x)); \
|
||||
SS(tmp); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define SB(buf, len) \
|
||||
do { \
|
||||
size_t sb_len = to_base64(dst, dst_len, buf, len); \
|
||||
if (sb_len == (size_t)-1) { \
|
||||
return ARGON2_ENCODING_FAIL; \
|
||||
} \
|
||||
dst += sb_len; \
|
||||
dst_len -= sb_len; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
const char* type_string = argon2_type2string(type, 0);
|
||||
int validation_result = validate_inputs(ctx);
|
||||
|
||||
if (!type_string) {
|
||||
return ARGON2_ENCODING_FAIL;
|
||||
}
|
||||
|
||||
if (validation_result != ARGON2_OK) {
|
||||
return validation_result;
|
||||
}
|
||||
|
||||
|
||||
SS("$");
|
||||
SS(type_string);
|
||||
|
||||
SS("$m=");
|
||||
SX(ctx->m_cost);
|
||||
SS(",t=");
|
||||
SX(ctx->t_cost);
|
||||
SS(",p=");
|
||||
SX(ctx->lanes);
|
||||
|
||||
SS("$");
|
||||
SB(ctx->salt, ctx->saltlen);
|
||||
|
||||
SS("$");
|
||||
SB(ctx->out, ctx->outlen);
|
||||
return ARGON2_OK;
|
||||
|
||||
#undef SS
|
||||
#undef SX
|
||||
#undef SB
|
||||
}
|
||||
|
||||
size_t b64len(uint32_t len) {
|
||||
size_t olen = ((size_t)len / 3) << 2;
|
||||
|
||||
switch (len % 3) {
|
||||
case 2:
|
||||
olen++;
|
||||
/* fall through */
|
||||
case 1:
|
||||
olen += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
return olen;
|
||||
}
|
||||
|
||||
size_t numlen(uint32_t num) {
|
||||
size_t len = 1;
|
||||
while (num >= 10) {
|
||||
++len;
|
||||
num = num / 10;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
57
algo/argon2/argon2d/argon2d/encoding.h
Normal file
57
algo/argon2/argon2d/argon2d/encoding.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef ENCODING_H
|
||||
#define ENCODING_H
|
||||
#include "argon2.h"
|
||||
|
||||
#define ARGON2_MAX_DECODED_LANES UINT32_C(255)
|
||||
#define ARGON2_MIN_DECODED_SALT_LEN UINT32_C(8)
|
||||
#define ARGON2_MIN_DECODED_OUT_LEN UINT32_C(12)
|
||||
|
||||
/*
|
||||
* encode an Argon2 hash string into the provided buffer. 'dst_len'
|
||||
* contains the size, in characters, of the 'dst' buffer; if 'dst_len'
|
||||
* is less than the number of required characters (including the
|
||||
* terminating 0), then this function returns ARGON2_ENCODING_ERROR.
|
||||
*
|
||||
* on success, ARGON2_OK is returned.
|
||||
*/
|
||||
int encode_string(char *dst, size_t dst_len, argon2_context *ctx,
|
||||
argon2_type type);
|
||||
|
||||
/*
|
||||
* Decodes an Argon2 hash string into the provided structure 'ctx'.
|
||||
* The only fields that must be set prior to this call are ctx.saltlen and
|
||||
* ctx.outlen (which must be the maximal salt and out length values that are
|
||||
* allowed), ctx.salt and ctx.out (which must be buffers of the specified
|
||||
* length), and ctx.pwd and ctx.pwdlen which must hold a valid password.
|
||||
*
|
||||
* Invalid input string causes an error. On success, the ctx is valid and all
|
||||
* fields have been initialized.
|
||||
*
|
||||
* Returned value is ARGON2_OK on success, other ARGON2_ codes on error.
|
||||
*/
|
||||
int decode_string(argon2_context *ctx, const char *str, argon2_type type);
|
||||
|
||||
/* Returns the length of the encoded byte stream with length len */
|
||||
size_t b64len(uint32_t len);
|
||||
|
||||
/* Returns the length of the encoded number num */
|
||||
size_t numlen(uint32_t num);
|
||||
|
||||
#endif
|
172
algo/argon2/argon2d/argon2d/opt.c
Normal file
172
algo/argon2/argon2d/argon2d/opt.c
Normal file
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "argon2.h"
|
||||
#include "core.h"
|
||||
|
||||
#include "../blake2/blake2.h"
|
||||
#include "../blake2/blamka-round-opt.h"
|
||||
|
||||
/*
|
||||
* Function fills a new memory block and optionally XORs the old block over the new one.
|
||||
* Memory must be initialized.
|
||||
* @param state Pointer to the just produced block. Content will be updated(!)
|
||||
* @param ref_block Pointer to the reference block
|
||||
* @param next_block Pointer to the block to be XORed over. May coincide with @ref_block
|
||||
* @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
static void fill_block(__m128i *state, const block *ref_block,
|
||||
block *next_block, int with_xor) {
|
||||
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
|
||||
unsigned int i;
|
||||
|
||||
if (with_xor) {
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
|
||||
block_XY[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i *)next_block->v + i));
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
|
||||
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
|
||||
state[8 * i + 6], state[8 * i + 7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
|
||||
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
|
||||
state[8 * 6 + i], state[8 * 7 + i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(state[i], block_XY[i]);
|
||||
_mm_storeu_si128((__m128i *)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void next_addresses(block *address_block, block *input_block) {
|
||||
/*Temporary zero-initialized blocks*/
|
||||
__m128i zero_block[ARGON2_OWORDS_IN_BLOCK];
|
||||
__m128i zero2_block[ARGON2_OWORDS_IN_BLOCK];
|
||||
|
||||
memset(zero_block, 0, sizeof(zero_block));
|
||||
memset(zero2_block, 0, sizeof(zero2_block));
|
||||
|
||||
/*Increasing index counter*/
|
||||
input_block->v[6]++;
|
||||
|
||||
/*First iteration of G*/
|
||||
fill_block(zero_block, input_block, address_block, 0);
|
||||
|
||||
/*Second iteration of G*/
|
||||
fill_block(zero2_block, address_block, address_block, 0);
|
||||
}
|
||||
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position) {
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
block address_block, input_block;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index, i;
|
||||
__m128i state[64];
|
||||
int data_independent_addressing;
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
|
||||
/* Don't forget to generate the first block of addresses: */
|
||||
if (data_independent_addressing) {
|
||||
next_addresses(&address_block, &input_block);
|
||||
}
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
curr_offset = position.lane * instance->lane_length +
|
||||
position.slice * instance->segment_length + starting_index;
|
||||
|
||||
if (0 == curr_offset % instance->lane_length) {
|
||||
/* Last block in this lane */
|
||||
prev_offset = curr_offset + instance->lane_length - 1;
|
||||
} else {
|
||||
/* Previous block */
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
|
||||
|
||||
for (i = starting_index; i < instance->segment_length;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
if (data_independent_addressing) {
|
||||
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||
next_addresses(&address_block, &input_block);
|
||||
}
|
||||
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||
} else {
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
}
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||
|
||||
if ((position.pass == 0) && (position.slice == 0)) {
|
||||
/* Can not reference other lanes yet */
|
||||
ref_lane = position.lane;
|
||||
}
|
||||
|
||||
/* 1.2.3 Computing the number of possible reference block within the
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||
ref_lane == position.lane);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block =
|
||||
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
|
||||
}
|
||||
}
|
57
algo/argon2/argon2d/argon2d/thread.c
Normal file
57
algo/argon2/argon2d/argon2d/thread.c
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#if !defined(ARGON2_NO_THREADS)
|
||||
|
||||
#include "thread.h"
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
int argon2_thread_create(argon2_thread_handle_t *handle,
|
||||
argon2_thread_func_t func, void *args) {
|
||||
if (NULL == handle || func == NULL) {
|
||||
return -1;
|
||||
}
|
||||
#if defined(_WIN32)
|
||||
*handle = _beginthreadex(NULL, 0, func, args, 0, NULL);
|
||||
return *handle != 0 ? 0 : -1;
|
||||
#else
|
||||
return pthread_create(handle, NULL, func, args);
|
||||
#endif
|
||||
}
|
||||
|
||||
int argon2_thread_join(argon2_thread_handle_t handle) {
|
||||
#if defined(_WIN32)
|
||||
if (WaitForSingleObject((HANDLE)handle, INFINITE) == WAIT_OBJECT_0) {
|
||||
return CloseHandle((HANDLE)handle) != 0 ? 0 : -1;
|
||||
}
|
||||
return -1;
|
||||
#else
|
||||
return pthread_join(handle, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
void argon2_thread_exit(void) {
|
||||
#if defined(_WIN32)
|
||||
_endthreadex(0);
|
||||
#else
|
||||
pthread_exit(NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* ARGON2_NO_THREADS */
|
67
algo/argon2/argon2d/argon2d/thread.h
Normal file
67
algo/argon2/argon2d/argon2d/thread.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_THREAD_H
|
||||
#define ARGON2_THREAD_H
|
||||
|
||||
#if !defined(ARGON2_NO_THREADS)
|
||||
|
||||
/*
|
||||
Here we implement an abstraction layer for the simpĺe requirements
|
||||
of the Argon2 code. We only require 3 primitives---thread creation,
|
||||
joining, and termination---so full emulation of the pthreads API
|
||||
is unwarranted. Currently we wrap pthreads and Win32 threads.
|
||||
|
||||
The API defines 2 types: the function pointer type,
|
||||
argon2_thread_func_t,
|
||||
and the type of the thread handle---argon2_thread_handle_t.
|
||||
*/
|
||||
#if defined(_WIN32)
|
||||
#include <process.h>
|
||||
typedef unsigned(__stdcall *argon2_thread_func_t)(void *);
|
||||
typedef uintptr_t argon2_thread_handle_t;
|
||||
#else
|
||||
#include <pthread.h>
|
||||
typedef void *(*argon2_thread_func_t)(void *);
|
||||
typedef pthread_t argon2_thread_handle_t;
|
||||
#endif
|
||||
|
||||
/* Creates a thread
|
||||
* @param handle pointer to a thread handle, which is the output of this
|
||||
* function. Must not be NULL.
|
||||
* @param func A function pointer for the thread's entry point. Must not be
|
||||
* NULL.
|
||||
* @param args Pointer that is passed as an argument to @func. May be NULL.
|
||||
* @return 0 if @handle and @func are valid pointers and a thread is successfuly
|
||||
* created.
|
||||
*/
|
||||
int argon2_thread_create(argon2_thread_handle_t *handle,
|
||||
argon2_thread_func_t func, void *args);
|
||||
|
||||
/* Waits for a thread to terminate
|
||||
* @param handle Handle to a thread created with argon2_thread_create.
|
||||
* @return 0 if @handle is a valid handle, and joining completed successfully.
|
||||
*/
|
||||
int argon2_thread_join(argon2_thread_handle_t handle);
|
||||
|
||||
/* Terminate the current thread. Must be run inside a thread created by
|
||||
* argon2_thread_create.
|
||||
*/
|
||||
void argon2_thread_exit(void);
|
||||
|
||||
#endif /* ARGON2_NO_THREADS */
|
||||
#endif
|
156
algo/argon2/argon2d/blake2/blake2-impl.h
Normal file
156
algo/argon2/argon2d/blake2/blake2-impl.h
Normal file
@@ -0,0 +1,156 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef PORTABLE_BLAKE2_IMPL_H
|
||||
#define PORTABLE_BLAKE2_IMPL_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define BLAKE2_INLINE __inline
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#define BLAKE2_INLINE __inline__
|
||||
#else
|
||||
#define BLAKE2_INLINE
|
||||
#endif
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
/*
|
||||
Not an exhaustive list, but should cover the majority of modern platforms
|
||||
Additionally, the code will always be correct---this is only a performance
|
||||
tweak.
|
||||
*/
|
||||
#if (defined(__BYTE_ORDER__) && \
|
||||
(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
|
||||
defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
|
||||
defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \
|
||||
defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \
|
||||
defined(_M_ARM)
|
||||
#define NATIVE_LITTLE_ENDIAN
|
||||
#endif
|
||||
/* Argon2 Team - End Code */
|
||||
|
||||
static BLAKE2_INLINE uint32_t load32(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
uint32_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
#else
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint32_t w = *p++;
|
||||
w |= (uint32_t)(*p++) << 8;
|
||||
w |= (uint32_t)(*p++) << 16;
|
||||
w |= (uint32_t)(*p++) << 24;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t load64(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
uint64_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
#else
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
w |= (uint64_t)(*p++) << 8;
|
||||
w |= (uint64_t)(*p++) << 16;
|
||||
w |= (uint64_t)(*p++) << 24;
|
||||
w |= (uint64_t)(*p++) << 32;
|
||||
w |= (uint64_t)(*p++) << 40;
|
||||
w |= (uint64_t)(*p++) << 48;
|
||||
w |= (uint64_t)(*p++) << 56;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store32(void *dst, uint32_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store64(void *dst, uint64_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t load48(const void *src) {
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
w |= (uint64_t)(*p++) << 8;
|
||||
w |= (uint64_t)(*p++) << 16;
|
||||
w |= (uint64_t)(*p++) << 24;
|
||||
w |= (uint64_t)(*p++) << 32;
|
||||
w |= (uint64_t)(*p++) << 40;
|
||||
return w;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store48(void *dst, uint64_t w) {
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (32 - c));
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (64 - c));
|
||||
}
|
||||
|
||||
void clear_internal_memory(void *v, size_t n);
|
||||
|
||||
#endif
|
91
algo/argon2/argon2d/blake2/blake2.h
Normal file
91
algo/argon2/argon2d/blake2/blake2.h
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef PORTABLE_BLAKE2_H
|
||||
#define PORTABLE_BLAKE2_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum blake2b_constant {
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
typedef struct __blake2b_param {
|
||||
uint8_t digest_length; /* 1 */
|
||||
uint8_t key_length; /* 2 */
|
||||
uint8_t fanout; /* 3 */
|
||||
uint8_t depth; /* 4 */
|
||||
uint32_t leaf_length; /* 8 */
|
||||
uint64_t node_offset; /* 16 */
|
||||
uint8_t node_depth; /* 17 */
|
||||
uint8_t inner_length; /* 18 */
|
||||
uint8_t reserved[14]; /* 32 */
|
||||
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
|
||||
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
|
||||
} blake2b_param;
|
||||
#pragma pack(pop)
|
||||
|
||||
typedef struct __blake2b_state {
|
||||
uint64_t h[8];
|
||||
uint64_t t[2];
|
||||
uint64_t f[2];
|
||||
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||
unsigned buflen;
|
||||
unsigned outlen;
|
||||
uint8_t last_node;
|
||||
} blake2b_state;
|
||||
|
||||
/* Ensure param structs have not been wrongly padded */
|
||||
/* Poor man's static_assert */
|
||||
enum {
|
||||
blake2_size_check_0 = 1 / !!(CHAR_BIT == 8),
|
||||
blake2_size_check_2 =
|
||||
1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
|
||||
};
|
||||
|
||||
/* Streaming API */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen);
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
size_t keylen);
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen);
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
/* Argon2 Team - End Code */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
390
algo/argon2/argon2d/blake2/blake2b.c
Normal file
390
algo/argon2/argon2d/blake2/blake2b.c
Normal file
@@ -0,0 +1,390 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
static const uint64_t blake2b_IV[8] = {
|
||||
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
|
||||
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
|
||||
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
|
||||
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)};
|
||||
|
||||
static const unsigned int blake2b_sigma[12][16] = {
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
};
|
||||
|
||||
static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) {
|
||||
S->f[1] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) {
|
||||
if (S->last_node) {
|
||||
blake2b_set_lastnode(S);
|
||||
}
|
||||
S->f[0] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S,
|
||||
uint64_t inc) {
|
||||
S->t[0] += inc;
|
||||
S->t[1] += (S->t[0] < inc);
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) {
|
||||
clear_internal_memory(S, sizeof(*S)); /* wipe */
|
||||
blake2b_set_lastblock(S); /* invalidate for further use */
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) {
|
||||
memset(S, 0, sizeof(*S));
|
||||
memcpy(S->h, blake2b_IV, sizeof(S->h));
|
||||
}
|
||||
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P) {
|
||||
const unsigned char *p = (const unsigned char *)P;
|
||||
unsigned int i;
|
||||
|
||||
if (NULL == P || NULL == S) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
blake2b_init0(S);
|
||||
/* IV XOR Parameter Block */
|
||||
for (i = 0; i < 8; ++i) {
|
||||
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
|
||||
}
|
||||
S->outlen = P->digest_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sequential blake2b initialization */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen) {
|
||||
blake2b_param P;
|
||||
|
||||
if (S == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Setup Parameter Block for unkeyed BLAKE2 */
|
||||
P.digest_length = (uint8_t)outlen;
|
||||
P.key_length = 0;
|
||||
P.fanout = 1;
|
||||
P.depth = 1;
|
||||
P.leaf_length = 0;
|
||||
P.node_offset = 0;
|
||||
P.node_depth = 0;
|
||||
P.inner_length = 0;
|
||||
memset(P.reserved, 0, sizeof(P.reserved));
|
||||
memset(P.salt, 0, sizeof(P.salt));
|
||||
memset(P.personal, 0, sizeof(P.personal));
|
||||
|
||||
return blake2b_init_param(S, &P);
|
||||
}
|
||||
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
size_t keylen) {
|
||||
blake2b_param P;
|
||||
|
||||
if (S == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Setup Parameter Block for keyed BLAKE2 */
|
||||
P.digest_length = (uint8_t)outlen;
|
||||
P.key_length = (uint8_t)keylen;
|
||||
P.fanout = 1;
|
||||
P.depth = 1;
|
||||
P.leaf_length = 0;
|
||||
P.node_offset = 0;
|
||||
P.node_depth = 0;
|
||||
P.inner_length = 0;
|
||||
memset(P.reserved, 0, sizeof(P.reserved));
|
||||
memset(P.salt, 0, sizeof(P.salt));
|
||||
memset(P.personal, 0, sizeof(P.personal));
|
||||
|
||||
if (blake2b_init_param(S, &P) < 0) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
memset(block, 0, BLAKE2B_BLOCKBYTES);
|
||||
memcpy(block, key, keylen);
|
||||
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
|
||||
/* Burn the key from stack */
|
||||
clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blake2b_compress(blake2b_state *S, const uint8_t *block) {
|
||||
uint64_t m[16];
|
||||
uint64_t v[16];
|
||||
unsigned int i, r;
|
||||
|
||||
for (i = 0; i < 16; ++i) {
|
||||
m[i] = load64(block + i * sizeof(m[i]));
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
v[i] = S->h[i];
|
||||
}
|
||||
|
||||
v[8] = blake2b_IV[0];
|
||||
v[9] = blake2b_IV[1];
|
||||
v[10] = blake2b_IV[2];
|
||||
v[11] = blake2b_IV[3];
|
||||
v[12] = blake2b_IV[4] ^ S->t[0];
|
||||
v[13] = blake2b_IV[5] ^ S->t[1];
|
||||
v[14] = blake2b_IV[6] ^ S->f[0];
|
||||
v[15] = blake2b_IV[7] ^ S->f[1];
|
||||
|
||||
#define G(r, i, a, b, c, d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r, 0, v[0], v[4], v[8], v[12]); \
|
||||
G(r, 1, v[1], v[5], v[9], v[13]); \
|
||||
G(r, 2, v[2], v[6], v[10], v[14]); \
|
||||
G(r, 3, v[3], v[7], v[11], v[15]); \
|
||||
G(r, 4, v[0], v[5], v[10], v[15]); \
|
||||
G(r, 5, v[1], v[6], v[11], v[12]); \
|
||||
G(r, 6, v[2], v[7], v[8], v[13]); \
|
||||
G(r, 7, v[3], v[4], v[9], v[14]); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
for (r = 0; r < 12; ++r) {
|
||||
ROUND(r);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||
}
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
}
|
||||
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
|
||||
const uint8_t *pin = (const uint8_t *)in;
|
||||
|
||||
if (inlen == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sanity check */
|
||||
if (S == NULL || in == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Is this a reused state? */
|
||||
if (S->f[0] != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
|
||||
/* Complete current block */
|
||||
size_t left = S->buflen;
|
||||
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
||||
memcpy(&S->buf[left], pin, fill);
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, S->buf);
|
||||
S->buflen = 0;
|
||||
inlen -= fill;
|
||||
pin += fill;
|
||||
/* Avoid buffer copies when possible */
|
||||
while (inlen > BLAKE2B_BLOCKBYTES) {
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, pin);
|
||||
inlen -= BLAKE2B_BLOCKBYTES;
|
||||
pin += BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
}
|
||||
memcpy(&S->buf[S->buflen], pin, inlen);
|
||||
S->buflen += (unsigned int)inlen;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen) {
|
||||
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
|
||||
unsigned int i;
|
||||
|
||||
/* Sanity checks */
|
||||
if (S == NULL || out == NULL || outlen < S->outlen) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Is this a reused state? */
|
||||
if (S->f[0] != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
blake2b_increment_counter(S, S->buflen);
|
||||
blake2b_set_lastblock(S);
|
||||
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
|
||||
blake2b_compress(S, S->buf);
|
||||
|
||||
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
|
||||
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
|
||||
}
|
||||
|
||||
memcpy(out, buffer, S->outlen);
|
||||
clear_internal_memory(buffer, sizeof(buffer));
|
||||
clear_internal_memory(S->buf, sizeof(S->buf));
|
||||
clear_internal_memory(S->h, sizeof(S->h));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen) {
|
||||
blake2b_state S;
|
||||
int ret = -1;
|
||||
|
||||
/* Verify parameters */
|
||||
if (NULL == in && inlen > 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (keylen > 0) {
|
||||
if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
} else {
|
||||
if (blake2b_init(&S, outlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
if (blake2b_update(&S, in, inlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
ret = blake2b_final(&S, out, outlen);
|
||||
|
||||
fail:
|
||||
clear_internal_memory(&S, sizeof(S));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
|
||||
uint8_t *out = (uint8_t *)pout;
|
||||
blake2b_state blake_state;
|
||||
uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
|
||||
int ret = -1;
|
||||
|
||||
if (outlen > UINT32_MAX) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Ensure little-endian byte order! */
|
||||
store32(outlen_bytes, (uint32_t)outlen);
|
||||
|
||||
#define TRY(statement) \
|
||||
do { \
|
||||
ret = statement; \
|
||||
if (ret < 0) { \
|
||||
goto fail; \
|
||||
} \
|
||||
} while ((void)0, 0)
|
||||
|
||||
if (outlen <= BLAKE2B_OUTBYTES) {
|
||||
TRY(blake2b_init(&blake_state, outlen));
|
||||
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(blake2b_update(&blake_state, in, inlen));
|
||||
TRY(blake2b_final(&blake_state, out, outlen));
|
||||
} else {
|
||||
uint32_t toproduce;
|
||||
uint8_t out_buffer[BLAKE2B_OUTBYTES];
|
||||
uint8_t in_buffer[BLAKE2B_OUTBYTES];
|
||||
TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(blake2b_update(&blake_state, in, inlen));
|
||||
TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
|
||||
|
||||
while (toproduce > BLAKE2B_OUTBYTES) {
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
|
||||
BLAKE2B_OUTBYTES, NULL, 0));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce -= BLAKE2B_OUTBYTES / 2;
|
||||
}
|
||||
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
|
||||
0));
|
||||
memcpy(out, out_buffer, toproduce);
|
||||
}
|
||||
fail:
|
||||
clear_internal_memory(&blake_state, sizeof(blake_state));
|
||||
return ret;
|
||||
#undef TRY
|
||||
}
|
||||
/* Argon2 Team - End Code */
|
180
algo/argon2/argon2d/blake2/blamka-round-opt.h
Normal file
180
algo/argon2/argon2d/blake2/blamka-round-opt.h
Normal file
@@ -0,0 +1,180 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef BLAKE_ROUND_MKA_OPT_H
|
||||
#define BLAKE_ROUND_MKA_OPT_H
|
||||
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#include <emmintrin.h>
|
||||
#if defined(__SSSE3__)
|
||||
#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
|
||||
#endif
|
||||
|
||||
#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__))
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(__XOP__)
|
||||
#if defined(__SSSE3__)
|
||||
#define r16 \
|
||||
(_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
||||
#define r24 \
|
||||
(_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
#define _mm_roti_epi64(x, c) \
|
||||
(-(c) == 32) \
|
||||
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
|
||||
: (-(c) == 24) \
|
||||
? _mm_shuffle_epi8((x), r24) \
|
||||
: (-(c) == 16) \
|
||||
? _mm_shuffle_epi8((x), r16) \
|
||||
: (-(c) == 63) \
|
||||
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
||||
_mm_add_epi64((x), (x))) \
|
||||
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
||||
_mm_slli_epi64((x), 64 - (-(c))))
|
||||
#else /* defined(__SSE2__) */
|
||||
#define _mm_roti_epi64(r, c) \
|
||||
_mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
|
||||
#endif
|
||||
#else
|
||||
#endif
|
||||
|
||||
static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
const __m128i z = _mm_mul_epu32(x, y);
|
||||
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
|
||||
}
|
||||
|
||||
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = fBlaMka(A0, B0); \
|
||||
A1 = fBlaMka(A1, B1); \
|
||||
\
|
||||
D0 = _mm_xor_si128(D0, A0); \
|
||||
D1 = _mm_xor_si128(D1, A1); \
|
||||
\
|
||||
D0 = _mm_roti_epi64(D0, -32); \
|
||||
D1 = _mm_roti_epi64(D1, -32); \
|
||||
\
|
||||
C0 = fBlaMka(C0, D0); \
|
||||
C1 = fBlaMka(C1, D1); \
|
||||
\
|
||||
B0 = _mm_xor_si128(B0, C0); \
|
||||
B1 = _mm_xor_si128(B1, C1); \
|
||||
\
|
||||
B0 = _mm_roti_epi64(B0, -24); \
|
||||
B1 = _mm_roti_epi64(B1, -24); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = fBlaMka(A0, B0); \
|
||||
A1 = fBlaMka(A1, B1); \
|
||||
\
|
||||
D0 = _mm_xor_si128(D0, A0); \
|
||||
D1 = _mm_xor_si128(D1, A1); \
|
||||
\
|
||||
D0 = _mm_roti_epi64(D0, -16); \
|
||||
D1 = _mm_roti_epi64(D1, -16); \
|
||||
\
|
||||
C0 = fBlaMka(C0, D0); \
|
||||
C1 = fBlaMka(C1, D1); \
|
||||
\
|
||||
B0 = _mm_xor_si128(B0, C0); \
|
||||
B1 = _mm_xor_si128(B1, C1); \
|
||||
\
|
||||
B0 = _mm_roti_epi64(B0, -63); \
|
||||
B1 = _mm_roti_epi64(B1, -63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
|
||||
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
|
||||
B0 = t0; \
|
||||
B1 = t1; \
|
||||
\
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(D1, D0, 8); \
|
||||
t1 = _mm_alignr_epi8(D0, D1, 8); \
|
||||
D0 = t1; \
|
||||
D1 = t0; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
|
||||
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
|
||||
B0 = t0; \
|
||||
B1 = t1; \
|
||||
\
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(D0, D1, 8); \
|
||||
t1 = _mm_alignr_epi8(D1, D0, 8); \
|
||||
D0 = t1; \
|
||||
D1 = t0; \
|
||||
} while ((void)0, 0)
|
||||
#else /* SSE2 */
|
||||
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = D0; \
|
||||
__m128i t1 = B0; \
|
||||
D0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = D0; \
|
||||
D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \
|
||||
D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \
|
||||
B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \
|
||||
B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0, t1; \
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
t0 = B0; \
|
||||
t1 = D0; \
|
||||
B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \
|
||||
B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \
|
||||
D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \
|
||||
D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \
|
||||
} while ((void)0, 0)
|
||||
#endif
|
||||
|
||||
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif
|
56
algo/argon2/argon2d/blake2/blamka-round-ref.h
Normal file
56
algo/argon2/argon2d/blake2/blamka-round-ref.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef BLAKE_ROUND_MKA_H
|
||||
#define BLAKE_ROUND_MKA_H
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
/*designed by the Lyra PHC team */
|
||||
static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||
const uint64_t m = UINT64_C(0xFFFFFFFF);
|
||||
const uint64_t xy = (x & m) * (y & m);
|
||||
return x + y + 2 * xy;
|
||||
}
|
||||
|
||||
#define G(a, b, c, d) \
|
||||
do { \
|
||||
a = fBlaMka(a, b); \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = fBlaMka(c, d); \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = fBlaMka(a, b); \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = fBlaMka(c, d); \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
|
||||
v12, v13, v14, v15) \
|
||||
do { \
|
||||
G(v0, v4, v8, v12); \
|
||||
G(v1, v5, v9, v13); \
|
||||
G(v2, v6, v10, v14); \
|
||||
G(v3, v7, v11, v15); \
|
||||
G(v0, v5, v10, v15); \
|
||||
G(v1, v6, v11, v12); \
|
||||
G(v2, v7, v8, v13); \
|
||||
G(v3, v4, v9, v14); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif
|
@@ -10,7 +10,7 @@ bool register_blake_algo( algo_gate_t* gate )
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_max64 = (void*)&blake_get_max64;
|
||||
//#if defined (__AVX2__) && defined (FOUR_WAY)
|
||||
// gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
|
||||
// gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
// gate->scanhash = (void*)&scanhash_blake_8way;
|
||||
// gate->hash = (void*)&blakehash_8way;
|
||||
#if defined(BLAKE_4WAY)
|
||||
|
@@ -30,7 +30,7 @@
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#if defined (__AVX__)
|
||||
#if defined (__SSE4_2__)
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
@@ -363,14 +363,14 @@ static const sph_u64 CB[16] = {
|
||||
do { \
|
||||
a = _mm_add_epi32( _mm_add_epi32( _mm_xor_si128( \
|
||||
_mm_set_epi32( c1, c1, c1, c1 ), m0 ), b ), a ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
d = mm_ror_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
b = mm_ror_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
a = _mm_add_epi32( _mm_add_epi32( _mm_xor_si128( \
|
||||
_mm_set_epi32( c0, c0, c0, c0 ), m1 ), b ), a ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
d = mm_ror_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
b = mm_ror_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_COMPACT_BLAKE_32
|
||||
@@ -419,14 +419,14 @@ do { \
|
||||
do { \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
|
||||
_mm256_set1_epi32( c1 ), m0 ), b ), a ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
|
||||
_mm256_set1_epi32( c0 ), m1 ), b ), a ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_S_8WAY(r) do { \
|
||||
@@ -445,14 +445,14 @@ do { \
|
||||
#define GB_4WAY(m0, m1, c0, c1, a, b, c, d) do { \
|
||||
a = _mm256_add_epi64( _mm256_add_epi64( _mm256_xor_si256( \
|
||||
_mm256_set_epi64x( c1, c1, c1, c1 ), m0 ), b ), a ); \
|
||||
d = mm256_rotr_64( _mm256_xor_si256( d, a ), 32 ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a ), 32 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 25 ); \
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 25 ); \
|
||||
a = _mm256_add_epi64( _mm256_add_epi64( _mm256_xor_si256( \
|
||||
_mm256_set_epi64x( c0, c0, c0, c0 ), m1 ), b ), a ); \
|
||||
d = mm256_rotr_64( _mm256_xor_si256( d, a ), 16 ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 11 ); \
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 11 ); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_COMPACT_BLAKE_64
|
||||
|
@@ -37,7 +37,7 @@
|
||||
#ifndef __BLAKE_HASH_4WAY__
|
||||
#define __BLAKE_HASH_4WAY__ 1
|
||||
|
||||
#ifdef __AVX__
|
||||
#ifdef __SSE4_2__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
@@ -51,7 +51,7 @@ extern "C"{
|
||||
|
||||
#define SPH_SIZE_blake512 512
|
||||
|
||||
// With AVX only Blake-256 4 way is available.
|
||||
// With SSE4.2 only Blake-256 4 way is available.
|
||||
// With AVX2 Blake-256 8way & Blake-512 4 way are also available.
|
||||
|
||||
// Blake-256 4 way
|
||||
|
@@ -20,7 +20,7 @@ bool register_blake2s_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&blake2s_hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include <stdint.h>
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
#define BLAKE2S_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
|
@@ -17,7 +17,7 @@
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
static const uint32_t blake2s_IV[8] =
|
||||
{
|
||||
@@ -92,13 +92,13 @@ int blake2s_4way_compress( blake2s_4way_state *S, const __m128i* block )
|
||||
#define G4W(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+0] ] ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
d = mm_ror_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
b = mm_ror_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+1] ] ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
d = mm_ror_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
b = mm_ror_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND4W(r) \
|
||||
@@ -210,14 +210,14 @@ int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block )
|
||||
do { \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
|
||||
m[ blake2s_sigma[r][2*i+0] ] ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
|
||||
m[ blake2s_sigma[r][2*i+1] ] ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND8W(r) \
|
||||
@@ -359,4 +359,4 @@ int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __AVX__
|
||||
#endif // __SSE4_2__
|
||||
|
@@ -14,7 +14,7 @@
|
||||
#ifndef __BLAKE2S_HASH_4WAY_H__
|
||||
#define __BLAKE2S_HASH_4WAY_H__ 1
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
|
||||
@@ -107,6 +107,6 @@ int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen );
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __AVX__
|
||||
#endif // __SSE4_2__
|
||||
|
||||
#endif
|
||||
|
@@ -22,7 +22,7 @@ bool register_vanilla_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_blakecoin;
|
||||
gate->hash = (void*)&blakecoinhash;
|
||||
#endif
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&blakecoin_get_max64;
|
||||
return true;
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
#define BLAKECOIN_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
|
@@ -18,7 +18,7 @@
|
||||
// uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#if defined(__SSE4_2__)
|
||||
#define DECRED_4WAY
|
||||
#endif
|
||||
|
||||
|
@@ -77,26 +77,26 @@ static const sph_u64 IV512[] = {
|
||||
#define ss0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 1), \
|
||||
_mm_slli_epi32( (x), 3) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 4), \
|
||||
mm_rotl_32( (x), 19) ) )
|
||||
_mm_xor_si128( mm_rol_32( (x), 4), \
|
||||
mm_rol_32( (x), 19) ) )
|
||||
|
||||
#define ss1(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 1), \
|
||||
_mm_slli_epi32( (x), 2) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 8), \
|
||||
mm_rotl_32( (x), 23) ) )
|
||||
_mm_xor_si128( mm_rol_32( (x), 8), \
|
||||
mm_rol_32( (x), 23) ) )
|
||||
|
||||
#define ss2(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 2), \
|
||||
_mm_slli_epi32( (x), 1) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 12), \
|
||||
mm_rotl_32( (x), 25) ) )
|
||||
_mm_xor_si128( mm_rol_32( (x), 12), \
|
||||
mm_rol_32( (x), 25) ) )
|
||||
|
||||
#define ss3(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 2), \
|
||||
_mm_slli_epi32( (x), 2) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 15), \
|
||||
mm_rotl_32( (x), 29) ) )
|
||||
_mm_xor_si128( mm_rol_32( (x), 15), \
|
||||
mm_rol_32( (x), 29) ) )
|
||||
|
||||
#define ss4(x) \
|
||||
_mm_xor_si128( (x), _mm_srli_epi32( (x), 1 ) )
|
||||
@@ -104,16 +104,16 @@ static const sph_u64 IV512[] = {
|
||||
#define ss5(x) \
|
||||
_mm_xor_si128( (x), _mm_srli_epi32( (x), 2 ) )
|
||||
|
||||
#define rs1(x) mm_rotl_32( x, 3 )
|
||||
#define rs2(x) mm_rotl_32( x, 7 )
|
||||
#define rs3(x) mm_rotl_32( x, 13 )
|
||||
#define rs4(x) mm_rotl_32( x, 16 )
|
||||
#define rs5(x) mm_rotl_32( x, 19 )
|
||||
#define rs6(x) mm_rotl_32( x, 23 )
|
||||
#define rs7(x) mm_rotl_32( x, 27 )
|
||||
#define rs1(x) mm_rol_32( x, 3 )
|
||||
#define rs2(x) mm_rol_32( x, 7 )
|
||||
#define rs3(x) mm_rol_32( x, 13 )
|
||||
#define rs4(x) mm_rol_32( x, 16 )
|
||||
#define rs5(x) mm_rol_32( x, 19 )
|
||||
#define rs6(x) mm_rol_32( x, 23 )
|
||||
#define rs7(x) mm_rol_32( x, 27 )
|
||||
|
||||
#define rol_off_32( M, j, off ) \
|
||||
mm_rotl_32( M[ ( (j) + (off) ) & 0xF ] , \
|
||||
mm_rol_32( M[ ( (j) + (off) ) & 0xF ] , \
|
||||
( ( (j) + (off) ) & 0xF ) + 1 )
|
||||
|
||||
#define add_elt_s( M, H, j ) \
|
||||
@@ -178,26 +178,26 @@ static const sph_u64 IV512[] = {
|
||||
#define sb0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 1), \
|
||||
_mm256_slli_epi64( (x), 3) ), \
|
||||
_mm256_xor_si256( mm256_rotl_64( (x), 4), \
|
||||
mm256_rotl_64( (x), 37) ) )
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 4), \
|
||||
mm256_rol_64( (x), 37) ) )
|
||||
|
||||
#define sb1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 1), \
|
||||
_mm256_slli_epi64( (x), 2) ), \
|
||||
_mm256_xor_si256( mm256_rotl_64( (x), 13), \
|
||||
mm256_rotl_64( (x), 43) ) )
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 13), \
|
||||
mm256_rol_64( (x), 43) ) )
|
||||
|
||||
#define sb2(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 2), \
|
||||
_mm256_slli_epi64( (x), 1) ), \
|
||||
_mm256_xor_si256( mm256_rotl_64( (x), 19), \
|
||||
mm256_rotl_64( (x), 53) ) )
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 19), \
|
||||
mm256_rol_64( (x), 53) ) )
|
||||
|
||||
#define sb3(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 2), \
|
||||
_mm256_slli_epi64( (x), 2) ), \
|
||||
_mm256_xor_si256( mm256_rotl_64( (x), 28), \
|
||||
mm256_rotl_64( (x), 59) ) )
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 28), \
|
||||
mm256_rol_64( (x), 59) ) )
|
||||
|
||||
#define sb4(x) \
|
||||
_mm256_xor_si256( (x), _mm256_srli_epi64( (x), 1 ) )
|
||||
@@ -205,17 +205,17 @@ static const sph_u64 IV512[] = {
|
||||
#define sb5(x) \
|
||||
_mm256_xor_si256( (x), _mm256_srli_epi64( (x), 2 ) )
|
||||
|
||||
#define rb1(x) mm256_rotl_64( x, 5 )
|
||||
#define rb2(x) mm256_rotl_64( x, 11 )
|
||||
#define rb3(x) mm256_rotl_64( x, 27 )
|
||||
#define rb4(x) mm256_rotl_64( x, 32 )
|
||||
#define rb5(x) mm256_rotl_64( x, 37 )
|
||||
#define rb6(x) mm256_rotl_64( x, 43 )
|
||||
#define rb7(x) mm256_rotl_64( x, 53 )
|
||||
#define rb1(x) mm256_rol_64( x, 5 )
|
||||
#define rb2(x) mm256_rol_64( x, 11 )
|
||||
#define rb3(x) mm256_rol_64( x, 27 )
|
||||
#define rb4(x) mm256_rol_64( x, 32 )
|
||||
#define rb5(x) mm256_rol_64( x, 37 )
|
||||
#define rb6(x) mm256_rol_64( x, 43 )
|
||||
#define rb7(x) mm256_rol_64( x, 53 )
|
||||
|
||||
#define rol_off_64( M, j, off ) \
|
||||
mm256_rotl_64( M[ ( (j) + (off) ) & 0xF ] , \
|
||||
( ( (j) + (off) ) & 0xF ) + 1 )
|
||||
mm256_rol_64( M[ ( (j) + (off) ) & 0xF ] , \
|
||||
( ( (j) + (off) ) & 0xF ) + 1 )
|
||||
|
||||
#define add_elt_b( M, H, j ) \
|
||||
_mm256_xor_si256( \
|
||||
@@ -526,42 +526,42 @@ void compress_small( const __m128i *M, const __m128i H[16], __m128i dH[16] )
|
||||
_mm_slli_epi32( qt[23], 2 ) ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( xl, qt[31] ), qt[ 7] ));
|
||||
dH[ 8] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[4], 9 ),
|
||||
mm_rol_32( dH[4], 9 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[24] ), M[ 8] )),
|
||||
_mm_xor_si128( _mm_slli_epi32( xl, 8 ),
|
||||
_mm_xor_si128( qt[23], qt[ 8] ) ) );
|
||||
dH[ 9] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[5], 10 ),
|
||||
mm_rol_32( dH[5], 10 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[25] ), M[ 9] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 6 ),
|
||||
_mm_xor_si128( qt[16], qt[ 9] ) ) );
|
||||
dH[10] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[6], 11 ),
|
||||
mm_rol_32( dH[6], 11 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[26] ), M[10] )),
|
||||
_mm_xor_si128( _mm_slli_epi32( xl, 6 ),
|
||||
_mm_xor_si128( qt[17], qt[10] ) ) );
|
||||
dH[11] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[7], 12 ),
|
||||
mm_rol_32( dH[7], 12 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[27] ), M[11] )),
|
||||
_mm_xor_si128( _mm_slli_epi32( xl, 4 ),
|
||||
_mm_xor_si128( qt[18], qt[11] ) ) );
|
||||
dH[12] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[0], 13 ),
|
||||
mm_rol_32( dH[0], 13 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[28] ), M[12] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 3 ),
|
||||
_mm_xor_si128( qt[19], qt[12] ) ) );
|
||||
dH[13] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[1], 14 ),
|
||||
mm_rol_32( dH[1], 14 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[29] ), M[13] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 4 ),
|
||||
_mm_xor_si128( qt[20], qt[13] ) ) );
|
||||
dH[14] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[2], 15 ),
|
||||
mm_rol_32( dH[2], 15 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[30] ), M[14] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 7 ),
|
||||
_mm_xor_si128( qt[21], qt[14] ) ) );
|
||||
dH[15] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[3], 16 ),
|
||||
mm_rol_32( dH[3], 16 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[31] ), M[15] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 2 ),
|
||||
_mm_xor_si128( qt[22], qt[15] ) ) );
|
||||
@@ -819,42 +819,42 @@ void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
|
||||
_mm256_slli_epi64( qt[23], 2 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[31] ), qt[ 7] ));
|
||||
dH[ 8] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[4], 9 ),
|
||||
mm256_rol_64( dH[4], 9 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[24] ), M[ 8] )),
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xl, 8 ),
|
||||
_mm256_xor_si256( qt[23], qt[ 8] ) ) );
|
||||
dH[ 9] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[5], 10 ),
|
||||
mm256_rol_64( dH[5], 10 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[25] ), M[ 9] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 6 ),
|
||||
_mm256_xor_si256( qt[16], qt[ 9] ) ) );
|
||||
dH[10] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[6], 11 ),
|
||||
mm256_rol_64( dH[6], 11 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[26] ), M[10] )),
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xl, 6 ),
|
||||
_mm256_xor_si256( qt[17], qt[10] ) ) );
|
||||
dH[11] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[7], 12 ),
|
||||
mm256_rol_64( dH[7], 12 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[27] ), M[11] )),
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xl, 4 ),
|
||||
_mm256_xor_si256( qt[18], qt[11] ) ) );
|
||||
dH[12] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[0], 13 ),
|
||||
mm256_rol_64( dH[0], 13 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[28] ), M[12] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 3 ),
|
||||
_mm256_xor_si256( qt[19], qt[12] ) ) );
|
||||
dH[13] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[1], 14 ),
|
||||
mm256_rol_64( dH[1], 14 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[29] ), M[13] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 4 ),
|
||||
_mm256_xor_si256( qt[20], qt[13] ) ) );
|
||||
dH[14] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[2], 15 ),
|
||||
mm256_rol_64( dH[2], 15 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[30] ), M[14] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 7 ),
|
||||
_mm256_xor_si256( qt[21], qt[14] ) ) );
|
||||
dH[15] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[3], 16 ),
|
||||
mm256_rol_64( dH[3], 16 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[31] ), M[15] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 2 ),
|
||||
_mm256_xor_si256( qt[22], qt[15] ) ) );
|
||||
|
1251
algo/bmw/bmw.test
1251
algo/bmw/bmw.test
File diff suppressed because it is too large
Load Diff
205
algo/cubehash/cube-hash-2way.c
Normal file
205
algo/cubehash/cube-hash-2way.c
Normal file
@@ -0,0 +1,205 @@
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <unistd.h>
|
||||
#include <memory.h>
|
||||
#include "cube-hash-2way.h"
|
||||
|
||||
// 2x128
|
||||
|
||||
static void transform_2way( cube_2way_context *sp )
|
||||
{
|
||||
int r;
|
||||
const int rounds = sp->rounds;
|
||||
|
||||
__m256i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3;
|
||||
|
||||
x0 = _mm256_load_si256( (__m256i*)sp->h );
|
||||
x1 = _mm256_load_si256( (__m256i*)sp->h + 1 );
|
||||
x2 = _mm256_load_si256( (__m256i*)sp->h + 2 );
|
||||
x3 = _mm256_load_si256( (__m256i*)sp->h + 3 );
|
||||
x4 = _mm256_load_si256( (__m256i*)sp->h + 4 );
|
||||
x5 = _mm256_load_si256( (__m256i*)sp->h + 5 );
|
||||
x6 = _mm256_load_si256( (__m256i*)sp->h + 6 );
|
||||
x7 = _mm256_load_si256( (__m256i*)sp->h + 7 );
|
||||
|
||||
for ( r = 0; r < rounds; ++r )
|
||||
{
|
||||
x4 = _mm256_add_epi32( x0, x4 );
|
||||
x5 = _mm256_add_epi32( x1, x5 );
|
||||
x6 = _mm256_add_epi32( x2, x6 );
|
||||
x7 = _mm256_add_epi32( x3, x7 );
|
||||
y0 = x2;
|
||||
y1 = x3;
|
||||
y2 = x0;
|
||||
y3 = x1;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
_mm256_srli_epi32( y0, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 7 ),
|
||||
_mm256_srli_epi32( y1, 25 ) );
|
||||
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 7 ),
|
||||
_mm256_srli_epi32( y2, 25 ) );
|
||||
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 7 ),
|
||||
_mm256_srli_epi32( y3, 25 ) );
|
||||
x0 = _mm256_xor_si256( x0, x4 );
|
||||
x1 = _mm256_xor_si256( x1, x5 );
|
||||
x2 = _mm256_xor_si256( x2, x6 );
|
||||
x3 = _mm256_xor_si256( x3, x7 );
|
||||
x4 = mm256_swap128_64( x4 );
|
||||
x5 = mm256_swap128_64( x5 );
|
||||
x6 = mm256_swap128_64( x6 );
|
||||
x7 = mm256_swap128_64( x7 );
|
||||
x4 = _mm256_add_epi32( x0, x4 );
|
||||
x5 = _mm256_add_epi32( x1, x5 );
|
||||
x6 = _mm256_add_epi32( x2, x6 );
|
||||
x7 = _mm256_add_epi32( x3, x7 );
|
||||
y0 = x1;
|
||||
y1 = x0;
|
||||
y2 = x3;
|
||||
y3 = x2;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
|
||||
_mm256_srli_epi32( y0, 21 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
|
||||
_mm256_srli_epi32( y1, 21 ) );
|
||||
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 11 ),
|
||||
_mm256_srli_epi32( y2, 21 ) );
|
||||
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 11 ),
|
||||
_mm256_srli_epi32( y3, 21 ) );
|
||||
x0 = _mm256_xor_si256( x0, x4 );
|
||||
x1 = _mm256_xor_si256( x1, x5 );
|
||||
x2 = _mm256_xor_si256( x2, x6 );
|
||||
x3 = _mm256_xor_si256( x3, x7 );
|
||||
x4 = mm256_swap64_32( x4 );
|
||||
x5 = mm256_swap64_32( x5 );
|
||||
x6 = mm256_swap64_32( x6 );
|
||||
x7 = mm256_swap64_32( x7 );
|
||||
}
|
||||
|
||||
_mm256_store_si256( (__m256i*)sp->h, x0 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 1, x1 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 2, x2 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 3, x3 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 4, x4 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 5, x5 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 6, x6 );
|
||||
_mm256_store_si256( (__m256i*)sp->h + 7, x7 );
|
||||
|
||||
}
|
||||
|
||||
cube_2way_context cube_2way_ctx_cache __attribute__ ((aligned (64)));
|
||||
|
||||
int cube_2way_reinit( cube_2way_context *sp )
|
||||
{
|
||||
memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int cube_2way_init( cube_2way_context *sp, int hashbitlen, int rounds,
|
||||
int blockbytes )
|
||||
{
|
||||
int i;
|
||||
|
||||
// all sizes of __m128i
|
||||
cube_2way_ctx_cache.hashlen = hashbitlen/128;
|
||||
cube_2way_ctx_cache.blocksize = blockbytes/16;
|
||||
cube_2way_ctx_cache.rounds = rounds;
|
||||
cube_2way_ctx_cache.pos = 0;
|
||||
|
||||
for ( i = 0; i < 8; ++i )
|
||||
cube_2way_ctx_cache.h[i] = m256_zero;
|
||||
|
||||
cube_2way_ctx_cache.h[0] = _mm256_set_epi32(
|
||||
0, rounds, blockbytes, hashbitlen / 8,
|
||||
0, rounds, blockbytes, hashbitlen / 8 );
|
||||
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform_2way( &cube_2way_ctx_cache );
|
||||
|
||||
memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int cube_2way_update( cube_2way_context *sp, const void *data, size_t size )
|
||||
{
|
||||
const int len = size / 16;
|
||||
const __m256i *in = (__m256i*)data;
|
||||
int i;
|
||||
|
||||
// It is assumed data is aligned to 256 bits and is a multiple of 128 bits.
|
||||
// Current usage sata is either 64 or 80 bytes.
|
||||
|
||||
for ( i = 0; i < len; i++ )
|
||||
{
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
|
||||
sp->pos++;
|
||||
if ( sp->pos == sp->blocksize )
|
||||
{
|
||||
transform_2way( sp );
|
||||
sp->pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cube_2way_close( cube_2way_context *sp, void *output )
|
||||
{
|
||||
__m256i *hash = (__m256i*)output;
|
||||
int i;
|
||||
|
||||
// pos is zero for 64 byte data, 1 for 80 byte data.
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
|
||||
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
|
||||
transform_2way( sp );
|
||||
|
||||
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
|
||||
1,0,0,0 ) );
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform_2way( &cube_2way_ctx_cache );
|
||||
|
||||
for ( i = 0; i < sp->hashlen; i++ )
|
||||
hash[i] = sp->h[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cube_2way_update_close( cube_2way_context *sp, void *output,
|
||||
const void *data, size_t size )
|
||||
{
|
||||
const int len = size / 16;
|
||||
const __m256i *in = (__m256i*)data;
|
||||
__m256i *hash = (__m256i*)output;
|
||||
int i;
|
||||
|
||||
for ( i = 0; i < len; i++ )
|
||||
{
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ], in[i] );
|
||||
sp->pos++;
|
||||
if ( sp->pos == sp->blocksize )
|
||||
{
|
||||
transform_2way( sp );
|
||||
sp->pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// pos is zero for 64 byte data, 1 for 80 byte data.
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
|
||||
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
|
||||
transform_2way( sp );
|
||||
|
||||
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
|
||||
1,0,0,0 ) );
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform_2way( &cube_2way_ctx_cache );
|
||||
|
||||
for ( i = 0; i < sp->hashlen; i++ )
|
||||
hash[i] = sp->h[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
36
algo/cubehash/cube-hash-2way.h
Normal file
36
algo/cubehash/cube-hash-2way.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef CUBE_HASH_2WAY_H__
|
||||
#define CUBE_HASH_2WAY_H__
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include <stdint.h>
|
||||
#include "avxdefs.h"
|
||||
|
||||
// 2x128, 2 way parallel SSE2
|
||||
|
||||
struct _cube_2way_context
|
||||
{
|
||||
int hashlen; // __m128i
|
||||
int rounds;
|
||||
int blocksize; // __m128i
|
||||
int pos; // number of __m128i read into x from current block
|
||||
__m256i h[8] __attribute__ ((aligned (64)));
|
||||
};
|
||||
|
||||
typedef struct _cube_2way_context cube_2way_context;
|
||||
|
||||
int cube_2way_init( cube_2way_context* sp, int hashbitlen, int rounds,
|
||||
int blockbytes );
|
||||
// reinitialize context with same parameters, much faster.
|
||||
int cube_2way_reinit( cube_2way_context *sp );
|
||||
|
||||
int cube_2way_update( cube_2way_context *sp, const void *data, size_t size );
|
||||
|
||||
int cube_2way_close( cube_2way_context *sp, void *output );
|
||||
|
||||
int cube_2way_update_close( cube_2way_context *sp, void *output,
|
||||
const void *data, size_t size );
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
@@ -22,7 +22,7 @@ static void transform( cubehashParam *sp )
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
__m256i x0, x1, x2, x3, y0, y1;
|
||||
register __m256i x0, x1, x2, x3, y0, y1;
|
||||
|
||||
x0 = _mm256_load_si256( (__m256i*)sp->x );
|
||||
x1 = _mm256_load_si256( (__m256i*)sp->x + 1 );
|
||||
@@ -33,20 +33,19 @@ static void transform( cubehashParam *sp )
|
||||
{
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = x1;
|
||||
y1 = x0;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
y0 = x0;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( x1, 7 ),
|
||||
_mm256_srli_epi32( x1, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
_mm256_srli_epi32( y0, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 7 ),
|
||||
_mm256_srli_epi32( y1, 25 ) );
|
||||
x0 = _mm256_xor_si256( x0, x2 );
|
||||
x1 = _mm256_xor_si256( x1, x3 );
|
||||
x2 = _mm256_shuffle_epi32( x2, 0x4e );
|
||||
x3 = _mm256_shuffle_epi32( x3, 0x4e );
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = _mm256_permute2f128_si256( x0, x0, 1 );
|
||||
y1 = _mm256_permute2f128_si256( x1, x1, 1 );
|
||||
y0 = _mm256_permute4x64_epi64( x0, 0x4e );
|
||||
y1 = _mm256_permute4x64_epi64( x1, 0x4e );
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
|
||||
_mm256_srli_epi32( y0, 21 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
|
||||
|
@@ -601,18 +601,18 @@ do { \
|
||||
|
||||
#define L( a, b, c, d ) \
|
||||
do { \
|
||||
a = mm256_rotl_32( a, 13 ); \
|
||||
c = mm256_rotl_32( c, 3 ); \
|
||||
a = mm256_rol_32( a, 13 ); \
|
||||
c = mm256_rol_32( c, 3 ); \
|
||||
b = _mm256_xor_si256( b, _mm256_xor_si256( a, c ) ); \
|
||||
d = _mm256_xor_si256( d, _mm256_xor_si256( c, \
|
||||
_mm256_slli_epi32( a, 3 ) ) ); \
|
||||
b = mm256_rotl_32( b, 1 ); \
|
||||
d = mm256_rotl_32( d, 7 ); \
|
||||
b = mm256_rol_32( b, 1 ); \
|
||||
d = mm256_rol_32( d, 7 ); \
|
||||
a = _mm256_xor_si256( a, _mm256_xor_si256( b, d ) ); \
|
||||
c = _mm256_xor_si256( c, _mm256_xor_si256( d, \
|
||||
_mm256_slli_epi32( b, 7 ) ) ); \
|
||||
a = mm256_rotl_32( a, 5 ); \
|
||||
c = mm256_rotl_32( c, 22 ); \
|
||||
a = mm256_rol_32( a, 5 ); \
|
||||
c = mm256_rol_32( c, 22 ); \
|
||||
} while (0)
|
||||
|
||||
#define DECL_STATE_BIG \
|
||||
|
@@ -38,7 +38,7 @@
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
|
||||
#if defined (__AVX__)
|
||||
#if defined (__AVX2__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
|
||||
|
@@ -40,7 +40,9 @@
|
||||
#include <string.h>
|
||||
#include "haval-hash-4way.h"
|
||||
|
||||
#if defined (__AVX__)
|
||||
// won't compile with sse4.2
|
||||
//#if defined (__SSE4_2__)
|
||||
#if defined(__AVX__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
@@ -134,8 +136,8 @@ extern "C"{
|
||||
#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c) \
|
||||
do { \
|
||||
__m128i t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \
|
||||
x7 = _mm_add_epi32( _mm_add_epi32( mm_rotr_32( t, 7 ), \
|
||||
mm_rotr_32( x7, 11 ) ), \
|
||||
x7 = _mm_add_epi32( _mm_add_epi32( mm_ror_32( t, 7 ), \
|
||||
mm_ror_32( x7, 11 ) ), \
|
||||
_mm_add_epi32( w, _mm_set1_epi32( c ) ) ); \
|
||||
} while (0)
|
||||
|
||||
|
@@ -83,7 +83,8 @@ void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf)
|
||||
keys[14] = tmp1;
|
||||
}
|
||||
|
||||
#ifdef __AVX__
|
||||
#ifdef __SSE4_2__
|
||||
//#ifdef __AVX__
|
||||
|
||||
#define AESENC(i,j) \
|
||||
State[j] = _mm_aesenc_si128(State[j], ExpandedKey[j][i]);
|
||||
|
@@ -42,15 +42,82 @@ void hodl_le_build_stratum_request( char* req, struct work* work,
|
||||
free( xnonce2str );
|
||||
}
|
||||
|
||||
void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
|
||||
char* hodl_malloc_txs_request( struct work *work )
|
||||
{
|
||||
char* req;
|
||||
json_t *val;
|
||||
char data_str[2 * sizeof(work->data) + 1];
|
||||
int i;
|
||||
|
||||
for ( i = 0; i < ARRAY_SIZE(work->data); i++ )
|
||||
be32enc( work->data + i, work->data[i] );
|
||||
|
||||
bin2hex( data_str, (unsigned char *)work->data, 88 );
|
||||
if ( work->workid )
|
||||
{
|
||||
char *params;
|
||||
val = json_object();
|
||||
json_object_set_new( val, "workid", json_string( work->workid ) );
|
||||
params = json_dumps( val, 0 );
|
||||
json_decref( val );
|
||||
req = malloc( 128 + 2*88 + strlen( work->txs ) + strlen( params ) );
|
||||
sprintf( req,
|
||||
"{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":1}\r\n",
|
||||
data_str, work->txs, params);
|
||||
free( params );
|
||||
}
|
||||
else
|
||||
{
|
||||
req = malloc( 128 + 2*88 + strlen(work->txs));
|
||||
sprintf( req,
|
||||
"{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":1}\r\n",
|
||||
data_str, work->txs);
|
||||
}
|
||||
return req;
|
||||
}
|
||||
|
||||
void hodl_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_tree,
|
||||
uint32_t ntime, uint32_t nbits )
|
||||
{
|
||||
uchar merkle_root[64] = { 0 };
|
||||
size_t t;
|
||||
int i;
|
||||
|
||||
algo_gate.gen_merkle_root( merkle_root, sctx );
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = version;
|
||||
|
||||
if ( have_stratum )
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[ 1+i ] = le32dec( prevhash + i );
|
||||
else
|
||||
for (i = 0; i < 8; i++)
|
||||
g_work->data[ 8-i ] = le32dec( prevhash + i );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[ 9+i ] = be32dec( merkle_tree + i );
|
||||
|
||||
g_work->data[ algo_gate.ntime_index ] = ntime;
|
||||
g_work->data[ algo_gate.nbits_index ] = nbits;
|
||||
g_work->data[22] = 0x80000000;
|
||||
g_work->data[31] = 0x00000280;
|
||||
}
|
||||
|
||||
// hodl build_extra_header is redundant, hodl can use std_build_extra_header
|
||||
// and call hodl_build_block_header.
|
||||
#if 0
|
||||
void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
|
||||
{
|
||||
uchar merkle_tree[64] = { 0 };
|
||||
size_t t;
|
||||
// int i;
|
||||
|
||||
algo_gate.gen_merkle_root( merkle_tree, sctx );
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
|
||||
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
|
||||
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
|
||||
le32dec( sctx->job.ntime ), le32dec( sctx->job.nbits ) );
|
||||
/*
|
||||
// Assemble block header
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
@@ -63,7 +130,9 @@ void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
|
||||
g_work->data[ algo_gate.nbits_index ] = le32dec( sctx->job.nbits );
|
||||
g_work->data[22] = 0x80000000;
|
||||
g_work->data[31] = 0x00000280;
|
||||
*/
|
||||
}
|
||||
#endif
|
||||
|
||||
// called only by thread 0, saves a backup of g_work
|
||||
void hodl_get_new_work( struct work* work, struct work* g_work)
|
||||
@@ -73,6 +142,22 @@ void hodl_get_new_work( struct work* work, struct work* g_work)
|
||||
hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999;
|
||||
}
|
||||
|
||||
json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
|
||||
{
|
||||
json_t *val;
|
||||
char *req = NULL;
|
||||
|
||||
if ( have_gbt )
|
||||
{
|
||||
req = malloc( strlen( gbt_lp_req ) + strlen( lp_id ) + 1 );
|
||||
sprintf( req, gbt_lp_req, lp_id );
|
||||
}
|
||||
val = json_rpc_call( curl, lp_url, rpc_userpass,
|
||||
req ? req : getwork_req, err, JSON_RPC_LONGPOLL );
|
||||
free( req );
|
||||
return val;
|
||||
}
|
||||
|
||||
// called by every thread, copies the backup to each thread's work.
|
||||
void hodl_resync_threads( struct work* work )
|
||||
{
|
||||
@@ -108,17 +193,26 @@ bool register_hodl_algo( algo_gate_t* gate )
|
||||
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
|
||||
return false;
|
||||
#endif
|
||||
// if ( TOTAL_CHUNKS % opt_n_threads )
|
||||
// {
|
||||
// applog(LOG_ERR,"Thread count must be power of 2.");
|
||||
// return false;
|
||||
// }
|
||||
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&hodl_scanhash;
|
||||
gate->get_new_work = (void*)&hodl_get_new_work;
|
||||
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;
|
||||
gate->set_target = (void*)&hodl_set_target;
|
||||
gate->build_stratum_request = (void*)&hodl_le_build_stratum_request;
|
||||
gate->build_extraheader = (void*)&hodl_build_extraheader;
|
||||
gate->malloc_txs_request = (void*)&hodl_malloc_txs_request;
|
||||
gate->build_block_header = (void*)&hodl_build_block_header;
|
||||
// gate->build_extraheader = (void*)&hodl_build_extraheader;
|
||||
gate->resync_threads = (void*)&hodl_resync_threads;
|
||||
gate->do_this_thread = (void*)&hodl_do_this_thread;
|
||||
gate->work_cmp_size = 76;
|
||||
hodl_scratchbuf = (unsigned char*)malloc( 1 << 30 );
|
||||
allow_getwork = false;
|
||||
return ( hodl_scratchbuf != NULL );
|
||||
}
|
||||
|
||||
|
@@ -10,23 +10,27 @@
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
|
||||
void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, void *MidHash)
|
||||
void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount,
|
||||
void *MidHash )
|
||||
{
|
||||
#ifdef __AVX__
|
||||
uint64_t* TempBufs[SHA512_PARALLEL_N] ;
|
||||
uint64_t* desination[SHA512_PARALLEL_N];
|
||||
const int Chunk = TOTAL_CHUNKS / ThreadCount;
|
||||
const uint32_t StartChunk = ThreadID * Chunk;
|
||||
const uint32_t EndChunk = StartChunk + Chunk;
|
||||
|
||||
for ( int i=0; i<SHA512_PARALLEL_N; ++i )
|
||||
#ifdef __SSE4_2__
|
||||
//#ifdef __AVX__
|
||||
uint64_t* TempBufs[ SHA512_PARALLEL_N ] ;
|
||||
uint64_t* desination[ SHA512_PARALLEL_N ];
|
||||
|
||||
for ( int i=0; i < SHA512_PARALLEL_N; ++i )
|
||||
{
|
||||
TempBufs[i] = (uint64_t*)malloc(32);
|
||||
memcpy(TempBufs[i], MidHash, 32);
|
||||
TempBufs[i] = (uint64_t*)malloc( 32 );
|
||||
memcpy( TempBufs[i], MidHash, 32 );
|
||||
}
|
||||
|
||||
uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
|
||||
for ( uint32_t i = StartChunk;
|
||||
i < StartChunk + (TOTAL_CHUNKS / ThreadCount); i+= SHA512_PARALLEL_N )
|
||||
for ( uint32_t i = StartChunk; i < EndChunk; i += SHA512_PARALLEL_N )
|
||||
{
|
||||
for ( int j=0; j<SHA512_PARALLEL_N; ++j )
|
||||
for ( int j = 0; j < SHA512_PARALLEL_N; ++j )
|
||||
{
|
||||
( (uint32_t*)TempBufs[j] )[0] = i + j;
|
||||
desination[j] = (uint64_t*)( (uint8_t *)Garbage + ( (i+j)
|
||||
@@ -35,15 +39,13 @@ void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, voi
|
||||
sha512Compute32b_parallel( TempBufs, desination );
|
||||
}
|
||||
|
||||
for ( int i=0; i<SHA512_PARALLEL_N; ++i )
|
||||
for ( int i = 0; i < SHA512_PARALLEL_N; ++i )
|
||||
free( TempBufs[i] );
|
||||
#else
|
||||
uint32_t TempBuf[8];
|
||||
memcpy( TempBuf, MidHash, 32 );
|
||||
|
||||
uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
|
||||
for ( uint32_t i = StartChunk;
|
||||
i < StartChunk + (TOTAL_CHUNKS / ThreadCount); ++i )
|
||||
for ( uint32_t i = StartChunk; i < EndChunk; ++i )
|
||||
{
|
||||
TempBuf[0] = i;
|
||||
SHA512( ( uint8_t *)TempBuf, 32,
|
||||
@@ -62,7 +64,8 @@ void Rev256(uint32_t *Dest, const uint32_t *Src)
|
||||
int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
#ifdef __AVX__
|
||||
#ifdef __SSE4_2__
|
||||
//#ifdef __AVX__
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf;
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#ifndef __AVX2__
|
||||
#ifdef __AVX__
|
||||
#ifdef __SSE4_2__
|
||||
//#ifdef __AVX__
|
||||
|
||||
//Dependencies
|
||||
#include <string.h>
|
||||
|
@@ -6,7 +6,8 @@
|
||||
|
||||
void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf);
|
||||
|
||||
#ifdef __AVX__
|
||||
#ifdef __SSE4_2__
|
||||
//#ifdef __AVX__
|
||||
|
||||
#define AES_PARALLEL_N 8
|
||||
#define BLOCK_COUNT 256
|
||||
|
@@ -60,7 +60,7 @@ static const sph_u64 RC[] = {
|
||||
#define AND64(d, a, b) (d = _mm256_and_si256(a,b))
|
||||
#define OR64(d, a, b) (d = _mm256_or_si256(a,b))
|
||||
#define NOT64(d, s) (d = _mm256_xor_si256(s,m256_neg1))
|
||||
#define ROL64(d, v, n) (d = mm256_rotl_64(v, n))
|
||||
#define ROL64(d, v, n) (d = mm256_rol_64(v, n))
|
||||
#define XOR64_IOTA XOR64
|
||||
|
||||
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
|
||||
|
@@ -13,7 +13,7 @@ bool register_allium_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_allium;
|
||||
gate->hash = (void*)&allium_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0xFFFFLL;
|
||||
return true;
|
||||
|
@@ -386,7 +386,7 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd, const uint64_t pwdlen,
|
||||
|
||||
#if defined(__AVX2__)
|
||||
memset_zero_256( (__m256i*)wholeMatrix, i>>5 );
|
||||
#elif defined(__AVX__)
|
||||
#elif defined(__SSE4_2__)
|
||||
memset_zero_128( (__m128i*)wholeMatrix, i>>4 );
|
||||
#else
|
||||
memset( wholeMatrix, 0, i );
|
||||
|
@@ -17,7 +17,7 @@ bool register_lyra2h_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2h;
|
||||
gate->hash = (void*)&lyra2h_hash;
|
||||
#endif
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&lyra2h_set_target;
|
||||
return true;
|
||||
|
@@ -132,7 +132,7 @@ void lyra2re_set_target ( struct work* work, double job_diff )
|
||||
bool register_lyra2re_algo( algo_gate_t* gate )
|
||||
{
|
||||
init_lyra2re_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_lyra2re;
|
||||
gate->hash = (void*)&lyra2re_hash;
|
||||
gate->get_max64 = (void*)&lyra2re_get_max64;
|
||||
|
@@ -31,7 +31,7 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
||||
gate->hash = (void*)&lyra2rev2_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
||||
gate->set_target = (void*)&lyra2rev2_set_target;
|
||||
return true;
|
||||
|
@@ -21,7 +21,7 @@ bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2z;
|
||||
gate->hash = (void*)&lyra2z_hash;
|
||||
#endif
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&lyra2z_set_target;
|
||||
return true;
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
#define LYRA2Z_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
|
@@ -69,7 +69,7 @@ bool lyra2z330_thread_init()
|
||||
|
||||
bool register_lyra2z330_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2z330_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z330;
|
||||
gate->hash = (void*)&lyra2z330_hash;
|
||||
|
@@ -51,7 +51,7 @@ inline void initState( uint64_t State[/*16*/] )
|
||||
state[3] = _mm256_set_epi64x( blake2b_IV[7], blake2b_IV[6],
|
||||
blake2b_IV[5], blake2b_IV[4] );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
|
||||
@@ -137,7 +137,7 @@ inline void squeeze( uint64_t *State, byte *Out, unsigned int len )
|
||||
//Squeezes remaining bytes
|
||||
memcpy_256( out, state, ( len_m256i % BLOCK_LEN_M256I ) );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
const int len_m128i = len / 16;
|
||||
const int fullBlocks = len_m128i / BLOCK_LEN_M128I;
|
||||
@@ -186,16 +186,26 @@ inline void absorbBlock( uint64_t *State, const uint64_t *In )
|
||||
{
|
||||
#if defined (__AVX2__)
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i* in = (__m256i*)In;
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i *in = (__m256i*)In;
|
||||
|
||||
state[0] = _mm256_xor_si256( state[0], in[0] );
|
||||
state[1] = _mm256_xor_si256( state[1], in[1] );
|
||||
state[2] = _mm256_xor_si256( state[2], in[2] );
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
LYRA_12_ROUNDS_AVX2( state[0], state[1], state[2], state[3] );
|
||||
state0 = _mm256_xor_si256( state0, in[0] );
|
||||
state1 = _mm256_xor_si256( state1, in[1] );
|
||||
state2 = _mm256_xor_si256( state2, in[2] );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
LYRA_12_ROUNDS_AVX2( state0, state1, state2, state3 );
|
||||
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
__m128i* in = (__m128i*)In;
|
||||
@@ -245,15 +255,25 @@ inline void absorbBlockBlake2Safe( uint64_t *State, const uint64_t *In )
|
||||
//XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
|
||||
#if defined (__AVX2__)
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i* in = (__m256i*)In;
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i *in = (__m256i*)In;
|
||||
|
||||
state[0] = _mm256_xor_si256( state[0], in[0] );
|
||||
state[1] = _mm256_xor_si256( state[1], in[1] );
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
LYRA_12_ROUNDS_AVX2( state[0], state[1], state[2], state[3] );
|
||||
state0 = _mm256_xor_si256( state0, in[0] );
|
||||
state1 = _mm256_xor_si256( state1, in[1] );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
LYRA_12_ROUNDS_AVX2( state0, state1, state2, state3 );
|
||||
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
__m128i* in = (__m128i*)In;
|
||||
@@ -301,14 +321,14 @@ inline void reducedSqueezeRow0( uint64_t* State, uint64_t* rowOut,
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i state0 = _mm256_load_si256( state );
|
||||
__m256i state1 = _mm256_load_si256( &state[1] );
|
||||
__m256i state2 = _mm256_load_si256( &state[2] );
|
||||
__m256i state3 = _mm256_load_si256( &state[3] );
|
||||
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i* out = (__m256i*)rowOut + ( (nCols-1) * BLOCK_LEN_M256I );
|
||||
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
for ( i = 0; i < 9; i += 3)
|
||||
{
|
||||
_mm_prefetch( out - i, _MM_HINT_T0 );
|
||||
@@ -330,13 +350,12 @@ inline void reducedSqueezeRow0( uint64_t* State, uint64_t* rowOut,
|
||||
LYRA_ROUND_AVX2( state0, state1, state2, state3 );
|
||||
}
|
||||
|
||||
_mm256_store_si256( state, state0 );
|
||||
_mm256_store_si256( &state[1], state1 );
|
||||
_mm256_store_si256( &state[2], state2 );
|
||||
_mm256_store_si256( &state[3], state3 );
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
|
||||
#elif defined (__AVX__)
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
__m128i state0 = _mm_load_si128( state );
|
||||
@@ -429,15 +448,15 @@ inline void reducedDuplexRow1( uint64_t *State, uint64_t *rowIn,
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i state0 = _mm256_load_si256( state );
|
||||
__m256i state1 = _mm256_load_si256( &state[1] );
|
||||
__m256i state2 = _mm256_load_si256( &state[2] );
|
||||
__m256i state3 = _mm256_load_si256( &state[3] );
|
||||
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i* in = (__m256i*)rowIn;
|
||||
__m256i* out = (__m256i*)rowOut + ( (nCols-1) * BLOCK_LEN_M256I );
|
||||
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
for ( i = 0; i < 9; i += 3)
|
||||
{
|
||||
_mm_prefetch( in + i, _MM_HINT_T0 );
|
||||
@@ -470,12 +489,12 @@ inline void reducedDuplexRow1( uint64_t *State, uint64_t *rowIn,
|
||||
out -= BLOCK_LEN_M256I;
|
||||
}
|
||||
|
||||
_mm256_store_si256( state, state0 );
|
||||
_mm256_store_si256( &state[1], state1 );
|
||||
_mm256_store_si256( &state[2], state2 );
|
||||
_mm256_store_si256( &state[3], state3 );
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
__m128i state0 = _mm_load_si128( state );
|
||||
@@ -608,17 +627,17 @@ inline void reducedDuplexRowSetup( uint64_t *State, uint64_t *rowIn,
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i state0 = _mm256_load_si256( state );
|
||||
__m256i state1 = _mm256_load_si256( &state[1] );
|
||||
__m256i state2 = _mm256_load_si256( &state[2] );
|
||||
__m256i state3 = _mm256_load_si256( &state[3] );
|
||||
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i* in = (__m256i*)rowIn;
|
||||
__m256i* inout = (__m256i*)rowInOut;
|
||||
__m256i* out = (__m256i*)rowOut + ( (nCols-1) * BLOCK_LEN_M256I );
|
||||
__m256i t0, t1, t2;
|
||||
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
for ( i = 0; i < 9; i += 3)
|
||||
{
|
||||
_mm_prefetch( in + i, _MM_HINT_T0 );
|
||||
@@ -670,12 +689,12 @@ inline void reducedDuplexRowSetup( uint64_t *State, uint64_t *rowIn,
|
||||
out -= BLOCK_LEN_M256I;
|
||||
}
|
||||
|
||||
_mm256_store_si256( state, state0 );
|
||||
_mm256_store_si256( &state[1], state1 );
|
||||
_mm256_store_si256( &state[2], state2 );
|
||||
_mm256_store_si256( &state[3], state3 );
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* in = (__m128i*)rowIn;
|
||||
__m128i* inout = (__m128i*)rowInOut;
|
||||
@@ -842,17 +861,17 @@ inline void reducedDuplexRow( uint64_t *State, uint64_t *rowIn,
|
||||
|
||||
#if defined __AVX2__
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i state0 = _mm256_load_si256( state );
|
||||
__m256i state1 = _mm256_load_si256( &state[1] );
|
||||
__m256i state2 = _mm256_load_si256( &state[2] );
|
||||
__m256i state3 = _mm256_load_si256( &state[3] );
|
||||
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i* in = (__m256i*)rowIn;
|
||||
__m256i* inout = (__m256i*)rowInOut;
|
||||
__m256i* out = (__m256i*)rowOut;
|
||||
__m256i t0, t1, t2;
|
||||
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
for ( i = 0; i < 9; i += 3)
|
||||
{
|
||||
_mm_prefetch( in + i, _MM_HINT_T0 );
|
||||
@@ -906,12 +925,12 @@ inline void reducedDuplexRow( uint64_t *State, uint64_t *rowIn,
|
||||
inout += BLOCK_LEN_M256I;
|
||||
}
|
||||
|
||||
_mm256_store_si256( state, state0 );
|
||||
_mm256_store_si256( &state[1], state1 );
|
||||
_mm256_store_si256( &state[2], state2 );
|
||||
_mm256_store_si256( &state[3], state3 );
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
#elif defined __AVX__
|
||||
#elif defined(__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
__m128i* in = (__m128i*)rowIn;
|
||||
|
@@ -55,23 +55,23 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
// returns void, updates all args
|
||||
#define G_4X64(a,b,c,d) \
|
||||
a = _mm256_add_epi64( a, b ); \
|
||||
d = mm256_rotr_64( _mm256_xor_si256( d, a), 32 ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a), 32 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 24 ); \
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 24 ); \
|
||||
a = _mm256_add_epi64( a, b ); \
|
||||
d = mm256_rotr_64( _mm256_xor_si256( d, a ), 16 ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 63 );
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 63 );
|
||||
|
||||
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s1 = mm256_rotr256_1x64( s1); \
|
||||
s1 = mm256_ror256_1x64( s1); \
|
||||
s2 = mm256_swap_128( s2 ); \
|
||||
s3 = mm256_rotl256_1x64( s3 ); \
|
||||
s3 = mm256_rol256_1x64( s3 ); \
|
||||
G_4X64( s0, s1, s2, s3 ); \
|
||||
s1 = mm256_rotl256_1x64( s1 ); \
|
||||
s1 = mm256_rol256_1x64( s1 ); \
|
||||
s2 = mm256_swap_128( s2 ); \
|
||||
s3 = mm256_rotr256_1x64( s3 );
|
||||
s3 = mm256_ror256_1x64( s3 );
|
||||
|
||||
#define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
@@ -87,32 +87,31 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
|
||||
#else
|
||||
// only available with avx
|
||||
#elif defined(__SSE4_2__)
|
||||
|
||||
// process 2 columns in parallel
|
||||
// returns void, all args updated
|
||||
#define G_2X64(a,b,c,d) \
|
||||
a = _mm_add_epi64( a, b ); \
|
||||
d = mm_rotr_64( _mm_xor_si128( d, a), 32 ); \
|
||||
d = mm_ror_64( _mm_xor_si128( d, a), 32 ); \
|
||||
c = _mm_add_epi64( c, d ); \
|
||||
b = mm_rotr_64( _mm_xor_si128( b, c ), 24 ); \
|
||||
b = mm_ror_64( _mm_xor_si128( b, c ), 24 ); \
|
||||
a = _mm_add_epi64( a, b ); \
|
||||
d = mm_rotr_64( _mm_xor_si128( d, a ), 16 ); \
|
||||
d = mm_ror_64( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi64( c, d ); \
|
||||
b = mm_rotr_64( _mm_xor_si128( b, c ), 63 );
|
||||
b = mm_ror_64( _mm_xor_si128( b, c ), 63 );
|
||||
|
||||
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
mm_rotl256_1x64( s2, s3 ); \
|
||||
mm_ror256_1x64( s2, s3 ); \
|
||||
mm_swap_128( s4, s5 ); \
|
||||
mm_rotr256_1x64( s6, s7 ); \
|
||||
mm_rol256_1x64( s6, s7 ); \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
mm_rotr256_1x64( s2, s3 ); \
|
||||
mm_rol256_1x64( s2, s3 ); \
|
||||
mm_swap_128( s4, s5 ); \
|
||||
mm_rotl256_1x64( s6, s7 );
|
||||
mm_ror256_1x64( s6, s7 );
|
||||
|
||||
#define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
|
@@ -375,7 +375,7 @@ out:
|
||||
|
||||
bool register_m7m_algo( algo_gate_t *gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->optimizations = SHA_OPT;
|
||||
init_m7m_ctx();
|
||||
gate->scanhash = (void*)scanhash_m7m_hash;
|
||||
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
|
||||
|
@@ -11,7 +11,7 @@ bool register_deep_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_deep;
|
||||
gate->hash = (void*)&deep_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -11,7 +11,7 @@ bool register_qubit_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_qubit;
|
||||
gate->hash = (void*)&qubit_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -40,6 +40,35 @@ void lbry_le_build_stratum_request( char *req, struct work *work,
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
|
||||
// don't use lbry_build_block_header, it can't handle clasim, do it inline
|
||||
// in lbry_build_extraheader. The side effect is no gbt support for lbry.
|
||||
void lbry_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits )
|
||||
{
|
||||
int i;
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = version;
|
||||
|
||||
if ( have_stratum )
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = le32dec( prevhash + i );
|
||||
else
|
||||
for (i = 0; i < 8; i++)
|
||||
g_work->data[ 8-i ] = le32dec( prevhash + i );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = be32dec( merkle_root + i );
|
||||
|
||||
// for ( int i = 0; i < 8; i++ )
|
||||
// g_work->data[17 + i] = claim[i];
|
||||
|
||||
g_work->data[ LBRY_NTIME_INDEX ] = ntime;
|
||||
g_work->data[ LBRY_NBITS_INDEX ] = nbits;
|
||||
g_work->data[28] = 0x80000000;
|
||||
}
|
||||
|
||||
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
unsigned char merkle_root[64] = { 0 };
|
||||
@@ -50,14 +79,23 @@ void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
// Assemble block header
|
||||
|
||||
// algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
|
||||
// (uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_root,
|
||||
// le32dec( sctx->job.ntime ), le32dec( sctx->job.nbits ) );
|
||||
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
g_work->data[17 + i] = ((uint32_t*)sctx->job.claim)[i];
|
||||
|
||||
g_work->data[ LBRY_NTIME_INDEX ] = le32dec(sctx->job.ntime);
|
||||
g_work->data[ LBRY_NBITS_INDEX ] = le32dec(sctx->job.nbits);
|
||||
g_work->data[28] = 0x80000000;
|
||||
@@ -72,7 +110,7 @@ int64_t lbry_get_max64() { return 0x1ffffLL; }
|
||||
|
||||
bool register_lbry_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->optimizations = AVX2_OPT | SHA_OPT;
|
||||
#if defined (LBRY_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_lbry_8way;
|
||||
gate->hash = (void*)&lbry_8way_hash;
|
||||
@@ -86,6 +124,7 @@ bool register_lbry_algo( algo_gate_t* gate )
|
||||
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
|
||||
gate->get_max64 = (void*)&lbry_get_max64;
|
||||
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
|
||||
// gate->build_block_header = (void*)&build_block_header;
|
||||
gate->build_extraheader = (void*)&lbry_build_extraheader;
|
||||
gate->set_target = (void*)&lbry_set_target;
|
||||
gate->ntime_index = LBRY_NTIME_INDEX;
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include "ripemd-hash-4way.h"
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
@@ -42,10 +42,10 @@ static const uint32_t IV[5] =
|
||||
|
||||
#define RR(a, b, c, d, e, f, s, r, k) \
|
||||
do{ \
|
||||
a = _mm_add_epi32( mm_rotl_32( _mm_add_epi32( _mm_add_epi32( \
|
||||
a = _mm_add_epi32( mm_rol_32( _mm_add_epi32( _mm_add_epi32( \
|
||||
_mm_add_epi32( a, f( b ,c, d ) ), r ), \
|
||||
_mm_set1_epi32( k ) ), s ), e ); \
|
||||
c = mm_rotl_32( c, 10 );\
|
||||
c = mm_rol_32( c, 10 );\
|
||||
} while (0)
|
||||
|
||||
#define ROUND1(a, b, c, d, e, f, s, r, k) \
|
||||
@@ -341,10 +341,10 @@ void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
|
||||
|
||||
#define RR_8W(a, b, c, d, e, f, s, r, k) \
|
||||
do{ \
|
||||
a = _mm256_add_epi32( mm256_rotl_32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
a = _mm256_add_epi32( mm256_rol_32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
_mm256_add_epi32( a, f( b ,c, d ) ), r ), \
|
||||
_mm256_set1_epi32( k ) ), s ), e ); \
|
||||
c = mm256_rotl_32( c, 10 );\
|
||||
c = mm256_rol_32( c, 10 );\
|
||||
} while (0)
|
||||
|
||||
#define ROUND1_8W(a, b, c, d, e, f, s, r, k) \
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
|
||||
@@ -34,5 +34,5 @@ void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst );
|
||||
|
||||
|
||||
#endif // __AVX2__
|
||||
#endif // __AVX__
|
||||
#endif // __SSE4_2__
|
||||
#endif // RIPEMD_HASH_4WAY_H__
|
||||
|
@@ -778,7 +778,7 @@ bool scrypt_miner_thread_init( int thr_id )
|
||||
|
||||
bool register_scrypt_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_scrypt;
|
||||
// gate->hash = (void*)&scrypt_1024_1_1_256_24way;
|
||||
|
@@ -30,7 +30,7 @@
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
@@ -98,19 +98,19 @@ static const sph_u32 K256[64] = {
|
||||
|
||||
#define BSG2_0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 2), mm_rotr_32(x, 13) ), mm_rotr_32( x, 22) )
|
||||
mm_ror_32(x, 2), mm_ror_32(x, 13) ), mm_ror_32( x, 22) )
|
||||
|
||||
#define BSG2_1(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 6), mm_rotr_32(x, 11) ), mm_rotr_32( x, 25) )
|
||||
mm_ror_32(x, 6), mm_ror_32(x, 11) ), mm_ror_32( x, 25) )
|
||||
|
||||
#define SSG2_0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 7), mm_rotr_32(x, 18) ), _mm_srli_epi32(x, 3) )
|
||||
mm_ror_32(x, 7), mm_ror_32(x, 18) ), _mm_srli_epi32(x, 3) )
|
||||
|
||||
#define SSG2_1(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 17), mm_rotr_32(x, 19) ), _mm_srli_epi32(x, 10) )
|
||||
mm_ror_32(x, 17), mm_ror_32(x, 19) ), _mm_srli_epi32(x, 10) )
|
||||
|
||||
#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
@@ -311,19 +311,19 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
|
||||
#define BSG2_0x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 2), mm256_rotr_32(x, 13) ), mm256_rotr_32( x, 22) )
|
||||
mm256_ror_32(x, 2), mm256_ror_32(x, 13) ), mm256_ror_32( x, 22) )
|
||||
|
||||
#define BSG2_1x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 6), mm256_rotr_32(x, 11) ), mm256_rotr_32( x, 25) )
|
||||
mm256_ror_32(x, 6), mm256_ror_32(x, 11) ), mm256_ror_32( x, 25) )
|
||||
|
||||
#define SSG2_0x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 7), mm256_rotr_32(x, 18) ), _mm256_srli_epi32(x, 3) )
|
||||
mm256_ror_32(x, 7), mm256_ror_32(x, 18) ), _mm256_srli_epi32(x, 3) )
|
||||
|
||||
#define SSG2_1x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 17), mm256_rotr_32(x, 19) ), _mm256_srli_epi32(x, 10) )
|
||||
mm256_ror_32(x, 17), mm256_ror_32(x, 19) ), _mm256_srli_epi32(x, 10) )
|
||||
|
||||
#define SHA2x_MEXP( a, b, c, d ) \
|
||||
_mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
@@ -373,9 +373,6 @@ sha256_8way_round( __m256i *in, __m256i r[8] )
|
||||
H = r[7];
|
||||
|
||||
SHA2s_8WAY_STEP( A, B, C, D, E, F, G, H, 0, 0 );
|
||||
|
||||
//printf("sha256 8 step: D= %08lx H= %08lx\n",*(uint32_t*)&D,*(uint32_t*)&H);
|
||||
|
||||
SHA2s_8WAY_STEP( H, A, B, C, D, E, F, G, 1, 0 );
|
||||
SHA2s_8WAY_STEP( G, H, A, B, C, D, E, F, 2, 0 );
|
||||
SHA2s_8WAY_STEP( F, G, H, A, B, C, D, E, 3, 0 );
|
||||
@@ -392,8 +389,6 @@ sha256_8way_round( __m256i *in, __m256i r[8] )
|
||||
SHA2s_8WAY_STEP( C, D, E, F, G, H, A, B, 14, 0 );
|
||||
SHA2s_8WAY_STEP( B, C, D, E, F, G, H, A, 15, 0 );
|
||||
|
||||
//printf("sha256 8 step: A= %08lx B= %08lx\n",*(uint32_t*)&A,*(uint32_t*)&B);
|
||||
|
||||
for ( int j = 16; j < 64; j += 16 )
|
||||
{
|
||||
W[ 0] = SHA2x_MEXP( 14, 9, 1, 0 );
|
||||
@@ -460,17 +455,7 @@ void sha256_8way( sha256_8way_context *sc, const void *data, size_t len )
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
const int buf_size = 64;
|
||||
/*
|
||||
printf("sha256 8 update1: len= %d\n", len);
|
||||
uint32_t* d = (uint32_t*)data;
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[128],d[136],d[144],d[152]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[160],d[168],d[176],d[184]);
|
||||
printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[192],d[200],d[208],d[216]);
|
||||
*/
|
||||
|
||||
ptr = (unsigned)sc->count_low & (buf_size - 1U);
|
||||
while ( len > 0 )
|
||||
{
|
||||
@@ -486,24 +471,7 @@ printf("sha256 8 in: %08lx %08lx %08lx %08lx\n",d[192],d[200],d[208],d[216]);
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
/*
|
||||
printf("sha256 8 update2: compress\n");
|
||||
d = (uint32_t*)sc->buf;
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
|
||||
d= (uint32_t*)sc->val;
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
*/
|
||||
sha256_8way_round( sc->buf, sc->val );
|
||||
/*
|
||||
printf("sha256 8 update3\n");
|
||||
d= (uint32_t*)sc->val;
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
*/
|
||||
ptr = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
@@ -522,32 +490,13 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
||||
const int pad = buf_size - 8;
|
||||
|
||||
ptr = (unsigned)sc->count_low & (buf_size - 1U);
|
||||
/*
|
||||
printf("sha256 8 close1: ptr= %d\n", ptr);
|
||||
uint32_t* d = (uint32_t*)sc->buf;
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
|
||||
*/
|
||||
|
||||
sc->buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
|
||||
ptr += 4;
|
||||
|
||||
if ( ptr > pad )
|
||||
{
|
||||
memset_zero_256( sc->buf + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||
|
||||
//printf("sha256 8 close2: compress\n");
|
||||
//uint32_t* d = (uint32_t*)sc->buf;
|
||||
//printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
|
||||
|
||||
sha256_8way_round( sc->buf, sc->val );
|
||||
|
||||
//d= (uint32_t*)sc->val;
|
||||
//printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
|
||||
memset_zero_256( sc->buf, pad >> 2 );
|
||||
}
|
||||
else
|
||||
@@ -561,23 +510,9 @@ printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
|
||||
mm256_bswap_32( _mm256_set1_epi32( high ) );
|
||||
sc->buf[ ( pad+4 ) >> 2 ] =
|
||||
mm256_bswap_32( _mm256_set1_epi32( low ) );
|
||||
/*
|
||||
d = (uint32_t*)sc->buf;
|
||||
printf("sha256 8 close3: compress\n");
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[64],d[72],d[80],d[88]);
|
||||
printf("sha256 8 buf: %08lx %08lx %08lx %08lx\n",d[96],d[104],d[112],d[120]);
|
||||
d= (uint32_t*)sc->val;
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
*/
|
||||
|
||||
sha256_8way_round( sc->buf, sc->val );
|
||||
/*
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[0],d[8],d[16],d[24]);
|
||||
printf("sha256 8 val: %08lx %08lx %08lx %08lx\n",d[32],d[40],d[48],d[56]);
|
||||
*/
|
||||
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
((__m256i*)dst)[u] = mm256_bswap_32( sc->val[u] );
|
||||
}
|
||||
@@ -644,19 +579,19 @@ static const sph_u64 K512[80] = {
|
||||
|
||||
#define BSG5_0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_64(x, 28), mm256_rotr_64(x, 34) ), mm256_rotr_64(x, 39) )
|
||||
mm256_ror_64(x, 28), mm256_ror_64(x, 34) ), mm256_ror_64(x, 39) )
|
||||
|
||||
#define BSG5_1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_64(x, 14), mm256_rotr_64(x, 18) ), mm256_rotr_64(x, 41) )
|
||||
mm256_ror_64(x, 14), mm256_ror_64(x, 18) ), mm256_ror_64(x, 41) )
|
||||
|
||||
#define SSG5_0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_64(x, 1), mm256_rotr_64(x, 8) ), _mm256_srli_epi64(x, 7) )
|
||||
mm256_ror_64(x, 1), mm256_ror_64(x, 8) ), _mm256_srli_epi64(x, 7) )
|
||||
|
||||
#define SSG5_1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_64(x, 19), mm256_rotr_64(x, 61) ), _mm256_srli_epi64(x, 6) )
|
||||
mm256_ror_64(x, 19), mm256_ror_64(x, 61) ), _mm256_srli_epi64(x, 6) )
|
||||
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
@@ -781,4 +716,4 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
}
|
||||
|
||||
#endif // __AVX2__
|
||||
#endif // __AVX__
|
||||
#endif // __SSE4_2__
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user