mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.8.5
This commit is contained in:
21
Makefile.am
21
Makefile.am
@@ -31,12 +31,19 @@ cpuminer_SOURCES = \
|
||||
crypto/hash.c \
|
||||
crypto/aesb.c \
|
||||
crypto/magimath.cpp \
|
||||
algo/argon2/argon2a.c \
|
||||
algo/argon2/ar2/argon2.c \
|
||||
algo/argon2/ar2/opt.c \
|
||||
algo/argon2/ar2/cores.c \
|
||||
algo/argon2/ar2/ar2-scrypt-jane.c \
|
||||
algo/argon2/ar2/blake2b.c \
|
||||
algo/argon2/argon2a/argon2a.c \
|
||||
algo/argon2/argon2a/ar2/argon2.c \
|
||||
algo/argon2/argon2a/ar2/opt.c \
|
||||
algo/argon2/argon2a/ar2/cores.c \
|
||||
algo/argon2/argon2a/ar2/ar2-scrypt-jane.c \
|
||||
algo/argon2/argon2a/ar2/blake2b.c \
|
||||
algo/argon2/argon2d/argon2d-gate.c \
|
||||
algo/argon2/argon2d/blake2/blake2b.c \
|
||||
algo/argon2/argon2d/argon2d/argon2.c \
|
||||
algo/argon2/argon2d/argon2d/core.c \
|
||||
algo/argon2/argon2d/argon2d/opt.c \
|
||||
algo/argon2/argon2d/argon2d/thread.c \
|
||||
algo/argon2/argon2d/argon2d/encoding.c \
|
||||
algo/blake/sph_blake.c \
|
||||
algo/blake/blake-hash-4way.c \
|
||||
algo/blake/blake-gate.c \
|
||||
@@ -153,6 +160,8 @@ cpuminer_SOURCES = \
|
||||
algo/sha/sph_sha2big.c \
|
||||
algo/sha/sha2-hash-4way.c \
|
||||
algo/sha/sha2.c \
|
||||
algo/sha/sha256t-gate.c \
|
||||
algo/sha/sha256t-4way.c \
|
||||
algo/sha/sha256t.c \
|
||||
algo/shabal/sph_shabal.c \
|
||||
algo/shabal/shabal-hash-4way.c \
|
||||
|
@@ -42,7 +42,9 @@ Supported Algorithms
|
||||
|
||||
allium Garlicoin
|
||||
anime Animecoin
|
||||
argon2
|
||||
argon2 Argon2 coin (AR2)
|
||||
argon2d-crds Credits (CRDS)
|
||||
argon2d-dyn Dynamic (DYN)
|
||||
axiom Shabal-256 MemoHash
|
||||
bastion
|
||||
blake Blake-256 (SFR)
|
||||
|
@@ -19,7 +19,7 @@ Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
||||
|
||||
Exe name Compile flags Arch name
|
||||
|
||||
cpuminer-sse2.exe "-march=core2" Core2, Nehalem
|
||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
||||
cpuminer-aes-sse42.exe "-maes -msse4.2" Westmere
|
||||
cpuminer-aes-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge
|
||||
cpuminer-avx2.exe "-march=core-avx2" Haswell...
|
||||
|
@@ -160,6 +160,12 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.8.5
|
||||
|
||||
Added argon2d-crds and argon2d-dyn algos.
|
||||
sha256t 8 way AVX2 & 4 way SSE4.2 optimized.
|
||||
CPUs with SSE4.2 get optimizations previously reserved for AVX.
|
||||
|
||||
v3.8.4.1
|
||||
|
||||
Fixed sha256t low difficulty rejects.
|
||||
|
@@ -160,6 +160,8 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
|
||||
case ALGO_ANIME: register_anime_algo ( gate ); break;
|
||||
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
|
||||
case ALGO_ARGON2DCRDS: register_argon2d_crds_algo( gate ); break;
|
||||
case ALGO_ARGON2DDYN: register_argon2d_dyn_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
|
@@ -87,10 +87,10 @@ typedef uint32_t set_t;
|
||||
#define EMPTY_SET 0
|
||||
#define SSE2_OPT 1
|
||||
#define AES_OPT 2
|
||||
#define AVX_OPT 4
|
||||
#define AVX2_OPT 8
|
||||
#define SHA_OPT 0x10
|
||||
//#define FOUR_WAY_OPT 0x20
|
||||
#define SSE42_OPT 4
|
||||
#define AVX_OPT 8
|
||||
#define AVX2_OPT 0x10
|
||||
#define SHA_OPT 0x20
|
||||
|
||||
// return set containing all elements from sets a & b
|
||||
inline set_t set_union ( set_t a, set_t b ) { return a | b; }
|
||||
|
@@ -99,18 +99,18 @@ static const char *Argon2_ErrorMessage[] = {
|
||||
{ARGON2_MISSING_ARGS, */ "Missing arguments", /*},*/
|
||||
};
|
||||
|
||||
int argon2d(argon2_context *context) { return argon2_core(context, Argon2_d); }
|
||||
int argon2d(argon2_context *context) { return ar2_argon2_core(context, Argon2_d); }
|
||||
|
||||
int argon2i(argon2_context *context) { return argon2_core(context, Argon2_i); }
|
||||
int argon2i(argon2_context *context) { return ar2_argon2_core(context, Argon2_i); }
|
||||
|
||||
int verify_d(argon2_context *context, const char *hash)
|
||||
int ar2_verify_d(argon2_context *context, const char *hash)
|
||||
{
|
||||
int result;
|
||||
/*if (0 == context->outlen || NULL == hash) {
|
||||
return ARGON2_OUT_PTR_MISMATCH;
|
||||
}*/
|
||||
|
||||
result = argon2_core(context, Argon2_d);
|
||||
result = ar2_argon2_core(context, Argon2_d);
|
||||
|
||||
if (ARGON2_OK != result) {
|
||||
return result;
|
||||
@@ -223,7 +223,7 @@ static size_t to_base64(char *dst, size_t dst_len, const void *src)
|
||||
* The output length is always exactly 32 bytes.
|
||||
*/
|
||||
|
||||
int encode_string(char *dst, size_t dst_len, argon2_context *ctx)
|
||||
int ar2_encode_string(char *dst, size_t dst_len, argon2_context *ctx)
|
||||
{
|
||||
#define SS(str) \
|
||||
do { \
|
@@ -255,7 +255,7 @@ int argon2id(argon2_context *context);
|
||||
* specified by the context outlen member
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
int verify_d(argon2_context *context, const char *hash);
|
||||
int ar2_verify_d(argon2_context *context, const char *hash);
|
||||
|
||||
/*
|
||||
* Get the associated error message for given error code
|
||||
@@ -283,7 +283,7 @@ const char *error_message(int error_code);
|
||||
* The output length is always exactly 32 bytes.
|
||||
*/
|
||||
|
||||
int encode_string(char *dst, size_t dst_len, argon2_context *ctx);
|
||||
int ar2_encode_string(char *dst, size_t dst_len, argon2_context *ctx);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
@@ -52,22 +52,22 @@ enum {
|
||||
};
|
||||
|
||||
/* Streaming API */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen);
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
int ar2_blake2b_init(blake2b_state *S, size_t outlen);
|
||||
int ar2_blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
size_t keylen);
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
int ar2_blake2b_init_param(blake2b_state *S, const blake2b_param *P);
|
||||
int ar2_blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
void my_blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
int ar2_blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int blake2b(void *out, const void *in, const void *key, size_t keylen);
|
||||
int ar2_blake2b(void *out, const void *in, const void *key, size_t keylen);
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *out, const void *in);
|
||||
int ar2_blake2b_long(void *out, const void *in);
|
||||
/* Argon2 Team - End Code */
|
||||
/* Miouyouyou */
|
||||
void blake2b_too(void *out, const void *in);
|
||||
void ar2_blake2b_too(void *out, const void *in);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
@@ -107,7 +107,7 @@ static const blake2b_state miou = {
|
||||
};
|
||||
|
||||
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P)
|
||||
int ar2_blake2b_init_param(blake2b_state *S, const blake2b_param *P)
|
||||
{
|
||||
const unsigned char *p = (const unsigned char *)P;
|
||||
unsigned int i;
|
||||
@@ -133,7 +133,7 @@ void compare_buffs(uint64_t *h, size_t outlen)
|
||||
}
|
||||
|
||||
/* Sequential blake2b initialization */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen)
|
||||
int ar2_blake2b_init(blake2b_state *S, size_t outlen)
|
||||
{
|
||||
memcpy(S, &miou, sizeof(*S));
|
||||
S->h[0] += outlen;
|
||||
@@ -147,7 +147,7 @@ void print64(const char *name, const uint64_t *array, uint16_t size)
|
||||
printf("};\n");
|
||||
}
|
||||
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen)
|
||||
int ar2_blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -207,7 +207,7 @@ static void blake2b_compress(blake2b_state *S, const uint8_t *block)
|
||||
#undef ROUND
|
||||
}
|
||||
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen)
|
||||
int ar2_blake2b_update(blake2b_state *S, const void *in, size_t inlen)
|
||||
{
|
||||
const uint8_t *pin = (const uint8_t *)in;
|
||||
/* Complete current block */
|
||||
@@ -235,7 +235,7 @@ void my_blake2b_update(blake2b_state *S, const void *in, size_t inlen)
|
||||
S->buflen += (unsigned int)inlen;
|
||||
}
|
||||
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen)
|
||||
int ar2_blake2b_final(blake2b_state *S, void *out, size_t outlen)
|
||||
{
|
||||
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
|
||||
unsigned int i;
|
||||
@@ -257,48 +257,48 @@ int blake2b_final(blake2b_state *S, void *out, size_t outlen)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b(void *out, const void *in, const void *key, size_t keylen)
|
||||
int ar2_blake2b(void *out, const void *in, const void *key, size_t keylen)
|
||||
{
|
||||
blake2b_state S;
|
||||
|
||||
blake2b_init(&S, 64);
|
||||
ar2_blake2b_init(&S, 64);
|
||||
my_blake2b_update(&S, in, 64);
|
||||
blake2b_final(&S, out, 64);
|
||||
ar2_blake2b_final(&S, out, 64);
|
||||
burn(&S, sizeof(S));
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blake2b_too(void *pout, const void *in)
|
||||
void ar2_blake2b_too(void *pout, const void *in)
|
||||
{
|
||||
uint8_t *out = (uint8_t *)pout;
|
||||
uint8_t out_buffer[64];
|
||||
uint8_t in_buffer[64];
|
||||
|
||||
blake2b_state blake_state;
|
||||
blake2b_init(&blake_state, 64);
|
||||
ar2_blake2b_init(&blake_state, 64);
|
||||
blake_state.buflen = blake_state.buf[1] = 4;
|
||||
my_blake2b_update(&blake_state, in, 72);
|
||||
blake2b_final(&blake_state, out_buffer, 64);
|
||||
ar2_blake2b_final(&blake_state, out_buffer, 64);
|
||||
memcpy(out, out_buffer, 32);
|
||||
out += 32;
|
||||
|
||||
register uint8_t i = 29;
|
||||
while (i--) {
|
||||
memcpy(in_buffer, out_buffer, 64);
|
||||
blake2b(out_buffer, in_buffer, NULL, 0);
|
||||
ar2_blake2b(out_buffer, in_buffer, NULL, 0);
|
||||
memcpy(out, out_buffer, 32);
|
||||
out += 32;
|
||||
}
|
||||
|
||||
memcpy(in_buffer, out_buffer, 64);
|
||||
blake2b(out_buffer, in_buffer, NULL, 0);
|
||||
ar2_blake2b(out_buffer, in_buffer, NULL, 0);
|
||||
memcpy(out, out_buffer, 64);
|
||||
|
||||
burn(&blake_state, sizeof(blake_state));
|
||||
}
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *pout, const void *in)
|
||||
int ar2_blake2b_long(void *pout, const void *in)
|
||||
{
|
||||
uint8_t *out = (uint8_t *)pout;
|
||||
blake2b_state blake_state;
|
||||
@@ -306,10 +306,10 @@ int blake2b_long(void *pout, const void *in)
|
||||
|
||||
store32(outlen_bytes, 32);
|
||||
|
||||
blake2b_init(&blake_state, 32);
|
||||
ar2_blake2b_init(&blake_state, 32);
|
||||
my_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes));
|
||||
blake2b_update(&blake_state, in, 1024);
|
||||
blake2b_final(&blake_state, out, 32);
|
||||
ar2_blake2b_update(&blake_state, in, 1024);
|
||||
ar2_blake2b_final(&blake_state, out, 32);
|
||||
burn(&blake_state, sizeof(blake_state));
|
||||
return 0;
|
||||
}
|
@@ -51,15 +51,15 @@
|
||||
#endif
|
||||
|
||||
/***************Instance and Position constructors**********/
|
||||
void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
void ar2_init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
//inline void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
|
||||
void copy_block(block *dst, const block *src) {
|
||||
void ar2_copy_block(block *dst, const block *src) {
|
||||
//inline void copy_block(block *dst, const block *src) {
|
||||
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_WORDS_IN_BLOCK);
|
||||
}
|
||||
|
||||
void xor_block(block *dst, const block *src) {
|
||||
void ar2_xor_block(block *dst, const block *src) {
|
||||
//inline void xor_block(block *dst, const block *src) {
|
||||
int i;
|
||||
for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) {
|
||||
@@ -67,7 +67,7 @@ void xor_block(block *dst, const block *src) {
|
||||
}
|
||||
}
|
||||
|
||||
static void load_block(block *dst, const void *input) {
|
||||
static void ar2_load_block(block *dst, const void *input) {
|
||||
//static inline void load_block(block *dst, const void *input) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) {
|
||||
@@ -75,7 +75,7 @@ static void load_block(block *dst, const void *input) {
|
||||
}
|
||||
}
|
||||
|
||||
static void store_block(void *output, const block *src) {
|
||||
static void ar2_store_block(void *output, const block *src) {
|
||||
//static inline void store_block(void *output, const block *src) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) {
|
||||
@@ -84,7 +84,7 @@ static void store_block(void *output, const block *src) {
|
||||
}
|
||||
|
||||
/***************Memory allocators*****************/
|
||||
int allocate_memory(block **memory, uint32_t m_cost) {
|
||||
int ar2_allocate_memory(block **memory, uint32_t m_cost) {
|
||||
if (memory != NULL) {
|
||||
size_t memory_size = sizeof(block) * m_cost;
|
||||
if (m_cost != 0 &&
|
||||
@@ -105,34 +105,34 @@ int allocate_memory(block **memory, uint32_t m_cost) {
|
||||
}
|
||||
}
|
||||
|
||||
void secure_wipe_memory(void *v, size_t n) { memset(v, 0, n); }
|
||||
void ar2_secure_wipe_memory(void *v, size_t n) { memset(v, 0, n); }
|
||||
//inline void secure_wipe_memory(void *v, size_t n) { memset(v, 0, n); }
|
||||
|
||||
/*********Memory functions*/
|
||||
|
||||
void clear_memory(argon2_instance_t *instance, int clear) {
|
||||
void ar2_clear_memory(argon2_instance_t *instance, int clear) {
|
||||
//inline void clear_memory(argon2_instance_t *instance, int clear) {
|
||||
if (instance->memory != NULL && clear) {
|
||||
secure_wipe_memory(instance->memory,
|
||||
ar2_secure_wipe_memory(instance->memory,
|
||||
sizeof(block) * /*instance->memory_blocks*/16);
|
||||
}
|
||||
}
|
||||
|
||||
void free_memory(block *memory) { free(memory); }
|
||||
void ar2_free_memory(block *memory) { free(memory); }
|
||||
//inline void free_memory(block *memory) { free(memory); }
|
||||
|
||||
void finalize(const argon2_context *context, argon2_instance_t *instance) {
|
||||
void ar2_finalize(const argon2_context *context, argon2_instance_t *instance) {
|
||||
if (context != NULL && instance != NULL) {
|
||||
block blockhash;
|
||||
copy_block(&blockhash, instance->memory + 15);
|
||||
ar2_copy_block(&blockhash, instance->memory + 15);
|
||||
|
||||
/* Hash the result */
|
||||
{
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
store_block(blockhash_bytes, &blockhash);
|
||||
blake2b_long(context->out, blockhash_bytes);
|
||||
secure_wipe_memory(blockhash.v, ARGON2_BLOCK_SIZE);
|
||||
secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); /* clear blockhash_bytes */
|
||||
ar2_store_block(blockhash_bytes, &blockhash);
|
||||
ar2_blake2b_long(context->out, blockhash_bytes);
|
||||
ar2_secure_wipe_memory(blockhash.v, ARGON2_BLOCK_SIZE);
|
||||
ar2_secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); /* clear blockhash_bytes */
|
||||
}
|
||||
|
||||
#ifdef GENKAT
|
||||
@@ -142,11 +142,11 @@ void finalize(const argon2_context *context, argon2_instance_t *instance) {
|
||||
/* Clear memory */
|
||||
// clear_memory(instance, 1);
|
||||
|
||||
free_memory(instance->memory);
|
||||
ar2_free_memory(instance->memory);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
uint32_t ar2_index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane) {
|
||||
/*
|
||||
@@ -207,7 +207,7 @@ uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
return absolute_position;
|
||||
}
|
||||
|
||||
void fill_memory_blocks(argon2_instance_t *instance) {
|
||||
void ar2_fill_memory_blocks(argon2_instance_t *instance) {
|
||||
uint32_t r, s;
|
||||
|
||||
for (r = 0; r < 2; ++r) {
|
||||
@@ -218,7 +218,7 @@ void fill_memory_blocks(argon2_instance_t *instance) {
|
||||
position.lane = 0;
|
||||
position.slice = (uint8_t)s;
|
||||
position.index = 0;
|
||||
fill_segment(instance, position);
|
||||
ar2_fill_segment(instance, position);
|
||||
}
|
||||
|
||||
#ifdef GENKAT
|
||||
@@ -227,19 +227,19 @@ void fill_memory_blocks(argon2_instance_t *instance) {
|
||||
}
|
||||
}
|
||||
|
||||
void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
|
||||
void ar2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
|
||||
/* Make the first and second block in each lane as G(H0||i||0) or
|
||||
G(H0||i||1) */
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, 0);
|
||||
blake2b_too(blockhash_bytes, blockhash);
|
||||
load_block(&instance->memory[0], blockhash_bytes);
|
||||
ar2_blake2b_too(blockhash_bytes, blockhash);
|
||||
ar2_load_block(&instance->memory[0], blockhash_bytes);
|
||||
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
|
||||
blake2b_too(blockhash_bytes, blockhash);
|
||||
load_block(&instance->memory[1], blockhash_bytes);
|
||||
secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
ar2_blake2b_too(blockhash_bytes, blockhash);
|
||||
ar2_load_block(&instance->memory[1], blockhash_bytes);
|
||||
ar2_secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
|
||||
@@ -268,7 +268,7 @@ static const blake2b_state base_hash = {
|
||||
#define SALTLEN 32
|
||||
#define SECRETLEN 0
|
||||
#define ADLEN 0
|
||||
void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
void ar2_initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type) {
|
||||
|
||||
uint8_t value[sizeof(uint32_t)];
|
||||
@@ -280,7 +280,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
PWDLEN);
|
||||
|
||||
|
||||
secure_wipe_memory(context->pwd, PWDLEN);
|
||||
ar2_secure_wipe_memory(context->pwd, PWDLEN);
|
||||
context->pwdlen = 0;
|
||||
|
||||
store32(&value, SALTLEN);
|
||||
@@ -298,19 +298,19 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
int ar2_initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
/* 1. Memory allocation */
|
||||
|
||||
|
||||
allocate_memory(&(instance->memory), 16);
|
||||
ar2_allocate_memory(&(instance->memory), 16);
|
||||
|
||||
/* 2. Initial hashing */
|
||||
/* H_0 + 8 extra bytes to produce the first blocks */
|
||||
/* Hashing all inputs */
|
||||
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
|
||||
initial_hash(blockhash, context, instance->type);
|
||||
ar2_initial_hash(blockhash, context, instance->type);
|
||||
/* Zeroing 8 extra bytes */
|
||||
secure_wipe_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ar2_secure_wipe_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ARGON2_PREHASH_SEED_LENGTH -
|
||||
ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
@@ -320,14 +320,14 @@ int initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
|
||||
/* 3. Creating first blocks, we always have at least two blocks in a slice
|
||||
*/
|
||||
fill_first_blocks(blockhash, instance);
|
||||
ar2_fill_first_blocks(blockhash, instance);
|
||||
/* Clearing the hash */
|
||||
secure_wipe_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
|
||||
ar2_secure_wipe_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int argon2_core(argon2_context *context, argon2_type type) {
|
||||
int ar2_argon2_core(argon2_context *context, argon2_type type) {
|
||||
argon2_instance_t instance;
|
||||
instance.memory = NULL;
|
||||
instance.type = type;
|
||||
@@ -336,14 +336,14 @@ int argon2_core(argon2_context *context, argon2_type type) {
|
||||
* blocks
|
||||
*/
|
||||
|
||||
int result = initialize(&instance, context);
|
||||
int result = ar2_initialize(&instance, context);
|
||||
if (ARGON2_OK != result) return result;
|
||||
|
||||
/* 4. Filling memory */
|
||||
fill_memory_blocks(&instance);
|
||||
ar2_fill_memory_blocks(&instance);
|
||||
|
||||
/* 5. Finalization */
|
||||
finalize(context, &instance);
|
||||
ar2_finalize(context, &instance);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
@@ -62,13 +62,13 @@ typedef struct _block { uint64_t v[ARGON2_WORDS_IN_BLOCK]; } ALIGN(16) block;
|
||||
/*****************Functions that work with the block******************/
|
||||
|
||||
/* Initialize each byte of the block with @in */
|
||||
void init_block_value(block *b, uint8_t in);
|
||||
void ar2_init_block_value(block *b, uint8_t in);
|
||||
|
||||
/* Copy block @src to block @dst */
|
||||
void copy_block(block *dst, const block *src);
|
||||
void ar2_copy_block(block *dst, const block *src);
|
||||
|
||||
/* XOR @src onto @dst bytewise */
|
||||
void xor_block(block *dst, const block *src);
|
||||
void ar2_xor_block(block *dst, const block *src);
|
||||
|
||||
/*
|
||||
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
|
||||
@@ -101,24 +101,24 @@ typedef struct Argon2_position_t {
|
||||
* @param m_cost number of blocks to allocate in the memory
|
||||
* @return ARGON2_OK if @memory is a valid pointer and memory is allocated
|
||||
*/
|
||||
int allocate_memory(block **memory, uint32_t m_cost);
|
||||
int ar2_allocate_memory(block **memory, uint32_t m_cost);
|
||||
|
||||
/* Function that securely cleans the memory
|
||||
* @param mem Pointer to the memory
|
||||
* @param s Memory size in bytes
|
||||
*/
|
||||
void secure_wipe_memory(void *v, size_t n);
|
||||
void ar2_secure_wipe_memory(void *v, size_t n);
|
||||
|
||||
/* Clears memory
|
||||
* @param instance pointer to the current instance
|
||||
* @param clear_memory indicates if we clear the memory with zeros.
|
||||
*/
|
||||
void clear_memory(argon2_instance_t *instance, int clear);
|
||||
void ar2_clear_memory(argon2_instance_t *instance, int clear);
|
||||
|
||||
/* Deallocates memory
|
||||
* @param memory pointer to the blocks
|
||||
*/
|
||||
void free_memory(block *memory);
|
||||
void ar2_free_memory(block *memory);
|
||||
|
||||
/*
|
||||
* Computes absolute position of reference block in the lane following a skewed
|
||||
@@ -130,7 +130,7 @@ void free_memory(block *memory);
|
||||
* If so we can reference the current segment
|
||||
* @pre All pointers must be valid
|
||||
*/
|
||||
uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
uint32_t ar2_index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane);
|
||||
|
||||
@@ -141,7 +141,7 @@ uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
* @return ARGON2_OK if everything is all right, otherwise one of error codes
|
||||
* (all defined in <argon2.h>
|
||||
*/
|
||||
int validate_inputs(const argon2_context *context);
|
||||
int ar2_validate_inputs(const argon2_context *context);
|
||||
|
||||
/*
|
||||
* Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears
|
||||
@@ -153,7 +153,7 @@ int validate_inputs(const argon2_context *context);
|
||||
* @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes
|
||||
* allocated
|
||||
*/
|
||||
void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
void ar2_initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type);
|
||||
|
||||
/*
|
||||
@@ -162,7 +162,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
* @param blockhash Pointer to the pre-hashing digest
|
||||
* @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values
|
||||
*/
|
||||
void fill_firsts_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||
void ar2_fill_firsts_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function allocates memory, hashes the inputs with Blake, and creates first
|
||||
@@ -174,7 +174,7 @@ void fill_firsts_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||
* @return Zero if successful, -1 if memory failed to allocate. @context->state
|
||||
* will be modified if successful.
|
||||
*/
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
int ar2_initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
|
||||
/*
|
||||
* XORing the last block of each lane, hashing it, making the tag. Deallocates
|
||||
@@ -187,7 +187,7 @@ int initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
* @pre if context->free_cbk is not NULL, it should point to a function that
|
||||
* deallocates memory
|
||||
*/
|
||||
void finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
void ar2_finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function that fills the segment using previous segments also from other
|
||||
@@ -196,7 +196,7 @@ void finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
void ar2_fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
|
||||
/*
|
||||
@@ -204,13 +204,13 @@ void fill_segment(const argon2_instance_t *instance,
|
||||
* blocks in each lane
|
||||
* @param instance Pointer to the current instance
|
||||
*/
|
||||
void fill_memory_blocks(argon2_instance_t *instance);
|
||||
void ar2_fill_memory_blocks(argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function that performs memory-hard hashing with certain degree of parallelism
|
||||
* @param context Pointer to the Argon2 internal structure
|
||||
* @return Error code if smth is wrong, ARGON2_OK otherwise
|
||||
*/
|
||||
int argon2_core(argon2_context *context, argon2_type type);
|
||||
int ar2_argon2_core(argon2_context *context, argon2_type type);
|
||||
|
||||
#endif
|
@@ -26,7 +26,7 @@
|
||||
#include "blake2/blake2.h"
|
||||
#include "blake2/blamka-round-opt.h"
|
||||
|
||||
void fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block)
|
||||
void ar2_fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block)
|
||||
{
|
||||
__m128i ALIGN(16) block_XY[ARGON2_QWORDS_IN_BLOCK];
|
||||
uint32_t i;
|
||||
@@ -95,7 +95,7 @@ static const uint64_t bad_rands[32] = {
|
||||
UINT64_C(8548260058287621283), UINT64_C(8641748798041936364)
|
||||
};
|
||||
|
||||
void generate_addresses(const argon2_instance_t *instance,
|
||||
void ar2_generate_addresses(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position,
|
||||
uint64_t *pseudo_rands)
|
||||
{
|
||||
@@ -113,7 +113,7 @@ void generate_addresses(const argon2_instance_t *instance,
|
||||
#define LANE_LENGTH 16
|
||||
#define POS_LANE 0
|
||||
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
void ar2_fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position)
|
||||
{
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
@@ -129,7 +129,7 @@ void fill_segment(const argon2_instance_t *instance,
|
||||
pseudo_rands = (uint64_t *)malloc(/*sizeof(uint64_t) * 4*/32);
|
||||
|
||||
if (data_independent_addressing) {
|
||||
generate_addresses(instance, &position, pseudo_rands);
|
||||
ar2_generate_addresses(instance, &position, pseudo_rands);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
@@ -173,12 +173,12 @@ void fill_segment(const argon2_instance_t *instance,
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,1);
|
||||
ref_index = ar2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,1);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block = instance->memory + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
fill_block(state, (__m128i const *)ref_block->v, (__m128i *)curr_block->v);
|
||||
ar2_fill_block(state, (__m128i const *)ref_block->v, (__m128i *)curr_block->v);
|
||||
}
|
||||
|
||||
free(pseudo_rands);
|
@@ -21,7 +21,7 @@
|
||||
* @param next_block Pointer to the block to be constructed
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block);
|
||||
void ar2_fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block);
|
||||
|
||||
/*
|
||||
* Generate pseudo-random values to reference blocks in the segment and puts
|
||||
@@ -31,7 +31,7 @@ void fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block);
|
||||
* @param pseudo_rands Pointer to the array of 64-bit values
|
||||
* @pre pseudo_rands must point to @a instance->segment_length allocated values
|
||||
*/
|
||||
void generate_addresses(const argon2_instance_t *instance,
|
||||
void ar2_generate_addresses(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position,
|
||||
uint64_t *pseudo_rands);
|
||||
|
||||
@@ -43,7 +43,7 @@ void generate_addresses(const argon2_instance_t *instance,
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
void ar2_fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
|
||||
#endif /* ARGON2_OPT_H */
|
@@ -24,7 +24,7 @@ inline void argon_call(void *out, void *in, void *salt, int type)
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
|
||||
argon2_core(&context, type);
|
||||
ar2_argon2_core(&context, type);
|
||||
}
|
||||
|
||||
void argon2hash(void *output, const void *input)
|
143
algo/argon2/argon2d/argon2d-gate.c
Normal file
143
algo/argon2/argon2d/argon2d-gate.c
Normal file
@@ -0,0 +1,143 @@
|
||||
#include "argon2d-gate.h"
|
||||
#include "argon2d/argon2.h"
|
||||
|
||||
static const size_t INPUT_BYTES = 80; // Lenth of a block header in bytes. Input Length = Salt Length (salt = input)
|
||||
static const size_t OUTPUT_BYTES = 32; // Length of output needed for a 256-bit hash
|
||||
static const unsigned int DEFAULT_ARGON2_FLAG = 2; //Same as ARGON2_DEFAULT_FLAGS
|
||||
|
||||
// Credits
|
||||
|
||||
void argon2d_crds_hash( void *output, const void *input )
|
||||
{
|
||||
argon2_context context;
|
||||
context.out = (uint8_t *)output;
|
||||
context.outlen = (uint32_t)OUTPUT_BYTES;
|
||||
context.pwd = (uint8_t *)input;
|
||||
context.pwdlen = (uint32_t)INPUT_BYTES;
|
||||
context.salt = (uint8_t *)input; //salt = input
|
||||
context.saltlen = (uint32_t)INPUT_BYTES;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
|
||||
// main configurable Argon2 hash parameters
|
||||
context.m_cost = 250; // Memory in KiB (~256KB)
|
||||
context.lanes = 4; // Degree of Parallelism
|
||||
context.threads = 1; // Threads
|
||||
context.t_cost = 1; // Iterations
|
||||
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
int scanhash_argon2d_crds( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_crds_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio(work, hash);
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_argon2d_crds_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d_crds;
|
||||
gate->hash = (void*)&argon2d_crds_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
}
|
||||
|
||||
// Dynamic
|
||||
|
||||
void argon2d_dyn_hash( void *output, const void *input )
|
||||
{
|
||||
argon2_context context;
|
||||
context.out = (uint8_t *)output;
|
||||
context.outlen = (uint32_t)OUTPUT_BYTES;
|
||||
context.pwd = (uint8_t *)input;
|
||||
context.pwdlen = (uint32_t)INPUT_BYTES;
|
||||
context.salt = (uint8_t *)input; //salt = input
|
||||
context.saltlen = (uint32_t)INPUT_BYTES;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
|
||||
// main configurable Argon2 hash parameters
|
||||
context.m_cost = 500; // Memory in KiB (512KB)
|
||||
context.lanes = 8; // Degree of Parallelism
|
||||
context.threads = 1; // Threads
|
||||
context.t_cost = 2; // Iterations
|
||||
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
int scanhash_argon2d_dyn( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_dyn_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio(work, hash);
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_argon2d_dyn_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d_dyn;
|
||||
gate->hash = (void*)&argon2d_dyn_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
}
|
||||
|
25
algo/argon2/argon2d/argon2d-gate.h
Normal file
25
algo/argon2/argon2d/argon2d-gate.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#ifndef ARGON2D_GATE_H__
|
||||
#define ARGON2D_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
// Credits
|
||||
bool register_argon2d_crds_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d_crds_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_argon2d_crds( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
// Dynamic
|
||||
bool register_argon2d_dyn_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d_dyn_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_argon2d_dyn( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
|
||||
#endif
|
||||
|
680
algo/argon2/argon2d/argon2d/argon2.c
Normal file
680
algo/argon2/argon2d/argon2d/argon2.c
Normal file
@@ -0,0 +1,680 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#include "argon2.h"
|
||||
#include "encoding.h"
|
||||
#include "core.h"
|
||||
|
||||
const char *argon2_type2string(argon2_type type, int uppercase) {
|
||||
switch (type) {
|
||||
case Argon2_d:
|
||||
return uppercase ? "Argon2d" : "argon2d";
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int argon2_ctx(argon2_context *context, argon2_type type) {
|
||||
/* 1. Validate all inputs */
|
||||
int result = validate_inputs(context);
|
||||
uint32_t memory_blocks, segment_length;
|
||||
argon2_instance_t instance;
|
||||
|
||||
if (ARGON2_OK != result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (Argon2_d != type) {
|
||||
return ARGON2_INCORRECT_TYPE;
|
||||
}
|
||||
|
||||
/* 2. Align memory size */
|
||||
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
|
||||
memory_blocks = context->m_cost;
|
||||
|
||||
if (memory_blocks < 2 * ARGON2_SYNC_POINTS * context->lanes) {
|
||||
memory_blocks = 2 * ARGON2_SYNC_POINTS * context->lanes;
|
||||
}
|
||||
|
||||
segment_length = memory_blocks / (context->lanes * ARGON2_SYNC_POINTS);
|
||||
/* Ensure that all segments have equal length */
|
||||
memory_blocks = segment_length * (context->lanes * ARGON2_SYNC_POINTS);
|
||||
|
||||
instance.memory = NULL;
|
||||
instance.passes = context->t_cost;
|
||||
instance.memory_blocks = memory_blocks;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||
instance.lanes = context->lanes;
|
||||
instance.limit = 1;
|
||||
instance.threads = context->threads;
|
||||
instance.type = type;
|
||||
|
||||
if (instance.threads > instance.limit) {
|
||||
instance.threads = instance.limit;
|
||||
}
|
||||
|
||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
* blocks
|
||||
*/
|
||||
result = initialize(&instance, context);
|
||||
|
||||
if (ARGON2_OK != result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
/* 4. Filling memory */
|
||||
result = fill_memory_blocks(&instance);
|
||||
|
||||
if (ARGON2_OK != result) {
|
||||
return result;
|
||||
}
|
||||
/* 5. Finalization */
|
||||
finalize(context, &instance);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt, const size_t saltlen,
|
||||
void *hash, const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen, argon2_type type){
|
||||
|
||||
argon2_context context;
|
||||
int result;
|
||||
uint8_t *out;
|
||||
|
||||
if (pwdlen > ARGON2_MAX_PWD_LENGTH) {
|
||||
return ARGON2_PWD_TOO_LONG;
|
||||
}
|
||||
|
||||
if (saltlen > ARGON2_MAX_SALT_LENGTH) {
|
||||
return ARGON2_SALT_TOO_LONG;
|
||||
}
|
||||
|
||||
if (hashlen > ARGON2_MAX_OUTLEN) {
|
||||
return ARGON2_OUTPUT_TOO_LONG;
|
||||
}
|
||||
|
||||
if (hashlen < ARGON2_MIN_OUTLEN) {
|
||||
return ARGON2_OUTPUT_TOO_SHORT;
|
||||
}
|
||||
|
||||
out = malloc(hashlen);
|
||||
if (!out) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
context.out = (uint8_t *)out;
|
||||
context.outlen = (uint32_t)hashlen;
|
||||
context.pwd = CONST_CAST(uint8_t *)pwd;
|
||||
context.pwdlen = (uint32_t)pwdlen;
|
||||
context.salt = CONST_CAST(uint8_t *)salt;
|
||||
context.saltlen = (uint32_t)saltlen;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.t_cost = t_cost;
|
||||
context.m_cost = m_cost;
|
||||
context.lanes = parallelism;
|
||||
context.threads = parallelism;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
|
||||
result = argon2_ctx(&context, type);
|
||||
|
||||
if (result != ARGON2_OK) {
|
||||
clear_internal_memory(out, hashlen);
|
||||
free(out);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* if raw hash requested, write it */
|
||||
if (hash) {
|
||||
memcpy(hash, out, hashlen);
|
||||
}
|
||||
|
||||
/* if encoding requested, write it */
|
||||
if (encoded && encodedlen) {
|
||||
if (encode_string(encoded, encodedlen, &context, type) != ARGON2_OK) {
|
||||
clear_internal_memory(out, hashlen); /* wipe buffers if error */
|
||||
clear_internal_memory(encoded, encodedlen);
|
||||
free(out);
|
||||
return ARGON2_ENCODING_FAIL;
|
||||
}
|
||||
}
|
||||
clear_internal_memory(out, hashlen);
|
||||
free(out);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, const size_t hashlen,
|
||||
char *encoded, const size_t encodedlen) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, NULL, 0, Argon2_d);
|
||||
}
|
||||
|
||||
static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) {
|
||||
size_t i;
|
||||
uint8_t d = 0U;
|
||||
|
||||
for (i = 0U; i < len; i++) {
|
||||
d |= b1[i] ^ b2[i];
|
||||
}
|
||||
return (int)((1 & ((d - 1) >> 8)) - 1);
|
||||
}
|
||||
|
||||
int argon2_verify(const char *encoded, const void *pwd, const size_t pwdlen,
|
||||
argon2_type type) {
|
||||
|
||||
argon2_context ctx;
|
||||
uint8_t *desired_result = NULL;
|
||||
|
||||
int ret = ARGON2_OK;
|
||||
|
||||
size_t encoded_len;
|
||||
uint32_t max_field_len;
|
||||
|
||||
if (pwdlen > ARGON2_MAX_PWD_LENGTH) {
|
||||
return ARGON2_PWD_TOO_LONG;
|
||||
}
|
||||
|
||||
if (encoded == NULL) {
|
||||
return ARGON2_DECODING_FAIL;
|
||||
}
|
||||
|
||||
encoded_len = strlen(encoded);
|
||||
if (encoded_len > UINT32_MAX) {
|
||||
return ARGON2_DECODING_FAIL;
|
||||
}
|
||||
|
||||
/* No field can be longer than the encoded length */
|
||||
max_field_len = (uint32_t)encoded_len;
|
||||
|
||||
ctx.saltlen = max_field_len;
|
||||
ctx.outlen = max_field_len;
|
||||
|
||||
ctx.salt = malloc(ctx.saltlen);
|
||||
ctx.out = malloc(ctx.outlen);
|
||||
if (!ctx.salt || !ctx.out) {
|
||||
ret = ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ctx.pwd = (uint8_t *)pwd;
|
||||
ctx.pwdlen = (uint32_t)pwdlen;
|
||||
|
||||
ret = decode_string(&ctx, encoded, type);
|
||||
if (ret != ARGON2_OK) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Set aside the desired result, and get a new buffer. */
|
||||
desired_result = ctx.out;
|
||||
ctx.out = malloc(ctx.outlen);
|
||||
if (!ctx.out) {
|
||||
ret = ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = argon2_verify_ctx(&ctx, (char *)desired_result, type);
|
||||
if (ret != ARGON2_OK) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
fail:
|
||||
free(ctx.salt);
|
||||
free(ctx.out);
|
||||
free(desired_result);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
|
||||
|
||||
return argon2_verify(encoded, pwd, pwdlen, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2d_ctx(argon2_context *context) {
|
||||
return argon2_ctx(context, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2_verify_ctx(argon2_context *context, const char *hash,
|
||||
argon2_type type) {
|
||||
int ret = argon2_ctx(context, type);
|
||||
if (ret != ARGON2_OK) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (argon2_compare((uint8_t *)hash, context->out, context->outlen)) {
|
||||
return ARGON2_VERIFY_MISMATCH;
|
||||
}
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int argon2d_verify_ctx(argon2_context *context, const char *hash) {
|
||||
return argon2_verify_ctx(context, hash, Argon2_d);
|
||||
}
|
||||
|
||||
const char *argon2_error_message(int error_code) {
|
||||
switch (error_code) {
|
||||
case ARGON2_OK:
|
||||
return "OK";
|
||||
case ARGON2_OUTPUT_PTR_NULL:
|
||||
return "Output pointer is NULL";
|
||||
case ARGON2_OUTPUT_TOO_SHORT:
|
||||
return "Output is too short";
|
||||
case ARGON2_OUTPUT_TOO_LONG:
|
||||
return "Output is too long";
|
||||
case ARGON2_PWD_TOO_SHORT:
|
||||
return "Password is too short";
|
||||
case ARGON2_PWD_TOO_LONG:
|
||||
return "Password is too long";
|
||||
case ARGON2_SALT_TOO_SHORT:
|
||||
return "Salt is too short";
|
||||
case ARGON2_SALT_TOO_LONG:
|
||||
return "Salt is too long";
|
||||
case ARGON2_AD_TOO_SHORT:
|
||||
return "Associated data is too short";
|
||||
case ARGON2_AD_TOO_LONG:
|
||||
return "Associated data is too long";
|
||||
case ARGON2_SECRET_TOO_SHORT:
|
||||
return "Secret is too short";
|
||||
case ARGON2_SECRET_TOO_LONG:
|
||||
return "Secret is too long";
|
||||
case ARGON2_TIME_TOO_SMALL:
|
||||
return "Time cost is too small";
|
||||
case ARGON2_TIME_TOO_LARGE:
|
||||
return "Time cost is too large";
|
||||
case ARGON2_MEMORY_TOO_LITTLE:
|
||||
return "Memory cost is too small";
|
||||
case ARGON2_MEMORY_TOO_MUCH:
|
||||
return "Memory cost is too large";
|
||||
case ARGON2_LANES_TOO_FEW:
|
||||
return "Too few lanes";
|
||||
case ARGON2_LANES_TOO_MANY:
|
||||
return "Too many lanes";
|
||||
case ARGON2_PWD_PTR_MISMATCH:
|
||||
return "Password pointer is NULL, but password length is not 0";
|
||||
case ARGON2_SALT_PTR_MISMATCH:
|
||||
return "Salt pointer is NULL, but salt length is not 0";
|
||||
case ARGON2_SECRET_PTR_MISMATCH:
|
||||
return "Secret pointer is NULL, but secret length is not 0";
|
||||
case ARGON2_AD_PTR_MISMATCH:
|
||||
return "Associated data pointer is NULL, but ad length is not 0";
|
||||
case ARGON2_MEMORY_ALLOCATION_ERROR:
|
||||
return "Memory allocation error";
|
||||
case ARGON2_FREE_MEMORY_CBK_NULL:
|
||||
return "The free memory callback is NULL";
|
||||
case ARGON2_ALLOCATE_MEMORY_CBK_NULL:
|
||||
return "The allocate memory callback is NULL";
|
||||
case ARGON2_INCORRECT_PARAMETER:
|
||||
return "Argon2_Context context is NULL";
|
||||
case ARGON2_INCORRECT_TYPE:
|
||||
return "There is no such version of Argon2";
|
||||
case ARGON2_OUT_PTR_MISMATCH:
|
||||
return "Output pointer mismatch";
|
||||
case ARGON2_THREADS_TOO_FEW:
|
||||
return "Not enough threads";
|
||||
case ARGON2_THREADS_TOO_MANY:
|
||||
return "Too many threads";
|
||||
case ARGON2_MISSING_ARGS:
|
||||
return "Missing arguments";
|
||||
case ARGON2_ENCODING_FAIL:
|
||||
return "Encoding failed";
|
||||
case ARGON2_DECODING_FAIL:
|
||||
return "Decoding failed";
|
||||
case ARGON2_THREAD_FAIL:
|
||||
return "Threading failure";
|
||||
case ARGON2_DECODING_LENGTH_FAIL:
|
||||
return "Some of encoded parameters are too long or too short";
|
||||
case ARGON2_VERIFY_MISMATCH:
|
||||
return "The password does not match the supplied hash";
|
||||
default:
|
||||
return "Unknown error code";
|
||||
}
|
||||
}
|
||||
|
||||
size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism,
|
||||
uint32_t saltlen, uint32_t hashlen, argon2_type type) {
|
||||
return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) +
|
||||
numlen(t_cost) + numlen(m_cost) + numlen(parallelism) +
|
||||
b64len(saltlen) + b64len(hashlen);
|
||||
}
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
///////////////////////////
|
||||
// Wolf's Additions
|
||||
///////////////////////////
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <pthread.h>
|
||||
#include <x86intrin.h>
|
||||
#include "../blake2/blake2.h"
|
||||
|
||||
typedef struct _Argon2d_Block
|
||||
{
|
||||
union
|
||||
{
|
||||
uint64_t data[1024 / 8] __attribute__((aligned(32)));
|
||||
__m128i dqwords[1024 / 16] __attribute__((aligned(32)));
|
||||
__m256i qqwords[1024 / 32] __attribute__((aligned(32)));
|
||||
};
|
||||
} Argon2d_Block;
|
||||
|
||||
typedef struct _Argon2ThreadData
|
||||
{
|
||||
Argon2d_Block *Matrix;
|
||||
uint32_t slice;
|
||||
uint32_t lane;
|
||||
} Argon2ThreadData;
|
||||
|
||||
#define SEGMENT_LENGTH (250U / (4U * 4U)) // memory_blocks / (context->lanes * ARGON2_SYNC_POINTS);
|
||||
#define LANE_LENGTH (SEGMENT_LENGTH * 4U) // segment_length * ARGON2_SYNC_POINTS;
|
||||
#define CONCURRENT_THREADS 4
|
||||
|
||||
static const uint64_t blake2b_IV[8] =
|
||||
{
|
||||
0x6A09E667F3BCC908ULL, 0xBB67AE8584CAA73BULL,
|
||||
0x3C6EF372FE94F82BULL, 0xA54FF53A5F1D36F1ULL,
|
||||
0x510E527FADE682D1ULL, 0x9B05688C2B3E6C1FULL,
|
||||
0x1F83D9ABFB41BD6BULL, 0x5BE0CD19137E2179ULL
|
||||
};
|
||||
|
||||
static const unsigned int blake2b_sigma[12][16] =
|
||||
{
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
};
|
||||
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
|
||||
#define G(r, i, a, b, c, d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
|
||||
d = ROTL64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = ROTL64(b ^ c, 40); \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
|
||||
d = ROTL64(d ^ a, 48); \
|
||||
c = c + d; \
|
||||
b = ROTL64(b ^ c, 1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r, 0, v[0], v[4], v[8], v[12]); \
|
||||
G(r, 1, v[1], v[5], v[9], v[13]); \
|
||||
G(r, 2, v[2], v[6], v[10], v[14]); \
|
||||
G(r, 3, v[3], v[7], v[11], v[15]); \
|
||||
G(r, 4, v[0], v[5], v[10], v[15]); \
|
||||
G(r, 5, v[1], v[6], v[11], v[12]); \
|
||||
G(r, 6, v[2], v[7], v[8], v[13]); \
|
||||
G(r, 7, v[3], v[4], v[9], v[14]); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
void CompressBlock(uint64_t *h, const uint64_t *m, uint64_t t, uint64_t f)
|
||||
{
|
||||
uint64_t v[16];
|
||||
|
||||
int i;
|
||||
for(i = 0; i < 8; ++i) v[i] = h[i];
|
||||
|
||||
for(i = 8; i < 16; ++i) v[i] = blake2b_IV[i - 8];
|
||||
|
||||
v[12] ^= t;
|
||||
v[14] ^= f;
|
||||
|
||||
int r;
|
||||
for(r = 0; r < 12; ++r)
|
||||
{
|
||||
ROUND(r);
|
||||
}
|
||||
|
||||
for(i = 0; i < 8; ++i) h[i] ^= v[i] ^ v[i + 8];
|
||||
}
|
||||
|
||||
void Argon2dInitHash(void *HashOut, void *Input)
|
||||
{
|
||||
blake2b_state BlakeHash;
|
||||
uint32_t InBuf[64]; // Is only 50 uint32_t, but need more space for Blake2B
|
||||
|
||||
memset(InBuf, 0x00, 200);
|
||||
|
||||
InBuf[0] = 4UL; // Lanes
|
||||
InBuf[1] = 32UL; // Output Length
|
||||
InBuf[2] = 250UL; // Memory Cost
|
||||
InBuf[3] = 1UL; // Time Cost
|
||||
InBuf[4] = 16UL; // Argon2 Version Number
|
||||
InBuf[5] = 0UL; // Type
|
||||
InBuf[6] = 80UL; // Password Length
|
||||
|
||||
memcpy(InBuf + 7, Input, 80); // Password
|
||||
|
||||
InBuf[27] = 80UL; // Salt Length
|
||||
|
||||
memcpy(InBuf + 28, Input, 80); // Salt
|
||||
|
||||
InBuf[48] = 0UL; // Secret Length
|
||||
InBuf[49] = 0UL; // Associated Data Length
|
||||
|
||||
int i;
|
||||
for(i = 50; i < 64; ++i) InBuf[i] = 0UL;
|
||||
|
||||
uint64_t H[8];
|
||||
|
||||
for(i = 0; i < 8; ++i) H[i] = blake2b_IV[i];
|
||||
|
||||
H[0] ^= 0x0000000001010040;
|
||||
|
||||
CompressBlock(H, (uint64_t *)InBuf, 128ULL, 0ULL);
|
||||
CompressBlock(H, (uint64_t *)(InBuf + 32), 200ULL, 0xFFFFFFFFFFFFFFFFULL);
|
||||
|
||||
memcpy(HashOut, H, 64U);
|
||||
}
|
||||
|
||||
void Argon2dFillFirstBlocks(Argon2d_Block *Matrix, void *InitHash)
|
||||
{
|
||||
uint32_t lane;
|
||||
for(lane = 0; lane < 4; ++lane)
|
||||
{
|
||||
((uint32_t *)InitHash)[16] = 0;
|
||||
((uint32_t *)InitHash)[17] = lane;
|
||||
blake2b_long(Matrix[lane * LANE_LENGTH].data, 1024, InitHash, 72);
|
||||
((uint32_t *)InitHash)[16] |= 1;
|
||||
blake2b_long(Matrix[lane * LANE_LENGTH + 1].data, 1024, InitHash, 72);
|
||||
}
|
||||
}
|
||||
|
||||
#include "../blake2/blamka-round-opt.h"
|
||||
|
||||
void Argon2dFillSingleBlock(Argon2d_Block *State, Argon2d_Block *RefBlock, Argon2d_Block *NextBlock)
|
||||
{
|
||||
__m256i XY[32];
|
||||
|
||||
int i;
|
||||
for(i = 0; i < 32; ++i)
|
||||
XY[i] = State->qqwords[i] = _mm256_xor_si256(State->qqwords[i], RefBlock->qqwords[i]);
|
||||
|
||||
for(i = 0; i < 8; ++i)
|
||||
{
|
||||
BLAKE2_ROUND( State->dqwords[8 * i + 0], State->dqwords[8 * i + 1], State->dqwords[8 * i + 2], State->dqwords[8 * i + 3],
|
||||
State->dqwords[8 * i + 4], State->dqwords[8 * i + 5], State->dqwords[8 * i + 6], State->dqwords[8 * i + 7]);
|
||||
}
|
||||
|
||||
for(i = 0; i < 8; ++i)
|
||||
{
|
||||
BLAKE2_ROUND( State->dqwords[8 * 0 + i], State->dqwords[8 * 1 + i], State->dqwords[8 * 2 + i], State->dqwords[8 * 3 + i],
|
||||
State->dqwords[8 * 4 + i], State->dqwords[8 * 5 + i], State->dqwords[8 * 6 + i], State->dqwords[8 * 7 + i]);
|
||||
}
|
||||
|
||||
for(i = 0; i < 32; ++i)
|
||||
{
|
||||
State->qqwords[i] = _mm256_xor_si256(State->qqwords[i], XY[i]);
|
||||
_mm256_store_si256(NextBlock->qqwords + i, State->qqwords[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void FillSegment(Argon2d_Block *Matrix, uint32_t slice, uint32_t lane)
|
||||
{
|
||||
uint32_t startidx, prevoff, curoff;
|
||||
Argon2d_Block State;
|
||||
|
||||
startidx = (!slice) ? 2 : 0;
|
||||
curoff = lane * LANE_LENGTH + slice * SEGMENT_LENGTH + startidx;
|
||||
|
||||
//if(!(curoff % LANE_LENGTH)) prevoff = curoff + LANE_LENGTH - 1;
|
||||
//else prevoff = curoff - 1;
|
||||
|
||||
prevoff = (!(curoff % LANE_LENGTH)) ? curoff + LANE_LENGTH - 1 : curoff - 1;
|
||||
|
||||
memcpy(State.data, (Matrix + prevoff)->data, 1024);
|
||||
|
||||
int i;
|
||||
for(i = startidx; i < SEGMENT_LENGTH; ++i, ++curoff, ++prevoff)
|
||||
{
|
||||
if((curoff % LANE_LENGTH) == 1) prevoff = curoff - 1;
|
||||
|
||||
uint64_t pseudorand = Matrix[prevoff].data[0];
|
||||
uint64_t reflane = (!slice) ? lane : (pseudorand >> 32) & 3; // mod lanes
|
||||
|
||||
uint32_t index = i;
|
||||
bool samelane = reflane == lane;
|
||||
pseudorand &= 0xFFFFFFFFULL;
|
||||
uint32_t refareasize = ((reflane == lane) ? slice * SEGMENT_LENGTH + index - 1 : slice * SEGMENT_LENGTH + ((!index) ? -1 : 0));
|
||||
|
||||
|
||||
if(!slice) refareasize = index - 1;
|
||||
|
||||
uint64_t relativepos = (pseudorand & 0xFFFFFFFFULL);
|
||||
relativepos = relativepos * relativepos >> 32;
|
||||
relativepos = refareasize - 1 - (refareasize * relativepos >> 32);
|
||||
|
||||
uint32_t startpos = 0;
|
||||
|
||||
uint32_t abspos = (startpos + relativepos) % LANE_LENGTH;
|
||||
|
||||
uint32_t refidx = abspos;
|
||||
|
||||
Argon2dFillSingleBlock(&State, Matrix + (LANE_LENGTH * reflane + refidx), Matrix + curoff);
|
||||
}
|
||||
}
|
||||
|
||||
void *ThreadedSegmentFill(void *ThrData)
|
||||
{
|
||||
Argon2ThreadData *Data = (Argon2ThreadData *)ThrData;
|
||||
|
||||
FillSegment(Data->Matrix, Data->slice, Data->lane);
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
void Argon2dFillAllBlocks(Argon2d_Block *Matrix)
|
||||
{
|
||||
pthread_t ThrHandles[CONCURRENT_THREADS];
|
||||
Argon2ThreadData ThrData[CONCURRENT_THREADS];
|
||||
|
||||
int s;
|
||||
for(s = 0; s < 4; ++s)
|
||||
{
|
||||
// WARNING: Assumes CONCURRENT_THREADS == lanes == 4
|
||||
int l;
|
||||
for(l = 0; l < 4; ++l)
|
||||
{
|
||||
FillSegment(Matrix, s, l);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Argon2dFinalizeHash(void *OutputHash, Argon2d_Block *Matrix)
|
||||
{
|
||||
int l;
|
||||
for(l = 1; l < 4; ++l)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < 32; ++i)
|
||||
Matrix[LANE_LENGTH - 1].qqwords[i] = _mm256_xor_si256(Matrix[LANE_LENGTH - 1].qqwords[i], Matrix[LANE_LENGTH * l + (LANE_LENGTH - 1)].qqwords[i]);
|
||||
}
|
||||
|
||||
blake2b_long(OutputHash, 32, Matrix[LANE_LENGTH - 1].data, 1024);
|
||||
}
|
||||
|
||||
void WolfArgon2dPoWHash(void *Output, void *Matrix, const void *BlkHdr)
|
||||
{
|
||||
uint8_t tmp[72];
|
||||
|
||||
Argon2dInitHash(tmp, (uint8_t *)BlkHdr);
|
||||
|
||||
Argon2dFillFirstBlocks(Matrix, tmp);
|
||||
|
||||
Argon2dFillAllBlocks(Matrix);
|
||||
|
||||
Argon2dFinalizeHash((uint8_t *)Output, Matrix);
|
||||
}
|
||||
|
||||
void WolfArgon2dAllocateCtx(void **Matrix)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
*((Argon2d_Block **)Matrix) = (Argon2d_Block *)_aligned_malloc(32, sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
#else
|
||||
*((Argon2d_Block **)Matrix) = (Argon2d_Block *)malloc(sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
posix_memalign(Matrix, 32, sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
#endif
|
||||
}
|
||||
|
||||
void WolfArgon2dFreeCtx(void *Matrix)
|
||||
{
|
||||
free(Matrix);
|
||||
}
|
||||
|
||||
#endif
|
345
algo/argon2/argon2d/argon2d/argon2.h
Normal file
345
algo/argon2/argon2d/argon2d/argon2.h
Normal file
@@ -0,0 +1,345 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_H
|
||||
#define ARGON2_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Symbols visibility control */
|
||||
#ifdef A2_VISCTL
|
||||
#define ARGON2_PUBLIC __attribute__((visibility("default")))
|
||||
#elif _MSC_VER
|
||||
#define ARGON2_PUBLIC __declspec(dllexport)
|
||||
#else
|
||||
#define ARGON2_PUBLIC
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Argon2 input parameter restrictions
|
||||
*/
|
||||
|
||||
/* Minimum and maximum number of lanes (degree of parallelism) */
|
||||
#define ARGON2_MIN_LANES UINT32_C(1)
|
||||
#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF)
|
||||
|
||||
/* Minimum and maximum number of threads */
|
||||
#define ARGON2_MIN_THREADS UINT32_C(1)
|
||||
#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF)
|
||||
|
||||
/* Number of synchronization points between lanes per pass */
|
||||
#define ARGON2_SYNC_POINTS UINT32_C(4)
|
||||
|
||||
/* Minimum and maximum digest size in bytes */
|
||||
#define ARGON2_MIN_OUTLEN UINT32_C(4)
|
||||
#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */
|
||||
#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */
|
||||
|
||||
#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */
|
||||
#define ARGON2_MAX_MEMORY_BITS \
|
||||
ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1))
|
||||
#define ARGON2_MAX_MEMORY \
|
||||
ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS)
|
||||
|
||||
/* Minimum and maximum number of passes */
|
||||
#define ARGON2_MIN_TIME UINT32_C(1)
|
||||
#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum password length in bytes */
|
||||
#define ARGON2_MIN_PWD_LENGTH UINT32_C(0)
|
||||
#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum associated data length in bytes */
|
||||
#define ARGON2_MIN_AD_LENGTH UINT32_C(0)
|
||||
#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum salt length in bytes */
|
||||
#define ARGON2_MIN_SALT_LENGTH UINT32_C(8)
|
||||
#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum key length in bytes */
|
||||
#define ARGON2_MIN_SECRET UINT32_C(0)
|
||||
#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Flags to determine which fields are securely wiped (default = no wipe). */
|
||||
#define ARGON2_DEFAULT_FLAGS UINT32_C(0)
|
||||
#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0)
|
||||
#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1)
|
||||
|
||||
/* Global flag to determine if we are wiping internal memory buffers. This flag
|
||||
* is defined in core.c and deafults to 1 (wipe internal memory). */
|
||||
extern int FLAG_clear_internal_memory;
|
||||
|
||||
/* Error codes */
|
||||
typedef enum Argon2_ErrorCodes {
|
||||
ARGON2_OK = 0,
|
||||
|
||||
ARGON2_OUTPUT_PTR_NULL = -1,
|
||||
|
||||
ARGON2_OUTPUT_TOO_SHORT = -2,
|
||||
ARGON2_OUTPUT_TOO_LONG = -3,
|
||||
|
||||
ARGON2_PWD_TOO_SHORT = -4,
|
||||
ARGON2_PWD_TOO_LONG = -5,
|
||||
|
||||
ARGON2_SALT_TOO_SHORT = -6,
|
||||
ARGON2_SALT_TOO_LONG = -7,
|
||||
|
||||
ARGON2_AD_TOO_SHORT = -8,
|
||||
ARGON2_AD_TOO_LONG = -9,
|
||||
|
||||
ARGON2_SECRET_TOO_SHORT = -10,
|
||||
ARGON2_SECRET_TOO_LONG = -11,
|
||||
|
||||
ARGON2_TIME_TOO_SMALL = -12,
|
||||
ARGON2_TIME_TOO_LARGE = -13,
|
||||
|
||||
ARGON2_MEMORY_TOO_LITTLE = -14,
|
||||
ARGON2_MEMORY_TOO_MUCH = -15,
|
||||
|
||||
ARGON2_LANES_TOO_FEW = -16,
|
||||
ARGON2_LANES_TOO_MANY = -17,
|
||||
|
||||
ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */
|
||||
ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */
|
||||
ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */
|
||||
ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */
|
||||
|
||||
ARGON2_MEMORY_ALLOCATION_ERROR = -22,
|
||||
|
||||
ARGON2_FREE_MEMORY_CBK_NULL = -23,
|
||||
ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24,
|
||||
|
||||
ARGON2_INCORRECT_PARAMETER = -25,
|
||||
ARGON2_INCORRECT_TYPE = -26,
|
||||
|
||||
ARGON2_OUT_PTR_MISMATCH = -27,
|
||||
|
||||
ARGON2_THREADS_TOO_FEW = -28,
|
||||
ARGON2_THREADS_TOO_MANY = -29,
|
||||
|
||||
ARGON2_MISSING_ARGS = -30,
|
||||
|
||||
ARGON2_ENCODING_FAIL = -31,
|
||||
|
||||
ARGON2_DECODING_FAIL = -32,
|
||||
|
||||
ARGON2_THREAD_FAIL = -33,
|
||||
|
||||
ARGON2_DECODING_LENGTH_FAIL = -34,
|
||||
|
||||
ARGON2_VERIFY_MISMATCH = -35
|
||||
} argon2_error_codes;
|
||||
|
||||
/* Memory allocator types --- for external allocation */
|
||||
typedef int (*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate);
|
||||
typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate);
|
||||
|
||||
/* Argon2 external data structures */
|
||||
|
||||
/*
|
||||
*****
|
||||
* Context: structure to hold Argon2 inputs:
|
||||
* output array and its length,
|
||||
* password and its length,
|
||||
* salt and its length,
|
||||
* secret and its length,
|
||||
* associated data and its length,
|
||||
* number of passes, amount of used memory (in KBytes, can be rounded up a bit)
|
||||
* number of parallel threads that will be run.
|
||||
* All the parameters above affect the output hash value.
|
||||
* Additionally, two function pointers can be provided to allocate and
|
||||
* deallocate the memory (if NULL, memory will be allocated internally).
|
||||
* Also, three flags indicate whether to erase password, secret as soon as they
|
||||
* are pre-hashed (and thus not needed anymore), and the entire memory
|
||||
*****
|
||||
* Simplest situation: you have output array out[8], password is stored in
|
||||
* pwd[32], salt is stored in salt[16], you do not have keys nor associated
|
||||
* data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with
|
||||
* 4 parallel lanes.
|
||||
* You want to erase the password, but you're OK with last pass not being
|
||||
* erased. You want to use the default memory allocator.
|
||||
* Then you initialize:
|
||||
Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false)
|
||||
*/
|
||||
typedef struct Argon2_Context {
|
||||
uint8_t *out; /* output array */
|
||||
uint32_t outlen; /* digest length */
|
||||
|
||||
uint8_t *pwd; /* password array */
|
||||
uint32_t pwdlen; /* password length */
|
||||
|
||||
uint8_t *salt; /* salt array */
|
||||
uint32_t saltlen; /* salt length */
|
||||
|
||||
uint8_t *secret; /* key array */
|
||||
uint32_t secretlen; /* key length */
|
||||
|
||||
uint8_t *ad; /* associated data array */
|
||||
uint32_t adlen; /* associated data length */
|
||||
|
||||
uint32_t t_cost; /* number of passes */
|
||||
uint32_t m_cost; /* amount of memory requested (KB) */
|
||||
uint32_t lanes; /* number of lanes */
|
||||
uint32_t threads; /* maximum number of threads */
|
||||
|
||||
allocate_fptr allocate_cbk; /* pointer to memory allocator */
|
||||
deallocate_fptr free_cbk; /* pointer to memory deallocator */
|
||||
|
||||
uint32_t flags; /* array of bool options */
|
||||
} argon2_context;
|
||||
|
||||
/* Argon2 primitive type */
|
||||
typedef enum Argon2_type {
|
||||
Argon2_d = 0
|
||||
} argon2_type;
|
||||
|
||||
/*
|
||||
* Function that gives the string representation of an argon2_type.
|
||||
* @param type The argon2_type that we want the string for
|
||||
* @param uppercase Whether the string should have the first letter uppercase
|
||||
* @return NULL if invalid type, otherwise the string representation.
|
||||
*/
|
||||
ARGON2_PUBLIC const char *argon2_type2string(argon2_type type, int uppercase);
|
||||
|
||||
/*
|
||||
* Function that performs memory-hard hashing with certain degree of parallelism
|
||||
* @param context Pointer to the Argon2 internal structure
|
||||
* @return Error code if smth is wrong, ARGON2_OK otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type);
|
||||
|
||||
/**
|
||||
* Hashes a password with Argon2i, producing a raw hash by allocating memory at
|
||||
* @hash
|
||||
* @param t_cost Number of iterations
|
||||
* @param m_cost Sets memory usage to m_cost kibibytes
|
||||
* @param parallelism Number of threads and compute lanes
|
||||
* @param pwd Pointer to password
|
||||
* @param pwdlen Password size in bytes
|
||||
* @param salt Pointer to salt
|
||||
* @param saltlen Salt size in bytes
|
||||
* @param hash Buffer where to write the raw hash - updated by the function
|
||||
* @param hashlen Desired length of the hash in bytes
|
||||
* @pre Different parallelism levels will give different results
|
||||
* @pre Returns ARGON2_OK if successful
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen);
|
||||
|
||||
ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
const uint32_t parallelism,
|
||||
const void *pwd, const size_t pwdlen,
|
||||
const void *salt, const size_t saltlen,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen, argon2_type type);
|
||||
|
||||
/**
|
||||
* Verifies a password against an encoded string
|
||||
* Encoded string is restricted as in validate_inputs()
|
||||
* @param encoded String encoding parameters, salt, hash
|
||||
* @param pwd Pointer to password
|
||||
* @pre Returns ARGON2_OK if successful
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen, argon2_type type);
|
||||
|
||||
/**
|
||||
* Argon2d: Version of Argon2 that picks memory blocks depending
|
||||
* on the password and salt. Only for side-channel-free
|
||||
* environment!!
|
||||
*****
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_ctx(argon2_context *context);
|
||||
|
||||
/**
|
||||
* Verify if a given password is correct for Argon2d hashing
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @param hash The password hash to verify. The length of the hash is
|
||||
* specified by the context outlen member
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_verify_ctx(argon2_context *context, const char *hash);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_verify_ctx(argon2_context *context, const char *hash,
|
||||
argon2_type type);
|
||||
|
||||
/**
|
||||
* Get the associated error message for given error code
|
||||
* @return The error message associated with the given error code
|
||||
*/
|
||||
ARGON2_PUBLIC const char *argon2_error_message(int error_code);
|
||||
|
||||
/**
|
||||
* Returns the encoded hash length for the given input parameters
|
||||
* @param t_cost Number of iterations
|
||||
* @param m_cost Memory usage in kibibytes
|
||||
* @param parallelism Number of threads; used to compute lanes
|
||||
* @param saltlen Salt size in bytes
|
||||
* @param hashlen Hash size in bytes
|
||||
* @param type The argon2_type that we want the encoded length for
|
||||
* @return The encoded hash length in bytes
|
||||
*/
|
||||
ARGON2_PUBLIC size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost,
|
||||
uint32_t parallelism, uint32_t saltlen,
|
||||
uint32_t hashlen, argon2_type type);
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
///////////////////////////
|
||||
// Wolf's Additions
|
||||
///////////////////////////
|
||||
|
||||
void WolfArgon2dPoWHash(void *Output, void *Matrix, const void *BlkHdr);
|
||||
void WolfArgon2dAllocateCtx(void **Matrix);
|
||||
void WolfArgon2dFreeCtx(void *Matrix);
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
617
algo/argon2/argon2d/argon2d/core.c
Normal file
617
algo/argon2/argon2d/argon2d/core.c
Normal file
@@ -0,0 +1,617 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
/*For memory wiping*/
|
||||
#ifdef _MSC_VER
|
||||
#include <windows.h>
|
||||
#include <winbase.h> /* For SecureZeroMemory */
|
||||
#endif
|
||||
#if defined __STDC_LIB_EXT1__
|
||||
#define __STDC_WANT_LIB_EXT1__ 1
|
||||
#endif
|
||||
#define VC_GE_2005(version) (version >= 1400)
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "core.h"
|
||||
#include "thread.h"
|
||||
#include "../blake2/blake2.h"
|
||||
#include "../blake2/blake2-impl.h"
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_attribute(optnone)
|
||||
#define NOT_OPTIMIZED __attribute__((optnone))
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#define GCC_VERSION \
|
||||
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
|
||||
#if GCC_VERSION >= 40400
|
||||
#define NOT_OPTIMIZED __attribute__((optimize("O0")))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef NOT_OPTIMIZED
|
||||
#define NOT_OPTIMIZED
|
||||
#endif
|
||||
|
||||
/***************Instance and Position constructors**********/
|
||||
void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
|
||||
void copy_block(block *dst, const block *src) {
|
||||
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
|
||||
}
|
||||
|
||||
void xor_block(block *dst, const block *src) {
|
||||
int i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] ^= src->v[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void load_block(block *dst, const void *input) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static void store_block(void *output, const block *src) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/***************Memory functions*****************/
|
||||
|
||||
int allocate_memory(const argon2_context *context, uint8_t **memory,
|
||||
size_t num, size_t size) {
|
||||
size_t memory_size = num*size;
|
||||
if (memory == NULL) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
/* 1. Check for multiplication overflow */
|
||||
if (size != 0 && memory_size / size != num) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
/* 2. Try to allocate with appropriate allocator */
|
||||
if (context->allocate_cbk) {
|
||||
(context->allocate_cbk)(memory, memory_size);
|
||||
} else {
|
||||
*memory = malloc(memory_size);
|
||||
}
|
||||
|
||||
if (*memory == NULL) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
void free_memory(const argon2_context *context, uint8_t *memory,
|
||||
size_t num, size_t size) {
|
||||
size_t memory_size = num*size;
|
||||
clear_internal_memory(memory, memory_size);
|
||||
if (context->free_cbk) {
|
||||
(context->free_cbk)(memory, memory_size);
|
||||
} else {
|
||||
free(memory);
|
||||
}
|
||||
}
|
||||
|
||||
void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) {
|
||||
#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER)
|
||||
SecureZeroMemory(v, n);
|
||||
#elif defined memset_s
|
||||
memset_s(v, n, 0, n);
|
||||
#elif defined(__OpenBSD__)
|
||||
explicit_bzero(v, n);
|
||||
#else
|
||||
static void *(*const volatile memset_sec)(void *, int, size_t) = &memset;
|
||||
memset_sec(v, 0, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Memory clear flag defaults to true. */
|
||||
int FLAG_clear_internal_memory = 1;
|
||||
void clear_internal_memory(void *v, size_t n) {
|
||||
if (FLAG_clear_internal_memory && v) {
|
||||
secure_wipe_memory(v, n);
|
||||
}
|
||||
}
|
||||
|
||||
void finalize(const argon2_context *context, argon2_instance_t *instance) {
|
||||
if (context != NULL && instance != NULL) {
|
||||
block blockhash;
|
||||
uint32_t l;
|
||||
|
||||
copy_block(&blockhash, instance->memory + instance->lane_length - 1);
|
||||
|
||||
/* XOR the last blocks */
|
||||
for (l = 1; l < instance->lanes; ++l) {
|
||||
uint32_t last_block_in_lane =
|
||||
l * instance->lane_length + (instance->lane_length - 1);
|
||||
xor_block(&blockhash, instance->memory + last_block_in_lane);
|
||||
}
|
||||
|
||||
/* Hash the result */
|
||||
{
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
store_block(blockhash_bytes, &blockhash);
|
||||
blake2b_long(context->out, context->outlen, blockhash_bytes,
|
||||
ARGON2_BLOCK_SIZE);
|
||||
/* clear blockhash and blockhash_bytes */
|
||||
clear_internal_memory(blockhash.v, ARGON2_BLOCK_SIZE);
|
||||
clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
free_memory(context, (uint8_t *)instance->memory,
|
||||
instance->memory_blocks, sizeof(block));
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane) {
|
||||
/*
|
||||
* Pass 0:
|
||||
* This lane : all already finished segments plus already constructed
|
||||
* blocks in this segment
|
||||
* Other lanes : all already finished segments
|
||||
* Pass 1+:
|
||||
* This lane : (SYNC_POINTS - 1) last segments plus already constructed
|
||||
* blocks in this segment
|
||||
* Other lanes : (SYNC_POINTS - 1) last segments
|
||||
*/
|
||||
uint32_t reference_area_size;
|
||||
uint64_t relative_position;
|
||||
uint32_t start_position, absolute_position;
|
||||
|
||||
if (0 == position->pass) {
|
||||
/* First pass */
|
||||
if (0 == position->slice) {
|
||||
/* First slice */
|
||||
reference_area_size =
|
||||
position->index - 1; /* all but the previous */
|
||||
} else {
|
||||
if (same_lane) {
|
||||
/* The same lane => add current segment */
|
||||
reference_area_size =
|
||||
position->slice * instance->segment_length +
|
||||
position->index - 1;
|
||||
} else {
|
||||
reference_area_size =
|
||||
position->slice * instance->segment_length +
|
||||
((position->index == 0) ? (-1) : 0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Second pass */
|
||||
if (same_lane) {
|
||||
reference_area_size = instance->lane_length -
|
||||
instance->segment_length + position->index -
|
||||
1;
|
||||
} else {
|
||||
reference_area_size = instance->lane_length -
|
||||
instance->segment_length +
|
||||
((position->index == 0) ? (-1) : 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
|
||||
* relative position */
|
||||
relative_position = pseudo_rand;
|
||||
relative_position = relative_position * relative_position >> 32;
|
||||
relative_position = reference_area_size - 1 -
|
||||
(reference_area_size * relative_position >> 32);
|
||||
|
||||
/* 1.2.5 Computing starting position */
|
||||
start_position = 0;
|
||||
|
||||
if (0 != position->pass) {
|
||||
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
|
||||
? 0
|
||||
: (position->slice + 1) * instance->segment_length;
|
||||
}
|
||||
|
||||
/* 1.2.6. Computing absolute position */
|
||||
absolute_position = (start_position + relative_position) %
|
||||
instance->lane_length; /* absolute position */
|
||||
return absolute_position;
|
||||
}
|
||||
|
||||
/* Single-threaded version for p=1 case */
|
||||
static int fill_memory_blocks_st(argon2_instance_t *instance) {
|
||||
uint32_t r, s, l;
|
||||
|
||||
for (r = 0; r < instance->passes; ++r) {
|
||||
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
|
||||
for (l = 0; l < instance->lanes; ++l) {
|
||||
argon2_position_t position = {r, l, (uint8_t)s, 0};
|
||||
fill_segment(instance, position);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
#if !defined(ARGON2_NO_THREADS)
|
||||
|
||||
#ifdef _WIN32
|
||||
static unsigned __stdcall fill_segment_thr(void *thread_data)
|
||||
#else
|
||||
static void *fill_segment_thr(void *thread_data)
|
||||
#endif
|
||||
{
|
||||
argon2_thread_data *my_data = thread_data;
|
||||
fill_segment(my_data->instance_ptr, my_data->pos);
|
||||
argon2_thread_exit();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Multi-threaded version for p > 1 case */
|
||||
static int fill_memory_blocks_mt(argon2_instance_t *instance) {
|
||||
uint32_t r, s;
|
||||
argon2_thread_handle_t *thread = NULL;
|
||||
argon2_thread_data *thr_data = NULL;
|
||||
int rc = ARGON2_OK;
|
||||
|
||||
/* 1. Allocating space for threads */
|
||||
thread = calloc(instance->lanes, sizeof(argon2_thread_handle_t));
|
||||
if (thread == NULL) {
|
||||
rc = ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
thr_data = calloc(instance->lanes, sizeof(argon2_thread_data));
|
||||
if (thr_data == NULL) {
|
||||
rc = ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
for (r = 0; r < instance->passes; ++r) {
|
||||
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
|
||||
uint32_t l;
|
||||
|
||||
/* 2. Calling threads */
|
||||
for (l = 0; l < instance->lanes; ++l) {
|
||||
argon2_position_t position;
|
||||
|
||||
/* 2.1 Join a thread if limit is exceeded */
|
||||
if (l >= instance->threads) {
|
||||
if (argon2_thread_join(thread[l - instance->threads])) {
|
||||
rc = ARGON2_THREAD_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* 2.2 Create thread */
|
||||
position.pass = r;
|
||||
position.lane = l;
|
||||
position.slice = (uint8_t)s;
|
||||
position.index = 0;
|
||||
thr_data[l].instance_ptr =
|
||||
instance; /* preparing the thread input */
|
||||
memcpy(&(thr_data[l].pos), &position,
|
||||
sizeof(argon2_position_t));
|
||||
if (argon2_thread_create(&thread[l], &fill_segment_thr,
|
||||
(void *)&thr_data[l])) {
|
||||
rc = ARGON2_THREAD_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* fill_segment(instance, position); */
|
||||
/*Non-thread equivalent of the lines above */
|
||||
}
|
||||
|
||||
/* 3. Joining remaining threads */
|
||||
for (l = instance->lanes - instance->threads; l < instance->lanes;
|
||||
++l) {
|
||||
if (argon2_thread_join(thread[l])) {
|
||||
rc = ARGON2_THREAD_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fail:
|
||||
if (thread != NULL) {
|
||||
free(thread);
|
||||
}
|
||||
if (thr_data != NULL) {
|
||||
free(thr_data);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* ARGON2_NO_THREADS */
|
||||
|
||||
int fill_memory_blocks(argon2_instance_t *instance) {
|
||||
if (instance == NULL || instance->lanes == 0) {
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
#if defined(ARGON2_NO_THREADS)
|
||||
return fill_memory_blocks_st(instance);
|
||||
#else
|
||||
return instance->threads == 1 ?
|
||||
fill_memory_blocks_st(instance) : fill_memory_blocks_mt(instance);
|
||||
#endif
|
||||
}
|
||||
|
||||
int validate_inputs(const argon2_context *context) {
|
||||
if (NULL == context) {
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
|
||||
if (NULL == context->out) {
|
||||
return ARGON2_OUTPUT_PTR_NULL;
|
||||
}
|
||||
|
||||
/* Validate output length */
|
||||
if (ARGON2_MIN_OUTLEN > context->outlen) {
|
||||
return ARGON2_OUTPUT_TOO_SHORT;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_OUTLEN < context->outlen) {
|
||||
return ARGON2_OUTPUT_TOO_LONG;
|
||||
}
|
||||
|
||||
/* Validate password (required param) */
|
||||
if (NULL == context->pwd) {
|
||||
if (0 != context->pwdlen) {
|
||||
return ARGON2_PWD_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) {
|
||||
return ARGON2_PWD_TOO_SHORT;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) {
|
||||
return ARGON2_PWD_TOO_LONG;
|
||||
}
|
||||
|
||||
/* Validate salt (required param) */
|
||||
if (NULL == context->salt) {
|
||||
if (0 != context->saltlen) {
|
||||
return ARGON2_SALT_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
if (ARGON2_MIN_SALT_LENGTH > context->saltlen) {
|
||||
return ARGON2_SALT_TOO_SHORT;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_SALT_LENGTH < context->saltlen) {
|
||||
return ARGON2_SALT_TOO_LONG;
|
||||
}
|
||||
|
||||
/* Validate secret (optional param) */
|
||||
if (NULL == context->secret) {
|
||||
if (0 != context->secretlen) {
|
||||
return ARGON2_SECRET_PTR_MISMATCH;
|
||||
}
|
||||
} else {
|
||||
if (ARGON2_MIN_SECRET > context->secretlen) {
|
||||
return ARGON2_SECRET_TOO_SHORT;
|
||||
}
|
||||
if (ARGON2_MAX_SECRET < context->secretlen) {
|
||||
return ARGON2_SECRET_TOO_LONG;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate associated data (optional param) */
|
||||
if (NULL == context->ad) {
|
||||
if (0 != context->adlen) {
|
||||
return ARGON2_AD_PTR_MISMATCH;
|
||||
}
|
||||
} else {
|
||||
if (ARGON2_MIN_AD_LENGTH > context->adlen) {
|
||||
return ARGON2_AD_TOO_SHORT;
|
||||
}
|
||||
if (ARGON2_MAX_AD_LENGTH < context->adlen) {
|
||||
return ARGON2_AD_TOO_LONG;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate memory cost */
|
||||
if (ARGON2_MIN_MEMORY > context->m_cost) {
|
||||
return ARGON2_MEMORY_TOO_LITTLE;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_MEMORY < context->m_cost) {
|
||||
return ARGON2_MEMORY_TOO_MUCH;
|
||||
}
|
||||
|
||||
if (context->m_cost < 8 * context->lanes) {
|
||||
return ARGON2_MEMORY_TOO_LITTLE;
|
||||
}
|
||||
|
||||
/* Validate time cost */
|
||||
if (ARGON2_MIN_TIME > context->t_cost) {
|
||||
return ARGON2_TIME_TOO_SMALL;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_TIME < context->t_cost) {
|
||||
return ARGON2_TIME_TOO_LARGE;
|
||||
}
|
||||
|
||||
/* Validate lanes */
|
||||
if (ARGON2_MIN_LANES > context->lanes) {
|
||||
return ARGON2_LANES_TOO_FEW;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_LANES < context->lanes) {
|
||||
return ARGON2_LANES_TOO_MANY;
|
||||
}
|
||||
|
||||
/* Validate threads */
|
||||
if (ARGON2_MIN_THREADS > context->threads) {
|
||||
return ARGON2_THREADS_TOO_FEW;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_THREADS < context->threads) {
|
||||
return ARGON2_THREADS_TOO_MANY;
|
||||
}
|
||||
|
||||
if (NULL != context->allocate_cbk && NULL == context->free_cbk) {
|
||||
return ARGON2_FREE_MEMORY_CBK_NULL;
|
||||
}
|
||||
|
||||
if (NULL == context->allocate_cbk && NULL != context->free_cbk) {
|
||||
return ARGON2_ALLOCATE_MEMORY_CBK_NULL;
|
||||
}
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
|
||||
uint32_t l;
|
||||
/* Make the first and second block in each lane as G(H0||0||i) or
|
||||
G(H0||1||i) */
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
for (l = 0; l < instance->lanes; ++l) {
|
||||
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
|
||||
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
|
||||
ARGON2_PREHASH_SEED_LENGTH);
|
||||
load_block(&instance->memory[l * instance->lane_length + 0],
|
||||
blockhash_bytes);
|
||||
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
|
||||
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
|
||||
ARGON2_PREHASH_SEED_LENGTH);
|
||||
load_block(&instance->memory[l * instance->lane_length + 1],
|
||||
blockhash_bytes);
|
||||
}
|
||||
clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type) {
|
||||
blake2b_state BlakeHash;
|
||||
uint8_t value[sizeof(uint32_t)];
|
||||
|
||||
if (NULL == context || NULL == blockhash) {
|
||||
return;
|
||||
}
|
||||
|
||||
blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
store32(&value, context->lanes);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->outlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->m_cost);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->t_cost);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, ARGON2_VERSION_NUMBER);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, (uint32_t)type);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->pwdlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->pwd != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
|
||||
context->pwdlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
|
||||
secure_wipe_memory(context->pwd, context->pwdlen);
|
||||
context->pwdlen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
store32(&value, context->saltlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->salt != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->salt,
|
||||
context->saltlen);
|
||||
}
|
||||
|
||||
store32(&value, context->secretlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->secret != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->secret,
|
||||
context->secretlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
|
||||
secure_wipe_memory(context->secret, context->secretlen);
|
||||
context->secretlen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
store32(&value, context->adlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->ad != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->ad,
|
||||
context->adlen);
|
||||
}
|
||||
|
||||
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
|
||||
int result = ARGON2_OK;
|
||||
|
||||
if (instance == NULL || context == NULL)
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
instance->context_ptr = context;
|
||||
|
||||
/* 1. Memory allocation */
|
||||
result = allocate_memory(context, (uint8_t **)&(instance->memory),
|
||||
instance->memory_blocks, sizeof(block));
|
||||
if (result != ARGON2_OK) {
|
||||
return result;
|
||||
}
|
||||
|
||||
/* 2. Initial hashing */
|
||||
/* H_0 + 8 extra bytes to produce the first blocks */
|
||||
/* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */
|
||||
/* Hashing all inputs */
|
||||
initial_hash(blockhash, context, instance->type);
|
||||
/* Zeroing 8 extra bytes */
|
||||
|
||||
clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ARGON2_PREHASH_SEED_LENGTH -
|
||||
ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
/* 3. Creating first blocks, we always have at least two blocks in a slice
|
||||
*/
|
||||
fill_first_blocks(blockhash, instance);
|
||||
/* Clearing the hash */
|
||||
clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
229
algo/argon2/argon2d/argon2d/core.h
Normal file
229
algo/argon2/argon2d/argon2d/core.h
Normal file
@@ -0,0 +1,229 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_CORE_H
|
||||
#define ARGON2_CORE_H
|
||||
|
||||
#include "argon2.h"
|
||||
|
||||
#define CONST_CAST(x) (x)(uintptr_t)
|
||||
|
||||
/**********************Argon2 internal constants*******************************/
|
||||
|
||||
enum argon2_core_constants {
|
||||
/* Version of the algorithm */
|
||||
ARGON2_VERSION_NUMBER = 0x10,
|
||||
/* Memory block size in bytes */
|
||||
ARGON2_BLOCK_SIZE = 1024,
|
||||
ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
|
||||
ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
|
||||
|
||||
/* Number of pseudo-random values generated by one call to Blake in Argon2i
|
||||
to
|
||||
generate reference block positions */
|
||||
ARGON2_ADDRESSES_IN_BLOCK = 128,
|
||||
|
||||
/* Pre-hashing digest length and its extension*/
|
||||
ARGON2_PREHASH_DIGEST_LENGTH = 64,
|
||||
ARGON2_PREHASH_SEED_LENGTH = 72
|
||||
};
|
||||
|
||||
/*************************Argon2 internal data types***********************/
|
||||
|
||||
/*
|
||||
* Structure for the (1KB) memory block implemented as 128 64-bit words.
|
||||
* Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
|
||||
* bounds checking).
|
||||
*/
|
||||
typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
|
||||
|
||||
/*****************Functions that work with the block******************/
|
||||
|
||||
/* Initialize each byte of the block with @in */
|
||||
void init_block_value(block *b, uint8_t in);
|
||||
|
||||
/* Copy block @src to block @dst */
|
||||
void copy_block(block *dst, const block *src);
|
||||
|
||||
/* XOR @src onto @dst bytewise */
|
||||
void xor_block(block *dst, const block *src);
|
||||
|
||||
/*
|
||||
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
|
||||
* and derived values.
|
||||
* Used to evaluate the number and location of blocks to construct in each
|
||||
* thread
|
||||
*/
|
||||
typedef struct Argon2_instance_t {
|
||||
block *memory; /* Memory pointer */
|
||||
uint32_t version;
|
||||
uint32_t passes; /* Number of passes */
|
||||
uint32_t memory_blocks; /* Number of blocks in memory */
|
||||
uint32_t segment_length;
|
||||
uint32_t lane_length;
|
||||
uint32_t lanes;
|
||||
uint32_t limit;
|
||||
uint32_t threads;
|
||||
argon2_type type;
|
||||
int print_internals; /* whether to print the memory blocks */
|
||||
argon2_context *context_ptr; /* points back to original context */
|
||||
} argon2_instance_t;
|
||||
|
||||
/*
|
||||
* Argon2 position: where we construct the block right now. Used to distribute
|
||||
* work between threads.
|
||||
*/
|
||||
typedef struct Argon2_position_t {
|
||||
uint32_t pass;
|
||||
uint32_t lane;
|
||||
uint8_t slice;
|
||||
uint32_t index;
|
||||
} argon2_position_t;
|
||||
|
||||
/*Struct that holds the inputs for thread handling FillSegment*/
|
||||
typedef struct Argon2_thread_data {
|
||||
argon2_instance_t *instance_ptr;
|
||||
argon2_position_t pos;
|
||||
} argon2_thread_data;
|
||||
|
||||
/*************************Argon2 core functions********************************/
|
||||
|
||||
/* Allocates memory to the given pointer, uses the appropriate allocator as
|
||||
* specified in the context. Total allocated memory is num*size.
|
||||
* @param context argon2_context which specifies the allocator
|
||||
* @param memory pointer to the pointer to the memory
|
||||
* @param size the size in bytes for each element to be allocated
|
||||
* @param num the number of elements to be allocated
|
||||
* @return ARGON2_OK if @memory is a valid pointer and memory is allocated
|
||||
*/
|
||||
int allocate_memory(const argon2_context *context, uint8_t **memory,
|
||||
size_t num, size_t size);
|
||||
|
||||
/*
|
||||
* Frees memory at the given pointer, uses the appropriate deallocator as
|
||||
* specified in the context. Also cleans the memory using clear_internal_memory.
|
||||
* @param context argon2_context which specifies the deallocator
|
||||
* @param memory pointer to buffer to be freed
|
||||
* @param size the size in bytes for each element to be deallocated
|
||||
* @param num the number of elements to be deallocated
|
||||
*/
|
||||
void free_memory(const argon2_context *context, uint8_t *memory,
|
||||
size_t num, size_t size);
|
||||
|
||||
/* Function that securely cleans the memory. This ignores any flags set
|
||||
* regarding clearing memory. Usually one just calls clear_internal_memory.
|
||||
* @param mem Pointer to the memory
|
||||
* @param s Memory size in bytes
|
||||
*/
|
||||
void secure_wipe_memory(void *v, size_t n);
|
||||
|
||||
/* Function that securely clears the memory if FLAG_clear_internal_memory is
|
||||
* set. If the flag isn't set, this function does nothing.
|
||||
* @param mem Pointer to the memory
|
||||
* @param s Memory size in bytes
|
||||
*/
|
||||
void clear_internal_memory(void *v, size_t n);
|
||||
|
||||
/*
|
||||
* Computes absolute position of reference block in the lane following a skewed
|
||||
* distribution and using a pseudo-random value as input
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Pointer to the current position
|
||||
* @param pseudo_rand 32-bit pseudo-random value used to determine the position
|
||||
* @param same_lane Indicates if the block will be taken from the current lane.
|
||||
* If so we can reference the current segment
|
||||
* @pre All pointers must be valid
|
||||
*/
|
||||
uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane);
|
||||
|
||||
/*
|
||||
* Function that validates all inputs against predefined restrictions and return
|
||||
* an error code
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return ARGON2_OK if everything is all right, otherwise one of error codes
|
||||
* (all defined in <argon2.h>
|
||||
*/
|
||||
int validate_inputs(const argon2_context *context);
|
||||
|
||||
/*
|
||||
* Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears
|
||||
* password and secret if needed
|
||||
* @param context Pointer to the Argon2 internal structure containing memory
|
||||
* pointer, and parameters for time and space requirements.
|
||||
* @param blockhash Buffer for pre-hashing digest
|
||||
* @param type Argon2 type
|
||||
* @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes
|
||||
* allocated
|
||||
*/
|
||||
void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type);
|
||||
|
||||
/*
|
||||
* Function creates first 2 blocks per lane
|
||||
* @param instance Pointer to the current instance
|
||||
* @param blockhash Pointer to the pre-hashing digest
|
||||
* @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values
|
||||
*/
|
||||
void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function allocates memory, hashes the inputs with Blake, and creates first
|
||||
* two blocks. Returns the pointer to the main memory with 2 blocks per lane
|
||||
* initialized
|
||||
* @param context Pointer to the Argon2 internal structure containing memory
|
||||
* pointer, and parameters for time and space requirements.
|
||||
* @param instance Current Argon2 instance
|
||||
* @return Zero if successful, -1 if memory failed to allocate. @context->state
|
||||
* will be modified if successful.
|
||||
*/
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
|
||||
/*
|
||||
* XORing the last block of each lane, hashing it, making the tag. Deallocates
|
||||
* the memory.
|
||||
* @param context Pointer to current Argon2 context (use only the out parameters
|
||||
* from it)
|
||||
* @param instance Pointer to current instance of Argon2
|
||||
* @pre instance->state must point to necessary amount of memory
|
||||
* @pre context->out must point to outlen bytes of memory
|
||||
* @pre if context->free_cbk is not NULL, it should point to a function that
|
||||
* deallocates memory
|
||||
*/
|
||||
void finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function that fills the segment using previous segments also from other
|
||||
* threads
|
||||
* @param context current context
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
|
||||
/*
|
||||
* Function that fills the entire memory t_cost times based on the first two
|
||||
* blocks in each lane
|
||||
* @param instance Pointer to the current instance
|
||||
* @return ARGON2_OK if successful, @context->state
|
||||
*/
|
||||
int fill_memory_blocks(argon2_instance_t *instance);
|
||||
|
||||
#endif
|
456
algo/argon2/argon2d/argon2d/encoding.c
Normal file
456
algo/argon2/argon2d/argon2d/encoding.c
Normal file
@@ -0,0 +1,456 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include "encoding.h"
|
||||
#include "core.h"
|
||||
|
||||
/*
|
||||
* Example code for a decoder and encoder of "hash strings", with Argon2
|
||||
* parameters.
|
||||
*
|
||||
* This code comprises three sections:
|
||||
*
|
||||
* -- The first section contains generic Base64 encoding and decoding
|
||||
* functions. It is conceptually applicable to any hash function
|
||||
* implementation that uses Base64 to encode and decode parameters,
|
||||
* salts and outputs. It could be made into a library, provided that
|
||||
* the relevant functions are made public (non-static) and be given
|
||||
* reasonable names to avoid collisions with other functions.
|
||||
*
|
||||
* -- The second section is specific to Argon2. It encodes and decodes
|
||||
* the parameters, salts and outputs. It does not compute the hash
|
||||
* itself.
|
||||
*
|
||||
* The code was originally written by Thomas Pornin <pornin@bolet.org>,
|
||||
* to whom comments and remarks may be sent. It is released under what
|
||||
* should amount to Public Domain or its closest equivalent; the
|
||||
* following mantra is supposed to incarnate that fact with all the
|
||||
* proper legal rituals:
|
||||
*
|
||||
* ---------------------------------------------------------------------
|
||||
* This file is provided under the terms of Creative Commons CC0 1.0
|
||||
* Public Domain Dedication. To the extent possible under law, the
|
||||
* author (Thomas Pornin) has waived all copyright and related or
|
||||
* neighboring rights to this file. This work is published from: Canada.
|
||||
* ---------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (c) 2015 Thomas Pornin
|
||||
*/
|
||||
|
||||
/* ==================================================================== */
|
||||
/*
|
||||
* Common code; could be shared between different hash functions.
|
||||
*
|
||||
* Note: the Base64 functions below assume that uppercase letters (resp.
|
||||
* lowercase letters) have consecutive numerical codes, that fit on 8
|
||||
* bits. All modern systems use ASCII-compatible charsets, where these
|
||||
* properties are true. If you are stuck with a dinosaur of a system
|
||||
* that still defaults to EBCDIC then you already have much bigger
|
||||
* interoperability issues to deal with.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Some macros for constant-time comparisons. These work over values in
|
||||
* the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true".
|
||||
*/
|
||||
#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF)
|
||||
#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF)
|
||||
#define GE(x, y) (GT(y, x) ^ 0xFF)
|
||||
#define LT(x, y) GT(y, x)
|
||||
#define LE(x, y) GE(y, x)
|
||||
|
||||
/*
|
||||
* Convert value x (0..63) to corresponding Base64 character.
|
||||
*/
|
||||
static int b64_byte_to_char(unsigned x) {
|
||||
return (LT(x, 26) & (x + 'A')) |
|
||||
(GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) |
|
||||
(GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') |
|
||||
(EQ(x, 63) & '/');
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert character c to the corresponding 6-bit value. If character c
|
||||
* is not a Base64 character, then 0xFF (255) is returned.
|
||||
*/
|
||||
static unsigned b64_char_to_byte(int c) {
|
||||
unsigned x;
|
||||
|
||||
x = (GE(c, 'A') & LE(c, 'Z') & (c - 'A')) |
|
||||
(GE(c, 'a') & LE(c, 'z') & (c - ('a' - 26))) |
|
||||
(GE(c, '0') & LE(c, '9') & (c - ('0' - 52))) | (EQ(c, '+') & 62) |
|
||||
(EQ(c, '/') & 63);
|
||||
return x | (EQ(x, 0) & (EQ(c, 'A') ^ 0xFF));
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert some bytes to Base64. 'dst_len' is the length (in characters)
|
||||
* of the output buffer 'dst'; if that buffer is not large enough to
|
||||
* receive the result (including the terminating 0), then (size_t)-1
|
||||
* is returned. Otherwise, the zero-terminated Base64 string is written
|
||||
* in the buffer, and the output length (counted WITHOUT the terminating
|
||||
* zero) is returned.
|
||||
*/
|
||||
static size_t to_base64(char *dst, size_t dst_len, const void *src,
|
||||
size_t src_len) {
|
||||
size_t olen;
|
||||
const unsigned char *buf;
|
||||
unsigned acc, acc_len;
|
||||
|
||||
olen = (src_len / 3) << 2;
|
||||
switch (src_len % 3) {
|
||||
case 2:
|
||||
olen++;
|
||||
/* fall through */
|
||||
case 1:
|
||||
olen += 2;
|
||||
break;
|
||||
}
|
||||
if (dst_len <= olen) {
|
||||
return (size_t)-1;
|
||||
}
|
||||
acc = 0;
|
||||
acc_len = 0;
|
||||
buf = (const unsigned char *)src;
|
||||
while (src_len-- > 0) {
|
||||
acc = (acc << 8) + (*buf++);
|
||||
acc_len += 8;
|
||||
while (acc_len >= 6) {
|
||||
acc_len -= 6;
|
||||
*dst++ = (char)b64_byte_to_char((acc >> acc_len) & 0x3F);
|
||||
}
|
||||
}
|
||||
if (acc_len > 0) {
|
||||
*dst++ = (char)b64_byte_to_char((acc << (6 - acc_len)) & 0x3F);
|
||||
}
|
||||
*dst++ = 0;
|
||||
return olen;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode Base64 chars into bytes. The '*dst_len' value must initially
|
||||
* contain the length of the output buffer '*dst'; when the decoding
|
||||
* ends, the actual number of decoded bytes is written back in
|
||||
* '*dst_len'.
|
||||
*
|
||||
* Decoding stops when a non-Base64 character is encountered, or when
|
||||
* the output buffer capacity is exceeded. If an error occurred (output
|
||||
* buffer is too small, invalid last characters leading to unprocessed
|
||||
* buffered bits), then NULL is returned; otherwise, the returned value
|
||||
* points to the first non-Base64 character in the source stream, which
|
||||
* may be the terminating zero.
|
||||
*/
|
||||
static const char *from_base64(void *dst, size_t *dst_len, const char *src) {
|
||||
size_t len;
|
||||
unsigned char *buf;
|
||||
unsigned acc, acc_len;
|
||||
|
||||
buf = (unsigned char *)dst;
|
||||
len = 0;
|
||||
acc = 0;
|
||||
acc_len = 0;
|
||||
for (;;) {
|
||||
unsigned d;
|
||||
|
||||
d = b64_char_to_byte(*src);
|
||||
if (d == 0xFF) {
|
||||
break;
|
||||
}
|
||||
src++;
|
||||
acc = (acc << 6) + d;
|
||||
acc_len += 6;
|
||||
if (acc_len >= 8) {
|
||||
acc_len -= 8;
|
||||
if ((len++) >= *dst_len) {
|
||||
return NULL;
|
||||
}
|
||||
*buf++ = (acc >> acc_len) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the input length is equal to 1 modulo 4 (which is
|
||||
* invalid), then there will remain 6 unprocessed bits;
|
||||
* otherwise, only 0, 2 or 4 bits are buffered. The buffered
|
||||
* bits must also all be zero.
|
||||
*/
|
||||
if (acc_len > 4 || (acc & (((unsigned)1 << acc_len) - 1)) != 0) {
|
||||
return NULL;
|
||||
}
|
||||
*dst_len = len;
|
||||
return src;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode decimal integer from 'str'; the value is written in '*v'.
|
||||
* Returned value is a pointer to the next non-decimal character in the
|
||||
* string. If there is no digit at all, or the value encoding is not
|
||||
* minimal (extra leading zeros), or the value does not fit in an
|
||||
* 'unsigned long', then NULL is returned.
|
||||
*/
|
||||
static const char *decode_decimal(const char *str, unsigned long *v) {
|
||||
const char *orig;
|
||||
unsigned long acc;
|
||||
|
||||
acc = 0;
|
||||
for (orig = str;; str++) {
|
||||
int c;
|
||||
|
||||
c = *str;
|
||||
if (c < '0' || c > '9') {
|
||||
break;
|
||||
}
|
||||
c -= '0';
|
||||
if (acc > (ULONG_MAX / 10)) {
|
||||
return NULL;
|
||||
}
|
||||
acc *= 10;
|
||||
if ((unsigned long)c > (ULONG_MAX - acc)) {
|
||||
return NULL;
|
||||
}
|
||||
acc += (unsigned long)c;
|
||||
}
|
||||
if (str == orig || (*orig == '0' && str != (orig + 1))) {
|
||||
return NULL;
|
||||
}
|
||||
*v = acc;
|
||||
return str;
|
||||
}
|
||||
|
||||
/* ==================================================================== */
|
||||
/*
|
||||
* Code specific to Argon2.
|
||||
*
|
||||
* The code below applies the following format:
|
||||
*
|
||||
* $argon2<T>[$v=<num>]$m=<num>,t=<num>,p=<num>$<bin>$<bin>
|
||||
*
|
||||
* where <T> is either 'd', 'id', or 'i', <num> is a decimal integer (positive,
|
||||
* fits in an 'unsigned long'), and <bin> is Base64-encoded data (no '=' padding
|
||||
* characters, no newline or whitespace).
|
||||
*
|
||||
* The last two binary chunks (encoded in Base64) are, in that order,
|
||||
* the salt and the output. Both are required. The binary salt length and the
|
||||
* output length must be in the allowed ranges defined in argon2.h.
|
||||
*
|
||||
* The ctx struct must contain buffers large enough to hold the salt and pwd
|
||||
* when it is fed into decode_string.
|
||||
*/
|
||||
|
||||
int decode_string(argon2_context *ctx, const char *str, argon2_type type) {
|
||||
|
||||
/* check for prefix */
|
||||
#define CC(prefix) \
|
||||
do { \
|
||||
size_t cc_len = strlen(prefix); \
|
||||
if (strncmp(str, prefix, cc_len) != 0) { \
|
||||
return ARGON2_DECODING_FAIL; \
|
||||
} \
|
||||
str += cc_len; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
/* optional prefix checking with supplied code */
|
||||
#define CC_opt(prefix, code) \
|
||||
do { \
|
||||
size_t cc_len = strlen(prefix); \
|
||||
if (strncmp(str, prefix, cc_len) == 0) { \
|
||||
str += cc_len; \
|
||||
{ code; } \
|
||||
} \
|
||||
} while ((void)0, 0)
|
||||
|
||||
/* Decoding prefix into decimal */
|
||||
#define DECIMAL(x) \
|
||||
do { \
|
||||
unsigned long dec_x; \
|
||||
str = decode_decimal(str, &dec_x); \
|
||||
if (str == NULL) { \
|
||||
return ARGON2_DECODING_FAIL; \
|
||||
} \
|
||||
(x) = dec_x; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
|
||||
/* Decoding prefix into uint32_t decimal */
|
||||
#define DECIMAL_U32(x) \
|
||||
do { \
|
||||
unsigned long dec_x; \
|
||||
str = decode_decimal(str, &dec_x); \
|
||||
if (str == NULL || dec_x > UINT32_MAX) { \
|
||||
return ARGON2_DECODING_FAIL; \
|
||||
} \
|
||||
(x) = (uint32_t)dec_x; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
|
||||
/* Decoding base64 into a binary buffer */
|
||||
#define BIN(buf, max_len, len) \
|
||||
do { \
|
||||
size_t bin_len = (max_len); \
|
||||
str = from_base64(buf, &bin_len, str); \
|
||||
if (str == NULL || bin_len > UINT32_MAX) { \
|
||||
return ARGON2_DECODING_FAIL; \
|
||||
} \
|
||||
(len) = (uint32_t)bin_len; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
size_t maxsaltlen = ctx->saltlen;
|
||||
size_t maxoutlen = ctx->outlen;
|
||||
int validation_result;
|
||||
const char* type_string;
|
||||
|
||||
/* We should start with the argon2_type we are using */
|
||||
type_string = argon2_type2string(type, 0);
|
||||
if (!type_string) {
|
||||
return ARGON2_INCORRECT_TYPE;
|
||||
}
|
||||
|
||||
CC("$");
|
||||
CC(type_string);
|
||||
|
||||
CC("$m=");
|
||||
DECIMAL_U32(ctx->m_cost);
|
||||
CC(",t=");
|
||||
DECIMAL_U32(ctx->t_cost);
|
||||
CC(",p=");
|
||||
DECIMAL_U32(ctx->lanes);
|
||||
ctx->threads = ctx->lanes;
|
||||
|
||||
CC("$");
|
||||
BIN(ctx->salt, maxsaltlen, ctx->saltlen);
|
||||
CC("$");
|
||||
BIN(ctx->out, maxoutlen, ctx->outlen);
|
||||
|
||||
/* The rest of the fields get the default values */
|
||||
ctx->secret = NULL;
|
||||
ctx->secretlen = 0;
|
||||
ctx->ad = NULL;
|
||||
ctx->adlen = 0;
|
||||
ctx->allocate_cbk = NULL;
|
||||
ctx->free_cbk = NULL;
|
||||
ctx->flags = ARGON2_DEFAULT_FLAGS;
|
||||
|
||||
/* On return, must have valid context */
|
||||
validation_result = validate_inputs(ctx);
|
||||
if (validation_result != ARGON2_OK) {
|
||||
return validation_result;
|
||||
}
|
||||
|
||||
/* Can't have any additional characters */
|
||||
if (*str == 0) {
|
||||
return ARGON2_OK;
|
||||
} else {
|
||||
return ARGON2_DECODING_FAIL;
|
||||
}
|
||||
#undef CC
|
||||
#undef CC_opt
|
||||
#undef DECIMAL
|
||||
#undef BIN
|
||||
}
|
||||
|
||||
int encode_string(char *dst, size_t dst_len, argon2_context *ctx,
|
||||
argon2_type type) {
|
||||
#define SS(str) \
|
||||
do { \
|
||||
size_t pp_len = strlen(str); \
|
||||
if (pp_len >= dst_len) { \
|
||||
return ARGON2_ENCODING_FAIL; \
|
||||
} \
|
||||
memcpy(dst, str, pp_len + 1); \
|
||||
dst += pp_len; \
|
||||
dst_len -= pp_len; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define SX(x) \
|
||||
do { \
|
||||
char tmp[30]; \
|
||||
sprintf(tmp, "%lu", (unsigned long)(x)); \
|
||||
SS(tmp); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define SB(buf, len) \
|
||||
do { \
|
||||
size_t sb_len = to_base64(dst, dst_len, buf, len); \
|
||||
if (sb_len == (size_t)-1) { \
|
||||
return ARGON2_ENCODING_FAIL; \
|
||||
} \
|
||||
dst += sb_len; \
|
||||
dst_len -= sb_len; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
const char* type_string = argon2_type2string(type, 0);
|
||||
int validation_result = validate_inputs(ctx);
|
||||
|
||||
if (!type_string) {
|
||||
return ARGON2_ENCODING_FAIL;
|
||||
}
|
||||
|
||||
if (validation_result != ARGON2_OK) {
|
||||
return validation_result;
|
||||
}
|
||||
|
||||
|
||||
SS("$");
|
||||
SS(type_string);
|
||||
|
||||
SS("$m=");
|
||||
SX(ctx->m_cost);
|
||||
SS(",t=");
|
||||
SX(ctx->t_cost);
|
||||
SS(",p=");
|
||||
SX(ctx->lanes);
|
||||
|
||||
SS("$");
|
||||
SB(ctx->salt, ctx->saltlen);
|
||||
|
||||
SS("$");
|
||||
SB(ctx->out, ctx->outlen);
|
||||
return ARGON2_OK;
|
||||
|
||||
#undef SS
|
||||
#undef SX
|
||||
#undef SB
|
||||
}
|
||||
|
||||
size_t b64len(uint32_t len) {
|
||||
size_t olen = ((size_t)len / 3) << 2;
|
||||
|
||||
switch (len % 3) {
|
||||
case 2:
|
||||
olen++;
|
||||
/* fall through */
|
||||
case 1:
|
||||
olen += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
return olen;
|
||||
}
|
||||
|
||||
size_t numlen(uint32_t num) {
|
||||
size_t len = 1;
|
||||
while (num >= 10) {
|
||||
++len;
|
||||
num = num / 10;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
57
algo/argon2/argon2d/argon2d/encoding.h
Normal file
57
algo/argon2/argon2d/argon2d/encoding.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef ENCODING_H
|
||||
#define ENCODING_H
|
||||
#include "argon2.h"
|
||||
|
||||
#define ARGON2_MAX_DECODED_LANES UINT32_C(255)
|
||||
#define ARGON2_MIN_DECODED_SALT_LEN UINT32_C(8)
|
||||
#define ARGON2_MIN_DECODED_OUT_LEN UINT32_C(12)
|
||||
|
||||
/*
|
||||
* encode an Argon2 hash string into the provided buffer. 'dst_len'
|
||||
* contains the size, in characters, of the 'dst' buffer; if 'dst_len'
|
||||
* is less than the number of required characters (including the
|
||||
* terminating 0), then this function returns ARGON2_ENCODING_ERROR.
|
||||
*
|
||||
* on success, ARGON2_OK is returned.
|
||||
*/
|
||||
int encode_string(char *dst, size_t dst_len, argon2_context *ctx,
|
||||
argon2_type type);
|
||||
|
||||
/*
|
||||
* Decodes an Argon2 hash string into the provided structure 'ctx'.
|
||||
* The only fields that must be set prior to this call are ctx.saltlen and
|
||||
* ctx.outlen (which must be the maximal salt and out length values that are
|
||||
* allowed), ctx.salt and ctx.out (which must be buffers of the specified
|
||||
* length), and ctx.pwd and ctx.pwdlen which must hold a valid password.
|
||||
*
|
||||
* Invalid input string causes an error. On success, the ctx is valid and all
|
||||
* fields have been initialized.
|
||||
*
|
||||
* Returned value is ARGON2_OK on success, other ARGON2_ codes on error.
|
||||
*/
|
||||
int decode_string(argon2_context *ctx, const char *str, argon2_type type);
|
||||
|
||||
/* Returns the length of the encoded byte stream with length len */
|
||||
size_t b64len(uint32_t len);
|
||||
|
||||
/* Returns the length of the encoded number num */
|
||||
size_t numlen(uint32_t num);
|
||||
|
||||
#endif
|
172
algo/argon2/argon2d/argon2d/opt.c
Normal file
172
algo/argon2/argon2d/argon2d/opt.c
Normal file
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "argon2.h"
|
||||
#include "core.h"
|
||||
|
||||
#include "../blake2/blake2.h"
|
||||
#include "../blake2/blamka-round-opt.h"
|
||||
|
||||
/*
|
||||
* Function fills a new memory block and optionally XORs the old block over the new one.
|
||||
* Memory must be initialized.
|
||||
* @param state Pointer to the just produced block. Content will be updated(!)
|
||||
* @param ref_block Pointer to the reference block
|
||||
* @param next_block Pointer to the block to be XORed over. May coincide with @ref_block
|
||||
* @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
static void fill_block(__m128i *state, const block *ref_block,
|
||||
block *next_block, int with_xor) {
|
||||
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
|
||||
unsigned int i;
|
||||
|
||||
if (with_xor) {
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
|
||||
block_XY[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i *)next_block->v + i));
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
|
||||
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
|
||||
state[8 * i + 6], state[8 * i + 7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
|
||||
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
|
||||
state[8 * 6 + i], state[8 * 7 + i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(state[i], block_XY[i]);
|
||||
_mm_storeu_si128((__m128i *)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void next_addresses(block *address_block, block *input_block) {
|
||||
/*Temporary zero-initialized blocks*/
|
||||
__m128i zero_block[ARGON2_OWORDS_IN_BLOCK];
|
||||
__m128i zero2_block[ARGON2_OWORDS_IN_BLOCK];
|
||||
|
||||
memset(zero_block, 0, sizeof(zero_block));
|
||||
memset(zero2_block, 0, sizeof(zero2_block));
|
||||
|
||||
/*Increasing index counter*/
|
||||
input_block->v[6]++;
|
||||
|
||||
/*First iteration of G*/
|
||||
fill_block(zero_block, input_block, address_block, 0);
|
||||
|
||||
/*Second iteration of G*/
|
||||
fill_block(zero2_block, address_block, address_block, 0);
|
||||
}
|
||||
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position) {
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
block address_block, input_block;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index, i;
|
||||
__m128i state[64];
|
||||
int data_independent_addressing;
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
|
||||
/* Don't forget to generate the first block of addresses: */
|
||||
if (data_independent_addressing) {
|
||||
next_addresses(&address_block, &input_block);
|
||||
}
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
curr_offset = position.lane * instance->lane_length +
|
||||
position.slice * instance->segment_length + starting_index;
|
||||
|
||||
if (0 == curr_offset % instance->lane_length) {
|
||||
/* Last block in this lane */
|
||||
prev_offset = curr_offset + instance->lane_length - 1;
|
||||
} else {
|
||||
/* Previous block */
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
|
||||
|
||||
for (i = starting_index; i < instance->segment_length;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
if (data_independent_addressing) {
|
||||
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||
next_addresses(&address_block, &input_block);
|
||||
}
|
||||
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||
} else {
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
}
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||
|
||||
if ((position.pass == 0) && (position.slice == 0)) {
|
||||
/* Can not reference other lanes yet */
|
||||
ref_lane = position.lane;
|
||||
}
|
||||
|
||||
/* 1.2.3 Computing the number of possible reference block within the
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||
ref_lane == position.lane);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block =
|
||||
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
|
||||
}
|
||||
}
|
57
algo/argon2/argon2d/argon2d/thread.c
Normal file
57
algo/argon2/argon2d/argon2d/thread.c
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#if !defined(ARGON2_NO_THREADS)
|
||||
|
||||
#include "thread.h"
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
int argon2_thread_create(argon2_thread_handle_t *handle,
|
||||
argon2_thread_func_t func, void *args) {
|
||||
if (NULL == handle || func == NULL) {
|
||||
return -1;
|
||||
}
|
||||
#if defined(_WIN32)
|
||||
*handle = _beginthreadex(NULL, 0, func, args, 0, NULL);
|
||||
return *handle != 0 ? 0 : -1;
|
||||
#else
|
||||
return pthread_create(handle, NULL, func, args);
|
||||
#endif
|
||||
}
|
||||
|
||||
int argon2_thread_join(argon2_thread_handle_t handle) {
|
||||
#if defined(_WIN32)
|
||||
if (WaitForSingleObject((HANDLE)handle, INFINITE) == WAIT_OBJECT_0) {
|
||||
return CloseHandle((HANDLE)handle) != 0 ? 0 : -1;
|
||||
}
|
||||
return -1;
|
||||
#else
|
||||
return pthread_join(handle, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
void argon2_thread_exit(void) {
|
||||
#if defined(_WIN32)
|
||||
_endthreadex(0);
|
||||
#else
|
||||
pthread_exit(NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* ARGON2_NO_THREADS */
|
67
algo/argon2/argon2d/argon2d/thread.h
Normal file
67
algo/argon2/argon2d/argon2d/thread.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_THREAD_H
|
||||
#define ARGON2_THREAD_H
|
||||
|
||||
#if !defined(ARGON2_NO_THREADS)
|
||||
|
||||
/*
|
||||
Here we implement an abstraction layer for the simpĺe requirements
|
||||
of the Argon2 code. We only require 3 primitives---thread creation,
|
||||
joining, and termination---so full emulation of the pthreads API
|
||||
is unwarranted. Currently we wrap pthreads and Win32 threads.
|
||||
|
||||
The API defines 2 types: the function pointer type,
|
||||
argon2_thread_func_t,
|
||||
and the type of the thread handle---argon2_thread_handle_t.
|
||||
*/
|
||||
#if defined(_WIN32)
|
||||
#include <process.h>
|
||||
typedef unsigned(__stdcall *argon2_thread_func_t)(void *);
|
||||
typedef uintptr_t argon2_thread_handle_t;
|
||||
#else
|
||||
#include <pthread.h>
|
||||
typedef void *(*argon2_thread_func_t)(void *);
|
||||
typedef pthread_t argon2_thread_handle_t;
|
||||
#endif
|
||||
|
||||
/* Creates a thread
|
||||
* @param handle pointer to a thread handle, which is the output of this
|
||||
* function. Must not be NULL.
|
||||
* @param func A function pointer for the thread's entry point. Must not be
|
||||
* NULL.
|
||||
* @param args Pointer that is passed as an argument to @func. May be NULL.
|
||||
* @return 0 if @handle and @func are valid pointers and a thread is successfuly
|
||||
* created.
|
||||
*/
|
||||
int argon2_thread_create(argon2_thread_handle_t *handle,
|
||||
argon2_thread_func_t func, void *args);
|
||||
|
||||
/* Waits for a thread to terminate
|
||||
* @param handle Handle to a thread created with argon2_thread_create.
|
||||
* @return 0 if @handle is a valid handle, and joining completed successfully.
|
||||
*/
|
||||
int argon2_thread_join(argon2_thread_handle_t handle);
|
||||
|
||||
/* Terminate the current thread. Must be run inside a thread created by
|
||||
* argon2_thread_create.
|
||||
*/
|
||||
void argon2_thread_exit(void);
|
||||
|
||||
#endif /* ARGON2_NO_THREADS */
|
||||
#endif
|
156
algo/argon2/argon2d/blake2/blake2-impl.h
Normal file
156
algo/argon2/argon2d/blake2/blake2-impl.h
Normal file
@@ -0,0 +1,156 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef PORTABLE_BLAKE2_IMPL_H
|
||||
#define PORTABLE_BLAKE2_IMPL_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define BLAKE2_INLINE __inline
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#define BLAKE2_INLINE __inline__
|
||||
#else
|
||||
#define BLAKE2_INLINE
|
||||
#endif
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
/*
|
||||
Not an exhaustive list, but should cover the majority of modern platforms
|
||||
Additionally, the code will always be correct---this is only a performance
|
||||
tweak.
|
||||
*/
|
||||
#if (defined(__BYTE_ORDER__) && \
|
||||
(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
|
||||
defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
|
||||
defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \
|
||||
defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \
|
||||
defined(_M_ARM)
|
||||
#define NATIVE_LITTLE_ENDIAN
|
||||
#endif
|
||||
/* Argon2 Team - End Code */
|
||||
|
||||
static BLAKE2_INLINE uint32_t load32(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
uint32_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
#else
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint32_t w = *p++;
|
||||
w |= (uint32_t)(*p++) << 8;
|
||||
w |= (uint32_t)(*p++) << 16;
|
||||
w |= (uint32_t)(*p++) << 24;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t load64(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
uint64_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
#else
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
w |= (uint64_t)(*p++) << 8;
|
||||
w |= (uint64_t)(*p++) << 16;
|
||||
w |= (uint64_t)(*p++) << 24;
|
||||
w |= (uint64_t)(*p++) << 32;
|
||||
w |= (uint64_t)(*p++) << 40;
|
||||
w |= (uint64_t)(*p++) << 48;
|
||||
w |= (uint64_t)(*p++) << 56;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store32(void *dst, uint32_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store64(void *dst, uint64_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t load48(const void *src) {
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
w |= (uint64_t)(*p++) << 8;
|
||||
w |= (uint64_t)(*p++) << 16;
|
||||
w |= (uint64_t)(*p++) << 24;
|
||||
w |= (uint64_t)(*p++) << 32;
|
||||
w |= (uint64_t)(*p++) << 40;
|
||||
return w;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store48(void *dst, uint64_t w) {
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (32 - c));
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (64 - c));
|
||||
}
|
||||
|
||||
void clear_internal_memory(void *v, size_t n);
|
||||
|
||||
#endif
|
91
algo/argon2/argon2d/blake2/blake2.h
Normal file
91
algo/argon2/argon2d/blake2/blake2.h
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef PORTABLE_BLAKE2_H
|
||||
#define PORTABLE_BLAKE2_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum blake2b_constant {
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
typedef struct __blake2b_param {
|
||||
uint8_t digest_length; /* 1 */
|
||||
uint8_t key_length; /* 2 */
|
||||
uint8_t fanout; /* 3 */
|
||||
uint8_t depth; /* 4 */
|
||||
uint32_t leaf_length; /* 8 */
|
||||
uint64_t node_offset; /* 16 */
|
||||
uint8_t node_depth; /* 17 */
|
||||
uint8_t inner_length; /* 18 */
|
||||
uint8_t reserved[14]; /* 32 */
|
||||
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
|
||||
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
|
||||
} blake2b_param;
|
||||
#pragma pack(pop)
|
||||
|
||||
typedef struct __blake2b_state {
|
||||
uint64_t h[8];
|
||||
uint64_t t[2];
|
||||
uint64_t f[2];
|
||||
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||
unsigned buflen;
|
||||
unsigned outlen;
|
||||
uint8_t last_node;
|
||||
} blake2b_state;
|
||||
|
||||
/* Ensure param structs have not been wrongly padded */
|
||||
/* Poor man's static_assert */
|
||||
enum {
|
||||
blake2_size_check_0 = 1 / !!(CHAR_BIT == 8),
|
||||
blake2_size_check_2 =
|
||||
1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
|
||||
};
|
||||
|
||||
/* Streaming API */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen);
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
size_t keylen);
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen);
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
/* Argon2 Team - End Code */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
390
algo/argon2/argon2d/blake2/blake2b.c
Normal file
390
algo/argon2/argon2d/blake2/blake2b.c
Normal file
@@ -0,0 +1,390 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
static const uint64_t blake2b_IV[8] = {
|
||||
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
|
||||
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
|
||||
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
|
||||
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)};
|
||||
|
||||
static const unsigned int blake2b_sigma[12][16] = {
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
};
|
||||
|
||||
static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) {
|
||||
S->f[1] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) {
|
||||
if (S->last_node) {
|
||||
blake2b_set_lastnode(S);
|
||||
}
|
||||
S->f[0] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S,
|
||||
uint64_t inc) {
|
||||
S->t[0] += inc;
|
||||
S->t[1] += (S->t[0] < inc);
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) {
|
||||
clear_internal_memory(S, sizeof(*S)); /* wipe */
|
||||
blake2b_set_lastblock(S); /* invalidate for further use */
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) {
|
||||
memset(S, 0, sizeof(*S));
|
||||
memcpy(S->h, blake2b_IV, sizeof(S->h));
|
||||
}
|
||||
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P) {
|
||||
const unsigned char *p = (const unsigned char *)P;
|
||||
unsigned int i;
|
||||
|
||||
if (NULL == P || NULL == S) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
blake2b_init0(S);
|
||||
/* IV XOR Parameter Block */
|
||||
for (i = 0; i < 8; ++i) {
|
||||
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
|
||||
}
|
||||
S->outlen = P->digest_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sequential blake2b initialization */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen) {
|
||||
blake2b_param P;
|
||||
|
||||
if (S == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Setup Parameter Block for unkeyed BLAKE2 */
|
||||
P.digest_length = (uint8_t)outlen;
|
||||
P.key_length = 0;
|
||||
P.fanout = 1;
|
||||
P.depth = 1;
|
||||
P.leaf_length = 0;
|
||||
P.node_offset = 0;
|
||||
P.node_depth = 0;
|
||||
P.inner_length = 0;
|
||||
memset(P.reserved, 0, sizeof(P.reserved));
|
||||
memset(P.salt, 0, sizeof(P.salt));
|
||||
memset(P.personal, 0, sizeof(P.personal));
|
||||
|
||||
return blake2b_init_param(S, &P);
|
||||
}
|
||||
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
size_t keylen) {
|
||||
blake2b_param P;
|
||||
|
||||
if (S == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Setup Parameter Block for keyed BLAKE2 */
|
||||
P.digest_length = (uint8_t)outlen;
|
||||
P.key_length = (uint8_t)keylen;
|
||||
P.fanout = 1;
|
||||
P.depth = 1;
|
||||
P.leaf_length = 0;
|
||||
P.node_offset = 0;
|
||||
P.node_depth = 0;
|
||||
P.inner_length = 0;
|
||||
memset(P.reserved, 0, sizeof(P.reserved));
|
||||
memset(P.salt, 0, sizeof(P.salt));
|
||||
memset(P.personal, 0, sizeof(P.personal));
|
||||
|
||||
if (blake2b_init_param(S, &P) < 0) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
memset(block, 0, BLAKE2B_BLOCKBYTES);
|
||||
memcpy(block, key, keylen);
|
||||
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
|
||||
/* Burn the key from stack */
|
||||
clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blake2b_compress(blake2b_state *S, const uint8_t *block) {
|
||||
uint64_t m[16];
|
||||
uint64_t v[16];
|
||||
unsigned int i, r;
|
||||
|
||||
for (i = 0; i < 16; ++i) {
|
||||
m[i] = load64(block + i * sizeof(m[i]));
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
v[i] = S->h[i];
|
||||
}
|
||||
|
||||
v[8] = blake2b_IV[0];
|
||||
v[9] = blake2b_IV[1];
|
||||
v[10] = blake2b_IV[2];
|
||||
v[11] = blake2b_IV[3];
|
||||
v[12] = blake2b_IV[4] ^ S->t[0];
|
||||
v[13] = blake2b_IV[5] ^ S->t[1];
|
||||
v[14] = blake2b_IV[6] ^ S->f[0];
|
||||
v[15] = blake2b_IV[7] ^ S->f[1];
|
||||
|
||||
#define G(r, i, a, b, c, d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r, 0, v[0], v[4], v[8], v[12]); \
|
||||
G(r, 1, v[1], v[5], v[9], v[13]); \
|
||||
G(r, 2, v[2], v[6], v[10], v[14]); \
|
||||
G(r, 3, v[3], v[7], v[11], v[15]); \
|
||||
G(r, 4, v[0], v[5], v[10], v[15]); \
|
||||
G(r, 5, v[1], v[6], v[11], v[12]); \
|
||||
G(r, 6, v[2], v[7], v[8], v[13]); \
|
||||
G(r, 7, v[3], v[4], v[9], v[14]); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
for (r = 0; r < 12; ++r) {
|
||||
ROUND(r);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||
}
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
}
|
||||
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
|
||||
const uint8_t *pin = (const uint8_t *)in;
|
||||
|
||||
if (inlen == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sanity check */
|
||||
if (S == NULL || in == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Is this a reused state? */
|
||||
if (S->f[0] != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
|
||||
/* Complete current block */
|
||||
size_t left = S->buflen;
|
||||
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
||||
memcpy(&S->buf[left], pin, fill);
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, S->buf);
|
||||
S->buflen = 0;
|
||||
inlen -= fill;
|
||||
pin += fill;
|
||||
/* Avoid buffer copies when possible */
|
||||
while (inlen > BLAKE2B_BLOCKBYTES) {
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, pin);
|
||||
inlen -= BLAKE2B_BLOCKBYTES;
|
||||
pin += BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
}
|
||||
memcpy(&S->buf[S->buflen], pin, inlen);
|
||||
S->buflen += (unsigned int)inlen;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen) {
|
||||
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
|
||||
unsigned int i;
|
||||
|
||||
/* Sanity checks */
|
||||
if (S == NULL || out == NULL || outlen < S->outlen) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Is this a reused state? */
|
||||
if (S->f[0] != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
blake2b_increment_counter(S, S->buflen);
|
||||
blake2b_set_lastblock(S);
|
||||
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
|
||||
blake2b_compress(S, S->buf);
|
||||
|
||||
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
|
||||
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
|
||||
}
|
||||
|
||||
memcpy(out, buffer, S->outlen);
|
||||
clear_internal_memory(buffer, sizeof(buffer));
|
||||
clear_internal_memory(S->buf, sizeof(S->buf));
|
||||
clear_internal_memory(S->h, sizeof(S->h));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen) {
|
||||
blake2b_state S;
|
||||
int ret = -1;
|
||||
|
||||
/* Verify parameters */
|
||||
if (NULL == in && inlen > 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (keylen > 0) {
|
||||
if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
} else {
|
||||
if (blake2b_init(&S, outlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
if (blake2b_update(&S, in, inlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
ret = blake2b_final(&S, out, outlen);
|
||||
|
||||
fail:
|
||||
clear_internal_memory(&S, sizeof(S));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
|
||||
uint8_t *out = (uint8_t *)pout;
|
||||
blake2b_state blake_state;
|
||||
uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
|
||||
int ret = -1;
|
||||
|
||||
if (outlen > UINT32_MAX) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Ensure little-endian byte order! */
|
||||
store32(outlen_bytes, (uint32_t)outlen);
|
||||
|
||||
#define TRY(statement) \
|
||||
do { \
|
||||
ret = statement; \
|
||||
if (ret < 0) { \
|
||||
goto fail; \
|
||||
} \
|
||||
} while ((void)0, 0)
|
||||
|
||||
if (outlen <= BLAKE2B_OUTBYTES) {
|
||||
TRY(blake2b_init(&blake_state, outlen));
|
||||
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(blake2b_update(&blake_state, in, inlen));
|
||||
TRY(blake2b_final(&blake_state, out, outlen));
|
||||
} else {
|
||||
uint32_t toproduce;
|
||||
uint8_t out_buffer[BLAKE2B_OUTBYTES];
|
||||
uint8_t in_buffer[BLAKE2B_OUTBYTES];
|
||||
TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(blake2b_update(&blake_state, in, inlen));
|
||||
TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
|
||||
|
||||
while (toproduce > BLAKE2B_OUTBYTES) {
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
|
||||
BLAKE2B_OUTBYTES, NULL, 0));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce -= BLAKE2B_OUTBYTES / 2;
|
||||
}
|
||||
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
|
||||
0));
|
||||
memcpy(out, out_buffer, toproduce);
|
||||
}
|
||||
fail:
|
||||
clear_internal_memory(&blake_state, sizeof(blake_state));
|
||||
return ret;
|
||||
#undef TRY
|
||||
}
|
||||
/* Argon2 Team - End Code */
|
180
algo/argon2/argon2d/blake2/blamka-round-opt.h
Normal file
180
algo/argon2/argon2d/blake2/blamka-round-opt.h
Normal file
@@ -0,0 +1,180 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef BLAKE_ROUND_MKA_OPT_H
|
||||
#define BLAKE_ROUND_MKA_OPT_H
|
||||
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#include <emmintrin.h>
|
||||
#if defined(__SSSE3__)
|
||||
#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
|
||||
#endif
|
||||
|
||||
#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__))
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(__XOP__)
|
||||
#if defined(__SSSE3__)
|
||||
#define r16 \
|
||||
(_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
||||
#define r24 \
|
||||
(_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
#define _mm_roti_epi64(x, c) \
|
||||
(-(c) == 32) \
|
||||
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
|
||||
: (-(c) == 24) \
|
||||
? _mm_shuffle_epi8((x), r24) \
|
||||
: (-(c) == 16) \
|
||||
? _mm_shuffle_epi8((x), r16) \
|
||||
: (-(c) == 63) \
|
||||
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
||||
_mm_add_epi64((x), (x))) \
|
||||
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
||||
_mm_slli_epi64((x), 64 - (-(c))))
|
||||
#else /* defined(__SSE2__) */
|
||||
#define _mm_roti_epi64(r, c) \
|
||||
_mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
|
||||
#endif
|
||||
#else
|
||||
#endif
|
||||
|
||||
static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
const __m128i z = _mm_mul_epu32(x, y);
|
||||
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
|
||||
}
|
||||
|
||||
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = fBlaMka(A0, B0); \
|
||||
A1 = fBlaMka(A1, B1); \
|
||||
\
|
||||
D0 = _mm_xor_si128(D0, A0); \
|
||||
D1 = _mm_xor_si128(D1, A1); \
|
||||
\
|
||||
D0 = _mm_roti_epi64(D0, -32); \
|
||||
D1 = _mm_roti_epi64(D1, -32); \
|
||||
\
|
||||
C0 = fBlaMka(C0, D0); \
|
||||
C1 = fBlaMka(C1, D1); \
|
||||
\
|
||||
B0 = _mm_xor_si128(B0, C0); \
|
||||
B1 = _mm_xor_si128(B1, C1); \
|
||||
\
|
||||
B0 = _mm_roti_epi64(B0, -24); \
|
||||
B1 = _mm_roti_epi64(B1, -24); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = fBlaMka(A0, B0); \
|
||||
A1 = fBlaMka(A1, B1); \
|
||||
\
|
||||
D0 = _mm_xor_si128(D0, A0); \
|
||||
D1 = _mm_xor_si128(D1, A1); \
|
||||
\
|
||||
D0 = _mm_roti_epi64(D0, -16); \
|
||||
D1 = _mm_roti_epi64(D1, -16); \
|
||||
\
|
||||
C0 = fBlaMka(C0, D0); \
|
||||
C1 = fBlaMka(C1, D1); \
|
||||
\
|
||||
B0 = _mm_xor_si128(B0, C0); \
|
||||
B1 = _mm_xor_si128(B1, C1); \
|
||||
\
|
||||
B0 = _mm_roti_epi64(B0, -63); \
|
||||
B1 = _mm_roti_epi64(B1, -63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
|
||||
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
|
||||
B0 = t0; \
|
||||
B1 = t1; \
|
||||
\
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(D1, D0, 8); \
|
||||
t1 = _mm_alignr_epi8(D0, D1, 8); \
|
||||
D0 = t1; \
|
||||
D1 = t0; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
|
||||
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
|
||||
B0 = t0; \
|
||||
B1 = t1; \
|
||||
\
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(D0, D1, 8); \
|
||||
t1 = _mm_alignr_epi8(D1, D0, 8); \
|
||||
D0 = t1; \
|
||||
D1 = t0; \
|
||||
} while ((void)0, 0)
|
||||
#else /* SSE2 */
|
||||
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = D0; \
|
||||
__m128i t1 = B0; \
|
||||
D0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = D0; \
|
||||
D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \
|
||||
D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \
|
||||
B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \
|
||||
B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0, t1; \
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
t0 = B0; \
|
||||
t1 = D0; \
|
||||
B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \
|
||||
B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \
|
||||
D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \
|
||||
D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \
|
||||
} while ((void)0, 0)
|
||||
#endif
|
||||
|
||||
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif
|
56
algo/argon2/argon2d/blake2/blamka-round-ref.h
Normal file
56
algo/argon2/argon2d/blake2/blamka-round-ref.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Argon2 reference source code package - reference C implementations
|
||||
*
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* You should have received a copy of both of these licenses along with this
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#ifndef BLAKE_ROUND_MKA_H
|
||||
#define BLAKE_ROUND_MKA_H
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
/*designed by the Lyra PHC team */
|
||||
static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||
const uint64_t m = UINT64_C(0xFFFFFFFF);
|
||||
const uint64_t xy = (x & m) * (y & m);
|
||||
return x + y + 2 * xy;
|
||||
}
|
||||
|
||||
#define G(a, b, c, d) \
|
||||
do { \
|
||||
a = fBlaMka(a, b); \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = fBlaMka(c, d); \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = fBlaMka(a, b); \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = fBlaMka(c, d); \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
|
||||
v12, v13, v14, v15) \
|
||||
do { \
|
||||
G(v0, v4, v8, v12); \
|
||||
G(v1, v5, v9, v13); \
|
||||
G(v2, v6, v10, v14); \
|
||||
G(v3, v7, v11, v15); \
|
||||
G(v0, v5, v10, v15); \
|
||||
G(v1, v6, v11, v12); \
|
||||
G(v2, v7, v8, v13); \
|
||||
G(v3, v4, v9, v14); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif
|
@@ -30,7 +30,7 @@
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#if defined (__AVX__)
|
||||
#if defined (__SSE4_2__)
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
@@ -363,14 +363,14 @@ static const sph_u64 CB[16] = {
|
||||
do { \
|
||||
a = _mm_add_epi32( _mm_add_epi32( _mm_xor_si128( \
|
||||
_mm_set_epi32( c1, c1, c1, c1 ), m0 ), b ), a ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
d = mm_ror_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
b = mm_ror_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
a = _mm_add_epi32( _mm_add_epi32( _mm_xor_si128( \
|
||||
_mm_set_epi32( c0, c0, c0, c0 ), m1 ), b ), a ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
d = mm_ror_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
b = mm_ror_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_COMPACT_BLAKE_32
|
||||
@@ -419,14 +419,14 @@ do { \
|
||||
do { \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
|
||||
_mm256_set1_epi32( c1 ), m0 ), b ), a ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
|
||||
_mm256_set1_epi32( c0 ), m1 ), b ), a ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_S_8WAY(r) do { \
|
||||
@@ -445,14 +445,14 @@ do { \
|
||||
#define GB_4WAY(m0, m1, c0, c1, a, b, c, d) do { \
|
||||
a = _mm256_add_epi64( _mm256_add_epi64( _mm256_xor_si256( \
|
||||
_mm256_set_epi64x( c1, c1, c1, c1 ), m0 ), b ), a ); \
|
||||
d = mm256_rotr_64( _mm256_xor_si256( d, a ), 32 ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a ), 32 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 25 ); \
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 25 ); \
|
||||
a = _mm256_add_epi64( _mm256_add_epi64( _mm256_xor_si256( \
|
||||
_mm256_set_epi64x( c0, c0, c0, c0 ), m1 ), b ), a ); \
|
||||
d = mm256_rotr_64( _mm256_xor_si256( d, a ), 16 ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 11 ); \
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 11 ); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_COMPACT_BLAKE_64
|
||||
|
@@ -37,7 +37,7 @@
|
||||
#ifndef __BLAKE_HASH_4WAY__
|
||||
#define __BLAKE_HASH_4WAY__ 1
|
||||
|
||||
#ifdef __AVX__
|
||||
#ifdef __SSE4_2__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
@@ -51,7 +51,7 @@ extern "C"{
|
||||
|
||||
#define SPH_SIZE_blake512 512
|
||||
|
||||
// With AVX only Blake-256 4 way is available.
|
||||
// With SSE4.2 only Blake-256 4 way is available.
|
||||
// With AVX2 Blake-256 8way & Blake-512 4 way are also available.
|
||||
|
||||
// Blake-256 4 way
|
||||
|
@@ -20,7 +20,7 @@ bool register_blake2s_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&blake2s_hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include <stdint.h>
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
#define BLAKE2S_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
|
@@ -17,7 +17,7 @@
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
static const uint32_t blake2s_IV[8] =
|
||||
{
|
||||
@@ -92,13 +92,13 @@ int blake2s_4way_compress( blake2s_4way_state *S, const __m128i* block )
|
||||
#define G4W(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+0] ] ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
d = mm_ror_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
b = mm_ror_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+1] ] ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
d = mm_ror_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
b = mm_ror_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND4W(r) \
|
||||
@@ -210,14 +210,14 @@ int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block )
|
||||
do { \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
|
||||
m[ blake2s_sigma[r][2*i+0] ] ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
|
||||
m[ blake2s_sigma[r][2*i+1] ] ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND8W(r) \
|
||||
@@ -359,4 +359,4 @@ int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __AVX__
|
||||
#endif // __SSE4_2__
|
||||
|
@@ -14,7 +14,7 @@
|
||||
#ifndef __BLAKE2S_HASH_4WAY_H__
|
||||
#define __BLAKE2S_HASH_4WAY_H__ 1
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
|
||||
@@ -107,6 +107,6 @@ int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen );
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __AVX__
|
||||
#endif // __SSE4_2__
|
||||
|
||||
#endif
|
||||
|
@@ -22,7 +22,7 @@ bool register_vanilla_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_blakecoin;
|
||||
gate->hash = (void*)&blakecoinhash;
|
||||
#endif
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&blakecoin_get_max64;
|
||||
return true;
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
#define BLAKECOIN_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
|
@@ -18,7 +18,7 @@
|
||||
// uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#if defined(__SSE4_2__)
|
||||
#define DECRED_4WAY
|
||||
#endif
|
||||
|
||||
|
@@ -77,26 +77,26 @@ static const sph_u64 IV512[] = {
|
||||
#define ss0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 1), \
|
||||
_mm_slli_epi32( (x), 3) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 4), \
|
||||
mm_rotl_32( (x), 19) ) )
|
||||
_mm_xor_si128( mm_rol_32( (x), 4), \
|
||||
mm_rol_32( (x), 19) ) )
|
||||
|
||||
#define ss1(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 1), \
|
||||
_mm_slli_epi32( (x), 2) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 8), \
|
||||
mm_rotl_32( (x), 23) ) )
|
||||
_mm_xor_si128( mm_rol_32( (x), 8), \
|
||||
mm_rol_32( (x), 23) ) )
|
||||
|
||||
#define ss2(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 2), \
|
||||
_mm_slli_epi32( (x), 1) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 12), \
|
||||
mm_rotl_32( (x), 25) ) )
|
||||
_mm_xor_si128( mm_rol_32( (x), 12), \
|
||||
mm_rol_32( (x), 25) ) )
|
||||
|
||||
#define ss3(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_srli_epi32( (x), 2), \
|
||||
_mm_slli_epi32( (x), 2) ), \
|
||||
_mm_xor_si128( mm_rotl_32( (x), 15), \
|
||||
mm_rotl_32( (x), 29) ) )
|
||||
_mm_xor_si128( mm_rol_32( (x), 15), \
|
||||
mm_rol_32( (x), 29) ) )
|
||||
|
||||
#define ss4(x) \
|
||||
_mm_xor_si128( (x), _mm_srli_epi32( (x), 1 ) )
|
||||
@@ -104,16 +104,16 @@ static const sph_u64 IV512[] = {
|
||||
#define ss5(x) \
|
||||
_mm_xor_si128( (x), _mm_srli_epi32( (x), 2 ) )
|
||||
|
||||
#define rs1(x) mm_rotl_32( x, 3 )
|
||||
#define rs2(x) mm_rotl_32( x, 7 )
|
||||
#define rs3(x) mm_rotl_32( x, 13 )
|
||||
#define rs4(x) mm_rotl_32( x, 16 )
|
||||
#define rs5(x) mm_rotl_32( x, 19 )
|
||||
#define rs6(x) mm_rotl_32( x, 23 )
|
||||
#define rs7(x) mm_rotl_32( x, 27 )
|
||||
#define rs1(x) mm_rol_32( x, 3 )
|
||||
#define rs2(x) mm_rol_32( x, 7 )
|
||||
#define rs3(x) mm_rol_32( x, 13 )
|
||||
#define rs4(x) mm_rol_32( x, 16 )
|
||||
#define rs5(x) mm_rol_32( x, 19 )
|
||||
#define rs6(x) mm_rol_32( x, 23 )
|
||||
#define rs7(x) mm_rol_32( x, 27 )
|
||||
|
||||
#define rol_off_32( M, j, off ) \
|
||||
mm_rotl_32( M[ ( (j) + (off) ) & 0xF ] , \
|
||||
mm_rol_32( M[ ( (j) + (off) ) & 0xF ] , \
|
||||
( ( (j) + (off) ) & 0xF ) + 1 )
|
||||
|
||||
#define add_elt_s( M, H, j ) \
|
||||
@@ -178,26 +178,26 @@ static const sph_u64 IV512[] = {
|
||||
#define sb0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 1), \
|
||||
_mm256_slli_epi64( (x), 3) ), \
|
||||
_mm256_xor_si256( mm256_rotl_64( (x), 4), \
|
||||
mm256_rotl_64( (x), 37) ) )
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 4), \
|
||||
mm256_rol_64( (x), 37) ) )
|
||||
|
||||
#define sb1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 1), \
|
||||
_mm256_slli_epi64( (x), 2) ), \
|
||||
_mm256_xor_si256( mm256_rotl_64( (x), 13), \
|
||||
mm256_rotl_64( (x), 43) ) )
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 13), \
|
||||
mm256_rol_64( (x), 43) ) )
|
||||
|
||||
#define sb2(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 2), \
|
||||
_mm256_slli_epi64( (x), 1) ), \
|
||||
_mm256_xor_si256( mm256_rotl_64( (x), 19), \
|
||||
mm256_rotl_64( (x), 53) ) )
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 19), \
|
||||
mm256_rol_64( (x), 53) ) )
|
||||
|
||||
#define sb3(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 2), \
|
||||
_mm256_slli_epi64( (x), 2) ), \
|
||||
_mm256_xor_si256( mm256_rotl_64( (x), 28), \
|
||||
mm256_rotl_64( (x), 59) ) )
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 28), \
|
||||
mm256_rol_64( (x), 59) ) )
|
||||
|
||||
#define sb4(x) \
|
||||
_mm256_xor_si256( (x), _mm256_srli_epi64( (x), 1 ) )
|
||||
@@ -205,17 +205,17 @@ static const sph_u64 IV512[] = {
|
||||
#define sb5(x) \
|
||||
_mm256_xor_si256( (x), _mm256_srli_epi64( (x), 2 ) )
|
||||
|
||||
#define rb1(x) mm256_rotl_64( x, 5 )
|
||||
#define rb2(x) mm256_rotl_64( x, 11 )
|
||||
#define rb3(x) mm256_rotl_64( x, 27 )
|
||||
#define rb4(x) mm256_rotl_64( x, 32 )
|
||||
#define rb5(x) mm256_rotl_64( x, 37 )
|
||||
#define rb6(x) mm256_rotl_64( x, 43 )
|
||||
#define rb7(x) mm256_rotl_64( x, 53 )
|
||||
#define rb1(x) mm256_rol_64( x, 5 )
|
||||
#define rb2(x) mm256_rol_64( x, 11 )
|
||||
#define rb3(x) mm256_rol_64( x, 27 )
|
||||
#define rb4(x) mm256_rol_64( x, 32 )
|
||||
#define rb5(x) mm256_rol_64( x, 37 )
|
||||
#define rb6(x) mm256_rol_64( x, 43 )
|
||||
#define rb7(x) mm256_rol_64( x, 53 )
|
||||
|
||||
#define rol_off_64( M, j, off ) \
|
||||
mm256_rotl_64( M[ ( (j) + (off) ) & 0xF ] , \
|
||||
( ( (j) + (off) ) & 0xF ) + 1 )
|
||||
mm256_rol_64( M[ ( (j) + (off) ) & 0xF ] , \
|
||||
( ( (j) + (off) ) & 0xF ) + 1 )
|
||||
|
||||
#define add_elt_b( M, H, j ) \
|
||||
_mm256_xor_si256( \
|
||||
@@ -526,42 +526,42 @@ void compress_small( const __m128i *M, const __m128i H[16], __m128i dH[16] )
|
||||
_mm_slli_epi32( qt[23], 2 ) ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( xl, qt[31] ), qt[ 7] ));
|
||||
dH[ 8] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[4], 9 ),
|
||||
mm_rol_32( dH[4], 9 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[24] ), M[ 8] )),
|
||||
_mm_xor_si128( _mm_slli_epi32( xl, 8 ),
|
||||
_mm_xor_si128( qt[23], qt[ 8] ) ) );
|
||||
dH[ 9] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[5], 10 ),
|
||||
mm_rol_32( dH[5], 10 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[25] ), M[ 9] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 6 ),
|
||||
_mm_xor_si128( qt[16], qt[ 9] ) ) );
|
||||
dH[10] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[6], 11 ),
|
||||
mm_rol_32( dH[6], 11 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[26] ), M[10] )),
|
||||
_mm_xor_si128( _mm_slli_epi32( xl, 6 ),
|
||||
_mm_xor_si128( qt[17], qt[10] ) ) );
|
||||
dH[11] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[7], 12 ),
|
||||
mm_rol_32( dH[7], 12 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[27] ), M[11] )),
|
||||
_mm_xor_si128( _mm_slli_epi32( xl, 4 ),
|
||||
_mm_xor_si128( qt[18], qt[11] ) ) );
|
||||
dH[12] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[0], 13 ),
|
||||
mm_rol_32( dH[0], 13 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[28] ), M[12] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 3 ),
|
||||
_mm_xor_si128( qt[19], qt[12] ) ) );
|
||||
dH[13] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[1], 14 ),
|
||||
mm_rol_32( dH[1], 14 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[29] ), M[13] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 4 ),
|
||||
_mm_xor_si128( qt[20], qt[13] ) ) );
|
||||
dH[14] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[2], 15 ),
|
||||
mm_rol_32( dH[2], 15 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[30] ), M[14] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 7 ),
|
||||
_mm_xor_si128( qt[21], qt[14] ) ) );
|
||||
dH[15] = _mm_add_epi32( _mm_add_epi32(
|
||||
mm_rotl_32( dH[3], 16 ),
|
||||
mm_rol_32( dH[3], 16 ),
|
||||
_mm_xor_si128( _mm_xor_si128( xh, qt[31] ), M[15] )),
|
||||
_mm_xor_si128( _mm_srli_epi32( xl, 2 ),
|
||||
_mm_xor_si128( qt[22], qt[15] ) ) );
|
||||
@@ -819,42 +819,42 @@ void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
|
||||
_mm256_slli_epi64( qt[23], 2 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[31] ), qt[ 7] ));
|
||||
dH[ 8] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[4], 9 ),
|
||||
mm256_rol_64( dH[4], 9 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[24] ), M[ 8] )),
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xl, 8 ),
|
||||
_mm256_xor_si256( qt[23], qt[ 8] ) ) );
|
||||
dH[ 9] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[5], 10 ),
|
||||
mm256_rol_64( dH[5], 10 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[25] ), M[ 9] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 6 ),
|
||||
_mm256_xor_si256( qt[16], qt[ 9] ) ) );
|
||||
dH[10] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[6], 11 ),
|
||||
mm256_rol_64( dH[6], 11 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[26] ), M[10] )),
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xl, 6 ),
|
||||
_mm256_xor_si256( qt[17], qt[10] ) ) );
|
||||
dH[11] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[7], 12 ),
|
||||
mm256_rol_64( dH[7], 12 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[27] ), M[11] )),
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xl, 4 ),
|
||||
_mm256_xor_si256( qt[18], qt[11] ) ) );
|
||||
dH[12] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[0], 13 ),
|
||||
mm256_rol_64( dH[0], 13 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[28] ), M[12] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 3 ),
|
||||
_mm256_xor_si256( qt[19], qt[12] ) ) );
|
||||
dH[13] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[1], 14 ),
|
||||
mm256_rol_64( dH[1], 14 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[29] ), M[13] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 4 ),
|
||||
_mm256_xor_si256( qt[20], qt[13] ) ) );
|
||||
dH[14] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[2], 15 ),
|
||||
mm256_rol_64( dH[2], 15 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[30] ), M[14] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 7 ),
|
||||
_mm256_xor_si256( qt[21], qt[14] ) ) );
|
||||
dH[15] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rotl_64( dH[3], 16 ),
|
||||
mm256_rol_64( dH[3], 16 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[31] ), M[15] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 2 ),
|
||||
_mm256_xor_si256( qt[22], qt[15] ) ) );
|
||||
|
1251
algo/bmw/bmw.test
1251
algo/bmw/bmw.test
File diff suppressed because it is too large
Load Diff
@@ -22,7 +22,7 @@ static void transform( cubehashParam *sp )
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
__m256i x0, x1, x2, x3, y0, y1;
|
||||
register __m256i x0, x1, x2, x3, y0, y1;
|
||||
|
||||
x0 = _mm256_load_si256( (__m256i*)sp->x );
|
||||
x1 = _mm256_load_si256( (__m256i*)sp->x + 1 );
|
||||
@@ -33,20 +33,19 @@ static void transform( cubehashParam *sp )
|
||||
{
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = x1;
|
||||
y1 = x0;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
y0 = x0;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( x1, 7 ),
|
||||
_mm256_srli_epi32( x1, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
_mm256_srli_epi32( y0, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 7 ),
|
||||
_mm256_srli_epi32( y1, 25 ) );
|
||||
x0 = _mm256_xor_si256( x0, x2 );
|
||||
x1 = _mm256_xor_si256( x1, x3 );
|
||||
x2 = _mm256_shuffle_epi32( x2, 0x4e );
|
||||
x3 = _mm256_shuffle_epi32( x3, 0x4e );
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = _mm256_permute2f128_si256( x0, x0, 1 );
|
||||
y1 = _mm256_permute2f128_si256( x1, x1, 1 );
|
||||
y0 = _mm256_permute4x64_epi64( x0, 0x4e );
|
||||
y1 = _mm256_permute4x64_epi64( x1, 0x4e );
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
|
||||
_mm256_srli_epi32( y0, 21 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
|
||||
|
@@ -601,18 +601,18 @@ do { \
|
||||
|
||||
#define L( a, b, c, d ) \
|
||||
do { \
|
||||
a = mm256_rotl_32( a, 13 ); \
|
||||
c = mm256_rotl_32( c, 3 ); \
|
||||
a = mm256_rol_32( a, 13 ); \
|
||||
c = mm256_rol_32( c, 3 ); \
|
||||
b = _mm256_xor_si256( b, _mm256_xor_si256( a, c ) ); \
|
||||
d = _mm256_xor_si256( d, _mm256_xor_si256( c, \
|
||||
_mm256_slli_epi32( a, 3 ) ) ); \
|
||||
b = mm256_rotl_32( b, 1 ); \
|
||||
d = mm256_rotl_32( d, 7 ); \
|
||||
b = mm256_rol_32( b, 1 ); \
|
||||
d = mm256_rol_32( d, 7 ); \
|
||||
a = _mm256_xor_si256( a, _mm256_xor_si256( b, d ) ); \
|
||||
c = _mm256_xor_si256( c, _mm256_xor_si256( d, \
|
||||
_mm256_slli_epi32( b, 7 ) ) ); \
|
||||
a = mm256_rotl_32( a, 5 ); \
|
||||
c = mm256_rotl_32( c, 22 ); \
|
||||
a = mm256_rol_32( a, 5 ); \
|
||||
c = mm256_rol_32( c, 22 ); \
|
||||
} while (0)
|
||||
|
||||
#define DECL_STATE_BIG \
|
||||
|
@@ -38,7 +38,7 @@
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
|
||||
#if defined (__AVX__)
|
||||
#if defined (__AVX2__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
|
||||
|
@@ -40,7 +40,9 @@
|
||||
#include <string.h>
|
||||
#include "haval-hash-4way.h"
|
||||
|
||||
#if defined (__AVX__)
|
||||
// won't compile with sse4.2
|
||||
//#if defined (__SSE4_2__)
|
||||
#if defined(__AVX__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
@@ -134,8 +136,8 @@ extern "C"{
|
||||
#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c) \
|
||||
do { \
|
||||
__m128i t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \
|
||||
x7 = _mm_add_epi32( _mm_add_epi32( mm_rotr_32( t, 7 ), \
|
||||
mm_rotr_32( x7, 11 ) ), \
|
||||
x7 = _mm_add_epi32( _mm_add_epi32( mm_ror_32( t, 7 ), \
|
||||
mm_ror_32( x7, 11 ) ), \
|
||||
_mm_add_epi32( w, _mm_set1_epi32( c ) ) ); \
|
||||
} while (0)
|
||||
|
||||
|
@@ -60,7 +60,7 @@ static const sph_u64 RC[] = {
|
||||
#define AND64(d, a, b) (d = _mm256_and_si256(a,b))
|
||||
#define OR64(d, a, b) (d = _mm256_or_si256(a,b))
|
||||
#define NOT64(d, s) (d = _mm256_xor_si256(s,m256_neg1))
|
||||
#define ROL64(d, v, n) (d = mm256_rotl_64(v, n))
|
||||
#define ROL64(d, v, n) (d = mm256_rol_64(v, n))
|
||||
#define XOR64_IOTA XOR64
|
||||
|
||||
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
|
||||
|
@@ -13,7 +13,7 @@ bool register_allium_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_allium;
|
||||
gate->hash = (void*)&allium_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0xFFFFLL;
|
||||
return true;
|
||||
|
@@ -386,7 +386,7 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd, const uint64_t pwdlen,
|
||||
|
||||
#if defined(__AVX2__)
|
||||
memset_zero_256( (__m256i*)wholeMatrix, i>>5 );
|
||||
#elif defined(__AVX__)
|
||||
#elif defined(__SSE4_2__)
|
||||
memset_zero_128( (__m128i*)wholeMatrix, i>>4 );
|
||||
#else
|
||||
memset( wholeMatrix, 0, i );
|
||||
|
@@ -17,7 +17,7 @@ bool register_lyra2h_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2h;
|
||||
gate->hash = (void*)&lyra2h_hash;
|
||||
#endif
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&lyra2h_set_target;
|
||||
return true;
|
||||
|
@@ -132,7 +132,7 @@ void lyra2re_set_target ( struct work* work, double job_diff )
|
||||
bool register_lyra2re_algo( algo_gate_t* gate )
|
||||
{
|
||||
init_lyra2re_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_lyra2re;
|
||||
gate->hash = (void*)&lyra2re_hash;
|
||||
gate->get_max64 = (void*)&lyra2re_get_max64;
|
||||
|
@@ -31,7 +31,7 @@ bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
||||
gate->hash = (void*)&lyra2rev2_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
||||
gate->set_target = (void*)&lyra2rev2_set_target;
|
||||
return true;
|
||||
|
@@ -21,7 +21,7 @@ bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_lyra2z;
|
||||
gate->hash = (void*)&lyra2z_hash;
|
||||
#endif
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&lyra2z_set_target;
|
||||
return true;
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
#define LYRA2Z_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
|
@@ -69,7 +69,7 @@ bool lyra2z330_thread_init()
|
||||
|
||||
bool register_lyra2z330_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE42_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2z330_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z330;
|
||||
gate->hash = (void*)&lyra2z330_hash;
|
||||
|
@@ -51,7 +51,7 @@ inline void initState( uint64_t State[/*16*/] )
|
||||
state[3] = _mm256_set_epi64x( blake2b_IV[7], blake2b_IV[6],
|
||||
blake2b_IV[5], blake2b_IV[4] );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
|
||||
@@ -137,7 +137,7 @@ inline void squeeze( uint64_t *State, byte *Out, unsigned int len )
|
||||
//Squeezes remaining bytes
|
||||
memcpy_256( out, state, ( len_m256i % BLOCK_LEN_M256I ) );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
const int len_m128i = len / 16;
|
||||
const int fullBlocks = len_m128i / BLOCK_LEN_M128I;
|
||||
@@ -186,16 +186,26 @@ inline void absorbBlock( uint64_t *State, const uint64_t *In )
|
||||
{
|
||||
#if defined (__AVX2__)
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i* in = (__m256i*)In;
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i *in = (__m256i*)In;
|
||||
|
||||
state[0] = _mm256_xor_si256( state[0], in[0] );
|
||||
state[1] = _mm256_xor_si256( state[1], in[1] );
|
||||
state[2] = _mm256_xor_si256( state[2], in[2] );
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
LYRA_12_ROUNDS_AVX2( state[0], state[1], state[2], state[3] );
|
||||
state0 = _mm256_xor_si256( state0, in[0] );
|
||||
state1 = _mm256_xor_si256( state1, in[1] );
|
||||
state2 = _mm256_xor_si256( state2, in[2] );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
LYRA_12_ROUNDS_AVX2( state0, state1, state2, state3 );
|
||||
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
__m128i* in = (__m128i*)In;
|
||||
@@ -245,15 +255,25 @@ inline void absorbBlockBlake2Safe( uint64_t *State, const uint64_t *In )
|
||||
//XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
|
||||
#if defined (__AVX2__)
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i* in = (__m256i*)In;
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i *in = (__m256i*)In;
|
||||
|
||||
state[0] = _mm256_xor_si256( state[0], in[0] );
|
||||
state[1] = _mm256_xor_si256( state[1], in[1] );
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
LYRA_12_ROUNDS_AVX2( state[0], state[1], state[2], state[3] );
|
||||
state0 = _mm256_xor_si256( state0, in[0] );
|
||||
state1 = _mm256_xor_si256( state1, in[1] );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
LYRA_12_ROUNDS_AVX2( state0, state1, state2, state3 );
|
||||
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
__m128i* in = (__m128i*)In;
|
||||
@@ -301,14 +321,14 @@ inline void reducedSqueezeRow0( uint64_t* State, uint64_t* rowOut,
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i state0 = _mm256_load_si256( state );
|
||||
__m256i state1 = _mm256_load_si256( &state[1] );
|
||||
__m256i state2 = _mm256_load_si256( &state[2] );
|
||||
__m256i state3 = _mm256_load_si256( &state[3] );
|
||||
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i* out = (__m256i*)rowOut + ( (nCols-1) * BLOCK_LEN_M256I );
|
||||
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
for ( i = 0; i < 9; i += 3)
|
||||
{
|
||||
_mm_prefetch( out - i, _MM_HINT_T0 );
|
||||
@@ -330,13 +350,12 @@ inline void reducedSqueezeRow0( uint64_t* State, uint64_t* rowOut,
|
||||
LYRA_ROUND_AVX2( state0, state1, state2, state3 );
|
||||
}
|
||||
|
||||
_mm256_store_si256( state, state0 );
|
||||
_mm256_store_si256( &state[1], state1 );
|
||||
_mm256_store_si256( &state[2], state2 );
|
||||
_mm256_store_si256( &state[3], state3 );
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
|
||||
#elif defined (__AVX__)
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
__m128i state0 = _mm_load_si128( state );
|
||||
@@ -429,15 +448,15 @@ inline void reducedDuplexRow1( uint64_t *State, uint64_t *rowIn,
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i state0 = _mm256_load_si256( state );
|
||||
__m256i state1 = _mm256_load_si256( &state[1] );
|
||||
__m256i state2 = _mm256_load_si256( &state[2] );
|
||||
__m256i state3 = _mm256_load_si256( &state[3] );
|
||||
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i* in = (__m256i*)rowIn;
|
||||
__m256i* out = (__m256i*)rowOut + ( (nCols-1) * BLOCK_LEN_M256I );
|
||||
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
for ( i = 0; i < 9; i += 3)
|
||||
{
|
||||
_mm_prefetch( in + i, _MM_HINT_T0 );
|
||||
@@ -470,12 +489,12 @@ inline void reducedDuplexRow1( uint64_t *State, uint64_t *rowIn,
|
||||
out -= BLOCK_LEN_M256I;
|
||||
}
|
||||
|
||||
_mm256_store_si256( state, state0 );
|
||||
_mm256_store_si256( &state[1], state1 );
|
||||
_mm256_store_si256( &state[2], state2 );
|
||||
_mm256_store_si256( &state[3], state3 );
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
__m128i state0 = _mm_load_si128( state );
|
||||
@@ -608,17 +627,17 @@ inline void reducedDuplexRowSetup( uint64_t *State, uint64_t *rowIn,
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i state0 = _mm256_load_si256( state );
|
||||
__m256i state1 = _mm256_load_si256( &state[1] );
|
||||
__m256i state2 = _mm256_load_si256( &state[2] );
|
||||
__m256i state3 = _mm256_load_si256( &state[3] );
|
||||
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i* in = (__m256i*)rowIn;
|
||||
__m256i* inout = (__m256i*)rowInOut;
|
||||
__m256i* out = (__m256i*)rowOut + ( (nCols-1) * BLOCK_LEN_M256I );
|
||||
__m256i t0, t1, t2;
|
||||
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
for ( i = 0; i < 9; i += 3)
|
||||
{
|
||||
_mm_prefetch( in + i, _MM_HINT_T0 );
|
||||
@@ -670,12 +689,12 @@ inline void reducedDuplexRowSetup( uint64_t *State, uint64_t *rowIn,
|
||||
out -= BLOCK_LEN_M256I;
|
||||
}
|
||||
|
||||
_mm256_store_si256( state, state0 );
|
||||
_mm256_store_si256( &state[1], state1 );
|
||||
_mm256_store_si256( &state[2], state2 );
|
||||
_mm256_store_si256( &state[3], state3 );
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
#elif defined (__AVX__)
|
||||
#elif defined (__SSE4_2__)
|
||||
|
||||
__m128i* in = (__m128i*)rowIn;
|
||||
__m128i* inout = (__m128i*)rowInOut;
|
||||
@@ -842,17 +861,17 @@ inline void reducedDuplexRow( uint64_t *State, uint64_t *rowIn,
|
||||
|
||||
#if defined __AVX2__
|
||||
|
||||
__m256i* state = (__m256i*)State;
|
||||
__m256i state0 = _mm256_load_si256( state );
|
||||
__m256i state1 = _mm256_load_si256( &state[1] );
|
||||
__m256i state2 = _mm256_load_si256( &state[2] );
|
||||
__m256i state3 = _mm256_load_si256( &state[3] );
|
||||
|
||||
register __m256i state0, state1, state2, state3;
|
||||
__m256i* in = (__m256i*)rowIn;
|
||||
__m256i* inout = (__m256i*)rowInOut;
|
||||
__m256i* out = (__m256i*)rowOut;
|
||||
__m256i t0, t1, t2;
|
||||
|
||||
state0 = _mm256_load_si256( (__m256i*)State );
|
||||
state1 = _mm256_load_si256( (__m256i*)State + 1 );
|
||||
state2 = _mm256_load_si256( (__m256i*)State + 2 );
|
||||
state3 = _mm256_load_si256( (__m256i*)State + 3 );
|
||||
|
||||
for ( i = 0; i < 9; i += 3)
|
||||
{
|
||||
_mm_prefetch( in + i, _MM_HINT_T0 );
|
||||
@@ -906,12 +925,12 @@ inline void reducedDuplexRow( uint64_t *State, uint64_t *rowIn,
|
||||
inout += BLOCK_LEN_M256I;
|
||||
}
|
||||
|
||||
_mm256_store_si256( state, state0 );
|
||||
_mm256_store_si256( &state[1], state1 );
|
||||
_mm256_store_si256( &state[2], state2 );
|
||||
_mm256_store_si256( &state[3], state3 );
|
||||
_mm256_store_si256( (__m256i*)State, state0 );
|
||||
_mm256_store_si256( (__m256i*)State + 1, state1 );
|
||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||
|
||||
#elif defined __AVX__
|
||||
#elif defined(__SSE4_2__)
|
||||
|
||||
__m128i* state = (__m128i*)State;
|
||||
__m128i* in = (__m128i*)rowIn;
|
||||
|
@@ -87,8 +87,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
|
||||
#else
|
||||
// only available with avx
|
||||
#elif defined(__SSE4_2__)
|
||||
|
||||
// process 2 columns in parallel
|
||||
// returns void, all args updated
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include "ripemd-hash-4way.h"
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
@@ -42,10 +42,10 @@ static const uint32_t IV[5] =
|
||||
|
||||
#define RR(a, b, c, d, e, f, s, r, k) \
|
||||
do{ \
|
||||
a = _mm_add_epi32( mm_rotl_32( _mm_add_epi32( _mm_add_epi32( \
|
||||
a = _mm_add_epi32( mm_rol_32( _mm_add_epi32( _mm_add_epi32( \
|
||||
_mm_add_epi32( a, f( b ,c, d ) ), r ), \
|
||||
_mm_set1_epi32( k ) ), s ), e ); \
|
||||
c = mm_rotl_32( c, 10 );\
|
||||
c = mm_rol_32( c, 10 );\
|
||||
} while (0)
|
||||
|
||||
#define ROUND1(a, b, c, d, e, f, s, r, k) \
|
||||
@@ -341,10 +341,10 @@ void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
|
||||
|
||||
#define RR_8W(a, b, c, d, e, f, s, r, k) \
|
||||
do{ \
|
||||
a = _mm256_add_epi32( mm256_rotl_32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
a = _mm256_add_epi32( mm256_rol_32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
_mm256_add_epi32( a, f( b ,c, d ) ), r ), \
|
||||
_mm256_set1_epi32( k ) ), s ), e ); \
|
||||
c = mm256_rotl_32( c, 10 );\
|
||||
c = mm256_rol_32( c, 10 );\
|
||||
} while (0)
|
||||
|
||||
#define ROUND1_8W(a, b, c, d, e, f, s, r, k) \
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
|
||||
@@ -34,5 +34,5 @@ void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst );
|
||||
|
||||
|
||||
#endif // __AVX2__
|
||||
#endif // __AVX__
|
||||
#endif // __SSE4_2__
|
||||
#endif // RIPEMD_HASH_4WAY_H__
|
||||
|
@@ -30,7 +30,7 @@
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
@@ -98,19 +98,19 @@ static const sph_u32 K256[64] = {
|
||||
|
||||
#define BSG2_0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 2), mm_rotr_32(x, 13) ), mm_rotr_32( x, 22) )
|
||||
mm_ror_32(x, 2), mm_ror_32(x, 13) ), mm_ror_32( x, 22) )
|
||||
|
||||
#define BSG2_1(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 6), mm_rotr_32(x, 11) ), mm_rotr_32( x, 25) )
|
||||
mm_ror_32(x, 6), mm_ror_32(x, 11) ), mm_ror_32( x, 25) )
|
||||
|
||||
#define SSG2_0(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 7), mm_rotr_32(x, 18) ), _mm_srli_epi32(x, 3) )
|
||||
mm_ror_32(x, 7), mm_ror_32(x, 18) ), _mm_srli_epi32(x, 3) )
|
||||
|
||||
#define SSG2_1(x) \
|
||||
_mm_xor_si128( _mm_xor_si128( \
|
||||
mm_rotr_32(x, 17), mm_rotr_32(x, 19) ), _mm_srli_epi32(x, 10) )
|
||||
mm_ror_32(x, 17), mm_ror_32(x, 19) ), _mm_srli_epi32(x, 10) )
|
||||
|
||||
#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
@@ -311,19 +311,19 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
|
||||
#define BSG2_0x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 2), mm256_rotr_32(x, 13) ), mm256_rotr_32( x, 22) )
|
||||
mm256_ror_32(x, 2), mm256_ror_32(x, 13) ), mm256_ror_32( x, 22) )
|
||||
|
||||
#define BSG2_1x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 6), mm256_rotr_32(x, 11) ), mm256_rotr_32( x, 25) )
|
||||
mm256_ror_32(x, 6), mm256_ror_32(x, 11) ), mm256_ror_32( x, 25) )
|
||||
|
||||
#define SSG2_0x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 7), mm256_rotr_32(x, 18) ), _mm256_srli_epi32(x, 3) )
|
||||
mm256_ror_32(x, 7), mm256_ror_32(x, 18) ), _mm256_srli_epi32(x, 3) )
|
||||
|
||||
#define SSG2_1x(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_32(x, 17), mm256_rotr_32(x, 19) ), _mm256_srli_epi32(x, 10) )
|
||||
mm256_ror_32(x, 17), mm256_ror_32(x, 19) ), _mm256_srli_epi32(x, 10) )
|
||||
|
||||
#define SHA2x_MEXP( a, b, c, d ) \
|
||||
_mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
@@ -644,19 +644,19 @@ static const sph_u64 K512[80] = {
|
||||
|
||||
#define BSG5_0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_64(x, 28), mm256_rotr_64(x, 34) ), mm256_rotr_64(x, 39) )
|
||||
mm256_ror_64(x, 28), mm256_ror_64(x, 34) ), mm256_ror_64(x, 39) )
|
||||
|
||||
#define BSG5_1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_64(x, 14), mm256_rotr_64(x, 18) ), mm256_rotr_64(x, 41) )
|
||||
mm256_ror_64(x, 14), mm256_ror_64(x, 18) ), mm256_ror_64(x, 41) )
|
||||
|
||||
#define SSG5_0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_64(x, 1), mm256_rotr_64(x, 8) ), _mm256_srli_epi64(x, 7) )
|
||||
mm256_ror_64(x, 1), mm256_ror_64(x, 8) ), _mm256_srli_epi64(x, 7) )
|
||||
|
||||
#define SSG5_1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( \
|
||||
mm256_rotr_64(x, 19), mm256_rotr_64(x, 61) ), _mm256_srli_epi64(x, 6) )
|
||||
mm256_ror_64(x, 19), mm256_ror_64(x, 61) ), _mm256_srli_epi64(x, 6) )
|
||||
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
@@ -781,4 +781,4 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
}
|
||||
|
||||
#endif // __AVX2__
|
||||
#endif // __AVX__
|
||||
#endif // __SSE4_2__
|
||||
|
@@ -44,7 +44,7 @@
|
||||
#include "sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
//#define SPH_SIZE_sha256 256
|
||||
|
||||
|
198
algo/sha/sha256t-4way.c
Normal file
198
algo/sha/sha256t-4way.c
Normal file
@@ -0,0 +1,198 @@
|
||||
#include "sha256t-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "sha2-hash-4way.h"
|
||||
//#include <openssl/sha.h>
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
static __thread sha256_8way_context sha256_ctx8 __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256t_8way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
sha256_8way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx8, sizeof ctx );
|
||||
|
||||
sha256_8way( &ctx, input + (64<<3), 16 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
|
||||
mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96,
|
||||
output+128, output+160, output+192, output+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_sha256t_8way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000 };
|
||||
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &edata[k], pdata[k] );
|
||||
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
pdata[19] = n;
|
||||
|
||||
sha256t_8way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( ( !( ( hash+(i<<3) )[7] & mask ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 8;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#elif defined(SHA256T_4WAY)
|
||||
|
||||
static __thread sha256_4way_context sha256_ctx4 __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256t_4way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
sha256_4way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx4, sizeof ctx );
|
||||
|
||||
sha256_4way( &ctx, input + (64<<2), 16 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
|
||||
mm_deinterleave_4x32( output, output+ 32, output+ 64, output+ 96,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_sha256t_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000 };
|
||||
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &edata[k], pdata[k] );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
sha256_4way_init( &sha256_ctx4 );
|
||||
sha256_4way( &sha256_ctx4, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
pdata[19] = n;
|
||||
|
||||
sha256t_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( !( ( hash+(i<<3) )[7] & mask ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
26
algo/sha/sha256t-gate.c
Normal file
26
algo/sha/sha256t-gate.c
Normal file
@@ -0,0 +1,26 @@
|
||||
#include "sha256t-gate.h"
|
||||
|
||||
bool register_sha256t_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(SHA256T_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
||||
gate->hash = (void*)&sha256t_8way_hash;
|
||||
#elif defined(SHA256T_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
||||
gate->hash = (void*)&sha256t_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_sha256t;
|
||||
gate->hash = (void*)&sha256t_hash;
|
||||
/*
|
||||
#ifndef USE_SPH_SHA
|
||||
SHA256_Init( &sha256t_ctx );
|
||||
#else
|
||||
sph_sha256_init( &sha256t_ctx );
|
||||
#endif
|
||||
*/
|
||||
#endif
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
}
|
||||
|
36
algo/sha/sha256t-gate.h
Normal file
36
algo/sha/sha256t-gate.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef __SHA256T_GATE_H__
|
||||
#define __SHA256T_GATE_H__ 1
|
||||
|
||||
#include <stdint.h>
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
#define SHA256T_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
#define SHA256T_8WAY
|
||||
#endif
|
||||
|
||||
bool register_blake2s_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
void sha256t_8way_hash( void *output, const void *input );
|
||||
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#elif defined (SHA256T_4WAY)
|
||||
|
||||
void sha256t_4way_hash( void *output, const void *input );
|
||||
int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#else
|
||||
|
||||
void sha256t_hash( void *output, const void *input );
|
||||
int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@@ -1,5 +1,4 @@
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include "sha256t-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -7,25 +6,26 @@
|
||||
#include "sph_sha2.h"
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#if !defined(SHA256T_4WAY)
|
||||
|
||||
#ifndef USE_SPH_SHA
|
||||
static SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
|
||||
static __thread SHA256_CTX sha256t_mid __attribute__ ((aligned (64)));
|
||||
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
|
||||
#else
|
||||
static sph_sha256_context sha256t_ctx __attribute__ ((aligned (64)));
|
||||
static __thread sph_sha256_context sha256t_mid __attribute__ ((aligned (64)));
|
||||
static __thread sph_sha256_context sha256t_ctx __attribute__ ((aligned (64)));
|
||||
#endif
|
||||
|
||||
void sha256t_midstate( const void* input )
|
||||
{
|
||||
memcpy( &sha256t_mid, &sha256t_ctx, sizeof sha256t_mid );
|
||||
#ifndef USE_SPH_SHA
|
||||
SHA256_Update( &sha256t_mid, input, 64 );
|
||||
SHA256_Init( &sha256t_ctx );
|
||||
SHA256_Update( &sha256t_ctx, input, 64 );
|
||||
#else
|
||||
sph_sha256( &sha256t_mid, input, 64 );
|
||||
sph_sha256_init( &sha256t_ctx );
|
||||
sph_sha256( &sha256t_ctx, input, 64 );
|
||||
#endif
|
||||
}
|
||||
|
||||
void sha256t_hash(void* output, const void* input, uint32_t len)
|
||||
void sha256t_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(64) hashA[16];
|
||||
const int midlen = 64; // bytes
|
||||
@@ -33,16 +33,16 @@ void sha256t_hash(void* output, const void* input, uint32_t len)
|
||||
|
||||
#ifndef USE_SPH_SHA
|
||||
SHA256_CTX ctx_sha256 __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx_sha256, &sha256t_mid, sizeof sha256t_mid );
|
||||
memcpy( &ctx_sha256, &sha256t_ctx, sizeof sha256t_ctx );
|
||||
|
||||
SHA256_Update( &ctx_sha256, input + midlen, tail );
|
||||
SHA256_Final( (unsigned char*)hashA, &ctx_sha256 );
|
||||
|
||||
memcpy( &ctx_sha256, &sha256t_ctx, sizeof sha256t_ctx );
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, hashA, 32 );
|
||||
SHA256_Final( (unsigned char*)hashA, &ctx_sha256 );
|
||||
|
||||
memcpy( &ctx_sha256, &sha256t_ctx, sizeof sha256t_ctx );
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, hashA, 32 );
|
||||
SHA256_Final( (unsigned char*)hashA, &ctx_sha256 );
|
||||
#else
|
||||
@@ -52,11 +52,11 @@ void sha256t_hash(void* output, const void* input, uint32_t len)
|
||||
sph_sha256( &ctx_sha256, input + midlen, tail );
|
||||
sph_sha256_close( &ctx_sha256, hashA );
|
||||
|
||||
memcpy( &ctx_sha256, &sha256t_ctx, sizeof sha256t_ctx );
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256( &ctx_sha256, hashA, 32 );
|
||||
sph_sha256_close( &ctx_sha256, hashA );
|
||||
|
||||
memcpy( &ctx_sha256, &sha256t_ctx, sizeof sha256t_ctx );
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256( &ctx_sha256, hashA, 32 );
|
||||
sph_sha256_close( &ctx_sha256, hashA );
|
||||
#endif
|
||||
@@ -68,8 +68,6 @@ int scanhash_sha256t(int thr_id, struct work *work,
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t len = 80;
|
||||
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
@@ -113,7 +111,7 @@ int scanhash_sha256t(int thr_id, struct work *work,
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
sha256t_hash(hash64, endiandata, len);
|
||||
sha256t_hash( hash64, endiandata );
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
@@ -139,23 +137,4 @@ int scanhash_sha256t(int thr_id, struct work *work,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sha256t_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_sha256t_algo( algo_gate_t* gate )
|
||||
{
|
||||
#ifndef USE_SPH_SHA
|
||||
SHA256_Init( &sha256t_ctx );
|
||||
#else
|
||||
sph_sha256_init( &sha256t_ctx );
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t;
|
||||
gate->hash = (void*)&sha256t_hash;
|
||||
// gate->set_target = (void*)&sha256t_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
}
|
||||
|
@@ -271,9 +271,9 @@ do { \
|
||||
xa0 = _mm_xor_si128( xm, _mm_xor_si128( xb1, _mm_xor_si128( \
|
||||
_mm_andnot_si128( xb3, xb2 ), \
|
||||
_mm_mullo_epi32( _mm_xor_si128( xa0, _mm_xor_si128( xc, \
|
||||
_mm_mullo_epi32( mm_rotl_32( xa1, 15 ), _mm_set1_epi32(5UL) ) \
|
||||
_mm_mullo_epi32( mm_rol_32( xa1, 15 ), _mm_set1_epi32(5UL) ) \
|
||||
) ), _mm_set1_epi32(3UL) ) ) ) ); \
|
||||
xb0 = mm_not( _mm_xor_si128( xa0, mm_rotl_32( xb0, 1 ) ) ); \
|
||||
xb0 = mm_not( _mm_xor_si128( xa0, mm_rol_32( xb0, 1 ) ) ); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_STEP_0 do { \
|
||||
@@ -335,22 +335,22 @@ do { \
|
||||
|
||||
#define APPLY_P \
|
||||
do { \
|
||||
B0 = mm_rotr_32( B0, 15 ); \
|
||||
B1 = mm_rotr_32( B1, 15 ); \
|
||||
B2 = mm_rotr_32( B2, 15 ); \
|
||||
B3 = mm_rotr_32( B3, 15 ); \
|
||||
B4 = mm_rotr_32( B4, 15 ); \
|
||||
B5 = mm_rotr_32( B5, 15 ); \
|
||||
B6 = mm_rotr_32( B6, 15 ); \
|
||||
B7 = mm_rotr_32( B7, 15 ); \
|
||||
B8 = mm_rotr_32( B8, 15 ); \
|
||||
B9 = mm_rotr_32( B9, 15 ); \
|
||||
BA = mm_rotr_32( BA, 15 ); \
|
||||
BB = mm_rotr_32( BB, 15 ); \
|
||||
BC = mm_rotr_32( BC, 15 ); \
|
||||
BD = mm_rotr_32( BD, 15 ); \
|
||||
BE = mm_rotr_32( BE, 15 ); \
|
||||
BF = mm_rotr_32( BF, 15 ); \
|
||||
B0 = mm_ror_32( B0, 15 ); \
|
||||
B1 = mm_ror_32( B1, 15 ); \
|
||||
B2 = mm_ror_32( B2, 15 ); \
|
||||
B3 = mm_ror_32( B3, 15 ); \
|
||||
B4 = mm_ror_32( B4, 15 ); \
|
||||
B5 = mm_ror_32( B5, 15 ); \
|
||||
B6 = mm_ror_32( B6, 15 ); \
|
||||
B7 = mm_ror_32( B7, 15 ); \
|
||||
B8 = mm_ror_32( B8, 15 ); \
|
||||
B9 = mm_ror_32( B9, 15 ); \
|
||||
BA = mm_ror_32( BA, 15 ); \
|
||||
BB = mm_ror_32( BB, 15 ); \
|
||||
BC = mm_ror_32( BC, 15 ); \
|
||||
BD = mm_ror_32( BD, 15 ); \
|
||||
BE = mm_ror_32( BE, 15 ); \
|
||||
BF = mm_ror_32( BF, 15 ); \
|
||||
PERM_STEP_0; \
|
||||
PERM_STEP_1; \
|
||||
PERM_STEP_2; \
|
||||
|
@@ -63,11 +63,11 @@ static const sph_u32 IV512[] = {
|
||||
// and return the rotated high 128 bits.
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
#define mm_rotr256hi_1x32( hi, lo ) _mm_alignr_epi8( lo, hi, 4 )
|
||||
#define mm_ror256hi_1x32( hi, lo ) _mm_alignr_epi8( lo, hi, 4 )
|
||||
|
||||
#else // SSE2
|
||||
|
||||
#define mm_rotr256hi_1x32( hi, lo ) \
|
||||
#define mm_ror256hi_1x32( hi, lo ) \
|
||||
_mm_or_si128( _mm_srli_si128( hi, 4 ), \
|
||||
_mm_slli_si128( lo, 12 ) )
|
||||
|
||||
@@ -182,36 +182,36 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
|
||||
// round 2, 6, 10
|
||||
|
||||
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13 ) );
|
||||
k00 = _mm_xor_si128( k00, mm_ror256hi_1x32( k12, k13 ) );
|
||||
x = _mm_xor_si128( p3, k00 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00 ) );
|
||||
k01 = _mm_xor_si128( k01, mm_ror256hi_1x32( k13, k00 ) );
|
||||
x = _mm_xor_si128( x, k01 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01 ) );
|
||||
k02 = _mm_xor_si128( k02, mm_ror256hi_1x32( k00, k01 ) );
|
||||
x = _mm_xor_si128( x, k02 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02 ) );
|
||||
k03 = _mm_xor_si128( k03, mm_ror256hi_1x32( k01, k02 ) );
|
||||
x = _mm_xor_si128( x, k03 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
p2 = _mm_xor_si128( p2, x );
|
||||
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03 ) );
|
||||
k10 = _mm_xor_si128( k10, mm_ror256hi_1x32( k02, k03 ) );
|
||||
x = _mm_xor_si128( p1, k10 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10 ) );
|
||||
k11 = _mm_xor_si128( k11, mm_ror256hi_1x32( k03, k10 ) );
|
||||
x = _mm_xor_si128( x, k11 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11 ) );
|
||||
k12 = _mm_xor_si128( k12, mm_ror256hi_1x32( k10, k11 ) );
|
||||
x = _mm_xor_si128( x, k12 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
||||
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12 ) );
|
||||
k13 = _mm_xor_si128( k13, mm_ror256hi_1x32( k11, k12 ) );
|
||||
x = _mm_xor_si128( x, k13 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
p0 = _mm_xor_si128( p0, x );
|
||||
@@ -264,36 +264,36 @@ c512( sph_shavite_big_context *sc, const void *msg )
|
||||
|
||||
// round 4, 8, 12
|
||||
|
||||
k00 = _mm_xor_si128( k00, mm_rotr256hi_1x32( k12, k13 ) );
|
||||
k00 = _mm_xor_si128( k00, mm_ror256hi_1x32( k12, k13 ) );
|
||||
|
||||
x = _mm_xor_si128( p1, k00 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k01 = _mm_xor_si128( k01, mm_rotr256hi_1x32( k13, k00 ) );
|
||||
k01 = _mm_xor_si128( k01, mm_ror256hi_1x32( k13, k00 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k01 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k02 = _mm_xor_si128( k02, mm_rotr256hi_1x32( k00, k01 ) );
|
||||
k02 = _mm_xor_si128( k02, mm_ror256hi_1x32( k00, k01 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k02 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k03 = _mm_xor_si128( k03, mm_rotr256hi_1x32( k01, k02 ) );
|
||||
k03 = _mm_xor_si128( k03, mm_ror256hi_1x32( k01, k02 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k03 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
p0 = _mm_xor_si128( p0, x );
|
||||
k10 = _mm_xor_si128( k10, mm_rotr256hi_1x32( k02, k03 ) );
|
||||
k10 = _mm_xor_si128( k10, mm_ror256hi_1x32( k02, k03 ) );
|
||||
|
||||
x = _mm_xor_si128( p3, k10 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k11 = _mm_xor_si128( k11, mm_rotr256hi_1x32( k03, k10 ) );
|
||||
k11 = _mm_xor_si128( k11, mm_ror256hi_1x32( k03, k10 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k11 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k12 = _mm_xor_si128( k12, mm_rotr256hi_1x32( k10, k11 ) );
|
||||
k12 = _mm_xor_si128( k12, mm_ror256hi_1x32( k10, k11 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k12 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
k13 = _mm_xor_si128( k13, mm_rotr256hi_1x32( k11, k12 ) );
|
||||
k13 = _mm_xor_si128( k13, mm_ror256hi_1x32( k11, k12 ) );
|
||||
|
||||
x = _mm_xor_si128( x, k13 );
|
||||
x = _mm_aesenc_si128( x, m128_zero );
|
||||
|
@@ -576,14 +576,14 @@ do { \
|
||||
do { \
|
||||
TTl = Fl( a,b,c,fun ); \
|
||||
TTh = Fh( a,b,c,fun ); \
|
||||
a##l = mm256_rotl_32( a##l, r ); \
|
||||
a##h = mm256_rotl_32( a##h, r ); \
|
||||
a##l = mm256_rol_32( a##l, r ); \
|
||||
a##h = mm256_rol_32( a##h, r ); \
|
||||
w##l = _mm256_add_epi32( w##l, d##l ); \
|
||||
w##h = _mm256_add_epi32( w##h, d##h ); \
|
||||
TTl = _mm256_add_epi32( TTl, w##l ); \
|
||||
TTh = _mm256_add_epi32( TTh, w##h ); \
|
||||
TTl = mm256_rotl_32( TTl, s ); \
|
||||
TTh = mm256_rotl_32( TTh, s ); \
|
||||
TTl = mm256_rol_32( TTl, s ); \
|
||||
TTh = mm256_rol_32( TTh, s ); \
|
||||
PERM( z,d,a ); \
|
||||
} while(0)
|
||||
|
||||
|
@@ -307,7 +307,7 @@ do { \
|
||||
#define TFBIG_MIX_4WAY(x0, x1, rc) \
|
||||
do { \
|
||||
x0 = _mm256_add_epi64( x0, x1 ); \
|
||||
x1 = _mm256_xor_si256( mm256_rotl_64( x1, rc ), x0 ); \
|
||||
x1 = _mm256_xor_si256( mm256_rol_64( x1, rc ), x0 ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
@@ -50,7 +50,7 @@
|
||||
#include <string.h>
|
||||
#include "sm3-hash-4way.h"
|
||||
|
||||
#ifdef __AVX__
|
||||
#ifdef __SSE4_2__
|
||||
|
||||
void sm3_4way_init( sm3_4way_ctx_t *ctx )
|
||||
{
|
||||
@@ -135,10 +135,10 @@ void sm3_4way_close( void *cc, void *dst )
|
||||
hash[i] = mm_bswap_32( ctx->digest[i] );
|
||||
}
|
||||
|
||||
#define P0(x) _mm_xor_si128( x, _mm_xor_si128( mm_rotl_32( x, 9 ), \
|
||||
mm_rotl_32( x, 17 ) ) )
|
||||
#define P1(x) _mm_xor_si128( x, _mm_xor_si128( mm_rotl_32( x, 15 ), \
|
||||
mm_rotl_32( x, 23 ) ) )
|
||||
#define P0(x) _mm_xor_si128( x, _mm_xor_si128( mm_rol_32( x, 9 ), \
|
||||
mm_rol_32( x, 17 ) ) )
|
||||
#define P1(x) _mm_xor_si128( x, _mm_xor_si128( mm_rol_32( x, 15 ), \
|
||||
mm_rol_32( x, 23 ) ) )
|
||||
|
||||
#define FF0(x,y,z) _mm_xor_si128( x, _mm_xor_si128( y, z ) )
|
||||
#define FF1(x,y,z) _mm_or_si128( _mm_or_si128( _mm_and_si128( x, y ), \
|
||||
@@ -170,8 +170,8 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
|
||||
for ( j = 16; j < 68; j++ )
|
||||
W[j] = _mm_xor_si128( P1( _mm_xor_si128( _mm_xor_si128( W[ j-16 ],
|
||||
W[ j-9 ] ),
|
||||
mm_rotl_32( W[ j-3 ], 15 ) ) ),
|
||||
_mm_xor_si128( mm_rotl_32( W[ j-13 ], 7 ),
|
||||
mm_rol_32( W[ j-3 ], 15 ) ) ),
|
||||
_mm_xor_si128( mm_rol_32( W[ j-13 ], 7 ),
|
||||
W[ j-6 ] ) );
|
||||
|
||||
for( j = 0; j < 64; j++ )
|
||||
@@ -180,19 +180,19 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
|
||||
T = _mm_set1_epi32( 0x79CC4519UL );
|
||||
for( j =0; j < 16; j++ )
|
||||
{
|
||||
SS1 = mm_rotl_32( _mm_add_epi32( _mm_add_epi32( mm_rotl_32( A, 12 ), E ),
|
||||
mm_rotl_32( T, j ) ), 7 );
|
||||
SS2 = _mm_xor_si128( SS1, mm_rotl_32( A, 12 ) );
|
||||
SS1 = mm_rol_32( _mm_add_epi32( _mm_add_epi32( mm_rol_32( A, 12 ), E ),
|
||||
mm_rol_32( T, j ) ), 7 );
|
||||
SS2 = _mm_xor_si128( SS1, mm_rol_32( A, 12 ) );
|
||||
TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF0( A, B, C ), D ),
|
||||
SS2 ), W1[j] );
|
||||
TT2 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( GG0( E, F, G ), H ),
|
||||
SS1 ), W[j] );
|
||||
D = C;
|
||||
C = mm_rotl_32( B, 9 );
|
||||
C = mm_rol_32( B, 9 );
|
||||
B = A;
|
||||
A = TT1;
|
||||
H = G;
|
||||
G = mm_rotl_32( F, 19 );
|
||||
G = mm_rol_32( F, 19 );
|
||||
F = E;
|
||||
E = P0( TT2 );
|
||||
}
|
||||
@@ -200,19 +200,19 @@ void sm3_4way_compress( __m128i *digest, __m128i *block )
|
||||
T = _mm_set1_epi32( 0x7A879D8AUL );
|
||||
for( j =16; j < 64; j++ )
|
||||
{
|
||||
SS1 = mm_rotl_32( _mm_add_epi32( _mm_add_epi32( mm_rotl_32( A, 12 ), E ),
|
||||
mm_rotl_32( T, j&31 ) ), 7 );
|
||||
SS2 = _mm_xor_si128( SS1, mm_rotl_32( A, 12 ) );
|
||||
SS1 = mm_rol_32( _mm_add_epi32( _mm_add_epi32( mm_rol_32( A, 12 ), E ),
|
||||
mm_rol_32( T, j&31 ) ), 7 );
|
||||
SS2 = _mm_xor_si128( SS1, mm_rol_32( A, 12 ) );
|
||||
TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF1( A, B, C ), D ),
|
||||
SS2 ), W1[j] );
|
||||
TT2 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( GG1( E, F, G ), H ),
|
||||
SS1 ), W[j] );
|
||||
D = C;
|
||||
C = mm_rotl_32( B, 9 );
|
||||
C = mm_rol_32( B, 9 );
|
||||
B = A;
|
||||
A = TT1;
|
||||
H = G;
|
||||
G = mm_rotl_32( F, 19 );
|
||||
G = mm_rol_32( F, 19 );
|
||||
F = E;
|
||||
E = P0( TT2 );
|
||||
}
|
||||
|
@@ -3412,7 +3412,7 @@ do { \
|
||||
static inline __m256i
|
||||
table_skew( __m256i val, int num )
|
||||
{
|
||||
return mm256_rotl_64( val, 8*num );
|
||||
return mm256_rol_64( val, 8*num );
|
||||
}
|
||||
|
||||
#define ROUND_ELT( table, in, i0, i1, i2, i3, i4, i5, i6, i7 ) \
|
||||
|
284
avxdefs.h
284
avxdefs.h
@@ -50,11 +50,10 @@
|
||||
//
|
||||
// Macros vs inline functions.
|
||||
//
|
||||
// Use macros for statement functions.
|
||||
// Use macros when updating multiple arguments.
|
||||
// Use inline functions when multiple statements or local variables are used.
|
||||
|
||||
//TODO rename rotr/rotl to ror/rol to match AVX512 Intel names.
|
||||
// Macros are used for statement functions.
|
||||
// Macros are used when updating multiple arguments.
|
||||
// Inline functions are used when multiple statements or local variables are
|
||||
// needed.
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <immintrin.h>
|
||||
@@ -217,15 +216,6 @@ static inline void memset_128( __m128i *dst, const __m128i a, int n )
|
||||
static inline void memcpy_128( __m128i *dst, const __m128i *src, int n )
|
||||
{ for ( int i = 0; i < n; i ++ ) dst[i] = src[i]; }
|
||||
|
||||
/* broken
|
||||
// Compare data in memory, return true if different
|
||||
static inline bool memcmp_128( __m128i src1, __m128i src2, int n )
|
||||
{ for ( int i = 0; i < n; i++ )
|
||||
if ( src1[i] != src2[i] ) return true;
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
|
||||
// A couple of 64 bit scalar functions
|
||||
// n = bytes/8
|
||||
|
||||
@@ -244,34 +234,28 @@ static inline void memset_64( uint64_t *dst, uint64_t a, int n )
|
||||
|
||||
// Bitfield extraction/insertion.
|
||||
// Return a vector with n bits extracted and right justified from each
|
||||
// element of v starting at bit i.
|
||||
// element of v starting at bit i, v[ i+n..i ] >> i
|
||||
#define mm_bfextract_64( v, i, n ) \
|
||||
_mm_srli_epi64( _mm_slli_epi64( v, 64 - i - n ), 64 - n )
|
||||
_mm_srli_epi64( _mm_slli_epi64( v, 64 - ((i)+(n)) ), 64 - (n) )
|
||||
|
||||
#define mm_bfextract_32( v, i, n ) \
|
||||
_mm_srli_epi32( _mm_slli_epi32( v, 32 - i - n ), 32 - n )
|
||||
_mm_srli_epi32( _mm_slli_epi32( v, 32 - ((i)+(n)) ), 32 - (n) )
|
||||
|
||||
#define mm_bfextract_16( v, i, n ) \
|
||||
_mm_srli_epi16( _mm_slli_epi16( v, 16 - i - n ), 16 - n )
|
||||
_mm_srli_epi16( _mm_slli_epi16( v, 16 - ((i)+(n)) ), 16 - (n) )
|
||||
|
||||
// Return v with n bits from a inserted starting at bit i.
|
||||
#define mm_bfinsert_64( v, a, i, n ) \
|
||||
_mm_or_si128( \
|
||||
_mm_and_si128( v, \
|
||||
_mm_srli_epi64( _mm_slli_epi64( m128_neg1, 64-n ), 64-i ) ), \
|
||||
_mm_slli_epi64( a, i) )
|
||||
_mm_or_si128( _mm_and_si128( v, _mm_srli_epi64( _mm_slli_epi64( \
|
||||
m128_neg1, 64-(n) ), 64-(i) ) ), _mm_slli_epi64( a, i ) )
|
||||
|
||||
#define mm_bfinsert_32( v, a, i, n ) \
|
||||
_mm_or_si128( \
|
||||
_mm_and_si128( v, \
|
||||
_mm_srli_epi32( _mm_slli_epi32( m128_neg1, 32-n ), 32-i ) ), \
|
||||
_mm_slli_epi32( a, i) )
|
||||
_mm_or_si128( _mm_and_si128( v, _mm_srli_epi32( _mm_slli_epi32( \
|
||||
m128_neg1, 32-(n) ), 32-(i) ) ), _mm_slli_epi32( a, i ) )
|
||||
|
||||
#define mm_bfinsert_16( v, a, i, n ) \
|
||||
_mm_or_si128( \
|
||||
_mm_and_si128( v, \
|
||||
_mm_srli_epi16( _mm_slli_epi16( m128_neg1, 16-n ), 16-i ) ), \
|
||||
_mm_slli_epi16( a, i) )
|
||||
_mm_or_si128( _mm_and_si128( v, _mm_srli_epi16( _mm_slli_epi16( \
|
||||
m128_neg1, 16-(n) ), 16-(i) ) ), _mm_slli_epi16( a, i) )
|
||||
|
||||
// Return vector with bit i of each element in v set/cleared
|
||||
#define mm_bitset_64( v, i ) \
|
||||
@@ -311,36 +295,23 @@ static inline void memset_64( uint64_t *dst, uint64_t a, int n )
|
||||
// Never implemented by Intel and since removed from Zen by AMD.
|
||||
|
||||
// Rotate bits in vector elements
|
||||
//TODO convert to macros and rename
|
||||
#define mm_ror_64( v, c ) \
|
||||
_mm_or_si128( _mm_srli_epi64( v, c ), _mm_slli_epi64( v, 64-(c) ) )
|
||||
static inline __m128i mm_rotr_64( __m128i v, int c )
|
||||
{ return _mm_or_si128( _mm_srli_epi64( v, c ), _mm_slli_epi64( v, 64-(c) ) ); }
|
||||
|
||||
#define mm_rol_64( v, c ) \
|
||||
_mm_or_si128( _mm_slli_epi64( v, c ), _mm_srli_epi64( v, 64-(c) ) )
|
||||
//static inline __m128i mm_rotl_64( __m128i v, int c )
|
||||
//{ return _mm_or_si128( _mm_slli_epi64( v, c ), _mm_srli_epi64( v, 64-(c) ) ); }
|
||||
|
||||
#define mm_ror_32( v, c ) \
|
||||
_mm_or_si128( _mm_srli_epi32( v, c ), _mm_slli_epi32( v, 32-(c) ) )
|
||||
static inline __m128i mm_rotr_32( __m128i v, int c )
|
||||
{ return _mm_or_si128( _mm_srli_epi32( v, c ), _mm_slli_epi32( v, 32-(c) ) ); }
|
||||
|
||||
#define mm_rol_32( v, c ) \
|
||||
_mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) )
|
||||
static inline __m128i mm_rotl_32( __m128i v, int c )
|
||||
{ return _mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) ); }
|
||||
|
||||
#define mm_ror_16( v, c ) \
|
||||
_mm_or_si128( _mm_srli_epi16( v, c ), _mm_slli_epi16( v, 16-(c) ) )
|
||||
//static inline __m128i mm_rotr_16( __m128i v, int c )
|
||||
//{ return _mm_or_si128( _mm_srli_epi16( v, c ), _mm_slli_epi16( v, 16-(c) ) ); }
|
||||
|
||||
#define mm_rol_16( v, c ) \
|
||||
_mm_or_si128( _mm_slli_epi16( v, c ), _mm_srli_epi16( v, 16-(c) ) )
|
||||
//static inline __m128i mm_rotl_16( __m128i v, int c )
|
||||
//{ return _mm_or_si128( _mm_slli_epi16( v, c ), _mm_srli_epi16( v, 16-(c) ) ); }
|
||||
|
||||
//
|
||||
// Rotate elements in vector
|
||||
@@ -351,17 +322,17 @@ static inline __m128i mm_rotl_32( __m128i v, int c )
|
||||
#define mm_rol_1x32( v ) _mm_shuffle_epi32( v, 0x93 )
|
||||
|
||||
#define mm_ror_1x16( v, c ) \
|
||||
_mm_shuffle_epi8( v, _mm_set_epi8( 1, 0, 15, 14, 13, 12, 11, 10 \
|
||||
9, 8, 7, 6, 5, 4, 3, 2 ) )
|
||||
_mm_shuffle_epi8( v, _mm_set_epi8( 1, 0,15,14,13,12,11,10 \
|
||||
9, 8, 7, 6, 5, 4, 3, 2 ) )
|
||||
#define mm_rol_1x16( v, c ) \
|
||||
_mm_shuffle_epi8( v, _mm_set_epi8( 13, 12, 11, 10, 9, 8, 7, 6, \
|
||||
5, 4, 3, 2, 1, 0, 15, 14 ) )
|
||||
_mm_shuffle_epi8( v, _mm_set_epi8( 13,12,11,10, 9, 8, 7, 6, \
|
||||
5, 4, 3, 2, 1, 0,15,14 ) )
|
||||
#define mm_ror_1x8( v, c ) \
|
||||
_mm_shuffle_epi8( v, _mm_set_epi8( 0, 15, 14, 13, 12, 11, 10, 9, \
|
||||
8, 7, 6, 5, 4, 3, 2, 1 ) )
|
||||
_mm_shuffle_epi8( v, _mm_set_epi8( 0,15,14,13,12,11,10, 9, \
|
||||
8, 7, 6, 5, 4, 3, 2, 1 ) )
|
||||
#define mm_rol_1x8( v, c ) \
|
||||
_mm_shuffle_epi8( v, _mm_set_epi8( 14, 13, 12, 11, 10, 9, 8, 7, \
|
||||
6, 5, 4, 3, 2, 1, 0, 15 ) )
|
||||
_mm_shuffle_epi8( v, _mm_set_epi8( 14,13,12,11,10, 9, 8, 7, \
|
||||
6, 5, 4, 3, 2, 1, 0,15 ) )
|
||||
|
||||
// Less efficient shift but more versatile. Use only for odd number rotations.
|
||||
// Use shuffle above when possible.
|
||||
@@ -393,7 +364,6 @@ static inline __m128i mm_rotl_32( __m128i v, int c )
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
|
||||
|
||||
// No comparable rol.
|
||||
#define mm_ror256_1x64( v1, v2 ) \
|
||||
do { \
|
||||
@@ -740,15 +710,6 @@ static inline void memset_256( __m256i *dst, const __m256i a, int n )
|
||||
static inline void memcpy_256( __m256i *dst, const __m256i *src, int n )
|
||||
{ for ( int i = 0; i < n; i ++ ) dst[i] = src[i]; }
|
||||
|
||||
/* broken
|
||||
// Compare data in memory, return true if different
|
||||
static inline bool memcmp_256( __m256i src1, __m256i src2, int n )
|
||||
{
|
||||
for ( int i = 0; i < n; i++ )
|
||||
if ( src1[i] != src2[i] ) return true;
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
|
||||
//
|
||||
// Bit operations
|
||||
@@ -768,25 +729,19 @@ static inline bool memcmp_256( __m256i src1, __m256i src2, int n )
|
||||
// Return v with bits [i..i+n] of each element replaced with the corresponding
|
||||
// bits from a.
|
||||
#define mm256_bfinsert_64( v, a, i, n ) \
|
||||
_mm256_or_si256( \
|
||||
_mm256_and_si256( v, \
|
||||
_mm256_srli_epi64( \
|
||||
_mm256_slli_epi64( m256_neg1, 64-n ), 64-i ) ), \
|
||||
_mm256_slli_epi64( a, i) )
|
||||
_mm256_or_si256( _mm256_and_si256( v, _mm256_srli_epi64( \
|
||||
_mm256_slli_epi64( m256_neg1, 64-(n) ), 64-(i) ) ), \
|
||||
_mm256_slli_epi64( a, i) )
|
||||
|
||||
#define mm256_bfinsert_32( v, a, i, n ) \
|
||||
_mm256_or_si256( \
|
||||
_mm256_and_si256( v, \
|
||||
_mm256_srli_epi32( \
|
||||
_mm256_slli_epi32( m256_neg1, 32-n ), 32-i ) ), \
|
||||
_mm256_slli_epi32( a, i) )
|
||||
_mm256_or_si256( _mm256_and_si256( v, _mm256_srli_epi32( \
|
||||
_mm256_slli_epi32( m256_neg1, 32-(n) ), 32-(i) ) ), \
|
||||
_mm256_slli_epi32( a, i) )
|
||||
|
||||
#define mm256_bfinsert_16( v, a, i, n ) \
|
||||
_mm256_or_si256( \
|
||||
_mm256_and_si256( v, \
|
||||
_mm256_srli_epi16( \
|
||||
_mm256_slli_epi16( m256_neg1, 16-n ), 16-i ) ), \
|
||||
_mm256_slli_epi16( a, i) )
|
||||
_mm256_or_si256( _mm256_and_si256( v, _mm256_srli_epi16( \
|
||||
_mm256_slli_epi16( m256_neg1, 16-(n) ), 16-(i) ) ), \
|
||||
_mm256_slli_epi16( a, i) )
|
||||
|
||||
// return bit n in position, all other bits cleared
|
||||
#define mm256_bitextract_64 ( x, n ) \
|
||||
@@ -829,50 +784,29 @@ static inline bool memcmp_256( __m256i src1, __m256i src2, int n )
|
||||
|
||||
//
|
||||
// Rotate each element of v by c bits
|
||||
//TODO convert to macros and rename
|
||||
#define mm256_ror_64( v, c ) \
|
||||
_mm256_or_si256( _mm256_srli_epi64( v, c ), \
|
||||
_mm256_slli_epi64( v, 64-(c) ) )
|
||||
static inline __m256i mm256_rotr_64( __m256i v, int c )
|
||||
{
|
||||
return _mm256_or_si256( _mm256_srli_epi64( v, c ),
|
||||
_mm256_slli_epi64( v, 64-(c) ) );
|
||||
}
|
||||
|
||||
#define mm256_rol_64( v, c ) \
|
||||
_mm256_or_si256( _mm256_slli_epi64( v, c ), \
|
||||
_mm256_srli_epi64( v, 64-(c) ) )
|
||||
static inline __m256i mm256_rotl_64( __m256i v, int c )
|
||||
{
|
||||
return _mm256_or_si256( _mm256_slli_epi64( v, c ),
|
||||
_mm256_srli_epi64( v, 64-(c) ) );
|
||||
}
|
||||
|
||||
#define mm256_ror_32( v, c ) \
|
||||
_mm256_or_si256( _mm256_srli_epi32( v, c ), \
|
||||
_mm256_slli_epi32( v, 32-(c) ) )
|
||||
static inline __m256i mm256_rotr_32( __m256i v, int c )
|
||||
{
|
||||
return _mm256_or_si256( _mm256_srli_epi32( v, c ),
|
||||
_mm256_slli_epi32( v, 32-(c) ) );
|
||||
}
|
||||
|
||||
#define mm256_rol_32( v, c ) \
|
||||
_mm256_or_si256( _mm256_slli_epi32( v, c ), \
|
||||
_mm256_srli_epi32( v, 32-(c) ) )
|
||||
static inline __m256i mm256_rotl_32( __m256i v, int c )
|
||||
{
|
||||
return _mm256_or_si256( _mm256_slli_epi32( v, c ),
|
||||
_mm256_srli_epi32( v, 32-(c) ) );
|
||||
}
|
||||
|
||||
#define mm256_ror_16( v, c ) \
|
||||
_mm256_or_si256( _mm256_srli_epi16( v, c ), \
|
||||
_mm256_slli_epi16( v, 16-(c)) )
|
||||
_mm256_slli_epi16( v, 16-(c) )
|
||||
|
||||
#define mm256_rol_16( v, c ) \
|
||||
_mm256_or_si256( _mm256_slli_epi16( v, c ), \
|
||||
_mm256_srli_epi16( v, 16-(c)) )
|
||||
_mm256_srli_epi16( v, 16-(c) )
|
||||
|
||||
// Rotate bits in each element of v by amount in corresponding element of
|
||||
// index vector c
|
||||
@@ -906,7 +840,7 @@ static inline __m256i mm256_rotl_32( __m256i v, int c )
|
||||
// AVX2 has no full vector permute for elements less than 32 bits.
|
||||
|
||||
// Swap 128 bit elements in 256 bit vector.
|
||||
#define mm256_swap_128( v ) _mm256_permute4x64_epi64( v, 0x4e )
|
||||
#define mm256_swap_128( v ) _mm256_permute4x64_epi64( v, 0x4e )
|
||||
|
||||
// Rotate 256 bit vector by one 64 bit element
|
||||
#define mm256_ror256_1x64( v ) _mm256_permute4x64_epi64( v, 0x39 )
|
||||
@@ -929,7 +863,7 @@ static inline __m256i mm256_rotl_32( __m256i v, int c )
|
||||
// Rotate elements within lanes of 256 bit vector.
|
||||
|
||||
// Swap 64 bit elements in each 128 bit lane.
|
||||
#define mm256_swap128_64( v ) _mm256_shuffle_epi32( v, 0x4e )
|
||||
#define mm256_swap128_64( v ) _mm256_shuffle_epi32( v, 0x4e )
|
||||
|
||||
// Rotate each 128 bit lane by one 32 bit element.
|
||||
#define mm256_ror128_1x32( v ) _mm256_shuffle_epi32( v, 0x39 )
|
||||
@@ -944,7 +878,7 @@ static inline __m256i mm256_rotl_32( __m256i v, int c )
|
||||
_mm256_bsrli_epi128( v, 16-(c) ) )
|
||||
|
||||
// Swap 32 bit elements in each 64 bit lane
|
||||
#define mm256_swap64_32( v ) _mm256_shuffle_epi32( v, 0xb1 )
|
||||
#define mm256_swap64_32( v ) _mm256_shuffle_epi32( v, 0xb1 )
|
||||
|
||||
|
||||
//
|
||||
@@ -1050,7 +984,7 @@ inline __m256i mm256_aesenc_nokey_2x128_obs( __m256i x )
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__AVX512F__)
|
||||
#if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI__)
|
||||
|
||||
// Experimental, not tested.
|
||||
|
||||
@@ -1120,115 +1054,115 @@ inline __m256i mm256_aesenc_nokey_2x128_obs( __m256i x )
|
||||
// Rotate elements in 512 bit vector.
|
||||
|
||||
#define mm512_swap_256( v ) \
|
||||
_mm512_permutexvar_epi64( v, _mm512_set_epi64x( 3,2,1,0, 7,6,5,4 )
|
||||
_mm512_permutexvar_epi64( v, _mm512_set_epi64x( 3,2,1,0, 7,6,5,4 )
|
||||
|
||||
#define mm512_ror_1x128( v ) \
|
||||
_mm512_permutexvar_epi64( v, _mm512_set_epi64x( 1,0, 7,6, 5,4, 3,2 )
|
||||
_mm512_permutexvar_epi64( v, _mm512_set_epi64x( 1,0, 7,6, 5,4, 3,2 )
|
||||
#define mm512_rol_1x128( v ) \
|
||||
_mm512_permutexvar_epi64( v, _mm512_set_epi64x( 5,4, 3,2, 1,0, 7,6 )
|
||||
_mm512_permutexvar_epi64( v, _mm512_set_epi64x( 5,4, 3,2, 1,0, 7,6 )
|
||||
|
||||
#define mm512_ror_1x64( v ) \
|
||||
_mm512_permutexvar_epi64( v, _mm512_set_epi64x( 0, 7, 6, 5, 4, 3, 2, 1 )
|
||||
_mm512_permutexvar_epi64( v, _mm512_set_epi64x( 0,7,6,5,4,3,2,1 )
|
||||
#define mm512_rol_1x64( v ) \
|
||||
_mm512_permutexvar_epi64( v, _mm512_set_epi64x( 6, 5, 4, 3, 2, 1, 0, 7 )
|
||||
_mm512_permutexvar_epi64( v, _mm512_set_epi64x( 6,5,4,3,2,1,0,7 )
|
||||
|
||||
#define mm512_ror_1x32( v ) \
|
||||
_mm512_permutexvar_epi32( v, _mm512_set_epi32 \
|
||||
( 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4 , 3, 2, 1 )
|
||||
_mm512_permutexvar_epi32( v, _mm512_set_epi32( \
|
||||
0,15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
|
||||
#define mm512_rol_1x32( v ) \
|
||||
_mm512_permutexvar_epi32( v, _mm512_set_epi32 \
|
||||
( 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15 )
|
||||
_mm512_permutexvar_epi32( v, _mm512_set_epi32( \
|
||||
14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15 )
|
||||
|
||||
#define mm512_ror_1x16( v ) \
|
||||
_mm512_permutexvar_epi16( v, _mm512_set_epi16 \
|
||||
( 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, \
|
||||
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
|
||||
_mm512_permutexvar_epi16( v, _mm512_set_epi16( \
|
||||
0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17, \
|
||||
16,15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
|
||||
#define mm512_rol_1x16( v ) \
|
||||
_mm512_permutexvar_epi16( v, _mm512_set_epi16 \
|
||||
( 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, \
|
||||
14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31 )
|
||||
_mm512_permutexvar_epi16( v, _mm512_set_epi16( \
|
||||
30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15, \
|
||||
14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,31 )
|
||||
|
||||
#define mm512_ror_1x8( v ) \
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8 \
|
||||
( 0, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, \
|
||||
48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, \
|
||||
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, \
|
||||
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8( \
|
||||
0,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49, \
|
||||
48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33, \
|
||||
32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17, \
|
||||
16,15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
|
||||
#define mm512_rol_1x8( v ) \
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8 \
|
||||
( 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, \
|
||||
46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, \
|
||||
30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, \
|
||||
14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 63 )
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8( \
|
||||
62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47, \
|
||||
46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31, \
|
||||
30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15, \
|
||||
14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,63 )
|
||||
|
||||
|
||||
//
|
||||
// Rotate elements within 256 bit lanes of 512 bit vector.
|
||||
|
||||
#define mm512_swap256_128( v ) _mm512_permutex_epi64( v, 0x4e )
|
||||
#define mm512_swap256_128( v ) _mm512_permutex_epi64( v, 0x4e )
|
||||
|
||||
#define mm512_ror256_1x64( v ) _mm512_permutex_epi64( v, 0x39 )
|
||||
#define mm512_rol256_1x64( v ) _mm512_permutex_epi64( v, 0x93 )
|
||||
|
||||
#define mm512_ror256_1x32( v ) \
|
||||
_mm512_permutexvar_epi32( v, _mm512_set_epi32( \
|
||||
8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1 )
|
||||
8,15,14,13,12,11,10, 9, 0, 7, 6, 5, 4, 3, 2, 1 )
|
||||
#define mm512_rol256_1x32( v ) \
|
||||
_mm512_permutexvar_epi32( v, _mm512_set_epi32( \
|
||||
14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7 )
|
||||
14,13,12,11,10, 9, 8,15, 6, 5, 4, 3, 2, 1, 0, 7 )
|
||||
|
||||
#define mm512_ror256_1x16( v ) \
|
||||
_mm512_permutexvar_epi16( v, _mm512_set_epi16( \
|
||||
16, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, \
|
||||
0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
|
||||
16,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17, \
|
||||
0,15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
|
||||
#define mm512_rol256_1x16( v ) \
|
||||
_mm512_permutexvar_epi16( v, _mm512_set_epi16( \
|
||||
30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 31, \
|
||||
14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15 )
|
||||
30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,31, \
|
||||
14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,15 )
|
||||
|
||||
#define mm512_ror256_1x8( v ) \
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8 \
|
||||
( 32, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, \
|
||||
48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, \
|
||||
0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, \
|
||||
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8( \
|
||||
32,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49, \
|
||||
48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33, \
|
||||
0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17, \
|
||||
16,15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
|
||||
#define mm512_rol256_1x8( v ) \
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8 \
|
||||
( 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, \
|
||||
46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 63, \
|
||||
30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, \
|
||||
14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31 )
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8( \
|
||||
62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47, \
|
||||
46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,63, \
|
||||
30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15, \
|
||||
14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,31 )
|
||||
|
||||
|
||||
//
|
||||
// Rotate elements in 128 bit lanes of 512 bit vector.
|
||||
// Rotate elements within 128 bit lanes of 512 bit vector.
|
||||
|
||||
#define mm512_swap128_64( v ) _mm512_permutex_epi64( v, 0xb1 )
|
||||
#define mm512_swap128_64( v ) _mm512_permutex_epi64( v, 0xb1 )
|
||||
|
||||
#define mm512_ror128_1x32( v ) _mm512_shuffle_epi32( v, 0x39 )
|
||||
#define mm512_rol128_1x32( v ) _mm512_shuffle_epi32( v, 0x93 )
|
||||
|
||||
#define mm512_ror128_1x16( v ) \
|
||||
_mm512_permutexvar_epi16( v, _mm512_set_epi16( \
|
||||
24, 31, 30, 29, 28, 27, 26, 25, 16, 23, 22, 21, 20, 19, 18, 17, \
|
||||
8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1 )
|
||||
_mm512_permutexvar_epi16( v, _mm512_set_epi16( \
|
||||
24,31,30,29,28,27,26,25, 16,23,22,21,20,19,18,17, \
|
||||
8,15,14,13,12,11,10, 9, 0, 7, 6, 5, 4, 3, 2, 1 )
|
||||
#define mm512_rol128_1x16( v ) \
|
||||
_mm512_permutexvar_epi16( v, _mm512_set_epi16( \
|
||||
30, 29, 28, 27, 26, 25, 24, 31, 22, 21, 20, 19, 18, 17, 16, 23, \
|
||||
14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7 )
|
||||
_mm512_permutexvar_epi16( v, _mm512_set_epi16( \
|
||||
30,29,28,27,26,25,24,31, 22,21,20,19,18,17,16,23, \
|
||||
14,13,12,11,10, 9, 8,15, 6, 5, 4, 3, 2, 1, 0, 7 )
|
||||
|
||||
#define mm512_ror128_1x8( v ) \
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8 \
|
||||
( 48, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, \
|
||||
32, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, \
|
||||
16, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, \
|
||||
0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8( \
|
||||
48,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49, \
|
||||
32,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33, \
|
||||
16,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17, \
|
||||
0,15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1 )
|
||||
#define mm512_rol128_1x8( v ) \
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8 \
|
||||
( 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 63, \
|
||||
46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 47, \
|
||||
30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 31, \
|
||||
14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15 )
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8( \
|
||||
62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,63, \
|
||||
46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,47, \
|
||||
30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,31, \
|
||||
14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,15 )
|
||||
|
||||
// Rotate 128 bit lanes by c bytes.
|
||||
#define mm512_ror128_x8( v, c ) \
|
||||
@@ -1247,24 +1181,24 @@ inline __m256i mm256_aesenc_nokey_2x128_obs( __m256i x )
|
||||
|
||||
#define mm512_bswap_64( v ) \
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8( \
|
||||
56, 57, 58, 59, 60, 61, 62, 63, 48, 49, 50, 51, 52, 53, 54, 55, \
|
||||
40, 41, 42, 43, 44, 45, 46, 47, 32, 33, 34, 35, 36, 37, 38, 39, \
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 16, 17, 18, 19, 20, 21, 22, 23, \
|
||||
8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, )
|
||||
56,57,58,59,60,61,62,63, 48,49,50,51,52,53,54,55, \
|
||||
40,41,42,43,44,45,46,47, 32,33,34,35,36,37,38,39, \
|
||||
24,25,26,27,28,29,30,31, 16,17,18,19,20,21,22,23, \
|
||||
8, 9,10,11,12,13,14,15, 0, 1, 2, 3, 4, 5, 6, 7, )
|
||||
|
||||
#define mm512_bswap_32( v ) \
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8( \
|
||||
60,61,62,63, 56,57,58,59, 52,53,54,55, 48,49,50,51, \
|
||||
44,45,46,47, 40,41,42,43, 36,37,38,39, 32,33,34,35, \
|
||||
28,29,30,31, 24,25,26,27, 20,21,22,23, 16,17,18,19, \
|
||||
12,13,14,15, 8, 9,10,11, 4, 5, 6, 7, 0, 1, 2, 3 )
|
||||
60,61,62,63, 56,57,58,59, 52,53,54,55, 48,49,50,51, \
|
||||
44,45,46,47, 40,41,42,43, 36,37,38,39, 32,33,34,35, \
|
||||
28,29,30,31, 24,25,26,27, 20,21,22,23, 16,17,18,19, \
|
||||
12,13,14,15, 8, 9,10,11, 4, 5, 6, 7, 0, 1, 2, 3 )
|
||||
|
||||
#define mm512_bswap_16( v ) \
|
||||
_mm512_permutexvar_epi8( v, _mm512_set_epi8( \
|
||||
62,63, 60,61, 58,59, 56,57, 54,55, 52,53, 50,51, 48,49, \
|
||||
46,47, 44,45, 42,43, 40,41, 38,39, 36,37, 34,35, 32,33, \
|
||||
30,31, 28,29, 26,27, 24,25, 22,23, 20,21, 18,19, 16,17, \
|
||||
14,15, 12,13, 10,11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 )
|
||||
62,63, 60,61, 58,59, 56,57, 54,55, 52,53, 50,51, 48,49, \
|
||||
46,47, 44,45, 42,43, 40,41, 38,39, 36,37, 34,35, 32,33, \
|
||||
30,31, 28,29, 26,27, 24,25, 22,23, 20,21, 18,19, 16,17, \
|
||||
14,15, 12,13, 10,11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 )
|
||||
|
||||
|
||||
#endif // AVX512F
|
||||
|
@@ -46,6 +46,16 @@ rm -f config.status
|
||||
CFLAGS="-O3 -march=core2 -Wall" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-ssse3.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-ssse3
|
||||
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
CFLAGS="-O3 -msse2 -Wall" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-sse2.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-sse2
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user