mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.8.6.1
This commit is contained in:
@@ -7,6 +7,11 @@ All of the code is believed to be open and free. If anyone has a
|
||||
claim to any of it post your case in the cpuminer-opt Bitcoin Talk forum
|
||||
or by email.
|
||||
|
||||
Miner programs are often flagged as malware by antivirus programs. This is
|
||||
a false positive, they are flagged simply because they are cryptocurrency
|
||||
miners. The source code is open for anyone to inspect. If you don't trust
|
||||
the software, don't use it.
|
||||
|
||||
https://bitcointalk.org/index.php?topic=1326803.0
|
||||
|
||||
mailto://jayddee246@gmail.com
|
||||
|
@@ -4,6 +4,11 @@ for Linux and Windows can be found in RELEASE_NOTES.
|
||||
cpuminer is a console program that is executed from a DOS command prompt.
|
||||
There is no GUI and no mouse support.
|
||||
|
||||
Miner programs are often flagged as malware by antivirus programs. This is
|
||||
a false positive, they are flagged simply because they are cryptocurrency
|
||||
miners. The source code is open for anyone to inspect. If you don't trust
|
||||
the software, don't use it.
|
||||
|
||||
Choose the exe that best matches you CPU's features or use trial and
|
||||
error to find the fastest one that doesn't crash. Pay attention to
|
||||
the features listed at cpuminer startup to ensure you are mining at
|
||||
|
@@ -13,11 +13,11 @@ Security warning
|
||||
----------------
|
||||
|
||||
Miner programs are often flagged as malware by antivirus programs. This is
|
||||
a false positive, they are flagged simply because they are miners. The source
|
||||
code is open for anyone to inspect. If you don't trust the software, don't use
|
||||
it.
|
||||
a false positive, they are flagged simply because they are cryptocurrency
|
||||
miners. The source code is open for anyone to inspect. If you don't trust
|
||||
the software, don't use it.
|
||||
|
||||
The cryptographic code has been taken from trusted sources but has been
|
||||
The cryptographic hashing code has been taken from trusted sources but has been
|
||||
modified for speed at the expense of accepted security practices. This
|
||||
code should not be imported into applications where secure cryptography is
|
||||
required.
|
||||
@@ -160,6 +160,11 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.8.6.1
|
||||
|
||||
Faster argon2d* AVX2.
|
||||
Untested AVX-512 for argon2d*, YMMV.
|
||||
|
||||
v3.8.6
|
||||
|
||||
Fixed argon2 regression in v3.8.5.
|
||||
|
@@ -79,7 +79,7 @@ int64_t argon2_get_max64 ()
|
||||
|
||||
bool register_argon2_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_argon2;
|
||||
gate->hash = (void*)&argon2hash;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
|
@@ -70,7 +70,7 @@ bool register_argon2d_crds_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_argon2d_crds;
|
||||
gate->hash = (void*)&argon2d_crds_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
}
|
||||
|
||||
// Dynamic
|
||||
@@ -138,6 +138,6 @@ bool register_argon2d_dyn_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_argon2d_dyn;
|
||||
gate->hash = (void*)&argon2d_dyn_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
}
|
||||
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -19,10 +19,6 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#include "argon2.h"
|
||||
#include "encoding.h"
|
||||
#include "core.h"
|
||||
@@ -31,6 +27,10 @@ const char *argon2_type2string(argon2_type type, int uppercase) {
|
||||
switch (type) {
|
||||
case Argon2_d:
|
||||
return uppercase ? "Argon2d" : "argon2d";
|
||||
case Argon2_i:
|
||||
return uppercase ? "Argon2i" : "argon2i";
|
||||
case Argon2_id:
|
||||
return uppercase ? "Argon2id" : "argon2id";
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@@ -46,7 +46,7 @@ int argon2_ctx(argon2_context *context, argon2_type type) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (Argon2_d != type) {
|
||||
if (Argon2_d != type && Argon2_i != type && Argon2_id != type) {
|
||||
return ARGON2_INCORRECT_TYPE;
|
||||
}
|
||||
|
||||
@@ -62,18 +62,18 @@ int argon2_ctx(argon2_context *context, argon2_type type) {
|
||||
/* Ensure that all segments have equal length */
|
||||
memory_blocks = segment_length * (context->lanes * ARGON2_SYNC_POINTS);
|
||||
|
||||
instance.version = context->version;
|
||||
instance.memory = NULL;
|
||||
instance.passes = context->t_cost;
|
||||
instance.memory_blocks = memory_blocks;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||
instance.lanes = context->lanes;
|
||||
instance.limit = 1;
|
||||
instance.threads = context->threads;
|
||||
instance.type = type;
|
||||
|
||||
if (instance.threads > instance.limit) {
|
||||
instance.threads = instance.limit;
|
||||
if (instance.threads > instance.lanes) {
|
||||
instance.threads = instance.lanes;
|
||||
}
|
||||
|
||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
@@ -101,7 +101,8 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt, const size_t saltlen,
|
||||
void *hash, const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen, argon2_type type){
|
||||
const size_t encodedlen, argon2_type type,
|
||||
const uint32_t version){
|
||||
|
||||
argon2_context context;
|
||||
int result;
|
||||
@@ -145,6 +146,7 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = version;
|
||||
|
||||
result = argon2_ctx(&context, type);
|
||||
|
||||
@@ -174,6 +176,26 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, const size_t hashlen,
|
||||
char *encoded, const size_t encodedlen) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_i,
|
||||
ARGON2_VERSION_NUMBER);
|
||||
}
|
||||
|
||||
int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, NULL, 0, Argon2_i, ARGON2_VERSION_NUMBER);
|
||||
}
|
||||
|
||||
int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
@@ -181,7 +203,8 @@ int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
char *encoded, const size_t encodedlen) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_d);
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_d,
|
||||
ARGON2_VERSION_NUMBER);
|
||||
}
|
||||
|
||||
int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
@@ -190,7 +213,27 @@ int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, NULL, 0, Argon2_d);
|
||||
hash, hashlen, NULL, 0, Argon2_d, ARGON2_VERSION_NUMBER);
|
||||
}
|
||||
|
||||
int argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, const size_t hashlen,
|
||||
char *encoded, const size_t encodedlen) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_id,
|
||||
ARGON2_VERSION_NUMBER);
|
||||
}
|
||||
|
||||
int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, NULL, 0, Argon2_id,
|
||||
ARGON2_VERSION_NUMBER);
|
||||
}
|
||||
|
||||
static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) {
|
||||
@@ -269,15 +312,33 @@ fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int argon2i_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
|
||||
|
||||
return argon2_verify(encoded, pwd, pwdlen, Argon2_i);
|
||||
}
|
||||
|
||||
int argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
|
||||
|
||||
return argon2_verify(encoded, pwd, pwdlen, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2id_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
|
||||
|
||||
return argon2_verify(encoded, pwd, pwdlen, Argon2_id);
|
||||
}
|
||||
|
||||
int argon2d_ctx(argon2_context *context) {
|
||||
return argon2_ctx(context, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2i_ctx(argon2_context *context) {
|
||||
return argon2_ctx(context, Argon2_i);
|
||||
}
|
||||
|
||||
int argon2id_ctx(argon2_context *context) {
|
||||
return argon2_ctx(context, Argon2_id);
|
||||
}
|
||||
|
||||
int argon2_verify_ctx(argon2_context *context, const char *hash,
|
||||
argon2_type type) {
|
||||
int ret = argon2_ctx(context, type);
|
||||
@@ -296,6 +357,14 @@ int argon2d_verify_ctx(argon2_context *context, const char *hash) {
|
||||
return argon2_verify_ctx(context, hash, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2i_verify_ctx(argon2_context *context, const char *hash) {
|
||||
return argon2_verify_ctx(context, hash, Argon2_i);
|
||||
}
|
||||
|
||||
int argon2id_verify_ctx(argon2_context *context, const char *hash) {
|
||||
return argon2_verify_ctx(context, hash, Argon2_id);
|
||||
}
|
||||
|
||||
const char *argon2_error_message(int error_code) {
|
||||
switch (error_code) {
|
||||
case ARGON2_OK:
|
||||
@@ -379,302 +448,5 @@ size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism,
|
||||
uint32_t saltlen, uint32_t hashlen, argon2_type type) {
|
||||
return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) +
|
||||
numlen(t_cost) + numlen(m_cost) + numlen(parallelism) +
|
||||
b64len(saltlen) + b64len(hashlen);
|
||||
b64len(saltlen) + b64len(hashlen) + numlen(ARGON2_VERSION_NUMBER) + 1;
|
||||
}
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
///////////////////////////
|
||||
// Wolf's Additions
|
||||
///////////////////////////
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <pthread.h>
|
||||
#include <x86intrin.h>
|
||||
#include "../blake2/blake2.h"
|
||||
|
||||
typedef struct _Argon2d_Block
|
||||
{
|
||||
union
|
||||
{
|
||||
uint64_t data[1024 / 8] __attribute__((aligned(32)));
|
||||
__m128i dqwords[1024 / 16] __attribute__((aligned(32)));
|
||||
__m256i qqwords[1024 / 32] __attribute__((aligned(32)));
|
||||
};
|
||||
} Argon2d_Block;
|
||||
|
||||
typedef struct _Argon2ThreadData
|
||||
{
|
||||
Argon2d_Block *Matrix;
|
||||
uint32_t slice;
|
||||
uint32_t lane;
|
||||
} Argon2ThreadData;
|
||||
|
||||
#define SEGMENT_LENGTH (250U / (4U * 4U)) // memory_blocks / (context->lanes * ARGON2_SYNC_POINTS);
|
||||
#define LANE_LENGTH (SEGMENT_LENGTH * 4U) // segment_length * ARGON2_SYNC_POINTS;
|
||||
#define CONCURRENT_THREADS 4
|
||||
|
||||
static const uint64_t blake2b_IV[8] =
|
||||
{
|
||||
0x6A09E667F3BCC908ULL, 0xBB67AE8584CAA73BULL,
|
||||
0x3C6EF372FE94F82BULL, 0xA54FF53A5F1D36F1ULL,
|
||||
0x510E527FADE682D1ULL, 0x9B05688C2B3E6C1FULL,
|
||||
0x1F83D9ABFB41BD6BULL, 0x5BE0CD19137E2179ULL
|
||||
};
|
||||
|
||||
static const unsigned int blake2b_sigma[12][16] =
|
||||
{
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
};
|
||||
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
|
||||
#define G(r, i, a, b, c, d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
|
||||
d = ROTL64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = ROTL64(b ^ c, 40); \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
|
||||
d = ROTL64(d ^ a, 48); \
|
||||
c = c + d; \
|
||||
b = ROTL64(b ^ c, 1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r, 0, v[0], v[4], v[8], v[12]); \
|
||||
G(r, 1, v[1], v[5], v[9], v[13]); \
|
||||
G(r, 2, v[2], v[6], v[10], v[14]); \
|
||||
G(r, 3, v[3], v[7], v[11], v[15]); \
|
||||
G(r, 4, v[0], v[5], v[10], v[15]); \
|
||||
G(r, 5, v[1], v[6], v[11], v[12]); \
|
||||
G(r, 6, v[2], v[7], v[8], v[13]); \
|
||||
G(r, 7, v[3], v[4], v[9], v[14]); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
void CompressBlock(uint64_t *h, const uint64_t *m, uint64_t t, uint64_t f)
|
||||
{
|
||||
uint64_t v[16];
|
||||
|
||||
int i;
|
||||
for(i = 0; i < 8; ++i) v[i] = h[i];
|
||||
|
||||
for(i = 8; i < 16; ++i) v[i] = blake2b_IV[i - 8];
|
||||
|
||||
v[12] ^= t;
|
||||
v[14] ^= f;
|
||||
|
||||
int r;
|
||||
for(r = 0; r < 12; ++r)
|
||||
{
|
||||
ROUND(r);
|
||||
}
|
||||
|
||||
for(i = 0; i < 8; ++i) h[i] ^= v[i] ^ v[i + 8];
|
||||
}
|
||||
|
||||
void Argon2dInitHash(void *HashOut, void *Input)
|
||||
{
|
||||
blake2b_state BlakeHash;
|
||||
uint32_t InBuf[64]; // Is only 50 uint32_t, but need more space for Blake2B
|
||||
|
||||
memset(InBuf, 0x00, 200);
|
||||
|
||||
InBuf[0] = 4UL; // Lanes
|
||||
InBuf[1] = 32UL; // Output Length
|
||||
InBuf[2] = 250UL; // Memory Cost
|
||||
InBuf[3] = 1UL; // Time Cost
|
||||
InBuf[4] = 16UL; // Argon2 Version Number
|
||||
InBuf[5] = 0UL; // Type
|
||||
InBuf[6] = 80UL; // Password Length
|
||||
|
||||
memcpy(InBuf + 7, Input, 80); // Password
|
||||
|
||||
InBuf[27] = 80UL; // Salt Length
|
||||
|
||||
memcpy(InBuf + 28, Input, 80); // Salt
|
||||
|
||||
InBuf[48] = 0UL; // Secret Length
|
||||
InBuf[49] = 0UL; // Associated Data Length
|
||||
|
||||
int i;
|
||||
for(i = 50; i < 64; ++i) InBuf[i] = 0UL;
|
||||
|
||||
uint64_t H[8];
|
||||
|
||||
for(i = 0; i < 8; ++i) H[i] = blake2b_IV[i];
|
||||
|
||||
H[0] ^= 0x0000000001010040;
|
||||
|
||||
CompressBlock(H, (uint64_t *)InBuf, 128ULL, 0ULL);
|
||||
CompressBlock(H, (uint64_t *)(InBuf + 32), 200ULL, 0xFFFFFFFFFFFFFFFFULL);
|
||||
|
||||
memcpy(HashOut, H, 64U);
|
||||
}
|
||||
|
||||
void Argon2dFillFirstBlocks(Argon2d_Block *Matrix, void *InitHash)
|
||||
{
|
||||
uint32_t lane;
|
||||
for(lane = 0; lane < 4; ++lane)
|
||||
{
|
||||
((uint32_t *)InitHash)[16] = 0;
|
||||
((uint32_t *)InitHash)[17] = lane;
|
||||
blake2b_long(Matrix[lane * LANE_LENGTH].data, 1024, InitHash, 72);
|
||||
((uint32_t *)InitHash)[16] |= 1;
|
||||
blake2b_long(Matrix[lane * LANE_LENGTH + 1].data, 1024, InitHash, 72);
|
||||
}
|
||||
}
|
||||
|
||||
#include "../blake2/blamka-round-opt.h"
|
||||
|
||||
void Argon2dFillSingleBlock(Argon2d_Block *State, Argon2d_Block *RefBlock, Argon2d_Block *NextBlock)
|
||||
{
|
||||
__m256i XY[32];
|
||||
|
||||
int i;
|
||||
for(i = 0; i < 32; ++i)
|
||||
XY[i] = State->qqwords[i] = _mm256_xor_si256(State->qqwords[i], RefBlock->qqwords[i]);
|
||||
|
||||
for(i = 0; i < 8; ++i)
|
||||
{
|
||||
BLAKE2_ROUND( State->dqwords[8 * i + 0], State->dqwords[8 * i + 1], State->dqwords[8 * i + 2], State->dqwords[8 * i + 3],
|
||||
State->dqwords[8 * i + 4], State->dqwords[8 * i + 5], State->dqwords[8 * i + 6], State->dqwords[8 * i + 7]);
|
||||
}
|
||||
|
||||
for(i = 0; i < 8; ++i)
|
||||
{
|
||||
BLAKE2_ROUND( State->dqwords[8 * 0 + i], State->dqwords[8 * 1 + i], State->dqwords[8 * 2 + i], State->dqwords[8 * 3 + i],
|
||||
State->dqwords[8 * 4 + i], State->dqwords[8 * 5 + i], State->dqwords[8 * 6 + i], State->dqwords[8 * 7 + i]);
|
||||
}
|
||||
|
||||
for(i = 0; i < 32; ++i)
|
||||
{
|
||||
State->qqwords[i] = _mm256_xor_si256(State->qqwords[i], XY[i]);
|
||||
_mm256_store_si256(NextBlock->qqwords + i, State->qqwords[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void FillSegment(Argon2d_Block *Matrix, uint32_t slice, uint32_t lane)
|
||||
{
|
||||
uint32_t startidx, prevoff, curoff;
|
||||
Argon2d_Block State;
|
||||
|
||||
startidx = (!slice) ? 2 : 0;
|
||||
curoff = lane * LANE_LENGTH + slice * SEGMENT_LENGTH + startidx;
|
||||
|
||||
//if(!(curoff % LANE_LENGTH)) prevoff = curoff + LANE_LENGTH - 1;
|
||||
//else prevoff = curoff - 1;
|
||||
|
||||
prevoff = (!(curoff % LANE_LENGTH)) ? curoff + LANE_LENGTH - 1 : curoff - 1;
|
||||
|
||||
memcpy(State.data, (Matrix + prevoff)->data, 1024);
|
||||
|
||||
int i;
|
||||
for(i = startidx; i < SEGMENT_LENGTH; ++i, ++curoff, ++prevoff)
|
||||
{
|
||||
if((curoff % LANE_LENGTH) == 1) prevoff = curoff - 1;
|
||||
|
||||
uint64_t pseudorand = Matrix[prevoff].data[0];
|
||||
uint64_t reflane = (!slice) ? lane : (pseudorand >> 32) & 3; // mod lanes
|
||||
|
||||
uint32_t index = i;
|
||||
bool samelane = reflane == lane;
|
||||
pseudorand &= 0xFFFFFFFFULL;
|
||||
uint32_t refareasize = ((reflane == lane) ? slice * SEGMENT_LENGTH + index - 1 : slice * SEGMENT_LENGTH + ((!index) ? -1 : 0));
|
||||
|
||||
|
||||
if(!slice) refareasize = index - 1;
|
||||
|
||||
uint64_t relativepos = (pseudorand & 0xFFFFFFFFULL);
|
||||
relativepos = relativepos * relativepos >> 32;
|
||||
relativepos = refareasize - 1 - (refareasize * relativepos >> 32);
|
||||
|
||||
uint32_t startpos = 0;
|
||||
|
||||
uint32_t abspos = (startpos + relativepos) % LANE_LENGTH;
|
||||
|
||||
uint32_t refidx = abspos;
|
||||
|
||||
Argon2dFillSingleBlock(&State, Matrix + (LANE_LENGTH * reflane + refidx), Matrix + curoff);
|
||||
}
|
||||
}
|
||||
|
||||
void *ThreadedSegmentFill(void *ThrData)
|
||||
{
|
||||
Argon2ThreadData *Data = (Argon2ThreadData *)ThrData;
|
||||
|
||||
FillSegment(Data->Matrix, Data->slice, Data->lane);
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
void Argon2dFillAllBlocks(Argon2d_Block *Matrix)
|
||||
{
|
||||
pthread_t ThrHandles[CONCURRENT_THREADS];
|
||||
Argon2ThreadData ThrData[CONCURRENT_THREADS];
|
||||
|
||||
int s;
|
||||
for(s = 0; s < 4; ++s)
|
||||
{
|
||||
// WARNING: Assumes CONCURRENT_THREADS == lanes == 4
|
||||
int l;
|
||||
for(l = 0; l < 4; ++l)
|
||||
{
|
||||
FillSegment(Matrix, s, l);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Argon2dFinalizeHash(void *OutputHash, Argon2d_Block *Matrix)
|
||||
{
|
||||
int l;
|
||||
for(l = 1; l < 4; ++l)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < 32; ++i)
|
||||
Matrix[LANE_LENGTH - 1].qqwords[i] = _mm256_xor_si256(Matrix[LANE_LENGTH - 1].qqwords[i], Matrix[LANE_LENGTH * l + (LANE_LENGTH - 1)].qqwords[i]);
|
||||
}
|
||||
|
||||
blake2b_long(OutputHash, 32, Matrix[LANE_LENGTH - 1].data, 1024);
|
||||
}
|
||||
|
||||
void WolfArgon2dPoWHash(void *Output, void *Matrix, const void *BlkHdr)
|
||||
{
|
||||
uint8_t tmp[72];
|
||||
|
||||
Argon2dInitHash(tmp, (uint8_t *)BlkHdr);
|
||||
|
||||
Argon2dFillFirstBlocks(Matrix, tmp);
|
||||
|
||||
Argon2dFillAllBlocks(Matrix);
|
||||
|
||||
Argon2dFinalizeHash((uint8_t *)Output, Matrix);
|
||||
}
|
||||
|
||||
void WolfArgon2dAllocateCtx(void **Matrix)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
*((Argon2d_Block **)Matrix) = (Argon2d_Block *)_aligned_malloc(32, sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
#else
|
||||
*((Argon2d_Block **)Matrix) = (Argon2d_Block *)malloc(sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
posix_memalign(Matrix, 32, sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
#endif
|
||||
}
|
||||
|
||||
void WolfArgon2dFreeCtx(void *Matrix)
|
||||
{
|
||||
free(Matrix);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -29,10 +29,13 @@ extern "C" {
|
||||
/* Symbols visibility control */
|
||||
#ifdef A2_VISCTL
|
||||
#define ARGON2_PUBLIC __attribute__((visibility("default")))
|
||||
#define ARGON2_LOCAL __attribute__ ((visibility ("hidden")))
|
||||
#elif _MSC_VER
|
||||
#define ARGON2_PUBLIC __declspec(dllexport)
|
||||
#define ARGON2_LOCAL
|
||||
#else
|
||||
#define ARGON2_PUBLIC
|
||||
#define ARGON2_LOCAL
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -206,6 +209,8 @@ typedef struct Argon2_Context {
|
||||
uint32_t lanes; /* number of lanes */
|
||||
uint32_t threads; /* maximum number of threads */
|
||||
|
||||
uint32_t version; /* version number */
|
||||
|
||||
allocate_fptr allocate_cbk; /* pointer to memory allocator */
|
||||
deallocate_fptr free_cbk; /* pointer to memory deallocator */
|
||||
|
||||
@@ -214,9 +219,18 @@ typedef struct Argon2_Context {
|
||||
|
||||
/* Argon2 primitive type */
|
||||
typedef enum Argon2_type {
|
||||
Argon2_d = 0
|
||||
Argon2_d = 0,
|
||||
Argon2_i = 1,
|
||||
Argon2_id = 2
|
||||
} argon2_type;
|
||||
|
||||
/* Version of the algorithm */
|
||||
typedef enum Argon2_version {
|
||||
ARGON2_VERSION_10 = 0x10,
|
||||
ARGON2_VERSION_13 = 0x13,
|
||||
ARGON2_VERSION_NUMBER = ARGON2_VERSION_10
|
||||
} argon2_version;
|
||||
|
||||
/*
|
||||
* Function that gives the string representation of an argon2_type.
|
||||
* @param type The argon2_type that we want the string for
|
||||
@@ -233,8 +247,30 @@ ARGON2_PUBLIC const char *argon2_type2string(argon2_type type, int uppercase);
|
||||
ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type);
|
||||
|
||||
/**
|
||||
* Hashes a password with Argon2i, producing a raw hash by allocating memory at
|
||||
* @hash
|
||||
* Hashes a password with Argon2i, producing an encoded hash
|
||||
* @param t_cost Number of iterations
|
||||
* @param m_cost Sets memory usage to m_cost kibibytes
|
||||
* @param parallelism Number of threads and compute lanes
|
||||
* @param pwd Pointer to password
|
||||
* @param pwdlen Password size in bytes
|
||||
* @param salt Pointer to salt
|
||||
* @param saltlen Salt size in bytes
|
||||
* @param hashlen Desired length of the hash in bytes
|
||||
* @param encoded Buffer where to write the encoded hash
|
||||
* @param encodedlen Size of the buffer (thus max size of the encoded hash)
|
||||
* @pre Different parallelism levels will give different results
|
||||
* @pre Returns ARGON2_OK if successful
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2i_hash_encoded(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
const uint32_t parallelism,
|
||||
const void *pwd, const size_t pwdlen,
|
||||
const void *salt, const size_t saltlen,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen);
|
||||
|
||||
/**
|
||||
* Hashes a password with Argon2i, producing a raw hash at @hash
|
||||
* @param t_cost Number of iterations
|
||||
* @param m_cost Sets memory usage to m_cost kibibytes
|
||||
* @param parallelism Number of threads and compute lanes
|
||||
@@ -247,7 +283,7 @@ ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type);
|
||||
* @pre Different parallelism levels will give different results
|
||||
* @pre Returns ARGON2_OK if successful
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
ARGON2_PUBLIC int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
@@ -261,13 +297,35 @@ ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen);
|
||||
|
||||
ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen);
|
||||
|
||||
ARGON2_PUBLIC int argon2id_hash_encoded(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
const uint32_t parallelism,
|
||||
const void *pwd, const size_t pwdlen,
|
||||
const void *salt, const size_t saltlen,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen);
|
||||
|
||||
ARGON2_PUBLIC int argon2id_hash_raw(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen, argon2_type type);
|
||||
const size_t encodedlen, argon2_type type,
|
||||
const uint32_t version);
|
||||
|
||||
/**
|
||||
* Verifies a password against an encoded string
|
||||
@@ -276,9 +334,15 @@ ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
* @param pwd Pointer to password
|
||||
* @pre Returns ARGON2_OK if successful
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2i_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen);
|
||||
|
||||
ARGON2_PUBLIC int argon2d_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen);
|
||||
|
||||
ARGON2_PUBLIC int argon2id_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen, argon2_type type);
|
||||
@@ -293,6 +357,27 @@ ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd,
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_ctx(argon2_context *context);
|
||||
|
||||
/**
|
||||
* Argon2i: Version of Argon2 that picks memory blocks
|
||||
* independent on the password and salt. Good for side-channels,
|
||||
* but worse w.r.t. tradeoff attacks if only one pass is used.
|
||||
*****
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2i_ctx(argon2_context *context);
|
||||
|
||||
/**
|
||||
* Argon2id: Version of Argon2 where the first half-pass over memory is
|
||||
* password-independent, the rest are password-dependent (on the password and
|
||||
* salt). OK against side channels (they reduce to 1/2-pass Argon2i), and
|
||||
* better with w.r.t. tradeoff attacks (similar to Argon2d).
|
||||
*****
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2id_ctx(argon2_context *context);
|
||||
|
||||
/**
|
||||
* Verify if a given password is correct for Argon2d hashing
|
||||
* @param context Pointer to current Argon2 context
|
||||
@@ -302,6 +387,25 @@ ARGON2_PUBLIC int argon2d_ctx(argon2_context *context);
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_verify_ctx(argon2_context *context, const char *hash);
|
||||
|
||||
/**
|
||||
* Verify if a given password is correct for Argon2i hashing
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @param hash The password hash to verify. The length of the hash is
|
||||
* specified by the context outlen member
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2i_verify_ctx(argon2_context *context, const char *hash);
|
||||
|
||||
/**
|
||||
* Verify if a given password is correct for Argon2id hashing
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @param hash The password hash to verify. The length of the hash is
|
||||
* specified by the context outlen member
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2id_verify_ctx(argon2_context *context,
|
||||
const char *hash);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_verify_ctx(argon2_context *context, const char *hash,
|
||||
argon2_type type);
|
||||
@@ -326,18 +430,6 @@ ARGON2_PUBLIC size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost,
|
||||
uint32_t parallelism, uint32_t saltlen,
|
||||
uint32_t hashlen, argon2_type type);
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
///////////////////////////
|
||||
// Wolf's Additions
|
||||
///////////////////////////
|
||||
|
||||
void WolfArgon2dPoWHash(void *Output, void *Matrix, const void *BlkHdr);
|
||||
void WolfArgon2dAllocateCtx(void **Matrix);
|
||||
void WolfArgon2dFreeCtx(void *Matrix);
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -25,7 +25,6 @@
|
||||
#endif
|
||||
#define VC_GE_2005(version) (version >= 1400)
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@@ -35,6 +34,10 @@
|
||||
#include "../blake2/blake2.h"
|
||||
#include "../blake2/blake2-impl.h"
|
||||
|
||||
#ifdef GENKAT
|
||||
#include "genkat.h"
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_attribute(optnone)
|
||||
#define NOT_OPTIMIZED __attribute__((optnone))
|
||||
@@ -131,7 +134,7 @@ void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) {
|
||||
}
|
||||
|
||||
/* Memory clear flag defaults to true. */
|
||||
int FLAG_clear_internal_memory = 1;
|
||||
int FLAG_clear_internal_memory = 0;
|
||||
void clear_internal_memory(void *v, size_t n) {
|
||||
if (FLAG_clear_internal_memory && v) {
|
||||
secure_wipe_memory(v, n);
|
||||
@@ -163,6 +166,10 @@ void finalize(const argon2_context *context, argon2_instance_t *instance) {
|
||||
clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
#ifdef GENKAT
|
||||
print_tag(context->out, context->outlen);
|
||||
#endif
|
||||
|
||||
free_memory(context, (uint8_t *)instance->memory,
|
||||
instance->memory_blocks, sizeof(block));
|
||||
}
|
||||
@@ -249,6 +256,9 @@ static int fill_memory_blocks_st(argon2_instance_t *instance) {
|
||||
fill_segment(instance, position);
|
||||
}
|
||||
}
|
||||
#ifdef GENKAT
|
||||
internal_kat(instance, r); /* Print all memory blocks */
|
||||
#endif
|
||||
}
|
||||
return ARGON2_OK;
|
||||
}
|
||||
@@ -331,6 +341,10 @@ static int fill_memory_blocks_mt(argon2_instance_t *instance) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef GENKAT
|
||||
internal_kat(instance, r); /* Print all memory blocks */
|
||||
#endif
|
||||
}
|
||||
|
||||
fail:
|
||||
@@ -530,7 +544,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
store32(&value, context->t_cost);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, ARGON2_VERSION_NUMBER);
|
||||
store32(&value, ARGON2_VERSION_NUMBER);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, (uint32_t)type);
|
||||
@@ -538,7 +552,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
|
||||
store32(&value, context->pwdlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
|
||||
if (context->pwd != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
|
||||
context->pwdlen);
|
||||
@@ -548,7 +562,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
context->pwdlen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
store32(&value, context->saltlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
@@ -602,11 +616,14 @@ int initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
/* Hashing all inputs */
|
||||
initial_hash(blockhash, context, instance->type);
|
||||
/* Zeroing 8 extra bytes */
|
||||
|
||||
clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ARGON2_PREHASH_SEED_LENGTH -
|
||||
ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
#ifdef GENKAT
|
||||
initial_kat(blockhash, context, instance->type);
|
||||
#endif
|
||||
|
||||
/* 3. Creating first blocks, we always have at least two blocks in a slice
|
||||
*/
|
||||
fill_first_blocks(blockhash, instance);
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -25,12 +25,12 @@
|
||||
/**********************Argon2 internal constants*******************************/
|
||||
|
||||
enum argon2_core_constants {
|
||||
/* Version of the algorithm */
|
||||
ARGON2_VERSION_NUMBER = 0x10,
|
||||
/* Memory block size in bytes */
|
||||
ARGON2_BLOCK_SIZE = 1024,
|
||||
ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
|
||||
ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
|
||||
ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32,
|
||||
ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64,
|
||||
|
||||
/* Number of pseudo-random values generated by one call to Blake in Argon2i
|
||||
to
|
||||
@@ -76,7 +76,6 @@ typedef struct Argon2_instance_t {
|
||||
uint32_t segment_length;
|
||||
uint32_t lane_length;
|
||||
uint32_t lanes;
|
||||
uint32_t limit;
|
||||
uint32_t threads;
|
||||
argon2_type type;
|
||||
int print_internals; /* whether to print the memory blocks */
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -326,6 +326,10 @@ int decode_string(argon2_context *ctx, const char *str, argon2_type type) {
|
||||
CC("$");
|
||||
CC(type_string);
|
||||
|
||||
/* Reading the version number if the default is suppressed */
|
||||
ctx->version = ARGON2_VERSION_10;
|
||||
CC_opt("$v=", DECIMAL_U32(ctx->version));
|
||||
|
||||
CC("$m=");
|
||||
DECIMAL_U32(ctx->m_cost);
|
||||
CC(",t=");
|
||||
@@ -411,6 +415,9 @@ int encode_string(char *dst, size_t dst_len, argon2_context *ctx,
|
||||
SS("$");
|
||||
SS(type_string);
|
||||
|
||||
SS("$v=");
|
||||
SX(ctx->version);
|
||||
|
||||
SS("$m=");
|
||||
SX(ctx->m_cost);
|
||||
SS(",t=");
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -34,6 +34,117 @@
|
||||
* @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
|
||||
#if defined(__AVX512F__)
|
||||
|
||||
static void fill_block(__m512i *state, const block *ref_block,
|
||||
block *next_block, int with_xor) {
|
||||
__m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK];
|
||||
unsigned int i;
|
||||
|
||||
if (with_xor) {
|
||||
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm512_xor_si512(
|
||||
state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
|
||||
block_XY[i] = _mm512_xor_si512(
|
||||
state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i));
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm512_xor_si512(
|
||||
state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
|
||||
}
|
||||
}
|
||||
|
||||
BLAKE2_ROUND_1( state[ 0], state[ 1], state[ 2], state[ 3],
|
||||
state[ 4], state[ 5], state[ 6], state[ 7] );
|
||||
BLAKE2_ROUND_1( state[ 8], state[ 9], state[10], state[11],
|
||||
state[12], state[13], state[14], state[15] );
|
||||
|
||||
BLAKE2_ROUND_2( state[ 0], state[ 2], state[ 4], state[ 6],
|
||||
state[ 8], state[10], state[12], state[14] );
|
||||
BLAKE2_ROUND_2( state[ 1], state[ 3], state[ 5], state[ 7],
|
||||
state[ 9], state[11], state[13], state[15] );
|
||||
|
||||
/*
|
||||
for (i = 0; i < 2; ++i) {
|
||||
BLAKE2_ROUND_1(
|
||||
state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
|
||||
state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
BLAKE2_ROUND_2(
|
||||
state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i],
|
||||
state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]);
|
||||
}
|
||||
*/
|
||||
|
||||
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm512_xor_si512(state[i], block_XY[i]);
|
||||
_mm512_storeu_si512((__m512i *)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(__AVX2__)
|
||||
|
||||
static void fill_block(__m256i *state, const block *ref_block,
|
||||
block *next_block, int with_xor) {
|
||||
__m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
|
||||
unsigned int i;
|
||||
|
||||
if (with_xor) {
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
|
||||
block_XY[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i));
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
|
||||
}
|
||||
}
|
||||
|
||||
BLAKE2_ROUND_1( state[ 0], state[ 4], state[ 1], state[ 5],
|
||||
state[ 2], state[ 6], state[ 3], state[ 7] );
|
||||
BLAKE2_ROUND_1( state[ 8], state[12], state[ 9], state[13],
|
||||
state[10], state[14], state[11], state[15] );
|
||||
BLAKE2_ROUND_1( state[16], state[20], state[17], state[21],
|
||||
state[18], state[22], state[19], state[23] );
|
||||
BLAKE2_ROUND_1( state[24], state[28], state[25], state[29],
|
||||
state[26], state[30], state[27], state[31] );
|
||||
|
||||
BLAKE2_ROUND_2( state[ 0], state[ 4], state[ 8], state[12],
|
||||
state[16], state[20], state[24], state[28] );
|
||||
BLAKE2_ROUND_2( state[ 1], state[ 5], state[ 9], state[13],
|
||||
state[17], state[21], state[25], state[29] );
|
||||
BLAKE2_ROUND_2( state[ 2], state[ 6], state[10], state[14],
|
||||
state[18], state[22], state[26], state[30] );
|
||||
BLAKE2_ROUND_2( state[ 3], state[ 7], state[11], state[15],
|
||||
state[19], state[23], state[27], state[31] );
|
||||
|
||||
/*
|
||||
for (i = 0; i < 4; ++i) {
|
||||
BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
|
||||
state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i],
|
||||
state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
|
||||
}
|
||||
*/
|
||||
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm256_xor_si256(state[i], block_XY[i]);
|
||||
_mm256_storeu_si256((__m256i *)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#else // SSE2
|
||||
|
||||
static void fill_block(__m128i *state, const block *ref_block,
|
||||
block *next_block, int with_xor) {
|
||||
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
|
||||
@@ -53,6 +164,41 @@ static void fill_block(__m128i *state, const block *ref_block,
|
||||
}
|
||||
}
|
||||
|
||||
BLAKE2_ROUND( state[ 0], state[ 1], state[ 2], state[ 3],
|
||||
state[ 4], state[ 5], state[ 6], state[ 7] );
|
||||
BLAKE2_ROUND( state[ 8], state[ 9], state[10], state[11],
|
||||
state[12], state[13], state[14], state[15] );
|
||||
BLAKE2_ROUND( state[16], state[17], state[18], state[19],
|
||||
state[20], state[21], state[22], state[23] );
|
||||
BLAKE2_ROUND( state[24], state[25], state[26], state[27],
|
||||
state[28], state[29], state[30], state[31] );
|
||||
BLAKE2_ROUND( state[32], state[33], state[34], state[35],
|
||||
state[36], state[37], state[38], state[39] );
|
||||
BLAKE2_ROUND( state[40], state[41], state[42], state[43],
|
||||
state[44], state[45], state[46], state[47] );
|
||||
BLAKE2_ROUND( state[48], state[49], state[50], state[51],
|
||||
state[52], state[53], state[54], state[55] );
|
||||
BLAKE2_ROUND( state[56], state[57], state[58], state[59],
|
||||
state[60], state[61], state[62], state[63] );
|
||||
|
||||
BLAKE2_ROUND( state[ 0], state[ 8], state[16], state[24],
|
||||
state[32], state[40], state[48], state[56] );
|
||||
BLAKE2_ROUND( state[ 1], state[ 9], state[17], state[25],
|
||||
state[33], state[41], state[49], state[57] );
|
||||
BLAKE2_ROUND( state[ 2], state[10], state[18], state[26],
|
||||
state[34], state[42], state[50], state[58] );
|
||||
BLAKE2_ROUND( state[ 3], state[11], state[19], state[27],
|
||||
state[35], state[43], state[51], state[59] );
|
||||
BLAKE2_ROUND( state[ 4], state[12], state[20], state[28],
|
||||
state[36], state[44], state[52], state[60] );
|
||||
BLAKE2_ROUND( state[ 5], state[13], state[21], state[29],
|
||||
state[37], state[45], state[53], state[61] );
|
||||
BLAKE2_ROUND( state[ 6], state[14], state[22], state[30],
|
||||
state[38], state[46], state[54], state[62] );
|
||||
BLAKE2_ROUND( state[ 7], state[15], state[23], state[31],
|
||||
state[39], state[47], state[55], state[63] );
|
||||
|
||||
/*
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
|
||||
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
|
||||
@@ -64,17 +210,28 @@ static void fill_block(__m128i *state, const block *ref_block,
|
||||
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
|
||||
state[8 * 6 + i], state[8 * 7 + i]);
|
||||
}
|
||||
|
||||
*/
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(state[i], block_XY[i]);
|
||||
_mm_storeu_si128((__m128i *)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
static void next_addresses(block *address_block, block *input_block) {
|
||||
/*Temporary zero-initialized blocks*/
|
||||
#if defined(__AVX512F__)
|
||||
__m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK];
|
||||
__m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK];
|
||||
#elif defined(__AVX2__)
|
||||
__m256i zero_block[ARGON2_HWORDS_IN_BLOCK];
|
||||
__m256i zero2_block[ARGON2_HWORDS_IN_BLOCK];
|
||||
#else
|
||||
__m128i zero_block[ARGON2_OWORDS_IN_BLOCK];
|
||||
__m128i zero2_block[ARGON2_OWORDS_IN_BLOCK];
|
||||
#endif
|
||||
|
||||
memset(zero_block, 0, sizeof(zero_block));
|
||||
memset(zero2_block, 0, sizeof(zero2_block));
|
||||
@@ -88,30 +245,53 @@ static void next_addresses(block *address_block, block *input_block) {
|
||||
/*Second iteration of G*/
|
||||
fill_block(zero2_block, address_block, address_block, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position) {
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
block address_block, input_block;
|
||||
// block address_block, input_block;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index, i;
|
||||
__m128i state[64];
|
||||
int data_independent_addressing;
|
||||
#if defined(__AVX512F__)
|
||||
__m512i state[ARGON2_512BIT_WORDS_IN_BLOCK];
|
||||
#elif defined(__AVX2__)
|
||||
__m256i state[ARGON2_HWORDS_IN_BLOCK];
|
||||
#else
|
||||
__m128i state[ARGON2_OWORDS_IN_BLOCK];
|
||||
#endif
|
||||
// int data_independent_addressing;
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
// data_independent_addressing =
|
||||
// (instance->type == Argon2_i) ||
|
||||
// (instance->type == Argon2_id && (position.pass == 0) &&
|
||||
// (position.slice < ARGON2_SYNC_POINTS / 2));
|
||||
|
||||
// if (data_independent_addressing) {
|
||||
// init_block_value(&input_block, 0);
|
||||
|
||||
// input_block.v[0] = position.pass;
|
||||
// input_block.v[1] = position.lane;
|
||||
// input_block.v[2] = position.slice;
|
||||
// input_block.v[3] = instance->memory_blocks;
|
||||
// input_block.v[4] = instance->passes;
|
||||
// input_block.v[5] = instance->type;
|
||||
// }
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
|
||||
/* Don't forget to generate the first block of addresses: */
|
||||
if (data_independent_addressing) {
|
||||
next_addresses(&address_block, &input_block);
|
||||
}
|
||||
// if (data_independent_addressing) {
|
||||
// next_addresses(&address_block, &input_block);
|
||||
// }
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
@@ -137,14 +317,14 @@ void fill_segment(const argon2_instance_t *instance,
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
if (data_independent_addressing) {
|
||||
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||
next_addresses(&address_block, &input_block);
|
||||
}
|
||||
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||
} else {
|
||||
// if (data_independent_addressing) {
|
||||
// if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||
// next_addresses(&address_block, &input_block);
|
||||
// }
|
||||
// pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||
// } else {
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
}
|
||||
// }
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||
@@ -165,8 +345,15 @@ void fill_segment(const argon2_instance_t *instance,
|
||||
ref_block =
|
||||
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
|
||||
// if (ARGON2_VERSION_10 == instance->version) {
|
||||
// /* version 1.2.1 and earlier: overwrite, not XOR */
|
||||
// fill_block(state, ref_block, curr_block, 0);
|
||||
// } else {
|
||||
// if(0 == position.pass) {
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
// } else {
|
||||
// fill_block(state, ref_block, curr_block, 1);
|
||||
// }
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -46,7 +46,7 @@ typedef pthread_t argon2_thread_handle_t;
|
||||
* @param func A function pointer for the thread's entry point. Must not be
|
||||
* NULL.
|
||||
* @param args Pointer that is passed as an argument to @func. May be NULL.
|
||||
* @return 0 if @handle and @func are valid pointers and a thread is successfuly
|
||||
* @return 0 if @handle and @func are valid pointers and a thread is successfully
|
||||
* created.
|
||||
*/
|
||||
int argon2_thread_create(argon2_thread_handle_t *handle,
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -153,4 +153,4 @@ static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
|
||||
void clear_internal_memory(void *v, size_t n);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -78,7 +78,7 @@ int blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen);
|
||||
const void *key, size_t keylen);
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
@@ -88,4 +88,4 @@ int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -387,4 +387,4 @@ fail:
|
||||
return ret;
|
||||
#undef TRY
|
||||
}
|
||||
/* Argon2 Team - End Code */
|
||||
/* Argon2 Team - End Code */
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -29,6 +29,8 @@
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX512F__)
|
||||
#if !defined(__AVX2__)
|
||||
#if !defined(__XOP__)
|
||||
#if defined(__SSSE3__)
|
||||
#define r16 \
|
||||
@@ -176,5 +178,294 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
\
|
||||
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
} while ((void)0, 0)
|
||||
#else /* __AVX2__ */
|
||||
|
||||
#endif
|
||||
#include <immintrin.h>
|
||||
|
||||
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
|
||||
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
||||
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
|
||||
|
||||
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
||||
D0 = _mm256_xor_si256(D0, A0); \
|
||||
D0 = rotr32(D0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C0, D0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
||||
\
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B0 = rotr24(B0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(A1, B1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
||||
D1 = _mm256_xor_si256(D1, A1); \
|
||||
D1 = rotr32(D1); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C1, D1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
||||
\
|
||||
B1 = _mm256_xor_si256(B1, C1); \
|
||||
B1 = rotr24(B1); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
||||
D0 = _mm256_xor_si256(D0, A0); \
|
||||
D0 = rotr16(D0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C0, D0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B0 = rotr63(B0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(A1, B1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
||||
D1 = _mm256_xor_si256(D1, A1); \
|
||||
D1 = rotr16(D1); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C1, D1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
||||
B1 = _mm256_xor_si256(B1, C1); \
|
||||
B1 = rotr63(B1); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
\
|
||||
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
||||
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
||||
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
\
|
||||
tmp1 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = tmp1; \
|
||||
\
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
||||
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
} while(0);
|
||||
|
||||
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
\
|
||||
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
||||
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
||||
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
\
|
||||
tmp1 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = tmp1; \
|
||||
\
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
|
||||
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do{ \
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
\
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do{ \
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
} while((void)0, 0);
|
||||
|
||||
#endif /* __AVX2__ */
|
||||
|
||||
#else /* __AVX512F__ */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#define ror64(x, n) _mm512_ror_epi64((x), (n))
|
||||
|
||||
static __m512i muladd(__m512i x, __m512i y)
|
||||
{
|
||||
__m512i z = _mm512_mul_epu32(x, y);
|
||||
return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
|
||||
}
|
||||
|
||||
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = muladd(A0, B0); \
|
||||
A1 = muladd(A1, B1); \
|
||||
\
|
||||
D0 = _mm512_xor_si512(D0, A0); \
|
||||
D1 = _mm512_xor_si512(D1, A1); \
|
||||
\
|
||||
D0 = ror64(D0, 32); \
|
||||
D1 = ror64(D1, 32); \
|
||||
\
|
||||
C0 = muladd(C0, D0); \
|
||||
C1 = muladd(C1, D1); \
|
||||
\
|
||||
B0 = _mm512_xor_si512(B0, C0); \
|
||||
B1 = _mm512_xor_si512(B1, C1); \
|
||||
\
|
||||
B0 = ror64(B0, 24); \
|
||||
B1 = ror64(B1, 24); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = muladd(A0, B0); \
|
||||
A1 = muladd(A1, B1); \
|
||||
\
|
||||
D0 = _mm512_xor_si512(D0, A0); \
|
||||
D1 = _mm512_xor_si512(D1, A1); \
|
||||
\
|
||||
D0 = ror64(D0, 16); \
|
||||
D1 = ror64(D1, 16); \
|
||||
\
|
||||
C0 = muladd(C0, D0); \
|
||||
C1 = muladd(C1, D1); \
|
||||
\
|
||||
B0 = _mm512_xor_si512(B0, C0); \
|
||||
B1 = _mm512_xor_si512(B1, C1); \
|
||||
\
|
||||
B0 = ror64(B0, 63); \
|
||||
B1 = ror64(B1, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
\
|
||||
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
\
|
||||
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
\
|
||||
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
\
|
||||
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define SWAP_HALVES(A0, A1) \
|
||||
do { \
|
||||
__m512i t0, t1; \
|
||||
t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
|
||||
t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
|
||||
A0 = t0; \
|
||||
A1 = t1; \
|
||||
} while((void)0, 0)
|
||||
|
||||
#define SWAP_QUARTERS(A0, A1) \
|
||||
do { \
|
||||
SWAP_HALVES(A0, A1); \
|
||||
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
|
||||
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
|
||||
} while((void)0, 0)
|
||||
|
||||
#define UNSWAP_QUARTERS(A0, A1) \
|
||||
do { \
|
||||
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
|
||||
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
|
||||
SWAP_HALVES(A0, A1); \
|
||||
} while((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
|
||||
do { \
|
||||
SWAP_HALVES(A0, B0); \
|
||||
SWAP_HALVES(C0, D0); \
|
||||
SWAP_HALVES(A1, B1); \
|
||||
SWAP_HALVES(C1, D1); \
|
||||
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
SWAP_HALVES(A0, B0); \
|
||||
SWAP_HALVES(C0, D0); \
|
||||
SWAP_HALVES(A1, B1); \
|
||||
SWAP_HALVES(C1, D1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
SWAP_QUARTERS(A0, A1); \
|
||||
SWAP_QUARTERS(B0, B1); \
|
||||
SWAP_QUARTERS(C0, C1); \
|
||||
SWAP_QUARTERS(D0, D1); \
|
||||
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
UNSWAP_QUARTERS(A0, A1); \
|
||||
UNSWAP_QUARTERS(B0, B1); \
|
||||
UNSWAP_QUARTERS(C0, C1); \
|
||||
UNSWAP_QUARTERS(D0, D1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif /* __AVX512F__ */
|
||||
#endif /* BLAKE_ROUND_MKA_OPT_H */
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -21,7 +21,7 @@
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
/*designed by the Lyra PHC team */
|
||||
/* designed by the Lyra PHC team */
|
||||
static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||
const uint64_t m = UINT64_C(0xFFFFFFFF);
|
||||
const uint64_t xy = (x & m) * (y & m);
|
||||
@@ -53,4 +53,4 @@ static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||
G(v3, v4, v9, v14); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -1034,7 +1034,7 @@ inline __m256i mm256_aesenc_nokey_2x128_obs( __m256i x )
|
||||
//
|
||||
// Pseudo constants.
|
||||
|
||||
#define m512_zero _mm512_setzero_si512()
|
||||
#define m512_zero _mm512_setzero_si512()
|
||||
#define m512_one_512 _mm512_set_epi64x( 0ULL, 0ULL, 0ULL, 0ULL, \
|
||||
0ULL, 0ULL, 0ULL, 1ULL )
|
||||
#define m512_one_256 _mm512_set4_epi64x( 0ULL, 0ULL, 0ULL, 1ULL )
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.6.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.6.1.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.8.6'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.6'
|
||||
PACKAGE_VERSION='3.8.6.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.6.1'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.8.6 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.8.6.1 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1392,7 +1392,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.6:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.6.1:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1497,7 +1497,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.8.6
|
||||
cpuminer-opt configure 3.8.6.1
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.8.6, which was
|
||||
It was created by cpuminer-opt $as_me 3.8.6.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2981,7 +2981,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.8.6'
|
||||
VERSION='3.8.6.1'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.8.6, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.8.6.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6743,7 +6743,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.8.6
|
||||
cpuminer-opt config.status 3.8.6.1
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.8.6])
|
||||
AC_INIT([cpuminer-opt], [3.8.6.1])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
Reference in New Issue
Block a user