This commit is contained in:
Jay D Dee
2018-04-06 11:42:01 -04:00
parent dd5e552357
commit c7efa50aad
22 changed files with 773 additions and 393 deletions

View File

@@ -79,7 +79,7 @@ int64_t argon2_get_max64 ()
bool register_argon2_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_argon2;
gate->hash = (void*)&argon2hash;
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;

View File

@@ -70,7 +70,7 @@ bool register_argon2d_crds_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_argon2d_crds;
gate->hash = (void*)&argon2d_crds_hash;
gate->set_target = (void*)&scrypt_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT;
}
// Dynamic
@@ -138,6 +138,6 @@ bool register_argon2d_dyn_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_argon2d_dyn;
gate->hash = (void*)&argon2d_dyn_hash;
gate->set_target = (void*)&scrypt_set_target;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT;
}

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -19,10 +19,6 @@
#include <stdlib.h>
#include <stdio.h>
#ifdef _WIN32
#include <malloc.h>
#endif
#include "argon2.h"
#include "encoding.h"
#include "core.h"
@@ -31,6 +27,10 @@ const char *argon2_type2string(argon2_type type, int uppercase) {
switch (type) {
case Argon2_d:
return uppercase ? "Argon2d" : "argon2d";
case Argon2_i:
return uppercase ? "Argon2i" : "argon2i";
case Argon2_id:
return uppercase ? "Argon2id" : "argon2id";
}
return NULL;
@@ -46,7 +46,7 @@ int argon2_ctx(argon2_context *context, argon2_type type) {
return result;
}
if (Argon2_d != type) {
if (Argon2_d != type && Argon2_i != type && Argon2_id != type) {
return ARGON2_INCORRECT_TYPE;
}
@@ -62,18 +62,18 @@ int argon2_ctx(argon2_context *context, argon2_type type) {
/* Ensure that all segments have equal length */
memory_blocks = segment_length * (context->lanes * ARGON2_SYNC_POINTS);
instance.version = context->version;
instance.memory = NULL;
instance.passes = context->t_cost;
instance.memory_blocks = memory_blocks;
instance.segment_length = segment_length;
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
instance.lanes = context->lanes;
instance.limit = 1;
instance.threads = context->threads;
instance.type = type;
if (instance.threads > instance.limit) {
instance.threads = instance.limit;
if (instance.threads > instance.lanes) {
instance.threads = instance.lanes;
}
/* 3. Initialization: Hashing inputs, allocating memory, filling first
@@ -101,7 +101,8 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt, const size_t saltlen,
void *hash, const size_t hashlen, char *encoded,
const size_t encodedlen, argon2_type type){
const size_t encodedlen, argon2_type type,
const uint32_t version){
argon2_context context;
int result;
@@ -145,6 +146,7 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
context.allocate_cbk = NULL;
context.free_cbk = NULL;
context.flags = ARGON2_DEFAULT_FLAGS;
context.version = version;
result = argon2_ctx(&context, type);
@@ -174,6 +176,26 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
return ARGON2_OK;
}
int argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, const size_t hashlen,
char *encoded, const size_t encodedlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
NULL, hashlen, encoded, encodedlen, Argon2_i,
ARGON2_VERSION_NUMBER);
}
int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash, const size_t hashlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
hash, hashlen, NULL, 0, Argon2_i, ARGON2_VERSION_NUMBER);
}
int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
@@ -181,7 +203,8 @@ int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
char *encoded, const size_t encodedlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
NULL, hashlen, encoded, encodedlen, Argon2_d);
NULL, hashlen, encoded, encodedlen, Argon2_d,
ARGON2_VERSION_NUMBER);
}
int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
@@ -190,7 +213,27 @@ int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const size_t saltlen, void *hash, const size_t hashlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
hash, hashlen, NULL, 0, Argon2_d);
hash, hashlen, NULL, 0, Argon2_d, ARGON2_VERSION_NUMBER);
}
int argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, const size_t hashlen,
char *encoded, const size_t encodedlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
NULL, hashlen, encoded, encodedlen, Argon2_id,
ARGON2_VERSION_NUMBER);
}
int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash, const size_t hashlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
hash, hashlen, NULL, 0, Argon2_id,
ARGON2_VERSION_NUMBER);
}
static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) {
@@ -269,15 +312,33 @@ fail:
return ret;
}
int argon2i_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
return argon2_verify(encoded, pwd, pwdlen, Argon2_i);
}
int argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
return argon2_verify(encoded, pwd, pwdlen, Argon2_d);
}
int argon2id_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
return argon2_verify(encoded, pwd, pwdlen, Argon2_id);
}
int argon2d_ctx(argon2_context *context) {
return argon2_ctx(context, Argon2_d);
}
int argon2i_ctx(argon2_context *context) {
return argon2_ctx(context, Argon2_i);
}
int argon2id_ctx(argon2_context *context) {
return argon2_ctx(context, Argon2_id);
}
int argon2_verify_ctx(argon2_context *context, const char *hash,
argon2_type type) {
int ret = argon2_ctx(context, type);
@@ -296,6 +357,14 @@ int argon2d_verify_ctx(argon2_context *context, const char *hash) {
return argon2_verify_ctx(context, hash, Argon2_d);
}
int argon2i_verify_ctx(argon2_context *context, const char *hash) {
return argon2_verify_ctx(context, hash, Argon2_i);
}
int argon2id_verify_ctx(argon2_context *context, const char *hash) {
return argon2_verify_ctx(context, hash, Argon2_id);
}
const char *argon2_error_message(int error_code) {
switch (error_code) {
case ARGON2_OK:
@@ -379,302 +448,5 @@ size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism,
uint32_t saltlen, uint32_t hashlen, argon2_type type) {
return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) +
numlen(t_cost) + numlen(m_cost) + numlen(parallelism) +
b64len(saltlen) + b64len(hashlen);
b64len(saltlen) + b64len(hashlen) + numlen(ARGON2_VERSION_NUMBER) + 1;
}
#ifdef __AVX2__
///////////////////////////
// Wolf's Additions
///////////////////////////
#include <stdbool.h>
#include <pthread.h>
#include <x86intrin.h>
#include "../blake2/blake2.h"
typedef struct _Argon2d_Block
{
union
{
uint64_t data[1024 / 8] __attribute__((aligned(32)));
__m128i dqwords[1024 / 16] __attribute__((aligned(32)));
__m256i qqwords[1024 / 32] __attribute__((aligned(32)));
};
} Argon2d_Block;
typedef struct _Argon2ThreadData
{
Argon2d_Block *Matrix;
uint32_t slice;
uint32_t lane;
} Argon2ThreadData;
#define SEGMENT_LENGTH (250U / (4U * 4U)) // memory_blocks / (context->lanes * ARGON2_SYNC_POINTS);
#define LANE_LENGTH (SEGMENT_LENGTH * 4U) // segment_length * ARGON2_SYNC_POINTS;
#define CONCURRENT_THREADS 4
static const uint64_t blake2b_IV[8] =
{
0x6A09E667F3BCC908ULL, 0xBB67AE8584CAA73BULL,
0x3C6EF372FE94F82BULL, 0xA54FF53A5F1D36F1ULL,
0x510E527FADE682D1ULL, 0x9B05688C2B3E6C1FULL,
0x1F83D9ABFB41BD6BULL, 0x5BE0CD19137E2179ULL
};
static const unsigned int blake2b_sigma[12][16] =
{
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
};
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
#define G(r, i, a, b, c, d) \
do { \
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
d = ROTL64(d ^ a, 32); \
c = c + d; \
b = ROTL64(b ^ c, 40); \
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
d = ROTL64(d ^ a, 48); \
c = c + d; \
b = ROTL64(b ^ c, 1); \
} while ((void)0, 0)
#define ROUND(r) \
do { \
G(r, 0, v[0], v[4], v[8], v[12]); \
G(r, 1, v[1], v[5], v[9], v[13]); \
G(r, 2, v[2], v[6], v[10], v[14]); \
G(r, 3, v[3], v[7], v[11], v[15]); \
G(r, 4, v[0], v[5], v[10], v[15]); \
G(r, 5, v[1], v[6], v[11], v[12]); \
G(r, 6, v[2], v[7], v[8], v[13]); \
G(r, 7, v[3], v[4], v[9], v[14]); \
} while ((void)0, 0)
void CompressBlock(uint64_t *h, const uint64_t *m, uint64_t t, uint64_t f)
{
uint64_t v[16];
int i;
for(i = 0; i < 8; ++i) v[i] = h[i];
for(i = 8; i < 16; ++i) v[i] = blake2b_IV[i - 8];
v[12] ^= t;
v[14] ^= f;
int r;
for(r = 0; r < 12; ++r)
{
ROUND(r);
}
for(i = 0; i < 8; ++i) h[i] ^= v[i] ^ v[i + 8];
}
void Argon2dInitHash(void *HashOut, void *Input)
{
blake2b_state BlakeHash;
uint32_t InBuf[64]; // Is only 50 uint32_t, but need more space for Blake2B
memset(InBuf, 0x00, 200);
InBuf[0] = 4UL; // Lanes
InBuf[1] = 32UL; // Output Length
InBuf[2] = 250UL; // Memory Cost
InBuf[3] = 1UL; // Time Cost
InBuf[4] = 16UL; // Argon2 Version Number
InBuf[5] = 0UL; // Type
InBuf[6] = 80UL; // Password Length
memcpy(InBuf + 7, Input, 80); // Password
InBuf[27] = 80UL; // Salt Length
memcpy(InBuf + 28, Input, 80); // Salt
InBuf[48] = 0UL; // Secret Length
InBuf[49] = 0UL; // Associated Data Length
int i;
for(i = 50; i < 64; ++i) InBuf[i] = 0UL;
uint64_t H[8];
for(i = 0; i < 8; ++i) H[i] = blake2b_IV[i];
H[0] ^= 0x0000000001010040;
CompressBlock(H, (uint64_t *)InBuf, 128ULL, 0ULL);
CompressBlock(H, (uint64_t *)(InBuf + 32), 200ULL, 0xFFFFFFFFFFFFFFFFULL);
memcpy(HashOut, H, 64U);
}
void Argon2dFillFirstBlocks(Argon2d_Block *Matrix, void *InitHash)
{
uint32_t lane;
for(lane = 0; lane < 4; ++lane)
{
((uint32_t *)InitHash)[16] = 0;
((uint32_t *)InitHash)[17] = lane;
blake2b_long(Matrix[lane * LANE_LENGTH].data, 1024, InitHash, 72);
((uint32_t *)InitHash)[16] |= 1;
blake2b_long(Matrix[lane * LANE_LENGTH + 1].data, 1024, InitHash, 72);
}
}
#include "../blake2/blamka-round-opt.h"
void Argon2dFillSingleBlock(Argon2d_Block *State, Argon2d_Block *RefBlock, Argon2d_Block *NextBlock)
{
__m256i XY[32];
int i;
for(i = 0; i < 32; ++i)
XY[i] = State->qqwords[i] = _mm256_xor_si256(State->qqwords[i], RefBlock->qqwords[i]);
for(i = 0; i < 8; ++i)
{
BLAKE2_ROUND( State->dqwords[8 * i + 0], State->dqwords[8 * i + 1], State->dqwords[8 * i + 2], State->dqwords[8 * i + 3],
State->dqwords[8 * i + 4], State->dqwords[8 * i + 5], State->dqwords[8 * i + 6], State->dqwords[8 * i + 7]);
}
for(i = 0; i < 8; ++i)
{
BLAKE2_ROUND( State->dqwords[8 * 0 + i], State->dqwords[8 * 1 + i], State->dqwords[8 * 2 + i], State->dqwords[8 * 3 + i],
State->dqwords[8 * 4 + i], State->dqwords[8 * 5 + i], State->dqwords[8 * 6 + i], State->dqwords[8 * 7 + i]);
}
for(i = 0; i < 32; ++i)
{
State->qqwords[i] = _mm256_xor_si256(State->qqwords[i], XY[i]);
_mm256_store_si256(NextBlock->qqwords + i, State->qqwords[i]);
}
}
void FillSegment(Argon2d_Block *Matrix, uint32_t slice, uint32_t lane)
{
uint32_t startidx, prevoff, curoff;
Argon2d_Block State;
startidx = (!slice) ? 2 : 0;
curoff = lane * LANE_LENGTH + slice * SEGMENT_LENGTH + startidx;
//if(!(curoff % LANE_LENGTH)) prevoff = curoff + LANE_LENGTH - 1;
//else prevoff = curoff - 1;
prevoff = (!(curoff % LANE_LENGTH)) ? curoff + LANE_LENGTH - 1 : curoff - 1;
memcpy(State.data, (Matrix + prevoff)->data, 1024);
int i;
for(i = startidx; i < SEGMENT_LENGTH; ++i, ++curoff, ++prevoff)
{
if((curoff % LANE_LENGTH) == 1) prevoff = curoff - 1;
uint64_t pseudorand = Matrix[prevoff].data[0];
uint64_t reflane = (!slice) ? lane : (pseudorand >> 32) & 3; // mod lanes
uint32_t index = i;
bool samelane = reflane == lane;
pseudorand &= 0xFFFFFFFFULL;
uint32_t refareasize = ((reflane == lane) ? slice * SEGMENT_LENGTH + index - 1 : slice * SEGMENT_LENGTH + ((!index) ? -1 : 0));
if(!slice) refareasize = index - 1;
uint64_t relativepos = (pseudorand & 0xFFFFFFFFULL);
relativepos = relativepos * relativepos >> 32;
relativepos = refareasize - 1 - (refareasize * relativepos >> 32);
uint32_t startpos = 0;
uint32_t abspos = (startpos + relativepos) % LANE_LENGTH;
uint32_t refidx = abspos;
Argon2dFillSingleBlock(&State, Matrix + (LANE_LENGTH * reflane + refidx), Matrix + curoff);
}
}
void *ThreadedSegmentFill(void *ThrData)
{
Argon2ThreadData *Data = (Argon2ThreadData *)ThrData;
FillSegment(Data->Matrix, Data->slice, Data->lane);
return(NULL);
}
void Argon2dFillAllBlocks(Argon2d_Block *Matrix)
{
pthread_t ThrHandles[CONCURRENT_THREADS];
Argon2ThreadData ThrData[CONCURRENT_THREADS];
int s;
for(s = 0; s < 4; ++s)
{
// WARNING: Assumes CONCURRENT_THREADS == lanes == 4
int l;
for(l = 0; l < 4; ++l)
{
FillSegment(Matrix, s, l);
}
}
}
void Argon2dFinalizeHash(void *OutputHash, Argon2d_Block *Matrix)
{
int l;
for(l = 1; l < 4; ++l)
{
int i;
for(i = 0; i < 32; ++i)
Matrix[LANE_LENGTH - 1].qqwords[i] = _mm256_xor_si256(Matrix[LANE_LENGTH - 1].qqwords[i], Matrix[LANE_LENGTH * l + (LANE_LENGTH - 1)].qqwords[i]);
}
blake2b_long(OutputHash, 32, Matrix[LANE_LENGTH - 1].data, 1024);
}
void WolfArgon2dPoWHash(void *Output, void *Matrix, const void *BlkHdr)
{
uint8_t tmp[72];
Argon2dInitHash(tmp, (uint8_t *)BlkHdr);
Argon2dFillFirstBlocks(Matrix, tmp);
Argon2dFillAllBlocks(Matrix);
Argon2dFinalizeHash((uint8_t *)Output, Matrix);
}
void WolfArgon2dAllocateCtx(void **Matrix)
{
#ifdef _WIN32
*((Argon2d_Block **)Matrix) = (Argon2d_Block *)_aligned_malloc(32, sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
#else
*((Argon2d_Block **)Matrix) = (Argon2d_Block *)malloc(sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
posix_memalign(Matrix, 32, sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
#endif
}
void WolfArgon2dFreeCtx(void *Matrix)
{
free(Matrix);
}
#endif

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -29,10 +29,13 @@ extern "C" {
/* Symbols visibility control */
#ifdef A2_VISCTL
#define ARGON2_PUBLIC __attribute__((visibility("default")))
#define ARGON2_LOCAL __attribute__ ((visibility ("hidden")))
#elif _MSC_VER
#define ARGON2_PUBLIC __declspec(dllexport)
#define ARGON2_LOCAL
#else
#define ARGON2_PUBLIC
#define ARGON2_LOCAL
#endif
/*
@@ -206,6 +209,8 @@ typedef struct Argon2_Context {
uint32_t lanes; /* number of lanes */
uint32_t threads; /* maximum number of threads */
uint32_t version; /* version number */
allocate_fptr allocate_cbk; /* pointer to memory allocator */
deallocate_fptr free_cbk; /* pointer to memory deallocator */
@@ -214,9 +219,18 @@ typedef struct Argon2_Context {
/* Argon2 primitive type */
typedef enum Argon2_type {
Argon2_d = 0
Argon2_d = 0,
Argon2_i = 1,
Argon2_id = 2
} argon2_type;
/* Version of the algorithm */
typedef enum Argon2_version {
ARGON2_VERSION_10 = 0x10,
ARGON2_VERSION_13 = 0x13,
ARGON2_VERSION_NUMBER = ARGON2_VERSION_10
} argon2_version;
/*
* Function that gives the string representation of an argon2_type.
* @param type The argon2_type that we want the string for
@@ -233,8 +247,30 @@ ARGON2_PUBLIC const char *argon2_type2string(argon2_type type, int uppercase);
ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type);
/**
* Hashes a password with Argon2i, producing a raw hash by allocating memory at
* @hash
* Hashes a password with Argon2i, producing an encoded hash
* @param t_cost Number of iterations
* @param m_cost Sets memory usage to m_cost kibibytes
* @param parallelism Number of threads and compute lanes
* @param pwd Pointer to password
* @param pwdlen Password size in bytes
* @param salt Pointer to salt
* @param saltlen Salt size in bytes
* @param hashlen Desired length of the hash in bytes
* @param encoded Buffer where to write the encoded hash
* @param encodedlen Size of the buffer (thus max size of the encoded hash)
* @pre Different parallelism levels will give different results
* @pre Returns ARGON2_OK if successful
*/
ARGON2_PUBLIC int argon2i_hash_encoded(const uint32_t t_cost,
const uint32_t m_cost,
const uint32_t parallelism,
const void *pwd, const size_t pwdlen,
const void *salt, const size_t saltlen,
const size_t hashlen, char *encoded,
const size_t encodedlen);
/**
* Hashes a password with Argon2i, producing a raw hash at @hash
* @param t_cost Number of iterations
* @param m_cost Sets memory usage to m_cost kibibytes
* @param parallelism Number of threads and compute lanes
@@ -247,7 +283,7 @@ ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type);
* @pre Different parallelism levels will give different results
* @pre Returns ARGON2_OK if successful
*/
ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
ARGON2_PUBLIC int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash,
@@ -261,13 +297,35 @@ ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost,
const size_t hashlen, char *encoded,
const size_t encodedlen);
ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash,
const size_t hashlen);
ARGON2_PUBLIC int argon2id_hash_encoded(const uint32_t t_cost,
const uint32_t m_cost,
const uint32_t parallelism,
const void *pwd, const size_t pwdlen,
const void *salt, const size_t saltlen,
const size_t hashlen, char *encoded,
const size_t encodedlen);
ARGON2_PUBLIC int argon2id_hash_raw(const uint32_t t_cost,
const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash,
const size_t hashlen);
/* generic function underlying the above ones */
ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash,
const size_t hashlen, char *encoded,
const size_t encodedlen, argon2_type type);
const size_t encodedlen, argon2_type type,
const uint32_t version);
/**
* Verifies a password against an encoded string
@@ -276,9 +334,15 @@ ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
* @param pwd Pointer to password
* @pre Returns ARGON2_OK if successful
*/
ARGON2_PUBLIC int argon2i_verify(const char *encoded, const void *pwd,
const size_t pwdlen);
ARGON2_PUBLIC int argon2d_verify(const char *encoded, const void *pwd,
const size_t pwdlen);
ARGON2_PUBLIC int argon2id_verify(const char *encoded, const void *pwd,
const size_t pwdlen);
/* generic function underlying the above ones */
ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd,
const size_t pwdlen, argon2_type type);
@@ -293,6 +357,27 @@ ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd,
*/
ARGON2_PUBLIC int argon2d_ctx(argon2_context *context);
/**
* Argon2i: Version of Argon2 that picks memory blocks
* independent on the password and salt. Good for side-channels,
* but worse w.r.t. tradeoff attacks if only one pass is used.
*****
* @param context Pointer to current Argon2 context
* @return Zero if successful, a non zero error code otherwise
*/
ARGON2_PUBLIC int argon2i_ctx(argon2_context *context);
/**
* Argon2id: Version of Argon2 where the first half-pass over memory is
* password-independent, the rest are password-dependent (on the password and
* salt). OK against side channels (they reduce to 1/2-pass Argon2i), and
* better with w.r.t. tradeoff attacks (similar to Argon2d).
*****
* @param context Pointer to current Argon2 context
* @return Zero if successful, a non zero error code otherwise
*/
ARGON2_PUBLIC int argon2id_ctx(argon2_context *context);
/**
* Verify if a given password is correct for Argon2d hashing
* @param context Pointer to current Argon2 context
@@ -302,6 +387,25 @@ ARGON2_PUBLIC int argon2d_ctx(argon2_context *context);
*/
ARGON2_PUBLIC int argon2d_verify_ctx(argon2_context *context, const char *hash);
/**
* Verify if a given password is correct for Argon2i hashing
* @param context Pointer to current Argon2 context
* @param hash The password hash to verify. The length of the hash is
* specified by the context outlen member
* @return Zero if successful, a non zero error code otherwise
*/
ARGON2_PUBLIC int argon2i_verify_ctx(argon2_context *context, const char *hash);
/**
* Verify if a given password is correct for Argon2id hashing
* @param context Pointer to current Argon2 context
* @param hash The password hash to verify. The length of the hash is
* specified by the context outlen member
* @return Zero if successful, a non zero error code otherwise
*/
ARGON2_PUBLIC int argon2id_verify_ctx(argon2_context *context,
const char *hash);
/* generic function underlying the above ones */
ARGON2_PUBLIC int argon2_verify_ctx(argon2_context *context, const char *hash,
argon2_type type);
@@ -326,18 +430,6 @@ ARGON2_PUBLIC size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost,
uint32_t parallelism, uint32_t saltlen,
uint32_t hashlen, argon2_type type);
#ifdef __AVX2__
///////////////////////////
// Wolf's Additions
///////////////////////////
void WolfArgon2dPoWHash(void *Output, void *Matrix, const void *BlkHdr);
void WolfArgon2dAllocateCtx(void **Matrix);
void WolfArgon2dFreeCtx(void *Matrix);
#endif
#if defined(__cplusplus)
}
#endif

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -25,7 +25,6 @@
#endif
#define VC_GE_2005(version) (version >= 1400)
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -35,6 +34,10 @@
#include "../blake2/blake2.h"
#include "../blake2/blake2-impl.h"
#ifdef GENKAT
#include "genkat.h"
#endif
#if defined(__clang__)
#if __has_attribute(optnone)
#define NOT_OPTIMIZED __attribute__((optnone))
@@ -131,7 +134,7 @@ void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) {
}
/* Memory clear flag defaults to true. */
int FLAG_clear_internal_memory = 1;
int FLAG_clear_internal_memory = 0;
void clear_internal_memory(void *v, size_t n) {
if (FLAG_clear_internal_memory && v) {
secure_wipe_memory(v, n);
@@ -163,6 +166,10 @@ void finalize(const argon2_context *context, argon2_instance_t *instance) {
clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
}
#ifdef GENKAT
print_tag(context->out, context->outlen);
#endif
free_memory(context, (uint8_t *)instance->memory,
instance->memory_blocks, sizeof(block));
}
@@ -249,6 +256,9 @@ static int fill_memory_blocks_st(argon2_instance_t *instance) {
fill_segment(instance, position);
}
}
#ifdef GENKAT
internal_kat(instance, r); /* Print all memory blocks */
#endif
}
return ARGON2_OK;
}
@@ -331,6 +341,10 @@ static int fill_memory_blocks_mt(argon2_instance_t *instance) {
}
}
}
#ifdef GENKAT
internal_kat(instance, r); /* Print all memory blocks */
#endif
}
fail:
@@ -530,7 +544,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
store32(&value, context->t_cost);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, ARGON2_VERSION_NUMBER);
store32(&value, ARGON2_VERSION_NUMBER);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, (uint32_t)type);
@@ -538,7 +552,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
store32(&value, context->pwdlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->pwd != NULL) {
blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
context->pwdlen);
@@ -548,7 +562,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
context->pwdlen = 0;
}
}
store32(&value, context->saltlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
@@ -602,11 +616,14 @@ int initialize(argon2_instance_t *instance, argon2_context *context) {
/* Hashing all inputs */
initial_hash(blockhash, context, instance->type);
/* Zeroing 8 extra bytes */
clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
ARGON2_PREHASH_SEED_LENGTH -
ARGON2_PREHASH_DIGEST_LENGTH);
#ifdef GENKAT
initial_kat(blockhash, context, instance->type);
#endif
/* 3. Creating first blocks, we always have at least two blocks in a slice
*/
fill_first_blocks(blockhash, instance);

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -25,12 +25,12 @@
/**********************Argon2 internal constants*******************************/
enum argon2_core_constants {
/* Version of the algorithm */
ARGON2_VERSION_NUMBER = 0x10,
/* Memory block size in bytes */
ARGON2_BLOCK_SIZE = 1024,
ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32,
ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64,
/* Number of pseudo-random values generated by one call to Blake in Argon2i
to
@@ -76,7 +76,6 @@ typedef struct Argon2_instance_t {
uint32_t segment_length;
uint32_t lane_length;
uint32_t lanes;
uint32_t limit;
uint32_t threads;
argon2_type type;
int print_internals; /* whether to print the memory blocks */

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -326,6 +326,10 @@ int decode_string(argon2_context *ctx, const char *str, argon2_type type) {
CC("$");
CC(type_string);
/* Reading the version number if the default is suppressed */
ctx->version = ARGON2_VERSION_10;
CC_opt("$v=", DECIMAL_U32(ctx->version));
CC("$m=");
DECIMAL_U32(ctx->m_cost);
CC(",t=");
@@ -411,6 +415,9 @@ int encode_string(char *dst, size_t dst_len, argon2_context *ctx,
SS("$");
SS(type_string);
SS("$v=");
SX(ctx->version);
SS("$m=");
SX(ctx->m_cost);
SS(",t=");

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -34,6 +34,117 @@
* @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
* @pre all block pointers must be valid
*/
#if defined(__AVX512F__)
static void fill_block(__m512i *state, const block *ref_block,
block *next_block, int with_xor) {
__m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK];
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
state[i] = _mm512_xor_si512(
state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
block_XY[i] = _mm512_xor_si512(
state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i));
}
} else {
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm512_xor_si512(
state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
}
}
BLAKE2_ROUND_1( state[ 0], state[ 1], state[ 2], state[ 3],
state[ 4], state[ 5], state[ 6], state[ 7] );
BLAKE2_ROUND_1( state[ 8], state[ 9], state[10], state[11],
state[12], state[13], state[14], state[15] );
BLAKE2_ROUND_2( state[ 0], state[ 2], state[ 4], state[ 6],
state[ 8], state[10], state[12], state[14] );
BLAKE2_ROUND_2( state[ 1], state[ 3], state[ 5], state[ 7],
state[ 9], state[11], state[13], state[15] );
/*
for (i = 0; i < 2; ++i) {
BLAKE2_ROUND_1(
state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 2; ++i) {
BLAKE2_ROUND_2(
state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i],
state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]);
}
*/
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
state[i] = _mm512_xor_si512(state[i], block_XY[i]);
_mm512_storeu_si512((__m512i *)next_block->v + i, state[i]);
}
}
#elif defined(__AVX2__)
static void fill_block(__m256i *state, const block *ref_block,
block *next_block, int with_xor) {
__m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
state[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
block_XY[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i));
}
} else {
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
}
}
BLAKE2_ROUND_1( state[ 0], state[ 4], state[ 1], state[ 5],
state[ 2], state[ 6], state[ 3], state[ 7] );
BLAKE2_ROUND_1( state[ 8], state[12], state[ 9], state[13],
state[10], state[14], state[11], state[15] );
BLAKE2_ROUND_1( state[16], state[20], state[17], state[21],
state[18], state[22], state[19], state[23] );
BLAKE2_ROUND_1( state[24], state[28], state[25], state[29],
state[26], state[30], state[27], state[31] );
BLAKE2_ROUND_2( state[ 0], state[ 4], state[ 8], state[12],
state[16], state[20], state[24], state[28] );
BLAKE2_ROUND_2( state[ 1], state[ 5], state[ 9], state[13],
state[17], state[21], state[25], state[29] );
BLAKE2_ROUND_2( state[ 2], state[ 6], state[10], state[14],
state[18], state[22], state[26], state[30] );
BLAKE2_ROUND_2( state[ 3], state[ 7], state[11], state[15],
state[19], state[23], state[27], state[31] );
/*
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
}
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i],
state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
}
*/
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
state[i] = _mm256_xor_si256(state[i], block_XY[i]);
_mm256_storeu_si256((__m256i *)next_block->v + i, state[i]);
}
}
#else // SSE2
static void fill_block(__m128i *state, const block *ref_block,
block *next_block, int with_xor) {
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
@@ -53,6 +164,41 @@ static void fill_block(__m128i *state, const block *ref_block,
}
}
BLAKE2_ROUND( state[ 0], state[ 1], state[ 2], state[ 3],
state[ 4], state[ 5], state[ 6], state[ 7] );
BLAKE2_ROUND( state[ 8], state[ 9], state[10], state[11],
state[12], state[13], state[14], state[15] );
BLAKE2_ROUND( state[16], state[17], state[18], state[19],
state[20], state[21], state[22], state[23] );
BLAKE2_ROUND( state[24], state[25], state[26], state[27],
state[28], state[29], state[30], state[31] );
BLAKE2_ROUND( state[32], state[33], state[34], state[35],
state[36], state[37], state[38], state[39] );
BLAKE2_ROUND( state[40], state[41], state[42], state[43],
state[44], state[45], state[46], state[47] );
BLAKE2_ROUND( state[48], state[49], state[50], state[51],
state[52], state[53], state[54], state[55] );
BLAKE2_ROUND( state[56], state[57], state[58], state[59],
state[60], state[61], state[62], state[63] );
BLAKE2_ROUND( state[ 0], state[ 8], state[16], state[24],
state[32], state[40], state[48], state[56] );
BLAKE2_ROUND( state[ 1], state[ 9], state[17], state[25],
state[33], state[41], state[49], state[57] );
BLAKE2_ROUND( state[ 2], state[10], state[18], state[26],
state[34], state[42], state[50], state[58] );
BLAKE2_ROUND( state[ 3], state[11], state[19], state[27],
state[35], state[43], state[51], state[59] );
BLAKE2_ROUND( state[ 4], state[12], state[20], state[28],
state[36], state[44], state[52], state[60] );
BLAKE2_ROUND( state[ 5], state[13], state[21], state[29],
state[37], state[45], state[53], state[61] );
BLAKE2_ROUND( state[ 6], state[14], state[22], state[30],
state[38], state[46], state[54], state[62] );
BLAKE2_ROUND( state[ 7], state[15], state[23], state[31],
state[39], state[47], state[55], state[63] );
/*
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
@@ -64,17 +210,28 @@ static void fill_block(__m128i *state, const block *ref_block,
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
state[8 * 6 + i], state[8 * 7 + i]);
}
*/
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(state[i], block_XY[i]);
_mm_storeu_si128((__m128i *)next_block->v + i, state[i]);
}
}
#endif
#if 0
static void next_addresses(block *address_block, block *input_block) {
/*Temporary zero-initialized blocks*/
#if defined(__AVX512F__)
__m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK];
__m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK];
#elif defined(__AVX2__)
__m256i zero_block[ARGON2_HWORDS_IN_BLOCK];
__m256i zero2_block[ARGON2_HWORDS_IN_BLOCK];
#else
__m128i zero_block[ARGON2_OWORDS_IN_BLOCK];
__m128i zero2_block[ARGON2_OWORDS_IN_BLOCK];
#endif
memset(zero_block, 0, sizeof(zero_block));
memset(zero2_block, 0, sizeof(zero2_block));
@@ -88,30 +245,53 @@ static void next_addresses(block *address_block, block *input_block) {
/*Second iteration of G*/
fill_block(zero2_block, address_block, address_block, 0);
}
#endif
void fill_segment(const argon2_instance_t *instance,
argon2_position_t position) {
block *ref_block = NULL, *curr_block = NULL;
block address_block, input_block;
// block address_block, input_block;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
__m128i state[64];
int data_independent_addressing;
#if defined(__AVX512F__)
__m512i state[ARGON2_512BIT_WORDS_IN_BLOCK];
#elif defined(__AVX2__)
__m256i state[ARGON2_HWORDS_IN_BLOCK];
#else
__m128i state[ARGON2_OWORDS_IN_BLOCK];
#endif
// int data_independent_addressing;
if (instance == NULL) {
return;
}
// data_independent_addressing =
// (instance->type == Argon2_i) ||
// (instance->type == Argon2_id && (position.pass == 0) &&
// (position.slice < ARGON2_SYNC_POINTS / 2));
// if (data_independent_addressing) {
// init_block_value(&input_block, 0);
// input_block.v[0] = position.pass;
// input_block.v[1] = position.lane;
// input_block.v[2] = position.slice;
// input_block.v[3] = instance->memory_blocks;
// input_block.v[4] = instance->passes;
// input_block.v[5] = instance->type;
// }
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
/* Don't forget to generate the first block of addresses: */
if (data_independent_addressing) {
next_addresses(&address_block, &input_block);
}
// if (data_independent_addressing) {
// next_addresses(&address_block, &input_block);
// }
}
/* Offset of the current block */
@@ -137,14 +317,14 @@ void fill_segment(const argon2_instance_t *instance,
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
next_addresses(&address_block, &input_block);
}
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
} else {
// if (data_independent_addressing) {
// if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
// next_addresses(&address_block, &input_block);
// }
// pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
// } else {
pseudo_rand = instance->memory[prev_offset].v[0];
}
// }
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
@@ -165,8 +345,15 @@ void fill_segment(const argon2_instance_t *instance,
ref_block =
instance->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->memory + curr_offset;
fill_block(state, ref_block, curr_block, 0);
// if (ARGON2_VERSION_10 == instance->version) {
// /* version 1.2.1 and earlier: overwrite, not XOR */
// fill_block(state, ref_block, curr_block, 0);
// } else {
// if(0 == position.pass) {
fill_block(state, ref_block, curr_block, 0);
// } else {
// fill_block(state, ref_block, curr_block, 1);
// }
// }
}
}

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -46,7 +46,7 @@ typedef pthread_t argon2_thread_handle_t;
* @param func A function pointer for the thread's entry point. Must not be
* NULL.
* @param args Pointer that is passed as an argument to @func. May be NULL.
* @return 0 if @handle and @func are valid pointers and a thread is successfuly
* @return 0 if @handle and @func are valid pointers and a thread is successfully
* created.
*/
int argon2_thread_create(argon2_thread_handle_t *handle,

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -153,4 +153,4 @@ static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
void clear_internal_memory(void *v, size_t n);
#endif
#endif

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -78,7 +78,7 @@ int blake2b_final(blake2b_state *S, void *out, size_t outlen);
/* Simple API */
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
const void *key, size_t keylen);
const void *key, size_t keylen);
/* Argon2 Team - Begin Code */
int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
@@ -88,4 +88,4 @@ int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
}
#endif
#endif
#endif

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -387,4 +387,4 @@ fail:
return ret;
#undef TRY
}
/* Argon2 Team - End Code */
/* Argon2 Team - End Code */

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -29,6 +29,8 @@
#include <x86intrin.h>
#endif
#if !defined(__AVX512F__)
#if !defined(__AVX2__)
#if !defined(__XOP__)
#if defined(__SSSE3__)
#define r16 \
@@ -176,5 +178,294 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#else /* __AVX2__ */
#endif
#include <immintrin.h>
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i ml = _mm256_mul_epu32(A0, B0); \
ml = _mm256_add_epi64(ml, ml); \
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
D0 = _mm256_xor_si256(D0, A0); \
D0 = rotr32(D0); \
\
ml = _mm256_mul_epu32(C0, D0); \
ml = _mm256_add_epi64(ml, ml); \
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
\
B0 = _mm256_xor_si256(B0, C0); \
B0 = rotr24(B0); \
\
ml = _mm256_mul_epu32(A1, B1); \
ml = _mm256_add_epi64(ml, ml); \
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
D1 = _mm256_xor_si256(D1, A1); \
D1 = rotr32(D1); \
\
ml = _mm256_mul_epu32(C1, D1); \
ml = _mm256_add_epi64(ml, ml); \
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
\
B1 = _mm256_xor_si256(B1, C1); \
B1 = rotr24(B1); \
} while((void)0, 0);
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i ml = _mm256_mul_epu32(A0, B0); \
ml = _mm256_add_epi64(ml, ml); \
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
D0 = _mm256_xor_si256(D0, A0); \
D0 = rotr16(D0); \
\
ml = _mm256_mul_epu32(C0, D0); \
ml = _mm256_add_epi64(ml, ml); \
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
B0 = _mm256_xor_si256(B0, C0); \
B0 = rotr63(B0); \
\
ml = _mm256_mul_epu32(A1, B1); \
ml = _mm256_add_epi64(ml, ml); \
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
D1 = _mm256_xor_si256(D1, A1); \
D1 = rotr16(D1); \
\
ml = _mm256_mul_epu32(C1, D1); \
ml = _mm256_add_epi64(ml, ml); \
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
B1 = _mm256_xor_si256(B1, C1); \
B1 = rotr63(B1); \
} while((void)0, 0);
#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
\
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
} while((void)0, 0);
#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
\
tmp1 = C0; \
C0 = C1; \
C1 = tmp1; \
\
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
} while(0);
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
\
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
} while((void)0, 0);
#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
\
tmp1 = C0; \
C0 = C1; \
C1 = tmp1; \
\
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
} while((void)0, 0);
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
do{ \
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
\
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
} while((void)0, 0);
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do{ \
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
} while((void)0, 0);
#endif /* __AVX2__ */
#else /* __AVX512F__ */
#include <immintrin.h>
#define ror64(x, n) _mm512_ror_epi64((x), (n))
static __m512i muladd(__m512i x, __m512i y)
{
__m512i z = _mm512_mul_epu32(x, y);
return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
}
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = muladd(A0, B0); \
A1 = muladd(A1, B1); \
\
D0 = _mm512_xor_si512(D0, A0); \
D1 = _mm512_xor_si512(D1, A1); \
\
D0 = ror64(D0, 32); \
D1 = ror64(D1, 32); \
\
C0 = muladd(C0, D0); \
C1 = muladd(C1, D1); \
\
B0 = _mm512_xor_si512(B0, C0); \
B1 = _mm512_xor_si512(B1, C1); \
\
B0 = ror64(B0, 24); \
B1 = ror64(B1, 24); \
} while ((void)0, 0)
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = muladd(A0, B0); \
A1 = muladd(A1, B1); \
\
D0 = _mm512_xor_si512(D0, A0); \
D1 = _mm512_xor_si512(D1, A1); \
\
D0 = ror64(D0, 16); \
D1 = ror64(D1, 16); \
\
C0 = muladd(C0, D0); \
C1 = muladd(C1, D1); \
\
B0 = _mm512_xor_si512(B0, C0); \
B1 = _mm512_xor_si512(B1, C1); \
\
B0 = ror64(B0, 63); \
B1 = ror64(B1, 63); \
} while ((void)0, 0)
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
\
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
\
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
\
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
\
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
} while ((void)0, 0)
#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#define SWAP_HALVES(A0, A1) \
do { \
__m512i t0, t1; \
t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
A0 = t0; \
A1 = t1; \
} while((void)0, 0)
#define SWAP_QUARTERS(A0, A1) \
do { \
SWAP_HALVES(A0, A1); \
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
} while((void)0, 0)
#define UNSWAP_QUARTERS(A0, A1) \
do { \
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
SWAP_HALVES(A0, A1); \
} while((void)0, 0)
#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
do { \
SWAP_HALVES(A0, B0); \
SWAP_HALVES(C0, D0); \
SWAP_HALVES(A1, B1); \
SWAP_HALVES(C1, D1); \
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
SWAP_HALVES(A0, B0); \
SWAP_HALVES(C0, D0); \
SWAP_HALVES(A1, B1); \
SWAP_HALVES(C1, D1); \
} while ((void)0, 0)
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
SWAP_QUARTERS(A0, A1); \
SWAP_QUARTERS(B0, B1); \
SWAP_QUARTERS(C0, C1); \
SWAP_QUARTERS(D0, D1); \
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
UNSWAP_QUARTERS(A0, A1); \
UNSWAP_QUARTERS(B0, B1); \
UNSWAP_QUARTERS(C0, C1); \
UNSWAP_QUARTERS(D0, D1); \
} while ((void)0, 0)
#endif /* __AVX512F__ */
#endif /* BLAKE_ROUND_MKA_OPT_H */

View File

@@ -4,7 +4,7 @@
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
@@ -21,7 +21,7 @@
#include "blake2.h"
#include "blake2-impl.h"
/*designed by the Lyra PHC team */
/* designed by the Lyra PHC team */
static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
const uint64_t m = UINT64_C(0xFFFFFFFF);
const uint64_t xy = (x & m) * (y & m);
@@ -53,4 +53,4 @@ static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
G(v3, v4, v9, v14); \
} while ((void)0, 0)
#endif
#endif