mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Initial upload v3.4.7
This commit is contained in:
0
algo/argon2/.dirstamp
Normal file
0
algo/argon2/.dirstamp
Normal file
0
algo/argon2/ar2/.dirstamp
Normal file
0
algo/argon2/ar2/.dirstamp
Normal file
249
algo/argon2/ar2/ar2-scrypt-jane.c
Normal file
249
algo/argon2/ar2/ar2-scrypt-jane.c
Normal file
@@ -0,0 +1,249 @@
|
||||
/*
|
||||
scrypt-jane by Andrew M, https://github.com/floodyberry/scrypt-jane
|
||||
|
||||
Public Domain or MIT License, whichever is easier
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#if defined( _WINDOWS )
|
||||
#if !defined( QT_GUI )
|
||||
extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "ar2-scrypt-jane.h"
|
||||
|
||||
#include "sj/scrypt-jane-portable.h"
|
||||
#include "sj/scrypt-jane-hash.h"
|
||||
#include "sj/scrypt-jane-romix.h"
|
||||
#include "sj/scrypt-jane-test-vectors.h"
|
||||
|
||||
#define scrypt_maxNfactor 30 /* (1 << (30 + 1)) = ~2 billion */
|
||||
#if (SCRYPT_BLOCK_BYTES == 64)
|
||||
#define scrypt_r_32kb 8 /* (1 << 8) = 256 * 2 blocks in a chunk * 64 bytes = Max of 32kb in a chunk */
|
||||
#elif (SCRYPT_BLOCK_BYTES == 128)
|
||||
#define scrypt_r_32kb 7 /* (1 << 7) = 128 * 2 blocks in a chunk * 128 bytes = Max of 32kb in a chunk */
|
||||
#elif (SCRYPT_BLOCK_BYTES == 256)
|
||||
#define scrypt_r_32kb 6 /* (1 << 6) = 64 * 2 blocks in a chunk * 256 bytes = Max of 32kb in a chunk */
|
||||
#elif (SCRYPT_BLOCK_BYTES == 512)
|
||||
#define scrypt_r_32kb 5 /* (1 << 5) = 32 * 2 blocks in a chunk * 512 bytes = Max of 32kb in a chunk */
|
||||
#endif
|
||||
#define scrypt_maxrfactor scrypt_r_32kb /* 32kb */
|
||||
#define scrypt_maxpfactor 25 /* (1 << 25) = ~33 million */
|
||||
|
||||
#include <stdio.h>
|
||||
//#include <malloc.h>
|
||||
|
||||
static void NORETURN
|
||||
scrypt_fatal_error_default(const char *msg) {
|
||||
fprintf(stderr, "%s\n", msg);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static scrypt_fatal_errorfn scrypt_fatal_error = scrypt_fatal_error_default;
|
||||
|
||||
void scrypt_set_fatal_error(scrypt_fatal_errorfn fn) {
|
||||
scrypt_fatal_error = fn;
|
||||
}
|
||||
|
||||
static int scrypt_power_on_self_test(void)
|
||||
{
|
||||
const scrypt_test_setting *t;
|
||||
uint8_t test_digest[64];
|
||||
uint32_t i;
|
||||
int res = 7, scrypt_valid;
|
||||
|
||||
if (!scrypt_test_mix()) {
|
||||
#if !defined(SCRYPT_TEST)
|
||||
scrypt_fatal_error("scrypt: mix function power-on-self-test failed");
|
||||
#endif
|
||||
res &= ~1;
|
||||
}
|
||||
|
||||
if (!scrypt_test_hash()) {
|
||||
#if !defined(SCRYPT_TEST)
|
||||
scrypt_fatal_error("scrypt: hash function power-on-self-test failed");
|
||||
#endif
|
||||
res &= ~2;
|
||||
}
|
||||
|
||||
for (i = 0, scrypt_valid = 1; post_settings[i].pw; i++) {
|
||||
t = post_settings + i;
|
||||
scrypt((uint8_t *)t->pw, strlen(t->pw), (uint8_t *)t->salt, strlen(t->salt), t->Nfactor, t->rfactor, t->pfactor, test_digest, sizeof(test_digest));
|
||||
scrypt_valid &= scrypt_verify(post_vectors[i], test_digest, sizeof(test_digest));
|
||||
}
|
||||
|
||||
if (!scrypt_valid) {
|
||||
#if !defined(SCRYPT_TEST)
|
||||
scrypt_fatal_error("scrypt: scrypt power-on-self-test failed");
|
||||
#endif
|
||||
res &= ~4;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
typedef struct scrypt_aligned_alloc_t {
|
||||
uint8_t *mem, *ptr;
|
||||
} scrypt_aligned_alloc;
|
||||
|
||||
#ifdef SCRYPT_TEST_SPEED
|
||||
|
||||
static uint8_t *mem_base = (uint8_t *)0;
|
||||
static size_t mem_bump = 0;
|
||||
|
||||
/* allocations are assumed to be multiples of 64 bytes and total allocations not to exceed ~1.01gb */
|
||||
static scrypt_aligned_alloc scrypt_alloc(uint64_t size)
|
||||
{
|
||||
scrypt_aligned_alloc aa;
|
||||
if (!mem_base) {
|
||||
mem_base = (uint8_t *)malloc((1024 * 1024 * 1024) + (1024 * 1024) + (SCRYPT_BLOCK_BYTES - 1));
|
||||
if (!mem_base)
|
||||
scrypt_fatal_error("scrypt: out of memory");
|
||||
mem_base = (uint8_t *)(((size_t)mem_base + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1));
|
||||
}
|
||||
aa.mem = mem_base + mem_bump;
|
||||
aa.ptr = aa.mem;
|
||||
mem_bump += (size_t)size;
|
||||
return aa;
|
||||
}
|
||||
|
||||
static void scrypt_free(scrypt_aligned_alloc *aa) {
|
||||
mem_bump = 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static scrypt_aligned_alloc scrypt_alloc(uint64_t size)
|
||||
{
|
||||
static const size_t max_alloc = (size_t)-1;
|
||||
scrypt_aligned_alloc aa;
|
||||
size += (SCRYPT_BLOCK_BYTES - 1);
|
||||
if (size > max_alloc)
|
||||
scrypt_fatal_error("scrypt: not enough address space on this CPU to allocate required memory");
|
||||
aa.mem = (uint8_t *)malloc((size_t)size);
|
||||
aa.ptr = (uint8_t *)(((size_t)aa.mem + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1));
|
||||
if (!aa.mem)
|
||||
scrypt_fatal_error("scrypt: out of memory");
|
||||
return aa;
|
||||
}
|
||||
|
||||
static void scrypt_free(scrypt_aligned_alloc *aa)
|
||||
{
|
||||
free(aa->mem);
|
||||
}
|
||||
|
||||
#endif /* SCRYPT_TEST_SPEED */
|
||||
|
||||
|
||||
void scrypt(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len,
|
||||
uint8_t Nfactor, uint8_t rfactor, uint8_t pfactor, uint8_t *out, size_t bytes)
|
||||
{
|
||||
scrypt_aligned_alloc YX, V;
|
||||
uint8_t *X, *Y;
|
||||
uint32_t N, r, p, chunk_bytes, i;
|
||||
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix();
|
||||
#endif
|
||||
|
||||
#if !defined(SCRYPT_TEST)
|
||||
static int power_on_self_test = 0;
|
||||
if (!power_on_self_test) {
|
||||
power_on_self_test = 1;
|
||||
if (!scrypt_power_on_self_test())
|
||||
scrypt_fatal_error("scrypt: power on self test failed");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (Nfactor > scrypt_maxNfactor)
|
||||
scrypt_fatal_error("scrypt: N out of range");
|
||||
if (rfactor > scrypt_maxrfactor)
|
||||
scrypt_fatal_error("scrypt: r out of range");
|
||||
if (pfactor > scrypt_maxpfactor)
|
||||
scrypt_fatal_error("scrypt: p out of range");
|
||||
|
||||
N = (1 << (Nfactor + 1));
|
||||
r = (1 << rfactor);
|
||||
p = (1 << pfactor);
|
||||
|
||||
chunk_bytes = SCRYPT_BLOCK_BYTES * r * 2;
|
||||
V = scrypt_alloc((uint64_t)N * chunk_bytes);
|
||||
YX = scrypt_alloc((p + 1) * chunk_bytes);
|
||||
|
||||
/* 1: X = PBKDF2(password, salt) */
|
||||
Y = YX.ptr;
|
||||
X = Y + chunk_bytes;
|
||||
scrypt_pbkdf2(password, password_len, salt, salt_len, 1, X, chunk_bytes * p);
|
||||
|
||||
/* 2: X = ROMix(X) */
|
||||
for (i = 0; i < p; i++)
|
||||
scrypt_ROMix((scrypt_mix_word_t *)(X + (chunk_bytes * i)), (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V.ptr, N, r);
|
||||
|
||||
/* 3: Out = PBKDF2(password, X) */
|
||||
scrypt_pbkdf2(password, password_len, X, chunk_bytes * p, 1, out, bytes);
|
||||
|
||||
scrypt_ensure_zero(YX.ptr, (p + 1) * chunk_bytes);
|
||||
|
||||
scrypt_free(&V);
|
||||
scrypt_free(&YX);
|
||||
}
|
||||
|
||||
#define Nfactor 8
|
||||
#define rfactor 0
|
||||
#define pfactor 0
|
||||
#if (SCRYPT_BLOCK_BYTES == 64)
|
||||
#define chunk_bytes 128
|
||||
#elif (SCRYPT_BLOCK_BYTES == 128)
|
||||
#define chunk_bytes 256
|
||||
#elif (SCRYPT_BLOCK_BYTES == 256)
|
||||
#define chunk_bytes 512
|
||||
#elif (SCRYPT_BLOCK_BYTES == 512)
|
||||
#define chunk_bytes 1024
|
||||
#endif
|
||||
|
||||
void my_scrypt(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint8_t *out)
|
||||
{
|
||||
scrypt_aligned_alloc YX, V;
|
||||
uint8_t *X, *Y;
|
||||
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix();
|
||||
#endif
|
||||
|
||||
/*
|
||||
#if !defined(SCRYPT_TEST)
|
||||
static int power_on_self_test = 0;
|
||||
if (!power_on_self_test) {
|
||||
power_on_self_test = 1;
|
||||
if (!scrypt_power_on_self_test())
|
||||
scrypt_fatal_error("scrypt: power on self test failed");
|
||||
}
|
||||
#endif
|
||||
*/
|
||||
V = scrypt_alloc((uint64_t)512 * chunk_bytes);
|
||||
YX = scrypt_alloc(2 * chunk_bytes);
|
||||
|
||||
/* 1: X = PBKDF2(password, salt) */
|
||||
Y = YX.ptr;
|
||||
X = Y + chunk_bytes;
|
||||
scrypt_pbkdf2(password, password_len, salt, salt_len, 1, X, chunk_bytes);
|
||||
|
||||
/* 2: X = ROMix(X) */
|
||||
scrypt_ROMix((scrypt_mix_word_t *)X, (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V.ptr, 512, 1);
|
||||
|
||||
/* 3: Out = PBKDF2(password, X) */
|
||||
scrypt_pbkdf2(password, password_len, X, chunk_bytes, 1, out, 32);
|
||||
|
||||
scrypt_ensure_zero(YX.ptr, 2 * chunk_bytes);
|
||||
|
||||
scrypt_free(&V);
|
||||
scrypt_free(&YX);
|
||||
}
|
||||
|
||||
#if defined( _WINDOWS )
|
||||
#if !defined( QT_GUI )
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
#endif
|
||||
35
algo/argon2/ar2/ar2-scrypt-jane.h
Normal file
35
algo/argon2/ar2/ar2-scrypt-jane.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#ifndef AR2_SCRYPT_JANE_H
|
||||
#define AR2_SCRYPT_JANE_H
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#undef SCRYPT_CHOOSE_COMPILETIME
|
||||
#endif
|
||||
//#define SCRYPT_TEST
|
||||
#define SCRYPT_SKEIN512
|
||||
#define SCRYPT_SALSA64
|
||||
|
||||
/*
|
||||
Nfactor: Increases CPU & Memory Hardness
|
||||
N = (1 << (Nfactor + 1)): How many times to mix a chunk and how many temporary chunks are used
|
||||
|
||||
rfactor: Increases Memory Hardness
|
||||
r = (1 << rfactor): How large a chunk is
|
||||
|
||||
pfactor: Increases CPU Hardness
|
||||
p = (1 << pfactor): Number of times to mix the main chunk
|
||||
|
||||
A block is the basic mixing unit (salsa/chacha block = 64 bytes)
|
||||
A chunk is (2 * r) blocks
|
||||
|
||||
~Memory used = (N + 2) * ((2 * r) * block size)
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef void (*scrypt_fatal_errorfn)(const char *msg);
|
||||
void scrypt_set_fatal_error(scrypt_fatal_errorfn fn);
|
||||
|
||||
void scrypt(const unsigned char *password, size_t password_len, const unsigned char *salt, size_t salt_len, unsigned char Nfactor, unsigned char rfactor, unsigned char pfactor, unsigned char *out, size_t bytes);
|
||||
void my_scrypt(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint8_t *out);
|
||||
#endif /* AR2_SCRYPT_JANE_H */
|
||||
284
algo/argon2/ar2/argon2.c
Normal file
284
algo/argon2/ar2/argon2.c
Normal file
@@ -0,0 +1,284 @@
|
||||
/*
|
||||
* Argon2 source code package
|
||||
*
|
||||
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
|
||||
*
|
||||
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along
|
||||
* with
|
||||
* this software. If not, see
|
||||
* <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "argon2.h"
|
||||
#include "cores.h"
|
||||
|
||||
/* Error messages */
|
||||
static const char *Argon2_ErrorMessage[] = {
|
||||
/*{ARGON2_OK, */ "OK",
|
||||
/*},
|
||||
|
||||
{ARGON2_OUTPUT_PTR_NULL, */ "Output pointer is NULL",
|
||||
/*},
|
||||
|
||||
{ARGON2_OUTPUT_TOO_SHORT, */ "Output is too short",
|
||||
/*},
|
||||
{ARGON2_OUTPUT_TOO_LONG, */ "Output is too long",
|
||||
/*},
|
||||
|
||||
{ARGON2_PWD_TOO_SHORT, */ "Password is too short",
|
||||
/*},
|
||||
{ARGON2_PWD_TOO_LONG, */ "Password is too long",
|
||||
/*},
|
||||
|
||||
{ARGON2_SALT_TOO_SHORT, */ "Salt is too short",
|
||||
/*},
|
||||
{ARGON2_SALT_TOO_LONG, */ "Salt is too long",
|
||||
/*},
|
||||
|
||||
{ARGON2_AD_TOO_SHORT, */ "Associated data is too short",
|
||||
/*},
|
||||
{ARGON2_AD_TOO_LONG, */ "Associated date is too long",
|
||||
/*},
|
||||
|
||||
{ARGON2_SECRET_TOO_SHORT, */ "Secret is too short",
|
||||
/*},
|
||||
{ARGON2_SECRET_TOO_LONG, */ "Secret is too long",
|
||||
/*},
|
||||
|
||||
{ARGON2_TIME_TOO_SMALL, */ "Time cost is too small",
|
||||
/*},
|
||||
{ARGON2_TIME_TOO_LARGE, */ "Time cost is too large",
|
||||
/*},
|
||||
|
||||
{ARGON2_MEMORY_TOO_LITTLE, */ "Memory cost is too small",
|
||||
/*},
|
||||
{ARGON2_MEMORY_TOO_MUCH, */ "Memory cost is too large",
|
||||
/*},
|
||||
|
||||
{ARGON2_LANES_TOO_FEW, */ "Too few lanes",
|
||||
/*},
|
||||
{ARGON2_LANES_TOO_MANY, */ "Too many lanes",
|
||||
/*},
|
||||
|
||||
{ARGON2_PWD_PTR_MISMATCH, */ "Password pointer is NULL, but password length is not 0",
|
||||
/*},
|
||||
{ARGON2_SALT_PTR_MISMATCH, */ "Salt pointer is NULL, but salt length is not 0",
|
||||
/*},
|
||||
{ARGON2_SECRET_PTR_MISMATCH, */ "Secret pointer is NULL, but secret length is not 0",
|
||||
/*},
|
||||
{ARGON2_AD_PTR_MISMATCH, */ "Associated data pointer is NULL, but ad length is not 0",
|
||||
/*},
|
||||
|
||||
{ARGON2_MEMORY_ALLOCATION_ERROR, */ "Memory allocation error",
|
||||
/*},
|
||||
|
||||
{ARGON2_FREE_MEMORY_CBK_NULL, */ "The free memory callback is NULL",
|
||||
/*},
|
||||
{ARGON2_ALLOCATE_MEMORY_CBK_NULL, */ "The allocate memory callback is NULL",
|
||||
/*},
|
||||
|
||||
{ARGON2_INCORRECT_PARAMETER, */ "Argon2_Context context is NULL",
|
||||
/*},
|
||||
{ARGON2_INCORRECT_TYPE, */ "There is no such version of Argon2",
|
||||
/*},
|
||||
|
||||
{ARGON2_OUT_PTR_MISMATCH, */ "Output pointer mismatch",
|
||||
/*},
|
||||
|
||||
{ARGON2_THREADS_TOO_FEW, */ "Not enough threads",
|
||||
/*},
|
||||
{ARGON2_THREADS_TOO_MANY, */ "Too many threads",
|
||||
/*},
|
||||
{ARGON2_MISSING_ARGS, */ "Missing arguments", /*},*/
|
||||
};
|
||||
|
||||
int argon2d(argon2_context *context) { return argon2_core(context, Argon2_d); }
|
||||
|
||||
int argon2i(argon2_context *context) { return argon2_core(context, Argon2_i); }
|
||||
|
||||
int verify_d(argon2_context *context, const char *hash)
|
||||
{
|
||||
int result;
|
||||
/*if (0 == context->outlen || NULL == hash) {
|
||||
return ARGON2_OUT_PTR_MISMATCH;
|
||||
}*/
|
||||
|
||||
result = argon2_core(context, Argon2_d);
|
||||
|
||||
if (ARGON2_OK != result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
return 0 == memcmp(hash, context->out, 32);
|
||||
}
|
||||
|
||||
const char *error_message(int error_code)
|
||||
{
|
||||
enum {
|
||||
/* Make sure---at compile time---that the enum size matches the array
|
||||
size */
|
||||
ERROR_STRING_CHECK =
|
||||
1 /
|
||||
!!((sizeof(Argon2_ErrorMessage) / sizeof(Argon2_ErrorMessage[0])) ==
|
||||
ARGON2_ERROR_CODES_LENGTH)
|
||||
};
|
||||
if (error_code < ARGON2_ERROR_CODES_LENGTH) {
|
||||
return Argon2_ErrorMessage[(argon2_error_codes)error_code];
|
||||
}
|
||||
return "Unknown error code.";
|
||||
}
|
||||
|
||||
/* encoding/decoding helpers */
|
||||
|
||||
/*
|
||||
* Some macros for constant-time comparisons. These work over values in
|
||||
* the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true".
|
||||
*/
|
||||
#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF)
|
||||
#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF)
|
||||
#define GE(x, y) (GT(y, x) ^ 0xFF)
|
||||
#define LT(x, y) GT(y, x)
|
||||
#define LE(x, y) GE(y, x)
|
||||
|
||||
/*
|
||||
* Convert value x (0..63) to corresponding Base64 character.
|
||||
*/
|
||||
static int b64_byte_to_char(unsigned x) {
|
||||
//static inline int b64_byte_to_char(unsigned x) {
|
||||
return (LT(x, 26) & (x + 'A')) |
|
||||
(GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) |
|
||||
(GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') |
|
||||
(EQ(x, 63) & '/');
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert some bytes to Base64. 'dst_len' is the length (in characters)
|
||||
* of the output buffer 'dst'; if that buffer is not large enough to
|
||||
* receive the result (including the terminating 0), then (size_t)-1
|
||||
* is returned. Otherwise, the zero-terminated Base64 string is written
|
||||
* in the buffer, and the output length (counted WITHOUT the terminating
|
||||
* zero) is returned.
|
||||
*/
|
||||
static size_t to_base64(char *dst, size_t dst_len, const void *src)
|
||||
{
|
||||
size_t olen;
|
||||
const unsigned char *buf;
|
||||
unsigned acc, acc_len;
|
||||
|
||||
olen = 43;
|
||||
/*switch (32 % 3) {
|
||||
case 2:
|
||||
olen++;*/
|
||||
/* fall through */
|
||||
/*case 1:
|
||||
olen += 2;
|
||||
break;
|
||||
}*/
|
||||
if (dst_len <= olen) {
|
||||
return (size_t)-1;
|
||||
}
|
||||
acc = 0;
|
||||
acc_len = 0;
|
||||
buf = (const unsigned char *)src;
|
||||
size_t src_len = 32;
|
||||
while (src_len-- > 0) {
|
||||
acc = (acc << 8) + (*buf++);
|
||||
acc_len += 8;
|
||||
while (acc_len >= 6) {
|
||||
acc_len -= 6;
|
||||
*dst++ = b64_byte_to_char((acc >> acc_len) & 0x3F);
|
||||
}
|
||||
}
|
||||
if (acc_len > 0) {
|
||||
*dst++ = b64_byte_to_char((acc << (6 - acc_len)) & 0x3F);
|
||||
}
|
||||
*dst++ = 0;
|
||||
return olen;
|
||||
}
|
||||
|
||||
/* ==================================================================== */
|
||||
/*
|
||||
* Code specific to Argon2i.
|
||||
*
|
||||
* The code below applies the following format:
|
||||
*
|
||||
* $argon2i$m=<num>,t=<num>,p=<num>[,keyid=<bin>][,data=<bin>][$<bin>[$<bin>]]
|
||||
*
|
||||
* where <num> is a decimal integer (positive, fits in an 'unsigned long')
|
||||
* and <bin> is Base64-encoded data (no '=' padding characters, no newline
|
||||
* or whitespace). The "keyid" is a binary identifier for a key (up to 8
|
||||
* bytes); "data" is associated data (up to 32 bytes). When the 'keyid'
|
||||
* (resp. the 'data') is empty, then it is ommitted from the output.
|
||||
*
|
||||
* The last two binary chunks (encoded in Base64) are, in that order,
|
||||
* the salt and the output. Both are optional, but you cannot have an
|
||||
* output without a salt. The binary salt length is between 8 and 48 bytes.
|
||||
* The output length is always exactly 32 bytes.
|
||||
*/
|
||||
|
||||
int encode_string(char *dst, size_t dst_len, argon2_context *ctx)
|
||||
{
|
||||
#define SS(str) \
|
||||
do { \
|
||||
size_t pp_len = strlen(str); \
|
||||
if (pp_len >= dst_len) { \
|
||||
return 0; \
|
||||
} \
|
||||
memcpy(dst, str, pp_len + 1); \
|
||||
dst += pp_len; \
|
||||
dst_len -= pp_len; \
|
||||
} while (0)
|
||||
|
||||
#define SX(x) \
|
||||
do { \
|
||||
char tmp[30]; \
|
||||
sprintf(tmp, "%lu", (unsigned long)(x)); \
|
||||
SS(tmp); \
|
||||
} while (0);
|
||||
|
||||
#define SB(buf) \
|
||||
do { \
|
||||
size_t sb_len = to_base64(dst, dst_len, buf); \
|
||||
if (sb_len == (size_t)-1) { \
|
||||
return 0; \
|
||||
} \
|
||||
dst += sb_len; \
|
||||
dst_len -= sb_len; \
|
||||
} while (0);
|
||||
|
||||
SS("$argon2i$m=");
|
||||
SX(16);
|
||||
SS(",t=");
|
||||
SX(2);
|
||||
SS(",p=");
|
||||
SX(1);
|
||||
|
||||
/*if (ctx->adlen > 0) {
|
||||
SS(",data=");
|
||||
SB(ctx->ad, ctx->adlen);
|
||||
}*/
|
||||
|
||||
/*if (ctx->saltlen == 0)
|
||||
return 1;*/
|
||||
|
||||
SS("$");
|
||||
SB(ctx->salt);
|
||||
|
||||
/*if (ctx->outlen32 == 0)
|
||||
return 1;*/
|
||||
|
||||
SS("$");
|
||||
SB(ctx->out);
|
||||
return 1;
|
||||
|
||||
#undef SS
|
||||
#undef SX
|
||||
#undef SB
|
||||
}
|
||||
292
algo/argon2/ar2/argon2.h
Normal file
292
algo/argon2/ar2/argon2.h
Normal file
@@ -0,0 +1,292 @@
|
||||
/*
|
||||
* Argon2 source code package
|
||||
*
|
||||
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
|
||||
*
|
||||
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along
|
||||
* with
|
||||
* this software. If not, see
|
||||
* <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
#ifndef ARGON2_H
|
||||
#define ARGON2_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*************************Argon2 input parameter
|
||||
* restrictions**************************************************/
|
||||
|
||||
/* Minimum and maximum number of lanes (degree of parallelism) */
|
||||
#define ARGON2_MIN_LANES UINT32_C(1)
|
||||
#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF)
|
||||
|
||||
/* Minimum and maximum number of threads */
|
||||
#define ARGON2_MIN_THREADS UINT32_C(1)
|
||||
#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF)
|
||||
|
||||
/* Number of synchronization points between lanes per pass */
|
||||
#define ARGON2_SYNC_POINTS UINT32_C(4)
|
||||
|
||||
/* Minimum and maximum digest size in bytes */
|
||||
#define ARGON2_MIN_OUTLEN UINT32_C(4)
|
||||
#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */
|
||||
#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */
|
||||
|
||||
#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
/* Max memory size is half the addressing space, topping at 2^32 blocks (4 TB)
|
||||
*/
|
||||
#define ARGON2_MAX_MEMORY_BITS \
|
||||
ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1))
|
||||
#define ARGON2_MAX_MEMORY \
|
||||
ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS)
|
||||
|
||||
/* Minimum and maximum number of passes */
|
||||
#define ARGON2_MIN_TIME UINT32_C(1)
|
||||
#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum password length in bytes */
|
||||
#define ARGON2_MIN_PWD_LENGTH UINT32_C(0)
|
||||
#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum associated data length in bytes */
|
||||
#define ARGON2_MIN_AD_LENGTH UINT32_C(0)
|
||||
#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum salt length in bytes */
|
||||
#define ARGON2_MIN_SALT_LENGTH UINT32_C(8)
|
||||
#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum key length in bytes */
|
||||
#define ARGON2_MIN_SECRET UINT32_C(0)
|
||||
#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF)
|
||||
|
||||
#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0)
|
||||
#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1)
|
||||
#define ARGON2_FLAG_CLEAR_MEMORY (UINT32_C(1) << 2)
|
||||
#define ARGON2_DEFAULT_FLAGS \
|
||||
(ARGON2_FLAG_CLEAR_PASSWORD | ARGON2_FLAG_CLEAR_MEMORY)
|
||||
|
||||
/* Error codes */
|
||||
typedef enum Argon2_ErrorCodes {
|
||||
ARGON2_OK = 0,
|
||||
|
||||
ARGON2_OUTPUT_PTR_NULL = 1,
|
||||
|
||||
ARGON2_OUTPUT_TOO_SHORT = 2,
|
||||
ARGON2_OUTPUT_TOO_LONG = 3,
|
||||
|
||||
ARGON2_PWD_TOO_SHORT = 4,
|
||||
ARGON2_PWD_TOO_LONG = 5,
|
||||
|
||||
ARGON2_SALT_TOO_SHORT = 6,
|
||||
ARGON2_SALT_TOO_LONG = 7,
|
||||
|
||||
ARGON2_AD_TOO_SHORT = 8,
|
||||
ARGON2_AD_TOO_LONG = 9,
|
||||
|
||||
ARGON2_SECRET_TOO_SHORT = 10,
|
||||
ARGON2_SECRET_TOO_LONG = 11,
|
||||
|
||||
ARGON2_TIME_TOO_SMALL = 12,
|
||||
ARGON2_TIME_TOO_LARGE = 13,
|
||||
|
||||
ARGON2_MEMORY_TOO_LITTLE = 14,
|
||||
ARGON2_MEMORY_TOO_MUCH = 15,
|
||||
|
||||
ARGON2_LANES_TOO_FEW = 16,
|
||||
ARGON2_LANES_TOO_MANY = 17,
|
||||
|
||||
ARGON2_PWD_PTR_MISMATCH = 18, /* NULL ptr with non-zero length */
|
||||
ARGON2_SALT_PTR_MISMATCH = 19, /* NULL ptr with non-zero length */
|
||||
ARGON2_SECRET_PTR_MISMATCH = 20, /* NULL ptr with non-zero length */
|
||||
ARGON2_AD_PTR_MISMATCH = 21, /* NULL ptr with non-zero length */
|
||||
|
||||
ARGON2_MEMORY_ALLOCATION_ERROR = 22,
|
||||
|
||||
ARGON2_FREE_MEMORY_CBK_NULL = 23,
|
||||
ARGON2_ALLOCATE_MEMORY_CBK_NULL = 24,
|
||||
|
||||
ARGON2_INCORRECT_PARAMETER = 25,
|
||||
ARGON2_INCORRECT_TYPE = 26,
|
||||
|
||||
ARGON2_OUT_PTR_MISMATCH = 27,
|
||||
|
||||
ARGON2_THREADS_TOO_FEW = 28,
|
||||
ARGON2_THREADS_TOO_MANY = 29,
|
||||
|
||||
ARGON2_MISSING_ARGS = 30,
|
||||
|
||||
ARGON2_ERROR_CODES_LENGTH /* Do NOT remove; Do NOT add error codes after
|
||||
this
|
||||
error code */
|
||||
} argon2_error_codes;
|
||||
|
||||
/* Memory allocator types --- for external allocation */
|
||||
typedef int (*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate);
|
||||
typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate);
|
||||
|
||||
/* Argon2 external data structures */
|
||||
|
||||
/*
|
||||
*****Context: structure to hold Argon2 inputs:
|
||||
* output array and its length,
|
||||
* password and its length,
|
||||
* salt and its length,
|
||||
* secret and its length,
|
||||
* associated data and its length,
|
||||
* number of passes, amount of used memory (in KBytes, can be rounded up a bit)
|
||||
* number of parallel threads that will be run.
|
||||
* All the parameters above affect the output hash value.
|
||||
* Additionally, two function pointers can be provided to allocate and
|
||||
deallocate the memory (if NULL, memory will be allocated internally).
|
||||
* Also, three flags indicate whether to erase password, secret as soon as they
|
||||
are pre-hashed (and thus not needed anymore), and the entire memory
|
||||
****************************
|
||||
Simplest situation: you have output array out[8], password is stored in
|
||||
pwd[32], salt is stored in salt[16], you do not have keys nor associated data.
|
||||
You need to spend 1 GB of RAM and you run 5 passes of Argon2d with 4 parallel
|
||||
lanes.
|
||||
You want to erase the password, but you're OK with last pass not being erased.
|
||||
You want to use the default memory allocator.
|
||||
*/
|
||||
typedef struct Argon2_Context {
|
||||
uint8_t *out; /* output array */
|
||||
uint8_t *pwd; /* password array */
|
||||
uint8_t *salt; /* salt array */
|
||||
/*uint8_t *secret;*/ /* key array */
|
||||
/*uint8_t *ad;*/ /* associated data array */
|
||||
|
||||
allocate_fptr allocate_cbk; /* pointer to memory allocator */
|
||||
deallocate_fptr free_cbk; /* pointer to memory deallocator */
|
||||
|
||||
/*uint32_t outlen;*/ /* digest length */
|
||||
uint32_t pwdlen; /* password length */
|
||||
/*uint32_t saltlen;*/ /* salt length */
|
||||
/*uint32_t secretlen;*/ /* key length */
|
||||
/*uint32_t adlen;*/ /* associated data length */
|
||||
/*uint32_t t_cost;*/ /* number of passes */
|
||||
/*uint32_t m_cost;*/ /* amount of memory requested (KB) */
|
||||
/*uint32_t lanes;*/ /* number of lanes */
|
||||
/*uint32_t threads;*/ /* maximum number of threads */
|
||||
/*uint32_t flags;*/ /* array of bool options */
|
||||
|
||||
} argon2_context;
|
||||
|
||||
/**
|
||||
* Function to hash the inputs in the memory-hard fashion (uses Argon2i)
|
||||
* @param out Pointer to the memory where the hash digest will be written
|
||||
* @param outlen Digest length in bytes
|
||||
* @param in Pointer to the input (password)
|
||||
* @param inlen Input length in bytes
|
||||
* @param salt Pointer to the salt
|
||||
* @param saltlen Salt length in bytes
|
||||
* @pre @a out must have at least @a outlen bytes allocated
|
||||
* @pre @a in must be at least @inlen bytes long
|
||||
* @pre @a saltlen must be at least @saltlen bytes long
|
||||
* @return Zero if successful, 1 otherwise.
|
||||
*/
|
||||
/*int hash_argon2i(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *salt, size_t saltlen, unsigned int t_cost,
|
||||
unsigned int m_cost);*/
|
||||
|
||||
/* same for argon2d */
|
||||
/*int hash_argon2d(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *salt, size_t saltlen, unsigned int t_cost,
|
||||
unsigned int m_cost);*/
|
||||
|
||||
/*
|
||||
* **************Argon2d: Version of Argon2 that picks memory blocks depending
|
||||
* on the password and salt. Only for side-channel-free
|
||||
* environment!!***************
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
int argon2d(argon2_context *context);
|
||||
|
||||
/*
|
||||
* * **************Argon2i: Version of Argon2 that picks memory blocks
|
||||
*independent on the password and salt. Good for side-channels,
|
||||
******************* but worse w.r.t. tradeoff attacks if
|
||||
*******************only one pass is used***************
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
int argon2i(argon2_context *context);
|
||||
|
||||
/*
|
||||
* * **************Argon2di: Reserved name***************
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
int argon2di(argon2_context *context);
|
||||
|
||||
/*
|
||||
* * **************Argon2ds: Argon2d hardened against GPU attacks, 20%
|
||||
* slower***************
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
int argon2ds(argon2_context *context);
|
||||
|
||||
/*
|
||||
* * **************Argon2id: First half-pass over memory is
|
||||
*password-independent, the rest are password-dependent
|
||||
********************OK against side channels: they reduce to 1/2-pass
|
||||
*Argon2i***************
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
int argon2id(argon2_context *context);
|
||||
|
||||
/*
|
||||
* Verify if a given password is correct for Argon2d hashing
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @param hash The password hash to verify. The length of the hash is
|
||||
* specified by the context outlen member
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
int verify_d(argon2_context *context, const char *hash);
|
||||
|
||||
/*
|
||||
* Get the associated error message for given error code
|
||||
* @return The error message associated with the given error code
|
||||
*/
|
||||
const char *error_message(int error_code);
|
||||
|
||||
/* ==================================================================== */
|
||||
/*
|
||||
* Code specific to Argon2i.
|
||||
*
|
||||
* The code below applies the following format:
|
||||
*
|
||||
* $argon2i$m=<num>,t=<num>,p=<num>[,keyid=<bin>][,data=<bin>][$<bin>[$<bin>]]
|
||||
*
|
||||
* where <num> is a decimal integer (positive, fits in an 'unsigned long')
|
||||
* and <bin> is Base64-encoded data (no '=' padding characters, no newline
|
||||
* or whitespace). The "keyid" is a binary identifier for a key (up to 8
|
||||
* bytes); "data" is associated data (up to 32 bytes). When the 'keyid'
|
||||
* (resp. the 'data') is empty, then it is ommitted from the output.
|
||||
*
|
||||
* The last two binary chunks (encoded in Base64) are, in that order,
|
||||
* the salt and the output. Both are optional, but you cannot have an
|
||||
* output without a salt. The binary salt length is between 8 and 48 bytes.
|
||||
* The output length is always exactly 32 bytes.
|
||||
*/
|
||||
|
||||
int encode_string(char *dst, size_t dst_len, argon2_context *ctx);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
114
algo/argon2/ar2/bench.c
Normal file
114
algo/argon2/ar2/bench.c
Normal file
@@ -0,0 +1,114 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include "argon2.h"
|
||||
|
||||
static uint64_t rdtsc(void)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
#if defined(__amd64__) || defined(__x86_64__)
|
||||
uint64_t rax, rdx;
|
||||
__asm__ __volatile__("rdtsc" : "=a"(rax), "=d"(rdx) : :);
|
||||
return (rdx << 32) | rax;
|
||||
#elif defined(__i386__) || defined(__i386) || defined(__X86__)
|
||||
uint64_t rax;
|
||||
__asm__ __volatile__("rdtsc" : "=A"(rax) : :);
|
||||
return rax;
|
||||
#else
|
||||
#error "Not implemented!"
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Benchmarks Argon2 with salt length 16, password length 16, t_cost 1,
|
||||
and different m_cost and threads
|
||||
*/
|
||||
static void benchmark()
|
||||
{
|
||||
#define BENCH_OUTLEN 16
|
||||
#define BENCH_INLEN 16
|
||||
const uint32_t inlen = BENCH_INLEN;
|
||||
const unsigned outlen = BENCH_OUTLEN;
|
||||
unsigned char out[BENCH_OUTLEN];
|
||||
unsigned char pwd_array[BENCH_INLEN];
|
||||
unsigned char salt_array[BENCH_INLEN];
|
||||
#undef BENCH_INLEN
|
||||
#undef BENCH_OUTLEN
|
||||
|
||||
uint32_t t_cost = 1;
|
||||
uint32_t m_cost;
|
||||
uint32_t thread_test[6] = {1, 2, 4, 6, 8, 16};
|
||||
|
||||
memset(pwd_array, 0, inlen);
|
||||
memset(salt_array, 1, inlen);
|
||||
|
||||
for (m_cost = (uint32_t)1 << 10; m_cost <= (uint32_t)1 << 22; m_cost *= 2) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 6; ++i) {
|
||||
argon2_context context;
|
||||
uint32_t thread_n = thread_test[i];
|
||||
uint64_t stop_cycles, stop_cycles_i;
|
||||
clock_t stop_time;
|
||||
uint64_t delta_d, delta_i;
|
||||
double mcycles_d, mcycles_i, run_time;
|
||||
|
||||
clock_t start_time = clock();
|
||||
uint64_t start_cycles = rdtsc();
|
||||
|
||||
context.out = out;
|
||||
context.outlen = outlen;
|
||||
context.pwd = pwd_array;
|
||||
context.pwdlen = inlen;
|
||||
context.salt = salt_array;
|
||||
context.saltlen = inlen;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.t_cost = t_cost;
|
||||
context.m_cost = m_cost;
|
||||
context.lanes = thread_n;
|
||||
context.threads = thread_n;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = 0;
|
||||
|
||||
argon2d(&context);
|
||||
stop_cycles = rdtsc();
|
||||
argon2i(&context);
|
||||
stop_cycles_i = rdtsc();
|
||||
stop_time = clock();
|
||||
|
||||
delta_d = (stop_cycles - start_cycles) / (m_cost);
|
||||
delta_i = (stop_cycles_i - stop_cycles) / (m_cost);
|
||||
mcycles_d = (double)(stop_cycles - start_cycles) / (1UL << 20);
|
||||
mcycles_i = (double)(stop_cycles_i - stop_cycles) / (1UL << 20);
|
||||
printf("Argon2d %d iterations %d MiB %d threads: %2.2f cpb %2.2f "
|
||||
"Mcycles \n",
|
||||
t_cost, m_cost >> 10, thread_n, (float)delta_d / 1024,
|
||||
mcycles_d);
|
||||
printf("Argon2i %d iterations %d MiB %d threads: %2.2f cpb %2.2f "
|
||||
"Mcycles \n",
|
||||
t_cost, m_cost >> 10, thread_n, (float)delta_i / 1024,
|
||||
mcycles_i);
|
||||
|
||||
run_time = ((double)stop_time - start_time) / (CLOCKS_PER_SEC);
|
||||
printf("%2.4f seconds\n\n", run_time);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
benchmark();
|
||||
return ARGON2_OK;
|
||||
}
|
||||
143
algo/argon2/ar2/blake2/blake2-impl.h
Normal file
143
algo/argon2/ar2/blake2/blake2-impl.h
Normal file
@@ -0,0 +1,143 @@
|
||||
#ifndef PORTABLE_BLAKE2_IMPL_H
|
||||
#define PORTABLE_BLAKE2_IMPL_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define BLAKE2_INLINE __inline
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#define BLAKE2_INLINE __inline__
|
||||
#else
|
||||
#define BLAKE2_INLINE
|
||||
#endif
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
/*
|
||||
Not an exhaustive list, but should cover the majority of modern platforms
|
||||
Additionally, the code will always be correct---this is only a performance
|
||||
tweak.
|
||||
*/
|
||||
#if (defined(__BYTE_ORDER__) && \
|
||||
(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
|
||||
defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
|
||||
defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \
|
||||
defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \
|
||||
defined(_M_ARM)
|
||||
#define NATIVE_LITTLE_ENDIAN
|
||||
#endif
|
||||
/* Argon2 Team - End Code */
|
||||
|
||||
static BLAKE2_INLINE uint32_t load32(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
uint32_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
#else
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint32_t w = *p++;
|
||||
w |= (uint32_t)(*p++) << 8;
|
||||
w |= (uint32_t)(*p++) << 16;
|
||||
w |= (uint32_t)(*p++) << 24;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t load64(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
uint64_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
#else
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
w |= (uint64_t)(*p++) << 8;
|
||||
w |= (uint64_t)(*p++) << 16;
|
||||
w |= (uint64_t)(*p++) << 24;
|
||||
w |= (uint64_t)(*p++) << 32;
|
||||
w |= (uint64_t)(*p++) << 40;
|
||||
w |= (uint64_t)(*p++) << 48;
|
||||
w |= (uint64_t)(*p++) << 56;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store32(void *dst, uint32_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store64(void *dst, uint64_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t load48(const void *src) {
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
w |= (uint64_t)(*p++) << 8;
|
||||
w |= (uint64_t)(*p++) << 16;
|
||||
w |= (uint64_t)(*p++) << 24;
|
||||
w |= (uint64_t)(*p++) << 32;
|
||||
w |= (uint64_t)(*p++) << 40;
|
||||
return w;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void store48(void *dst, uint64_t w) {
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (32 - c));
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (64 - c));
|
||||
}
|
||||
|
||||
/* prevents compiler optimizing out memset() */
|
||||
static BLAKE2_INLINE void burn(void *v, size_t n) {
|
||||
static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
|
||||
memset_v(v, 0, n);
|
||||
}
|
||||
|
||||
#endif
|
||||
76
algo/argon2/ar2/blake2/blake2.h
Normal file
76
algo/argon2/ar2/blake2/blake2.h
Normal file
@@ -0,0 +1,76 @@
|
||||
#ifndef PORTABLE_BLAKE2_H
|
||||
#define PORTABLE_BLAKE2_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum blake2b_constant {
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
typedef struct __blake2b_param {
|
||||
uint8_t digest_length; /* 1 */
|
||||
uint8_t key_length; /* 2 */
|
||||
uint8_t fanout; /* 3 */
|
||||
uint8_t depth; /* 4 */
|
||||
uint32_t leaf_length; /* 8 */
|
||||
uint64_t node_offset; /* 16 */
|
||||
uint8_t node_depth; /* 17 */
|
||||
uint8_t inner_length; /* 18 */
|
||||
uint8_t reserved[14]; /* 32 */
|
||||
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
|
||||
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
|
||||
} blake2b_param;
|
||||
#pragma pack(pop)
|
||||
|
||||
typedef struct __blake2b_state {
|
||||
uint64_t h[8];
|
||||
uint64_t t[2];
|
||||
uint64_t f[2];
|
||||
unsigned buflen;
|
||||
unsigned outlen;
|
||||
uint8_t last_node;
|
||||
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||
} blake2b_state;
|
||||
|
||||
/* Ensure param structs have not been wrongly padded */
|
||||
/* Poor man's static_assert */
|
||||
enum {
|
||||
blake2_size_check_0 = 1 / !!(CHAR_BIT == 8),
|
||||
blake2_size_check_2 =
|
||||
1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
|
||||
};
|
||||
|
||||
/* Streaming API */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen);
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
size_t keylen);
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
void my_blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int blake2b(void *out, const void *in, const void *key, size_t keylen);
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *out, const void *in);
|
||||
/* Argon2 Team - End Code */
|
||||
/* Miouyouyou */
|
||||
void blake2b_too(void *out, const void *in);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
162
algo/argon2/ar2/blake2/blamka-round-opt.h
Normal file
162
algo/argon2/ar2/blake2/blamka-round-opt.h
Normal file
@@ -0,0 +1,162 @@
|
||||
#ifndef BLAKE_ROUND_MKA_OPT_H
|
||||
#define BLAKE_ROUND_MKA_OPT_H
|
||||
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include <immintrin.h>
|
||||
#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__))
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(__XOP__)
|
||||
#if defined(__SSSE3__)
|
||||
#define r16 \
|
||||
(_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
||||
#define r24 \
|
||||
(_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
#define _mm_roti_epi64(x, c) \
|
||||
(-(c) == 32) \
|
||||
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
|
||||
: (-(c) == 24) \
|
||||
? _mm_shuffle_epi8((x), r24) \
|
||||
: (-(c) == 16) \
|
||||
? _mm_shuffle_epi8((x), r16) \
|
||||
: (-(c) == 63) \
|
||||
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
||||
_mm_add_epi64((x), (x))) \
|
||||
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
||||
_mm_slli_epi64((x), 64 - (-(c))))
|
||||
#else /* defined(__SSE2__) */
|
||||
#define _mm_roti_epi64(r, c) \
|
||||
_mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
|
||||
#endif
|
||||
#else
|
||||
#endif
|
||||
|
||||
static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
const __m128i z = _mm_mul_epu32(x, y);
|
||||
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
|
||||
}
|
||||
|
||||
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = fBlaMka(A0, B0); \
|
||||
A1 = fBlaMka(A1, B1); \
|
||||
\
|
||||
D0 = _mm_xor_si128(D0, A0); \
|
||||
D1 = _mm_xor_si128(D1, A1); \
|
||||
\
|
||||
D0 = _mm_roti_epi64(D0, -32); \
|
||||
D1 = _mm_roti_epi64(D1, -32); \
|
||||
\
|
||||
C0 = fBlaMka(C0, D0); \
|
||||
C1 = fBlaMka(C1, D1); \
|
||||
\
|
||||
B0 = _mm_xor_si128(B0, C0); \
|
||||
B1 = _mm_xor_si128(B1, C1); \
|
||||
\
|
||||
B0 = _mm_roti_epi64(B0, -24); \
|
||||
B1 = _mm_roti_epi64(B1, -24); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = fBlaMka(A0, B0); \
|
||||
A1 = fBlaMka(A1, B1); \
|
||||
\
|
||||
D0 = _mm_xor_si128(D0, A0); \
|
||||
D1 = _mm_xor_si128(D1, A1); \
|
||||
\
|
||||
D0 = _mm_roti_epi64(D0, -16); \
|
||||
D1 = _mm_roti_epi64(D1, -16); \
|
||||
\
|
||||
C0 = fBlaMka(C0, D0); \
|
||||
C1 = fBlaMka(C1, D1); \
|
||||
\
|
||||
B0 = _mm_xor_si128(B0, C0); \
|
||||
B1 = _mm_xor_si128(B1, C1); \
|
||||
\
|
||||
B0 = _mm_roti_epi64(B0, -63); \
|
||||
B1 = _mm_roti_epi64(B1, -63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
|
||||
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
|
||||
B0 = t0; \
|
||||
B1 = t1; \
|
||||
\
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(D1, D0, 8); \
|
||||
t1 = _mm_alignr_epi8(D0, D1, 8); \
|
||||
D0 = t1; \
|
||||
D1 = t0; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
|
||||
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
|
||||
B0 = t0; \
|
||||
B1 = t1; \
|
||||
\
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(D0, D1, 8); \
|
||||
t1 = _mm_alignr_epi8(D1, D0, 8); \
|
||||
D0 = t1; \
|
||||
D1 = t0; \
|
||||
} while ((void)0, 0)
|
||||
#else /* SSE2 */
|
||||
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = D0; \
|
||||
__m128i t1 = B0; \
|
||||
D0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = D0; \
|
||||
D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \
|
||||
D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \
|
||||
B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \
|
||||
B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
t0 = B0; \
|
||||
__m128i t1 = D0; \
|
||||
B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \
|
||||
B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \
|
||||
D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \
|
||||
D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \
|
||||
} while ((void)0, 0)
|
||||
#endif
|
||||
|
||||
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif
|
||||
39
algo/argon2/ar2/blake2/blamka-round-ref.h
Normal file
39
algo/argon2/ar2/blake2/blamka-round-ref.h
Normal file
@@ -0,0 +1,39 @@
|
||||
#ifndef BLAKE_ROUND_MKA_H
|
||||
#define BLAKE_ROUND_MKA_H
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
/*designed by the Lyra PHC team */
|
||||
static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||
const uint64_t m = UINT64_C(0xFFFFFFFF);
|
||||
const uint64_t xy = (x & m) * (y & m);
|
||||
return x + y + 2 * xy;
|
||||
}
|
||||
|
||||
#define G(a, b, c, d) \
|
||||
do { \
|
||||
a = fBlaMka(a, b); \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = fBlaMka(c, d); \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = fBlaMka(a, b); \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = fBlaMka(c, d); \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
|
||||
v12, v13, v14, v15) \
|
||||
do { \
|
||||
G(v0, v4, v8, v12); \
|
||||
G(v1, v5, v9, v13); \
|
||||
G(v2, v6, v10, v14); \
|
||||
G(v3, v7, v11, v15); \
|
||||
G(v0, v5, v10, v15); \
|
||||
G(v1, v6, v11, v12); \
|
||||
G(v2, v7, v8, v13); \
|
||||
G(v3, v4, v9, v14); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif
|
||||
316
algo/argon2/ar2/blake2b.c
Normal file
316
algo/argon2/ar2/blake2b.c
Normal file
@@ -0,0 +1,316 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "blake2/blake2.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// i know there is a trick but nvm :p
|
||||
#define PRIu64 "%llu"
|
||||
#define PRIx64 "%llx"
|
||||
#endif
|
||||
|
||||
static const uint64_t blake2b_IV[8] = {
|
||||
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
|
||||
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
|
||||
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
|
||||
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)
|
||||
};
|
||||
|
||||
static const unsigned int blake2b_sigma[12][16] = {
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
};
|
||||
|
||||
static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) {
|
||||
S->f[1] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) {
|
||||
if (S->last_node) {
|
||||
blake2b_set_lastnode(S);
|
||||
}
|
||||
S->f[0] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S, uint64_t inc) {
|
||||
S->t[0] += inc;
|
||||
S->t[1] += (S->t[0] < inc);
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) {
|
||||
burn(S, sizeof(*S)); /* wipe */
|
||||
blake2b_set_lastblock(S); /* invalidate for further use */
|
||||
}
|
||||
|
||||
static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) {
|
||||
memset(S, 0, sizeof(*S));
|
||||
memcpy(S->h, blake2b_IV, sizeof(S->h));
|
||||
}
|
||||
|
||||
/*
|
||||
void print_state(blake2b_state BlakeHash)
|
||||
{
|
||||
printf(".h = {UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 "),\n"
|
||||
"UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 "),\n"
|
||||
"UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 "),\n"
|
||||
"UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 ")},\n"
|
||||
".t = {UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 ")},\n"
|
||||
".f = {UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 ")}\n",
|
||||
BlakeHash.h[0], BlakeHash.h[1], BlakeHash.h[2], BlakeHash.h[3],
|
||||
BlakeHash.h[4], BlakeHash.h[5], BlakeHash.h[6], BlakeHash.h[7],
|
||||
BlakeHash.t[0], BlakeHash.t[1],
|
||||
BlakeHash.f[0], BlakeHash.f[1]);
|
||||
printf(".buf = {");
|
||||
for (register uint8_t i = 0; i < BLAKE2B_BLOCKBYTES; i++)
|
||||
printf("%" PRIu8 ", ", BlakeHash.buf[i]);
|
||||
puts("\n");
|
||||
printf("}\n.buflen = %d\n.outlen = %d\n",
|
||||
BlakeHash.buflen, BlakeHash.outlen);
|
||||
printf(".last_node = %" PRIu8 "\n", BlakeHash.last_node);
|
||||
fflush(stdout);
|
||||
}
|
||||
*/
|
||||
|
||||
static const blake2b_state miou = {
|
||||
.h = {
|
||||
UINT64_C(7640891576939301128), UINT64_C(13503953896175478587),
|
||||
UINT64_C(4354685564936845355), UINT64_C(11912009170470909681),
|
||||
UINT64_C(5840696475078001361), UINT64_C(11170449401992604703),
|
||||
UINT64_C(2270897969802886507), UINT64_C(6620516959819538809)
|
||||
},
|
||||
.t = {UINT64_C(0), UINT64_C(0)},
|
||||
.f = {UINT64_C(0), UINT64_C(0)},
|
||||
.buf = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
},
|
||||
.buflen = 0,
|
||||
.outlen = 64,
|
||||
.last_node = 0
|
||||
};
|
||||
|
||||
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P)
|
||||
{
|
||||
const unsigned char *p = (const unsigned char *)P;
|
||||
unsigned int i;
|
||||
|
||||
if (NULL == P || NULL == S) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
blake2b_init0(S);
|
||||
/* IV XOR Parameter Block */
|
||||
for (i = 0; i < 8; ++i) {
|
||||
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
|
||||
}
|
||||
S->outlen = P->digest_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void compare_buffs(uint64_t *h, size_t outlen)
|
||||
{
|
||||
// printf("CMP : %d", memcmp(h, miou.h, 8*(sizeof(uint64_t))));
|
||||
printf("miou : %" PRIu64 " - h : %" PRIu64 " - outlen : %ld\n", miou.h[0], h[0], outlen);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
/* Sequential blake2b initialization */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen)
|
||||
{
|
||||
memcpy(S, &miou, sizeof(*S));
|
||||
S->h[0] += outlen;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void print64(const char *name, const uint64_t *array, uint16_t size)
|
||||
{
|
||||
printf("%s = {", name);
|
||||
for (uint8_t i = 0; i < size; i++) printf("UINT64_C(%" PRIu64 "), ", array[i]);
|
||||
printf("};\n");
|
||||
}
|
||||
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blake2b_compress(blake2b_state *S, const uint8_t *block)
|
||||
{
|
||||
uint64_t m[16];
|
||||
uint64_t v[16];
|
||||
unsigned int i, r;
|
||||
|
||||
for (i = 0; i < 16; ++i) {
|
||||
m[i] = load64(block + i * 8);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
v[i] = S->h[i];
|
||||
}
|
||||
|
||||
v[8] = blake2b_IV[0];
|
||||
v[9] = blake2b_IV[1];
|
||||
v[10] = blake2b_IV[2];
|
||||
v[11] = blake2b_IV[3];
|
||||
v[12] = blake2b_IV[4] ^ S->t[0];
|
||||
v[13] = blake2b_IV[5]/* ^ S->t[1]*/;
|
||||
v[14] = blake2b_IV[6] ^ S->f[0];
|
||||
v[15] = blake2b_IV[7]/* ^ S->f[1]*/;
|
||||
|
||||
#define G(r, i, a, b, c, d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r, 0, v[0], v[4], v[8], v[12]); \
|
||||
G(r, 1, v[1], v[5], v[9], v[13]); \
|
||||
G(r, 2, v[2], v[6], v[10], v[14]); \
|
||||
G(r, 3, v[3], v[7], v[11], v[15]); \
|
||||
G(r, 4, v[0], v[5], v[10], v[15]); \
|
||||
G(r, 5, v[1], v[6], v[11], v[12]); \
|
||||
G(r, 6, v[2], v[7], v[8], v[13]); \
|
||||
G(r, 7, v[3], v[4], v[9], v[14]); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
for (r = 0; r < 12; ++r) ROUND(r);
|
||||
|
||||
for (i = 0; i < 8; ++i) S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
}
|
||||
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen)
|
||||
{
|
||||
const uint8_t *pin = (const uint8_t *)in;
|
||||
/* Complete current block */
|
||||
memcpy(&S->buf[4], pin, 124);
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, S->buf);
|
||||
S->buflen = 0;
|
||||
pin += 124;
|
||||
|
||||
register int8_t i = 7;
|
||||
/* Avoid buffer copies when possible */
|
||||
while (i--) {
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, pin);
|
||||
pin += BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
memcpy(&S->buf[S->buflen], pin, 4);
|
||||
S->buflen += 4;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void my_blake2b_update(blake2b_state *S, const void *in, size_t inlen)
|
||||
{
|
||||
memcpy(&S->buf[S->buflen], in, inlen);
|
||||
S->buflen += (unsigned int)inlen;
|
||||
}
|
||||
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen)
|
||||
{
|
||||
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
|
||||
unsigned int i;
|
||||
|
||||
blake2b_increment_counter(S, S->buflen);
|
||||
blake2b_set_lastblock(S);
|
||||
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
|
||||
blake2b_compress(S, S->buf);
|
||||
|
||||
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
|
||||
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
|
||||
}
|
||||
|
||||
memcpy(out, buffer, S->outlen);
|
||||
|
||||
burn(buffer, sizeof(buffer));
|
||||
burn(S->buf, sizeof(S->buf));
|
||||
burn(S->h, sizeof(S->h));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b(void *out, const void *in, const void *key, size_t keylen)
|
||||
{
|
||||
blake2b_state S;
|
||||
|
||||
blake2b_init(&S, 64);
|
||||
my_blake2b_update(&S, in, 64);
|
||||
blake2b_final(&S, out, 64);
|
||||
burn(&S, sizeof(S));
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blake2b_too(void *pout, const void *in)
|
||||
{
|
||||
uint8_t *out = (uint8_t *)pout;
|
||||
uint8_t out_buffer[64];
|
||||
uint8_t in_buffer[64];
|
||||
|
||||
blake2b_state blake_state;
|
||||
blake2b_init(&blake_state, 64);
|
||||
blake_state.buflen = blake_state.buf[1] = 4;
|
||||
my_blake2b_update(&blake_state, in, 72);
|
||||
blake2b_final(&blake_state, out_buffer, 64);
|
||||
memcpy(out, out_buffer, 32);
|
||||
out += 32;
|
||||
|
||||
register uint8_t i = 29;
|
||||
while (i--) {
|
||||
memcpy(in_buffer, out_buffer, 64);
|
||||
blake2b(out_buffer, in_buffer, NULL, 0);
|
||||
memcpy(out, out_buffer, 32);
|
||||
out += 32;
|
||||
}
|
||||
|
||||
memcpy(in_buffer, out_buffer, 64);
|
||||
blake2b(out_buffer, in_buffer, NULL, 0);
|
||||
memcpy(out, out_buffer, 64);
|
||||
|
||||
burn(&blake_state, sizeof(blake_state));
|
||||
}
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *pout, const void *in)
|
||||
{
|
||||
uint8_t *out = (uint8_t *)pout;
|
||||
blake2b_state blake_state;
|
||||
uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
|
||||
|
||||
store32(outlen_bytes, 32);
|
||||
|
||||
blake2b_init(&blake_state, 32);
|
||||
my_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes));
|
||||
blake2b_update(&blake_state, in, 1024);
|
||||
blake2b_final(&blake_state, out, 32);
|
||||
burn(&blake_state, sizeof(blake_state));
|
||||
return 0;
|
||||
}
|
||||
/* Argon2 Team - End Code */
|
||||
349
algo/argon2/ar2/cores.c
Normal file
349
algo/argon2/ar2/cores.c
Normal file
@@ -0,0 +1,349 @@
|
||||
/*
|
||||
* Argon2 source code package
|
||||
*
|
||||
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
|
||||
*
|
||||
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along
|
||||
* with
|
||||
* this software. If not, see
|
||||
* <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
/*For memory wiping*/
|
||||
#ifdef _MSC_VER
|
||||
#include <windows.h>
|
||||
#include <winbase.h> /* For SecureZeroMemory */
|
||||
#endif
|
||||
#if defined __STDC_LIB_EXT1__
|
||||
#define __STDC_WANT_LIB_EXT1__ 1
|
||||
#endif
|
||||
#define VC_GE_2005(version) (version >= 1400)
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "argon2.h"
|
||||
#include "cores.h"
|
||||
#include "blake2/blake2.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
|
||||
#ifdef GENKAT
|
||||
#include "genkat.h"
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_attribute(optnone)
|
||||
#define NOT_OPTIMIZED __attribute__((optnone))
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#define GCC_VERSION \
|
||||
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
|
||||
#if GCC_VERSION >= 40400
|
||||
#define NOT_OPTIMIZED __attribute__((optimize("O0")))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef NOT_OPTIMIZED
|
||||
#define NOT_OPTIMIZED
|
||||
#endif
|
||||
|
||||
/***************Instance and Position constructors**********/
|
||||
void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
//inline void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
|
||||
void copy_block(block *dst, const block *src) {
|
||||
//inline void copy_block(block *dst, const block *src) {
|
||||
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_WORDS_IN_BLOCK);
|
||||
}
|
||||
|
||||
void xor_block(block *dst, const block *src) {
|
||||
//inline void xor_block(block *dst, const block *src) {
|
||||
int i;
|
||||
for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] ^= src->v[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void load_block(block *dst, const void *input) {
|
||||
//static inline void load_block(block *dst, const void *input) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static void store_block(void *output, const block *src) {
|
||||
//static inline void store_block(void *output, const block *src) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) {
|
||||
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/***************Memory allocators*****************/
|
||||
int allocate_memory(block **memory, uint32_t m_cost) {
|
||||
if (memory != NULL) {
|
||||
size_t memory_size = sizeof(block) * m_cost;
|
||||
if (m_cost != 0 &&
|
||||
memory_size / m_cost !=
|
||||
sizeof(block)) { /*1. Check for multiplication overflow*/
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
*memory = (block *)malloc(memory_size); /*2. Try to allocate*/
|
||||
|
||||
if (!*memory) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
return ARGON2_OK;
|
||||
} else {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
void secure_wipe_memory(void *v, size_t n) { memset(v, 0, n); }
|
||||
//inline void secure_wipe_memory(void *v, size_t n) { memset(v, 0, n); }
|
||||
|
||||
/*********Memory functions*/
|
||||
|
||||
void clear_memory(argon2_instance_t *instance, int clear) {
|
||||
//inline void clear_memory(argon2_instance_t *instance, int clear) {
|
||||
if (instance->memory != NULL && clear) {
|
||||
secure_wipe_memory(instance->memory,
|
||||
sizeof(block) * /*instance->memory_blocks*/16);
|
||||
}
|
||||
}
|
||||
|
||||
void free_memory(block *memory) { free(memory); }
|
||||
//inline void free_memory(block *memory) { free(memory); }
|
||||
|
||||
void finalize(const argon2_context *context, argon2_instance_t *instance) {
|
||||
if (context != NULL && instance != NULL) {
|
||||
block blockhash;
|
||||
copy_block(&blockhash, instance->memory + 15);
|
||||
|
||||
/* Hash the result */
|
||||
{
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
store_block(blockhash_bytes, &blockhash);
|
||||
blake2b_long(context->out, blockhash_bytes);
|
||||
secure_wipe_memory(blockhash.v, ARGON2_BLOCK_SIZE);
|
||||
secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); /* clear blockhash_bytes */
|
||||
}
|
||||
|
||||
#ifdef GENKAT
|
||||
print_tag(context->out, context->outlen);
|
||||
#endif
|
||||
|
||||
/* Clear memory */
|
||||
// clear_memory(instance, 1);
|
||||
|
||||
free_memory(instance->memory);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane) {
|
||||
/*
|
||||
* Pass 0:
|
||||
* This lane : all already finished segments plus already constructed
|
||||
* blocks in this segment
|
||||
* Other lanes : all already finished segments
|
||||
* Pass 1+:
|
||||
* This lane : (SYNC_POINTS - 1) last segments plus already constructed
|
||||
* blocks in this segment
|
||||
* Other lanes : (SYNC_POINTS - 1) last segments
|
||||
*/
|
||||
uint32_t reference_area_size;
|
||||
uint64_t relative_position;
|
||||
uint32_t start_position, absolute_position;
|
||||
|
||||
if (0 == position->pass) {
|
||||
/* First pass */
|
||||
if (0 == position->slice) {
|
||||
/* First slice */
|
||||
reference_area_size =
|
||||
position->index - 1; /* all but the previous */
|
||||
} else {
|
||||
if (same_lane) {
|
||||
/* The same lane => add current segment */
|
||||
reference_area_size =
|
||||
position->slice * 4 +
|
||||
position->index - 1;
|
||||
} else {
|
||||
reference_area_size =
|
||||
position->slice * 4 +
|
||||
((position->index == 0) ? (-1) : 0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Second pass */
|
||||
if (same_lane) {reference_area_size = 11 + position->index;}
|
||||
else {reference_area_size = 12 - (position->index == 0);}
|
||||
}
|
||||
|
||||
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
|
||||
* relative position */
|
||||
relative_position = pseudo_rand;
|
||||
relative_position = relative_position * relative_position >> 32;
|
||||
relative_position = reference_area_size - 1 -
|
||||
(reference_area_size * relative_position >> 32);
|
||||
|
||||
/* 1.2.5 Computing starting position */
|
||||
start_position = 0;
|
||||
|
||||
if (0 != position->pass) {
|
||||
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
|
||||
? 0 : (position->slice + 1) * 4;
|
||||
}
|
||||
|
||||
/* 1.2.6. Computing absolute position */
|
||||
absolute_position = (start_position + relative_position) % 16;
|
||||
return absolute_position;
|
||||
}
|
||||
|
||||
void fill_memory_blocks(argon2_instance_t *instance) {
|
||||
uint32_t r, s;
|
||||
|
||||
for (r = 0; r < 2; ++r) {
|
||||
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
|
||||
|
||||
argon2_position_t position;
|
||||
position.pass = r;
|
||||
position.lane = 0;
|
||||
position.slice = (uint8_t)s;
|
||||
position.index = 0;
|
||||
fill_segment(instance, position);
|
||||
}
|
||||
|
||||
#ifdef GENKAT
|
||||
internal_kat(instance, r); /* Print all memory blocks */
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
|
||||
/* Make the first and second block in each lane as G(H0||i||0) or
|
||||
G(H0||i||1) */
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, 0);
|
||||
blake2b_too(blockhash_bytes, blockhash);
|
||||
load_block(&instance->memory[0], blockhash_bytes);
|
||||
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
|
||||
blake2b_too(blockhash_bytes, blockhash);
|
||||
load_block(&instance->memory[1], blockhash_bytes);
|
||||
secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
|
||||
static const blake2b_state base_hash = {
|
||||
.h = {
|
||||
UINT64_C(7640891576939301192), UINT64_C(13503953896175478587),
|
||||
UINT64_C(4354685564936845355), UINT64_C(11912009170470909681),
|
||||
UINT64_C(5840696475078001361), UINT64_C(11170449401992604703),
|
||||
UINT64_C(2270897969802886507), UINT64_C(6620516959819538809)
|
||||
},
|
||||
.t = {UINT64_C(0),UINT64_C(0)},
|
||||
.f = {UINT64_C(0),UINT64_C(0)},
|
||||
.buf = {
|
||||
1, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 2, 0, 0, 0, 16, 0, 0, 0, 1, 0,
|
||||
0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
.buflen = 28,
|
||||
.outlen = 64,
|
||||
.last_node = 0
|
||||
};
|
||||
|
||||
#define PWDLEN 32
|
||||
#define SALTLEN 32
|
||||
#define SECRETLEN 0
|
||||
#define ADLEN 0
|
||||
void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type) {
|
||||
|
||||
uint8_t value[sizeof(uint32_t)];
|
||||
|
||||
/* Is it generating cache invalidation between cores ? */
|
||||
blake2b_state BlakeHash = base_hash;
|
||||
BlakeHash.buf[20] = (uint8_t) type;
|
||||
my_blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
|
||||
PWDLEN);
|
||||
|
||||
|
||||
secure_wipe_memory(context->pwd, PWDLEN);
|
||||
context->pwdlen = 0;
|
||||
|
||||
store32(&value, SALTLEN);
|
||||
my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
my_blake2b_update(&BlakeHash, (const uint8_t *)context->salt,
|
||||
SALTLEN);
|
||||
|
||||
store32(&value, SECRETLEN);
|
||||
my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, ADLEN);
|
||||
my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
/* 1. Memory allocation */
|
||||
|
||||
|
||||
allocate_memory(&(instance->memory), 16);
|
||||
|
||||
/* 2. Initial hashing */
|
||||
/* H_0 + 8 extra bytes to produce the first blocks */
|
||||
/* Hashing all inputs */
|
||||
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
|
||||
initial_hash(blockhash, context, instance->type);
|
||||
/* Zeroing 8 extra bytes */
|
||||
secure_wipe_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ARGON2_PREHASH_SEED_LENGTH -
|
||||
ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
#ifdef GENKAT
|
||||
initial_kat(blockhash, context, instance->type);
|
||||
#endif
|
||||
|
||||
/* 3. Creating first blocks, we always have at least two blocks in a slice
|
||||
*/
|
||||
fill_first_blocks(blockhash, instance);
|
||||
/* Clearing the hash */
|
||||
secure_wipe_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int argon2_core(argon2_context *context, argon2_type type) {
|
||||
argon2_instance_t instance;
|
||||
instance.memory = NULL;
|
||||
instance.type = type;
|
||||
|
||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
* blocks
|
||||
*/
|
||||
|
||||
int result = initialize(&instance, context);
|
||||
if (ARGON2_OK != result) return result;
|
||||
|
||||
/* 4. Filling memory */
|
||||
fill_memory_blocks(&instance);
|
||||
|
||||
/* 5. Finalization */
|
||||
finalize(context, &instance);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
216
algo/argon2/ar2/cores.h
Normal file
216
algo/argon2/ar2/cores.h
Normal file
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Argon2 source code package
|
||||
*
|
||||
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
|
||||
*
|
||||
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along
|
||||
* with
|
||||
* this software. If not, see
|
||||
* <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_CORES_H
|
||||
#define ARGON2_CORES_H
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <Windows.h>
|
||||
#include <process.h>
|
||||
#define ALIGN(n) __declspec(align(n))
|
||||
#elif defined(__GNUC__) || defined(__clang)
|
||||
#define ALIGN(x) __attribute__((__aligned__(x)))
|
||||
#else
|
||||
#define ALIGN(x)
|
||||
#endif
|
||||
|
||||
/*************************Argon2 internal
|
||||
* constants**************************************************/
|
||||
|
||||
enum argon2_core_constants {
|
||||
/* Version of the algorithm */
|
||||
ARGON2_VERSION_NUMBER = 0x10,
|
||||
|
||||
/* Memory block size in bytes */
|
||||
ARGON2_BLOCK_SIZE = 1024,
|
||||
ARGON2_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
|
||||
ARGON2_QWORDS_IN_BLOCK = 64,
|
||||
|
||||
/* Number of pseudo-random values generated by one call to Blake in Argon2i
|
||||
to
|
||||
generate reference block positions */
|
||||
ARGON2_ADDRESSES_IN_BLOCK = 128,
|
||||
|
||||
/* Pre-hashing digest length and its extension*/
|
||||
ARGON2_PREHASH_DIGEST_LENGTH = 64,
|
||||
ARGON2_PREHASH_SEED_LENGTH = 72
|
||||
};
|
||||
|
||||
/* Argon2 primitive type */
|
||||
typedef enum Argon2_type { Argon2_d = 0, Argon2_i = 1 } argon2_type;
|
||||
|
||||
/*************************Argon2 internal data
|
||||
* types**************************************************/
|
||||
|
||||
/*
|
||||
* Structure for the (1KB) memory block implemented as 128 64-bit words.
|
||||
* Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
|
||||
* bounds checking).
|
||||
*/
|
||||
typedef struct _block { uint64_t v[ARGON2_WORDS_IN_BLOCK]; } ALIGN(16) block;
|
||||
|
||||
/*****************Functions that work with the block******************/
|
||||
|
||||
/* Initialize each byte of the block with @in */
|
||||
void init_block_value(block *b, uint8_t in);
|
||||
|
||||
/* Copy block @src to block @dst */
|
||||
void copy_block(block *dst, const block *src);
|
||||
|
||||
/* XOR @src onto @dst bytewise */
|
||||
void xor_block(block *dst, const block *src);
|
||||
|
||||
/*
|
||||
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
|
||||
* and derived values.
|
||||
* Used to evaluate the number and location of blocks to construct in each
|
||||
* thread
|
||||
*/
|
||||
typedef struct Argon2_instance_t {
|
||||
block *memory; /* Memory pointer */
|
||||
argon2_type type;
|
||||
int print_internals; /* whether to print the memory blocks */
|
||||
} argon2_instance_t;
|
||||
|
||||
/*
|
||||
* Argon2 position: where we construct the block right now. Used to distribute
|
||||
* work between threads.
|
||||
*/
|
||||
typedef struct Argon2_position_t {
|
||||
uint32_t pass;
|
||||
uint32_t lane;
|
||||
uint8_t slice;
|
||||
uint32_t index;
|
||||
} argon2_position_t;
|
||||
|
||||
/*************************Argon2 core
|
||||
* functions**************************************************/
|
||||
|
||||
/* Allocates memory to the given pointer
|
||||
* @param memory pointer to the pointer to the memory
|
||||
* @param m_cost number of blocks to allocate in the memory
|
||||
* @return ARGON2_OK if @memory is a valid pointer and memory is allocated
|
||||
*/
|
||||
int allocate_memory(block **memory, uint32_t m_cost);
|
||||
|
||||
/* Function that securely cleans the memory
|
||||
* @param mem Pointer to the memory
|
||||
* @param s Memory size in bytes
|
||||
*/
|
||||
void secure_wipe_memory(void *v, size_t n);
|
||||
|
||||
/* Clears memory
|
||||
* @param instance pointer to the current instance
|
||||
* @param clear_memory indicates if we clear the memory with zeros.
|
||||
*/
|
||||
void clear_memory(argon2_instance_t *instance, int clear);
|
||||
|
||||
/* Deallocates memory
|
||||
* @param memory pointer to the blocks
|
||||
*/
|
||||
void free_memory(block *memory);
|
||||
|
||||
/*
|
||||
* Computes absolute position of reference block in the lane following a skewed
|
||||
* distribution and using a pseudo-random value as input
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Pointer to the current position
|
||||
* @param pseudo_rand 32-bit pseudo-random value used to determine the position
|
||||
* @param same_lane Indicates if the block will be taken from the current lane.
|
||||
* If so we can reference the current segment
|
||||
* @pre All pointers must be valid
|
||||
*/
|
||||
uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane);
|
||||
|
||||
/*
|
||||
* Function that validates all inputs against predefined restrictions and return
|
||||
* an error code
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return ARGON2_OK if everything is all right, otherwise one of error codes
|
||||
* (all defined in <argon2.h>
|
||||
*/
|
||||
int validate_inputs(const argon2_context *context);
|
||||
|
||||
/*
|
||||
* Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears
|
||||
* password and secret if needed
|
||||
* @param context Pointer to the Argon2 internal structure containing memory
|
||||
* pointer, and parameters for time and space requirements.
|
||||
* @param blockhash Buffer for pre-hashing digest
|
||||
* @param type Argon2 type
|
||||
* @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes
|
||||
* allocated
|
||||
*/
|
||||
void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type);
|
||||
|
||||
/*
|
||||
* Function creates first 2 blocks per lane
|
||||
* @param instance Pointer to the current instance
|
||||
* @param blockhash Pointer to the pre-hashing digest
|
||||
* @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values
|
||||
*/
|
||||
void fill_firsts_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function allocates memory, hashes the inputs with Blake, and creates first
|
||||
* two blocks. Returns the pointer to the main memory with 2 blocks per lane
|
||||
* initialized
|
||||
* @param context Pointer to the Argon2 internal structure containing memory
|
||||
* pointer, and parameters for time and space requirements.
|
||||
* @param instance Current Argon2 instance
|
||||
* @return Zero if successful, -1 if memory failed to allocate. @context->state
|
||||
* will be modified if successful.
|
||||
*/
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
|
||||
/*
|
||||
* XORing the last block of each lane, hashing it, making the tag. Deallocates
|
||||
* the memory.
|
||||
* @param context Pointer to current Argon2 context (use only the out parameters
|
||||
* from it)
|
||||
* @param instance Pointer to current instance of Argon2
|
||||
* @pre instance->state must point to necessary amount of memory
|
||||
* @pre context->out must point to outlen bytes of memory
|
||||
* @pre if context->free_cbk is not NULL, it should point to a function that
|
||||
* deallocates memory
|
||||
*/
|
||||
void finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function that fills the segment using previous segments also from other
|
||||
* threads
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
|
||||
/*
|
||||
* Function that fills the entire memory t_cost times based on the first two
|
||||
* blocks in each lane
|
||||
* @param instance Pointer to the current instance
|
||||
*/
|
||||
void fill_memory_blocks(argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function that performs memory-hard hashing with certain degree of parallelism
|
||||
* @param context Pointer to the Argon2 internal structure
|
||||
* @return Error code if smth is wrong, ARGON2_OK otherwise
|
||||
*/
|
||||
int argon2_core(argon2_context *context, argon2_type type);
|
||||
|
||||
#endif
|
||||
186
algo/argon2/ar2/genkat.c
Normal file
186
algo/argon2/ar2/genkat.c
Normal file
@@ -0,0 +1,186 @@
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "argon2.h"
|
||||
#include "cores.h"
|
||||
|
||||
void initial_kat(const uint8_t *blockhash, const argon2_context *context,
|
||||
argon2_type type)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
if (blockhash != NULL && context != NULL) {
|
||||
printf("=======================================");
|
||||
|
||||
switch (type) {
|
||||
case Argon2_d:
|
||||
printf("Argon2d\n");
|
||||
break;
|
||||
|
||||
case Argon2_i:
|
||||
printf("Argon2i\n");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
printf("Memory: %u KiB, Iterations: %u, Parallelism: %u lanes, Tag "
|
||||
"length: %u bytes\n",
|
||||
context->m_cost, context->t_cost, context->lanes,
|
||||
context->outlen);
|
||||
|
||||
printf("Password[%u]: ", context->pwdlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
|
||||
printf("CLEARED\n");
|
||||
} else {
|
||||
for (i = 0; i < context->pwdlen; ++i) {
|
||||
printf("%2.2x ", ((unsigned char *)context->pwd)[i]);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("Salt[%u]: ", context->saltlen);
|
||||
|
||||
for (i = 0; i < context->saltlen; ++i) {
|
||||
printf("%2.2x ", ((unsigned char *)context->salt)[i]);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
printf("Secret[%u]: ", context->secretlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
|
||||
printf("CLEARED\n");
|
||||
} else {
|
||||
for (i = 0; i < context->secretlen; ++i) {
|
||||
printf("%2.2x ", ((unsigned char *)context->secret)[i]);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("Associated data[%u]: ", context->adlen);
|
||||
|
||||
for (i = 0; i < context->adlen; ++i) {
|
||||
printf("%2.2x ", ((unsigned char *)context->ad)[i]);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
printf("Pre-hashing digest: ");
|
||||
|
||||
for (i = 0; i < ARGON2_PREHASH_DIGEST_LENGTH; ++i) {
|
||||
printf("%2.2x ", ((unsigned char *)blockhash)[i]);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
void print_tag(const void *out, uint32_t outlen)
|
||||
{
|
||||
unsigned i;
|
||||
if (out != NULL) {
|
||||
printf("Tag: ");
|
||||
|
||||
for (i = 0; i < outlen; ++i) {
|
||||
printf("%2.2x ", ((uint8_t *)out)[i]);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
void internal_kat(const argon2_instance_t *instance, uint32_t pass)
|
||||
{
|
||||
if (instance != NULL) {
|
||||
uint32_t i, j;
|
||||
printf("\n After pass %u:\n", pass);
|
||||
|
||||
for (i = 0; i < instance->memory_blocks; ++i) {
|
||||
uint32_t how_many_words =
|
||||
(instance->memory_blocks > ARGON2_WORDS_IN_BLOCK)
|
||||
? 1
|
||||
: ARGON2_WORDS_IN_BLOCK;
|
||||
|
||||
for (j = 0; j < how_many_words; ++j)
|
||||
printf("Block %.4u [%3u]: %016" PRIx64 "\n", i, j,
|
||||
instance->memory[i].v[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void fatal(const char *error) {
|
||||
fprintf(stderr, "Error: %s\n", error);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void generate_testvectors(const char *type)
|
||||
{
|
||||
#define TEST_OUTLEN 32
|
||||
#define TEST_PWDLEN 32
|
||||
#define TEST_SALTLEN 16
|
||||
#define TEST_SECRETLEN 8
|
||||
#define TEST_ADLEN 12
|
||||
argon2_context context;
|
||||
|
||||
unsigned char out[TEST_OUTLEN];
|
||||
unsigned char pwd[TEST_PWDLEN];
|
||||
unsigned char salt[TEST_SALTLEN];
|
||||
unsigned char secret[TEST_SECRETLEN];
|
||||
unsigned char ad[TEST_ADLEN];
|
||||
const allocate_fptr myown_allocator = NULL;
|
||||
const deallocate_fptr myown_deallocator = NULL;
|
||||
|
||||
unsigned t_cost = 3;
|
||||
unsigned m_cost = 16;
|
||||
unsigned lanes = 4;
|
||||
|
||||
memset(pwd, 1, TEST_OUTLEN);
|
||||
memset(salt, 2, TEST_SALTLEN);
|
||||
memset(secret, 3, TEST_SECRETLEN);
|
||||
memset(ad, 4, TEST_ADLEN);
|
||||
|
||||
context.out = out;
|
||||
context.outlen = TEST_OUTLEN;
|
||||
context.pwd = pwd;
|
||||
context.pwdlen = TEST_PWDLEN;
|
||||
context.salt = salt;
|
||||
context.saltlen = TEST_SALTLEN;
|
||||
context.secret = secret;
|
||||
context.secretlen = TEST_SECRETLEN;
|
||||
context.ad = ad;
|
||||
context.adlen = TEST_ADLEN;
|
||||
context.t_cost = t_cost;
|
||||
context.m_cost = m_cost;
|
||||
context.lanes = lanes;
|
||||
context.threads = lanes;
|
||||
context.allocate_cbk = myown_allocator;
|
||||
context.free_cbk = myown_deallocator;
|
||||
context.flags = 0;
|
||||
|
||||
#undef TEST_OUTLEN
|
||||
#undef TEST_PWDLEN
|
||||
#undef TEST_SALTLEN
|
||||
#undef TEST_SECRETLEN
|
||||
#undef TEST_ADLEN
|
||||
|
||||
if (!strcmp(type, "d")) {
|
||||
argon2d(&context);
|
||||
} else if (!strcmp(type, "i")) {
|
||||
argon2i(&context);
|
||||
} else
|
||||
fatal("wrong Argon2 type");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
const char *type = (argc > 1) ? argv[1] : "i";
|
||||
generate_testvectors(type);
|
||||
return ARGON2_OK;
|
||||
}
|
||||
45
algo/argon2/ar2/genkat.h
Normal file
45
algo/argon2/ar2/genkat.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Argon2 source code package
|
||||
*
|
||||
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
|
||||
*
|
||||
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along
|
||||
* with
|
||||
* this software. If not, see
|
||||
* <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_KAT_H
|
||||
#define ARGON2_KAT_H
|
||||
|
||||
/*
|
||||
* Initial KAT function that prints the inputs to the file
|
||||
* @param blockhash Array that contains pre-hashing digest
|
||||
* @param context Holds inputs
|
||||
* @param type Argon2 type
|
||||
* @pre blockhash must point to INPUT_INITIAL_HASH_LENGTH bytes
|
||||
* @pre context member pointers must point to allocated memory of size according
|
||||
* to the length values
|
||||
*/
|
||||
void initial_kat(const uint8_t *blockhash, const argon2_context *context,
|
||||
argon2_type type);
|
||||
|
||||
/*
|
||||
* Function that prints the output tag
|
||||
* @param out output array pointer
|
||||
* @param outlen digest length
|
||||
* @pre out must point to @a outlen bytes
|
||||
**/
|
||||
void print_tag(const void *out, uint32_t outlen);
|
||||
|
||||
/*
|
||||
* Function that prints the internal state at given moment
|
||||
* @param instance pointer to the current instance
|
||||
* @param pass current pass number
|
||||
* @pre instance must have necessary memory allocated
|
||||
**/
|
||||
void internal_kat(const argon2_instance_t *instance, uint32_t pass);
|
||||
|
||||
#endif
|
||||
185
algo/argon2/ar2/opt.c
Normal file
185
algo/argon2/ar2/opt.c
Normal file
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
* Argon2 source code package
|
||||
*
|
||||
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
|
||||
*
|
||||
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along
|
||||
* with
|
||||
* this software. If not, see
|
||||
* <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "argon2.h"
|
||||
#include "cores.h"
|
||||
#include "opt.h"
|
||||
|
||||
#include "blake2/blake2.h"
|
||||
#include "blake2/blamka-round-opt.h"
|
||||
|
||||
void fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block)
|
||||
{
|
||||
__m128i ALIGN(16) block_XY[ARGON2_QWORDS_IN_BLOCK];
|
||||
uint32_t i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm_xor_si128(
|
||||
state[i], _mm_load_si128(&ref_block[i]));
|
||||
}
|
||||
|
||||
BLAKE2_ROUND(state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7]);
|
||||
BLAKE2_ROUND(state[8], state[9], state[10], state[11], state[12], state[13], state[14], state[15]);
|
||||
BLAKE2_ROUND(state[16], state[17], state[18], state[19], state[20], state[21], state[22], state[23]);
|
||||
BLAKE2_ROUND(state[24], state[25], state[26], state[27], state[28], state[29], state[30], state[31]);
|
||||
BLAKE2_ROUND(state[32], state[33], state[34], state[35], state[36], state[37], state[38], state[39]);
|
||||
BLAKE2_ROUND(state[40], state[41], state[42], state[43], state[44], state[45], state[46], state[47]);
|
||||
BLAKE2_ROUND(state[48], state[49], state[50], state[51], state[52], state[53], state[54], state[55]);
|
||||
BLAKE2_ROUND(state[56], state[57], state[58], state[59], state[60], state[61], state[62], state[63]);
|
||||
/*for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
|
||||
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
|
||||
state[8 * i + 6], state[8 * i + 7]);
|
||||
}*/
|
||||
|
||||
BLAKE2_ROUND(state[0], state[8], state[16], state[24], state[32], state[40], state[48], state[56]);
|
||||
BLAKE2_ROUND(state[1], state[9], state[17], state[25], state[33], state[41], state[49], state[57]);
|
||||
BLAKE2_ROUND(state[2], state[10], state[18], state[26], state[34], state[42], state[50], state[58]);
|
||||
BLAKE2_ROUND(state[3], state[11], state[19], state[27], state[35], state[43], state[51], state[59]);
|
||||
BLAKE2_ROUND(state[4], state[12], state[20], state[28], state[36], state[44], state[52], state[60]);
|
||||
BLAKE2_ROUND(state[5], state[13], state[21], state[29], state[37], state[45], state[53], state[61]);
|
||||
BLAKE2_ROUND(state[6], state[14], state[22], state[30], state[38], state[46], state[54], state[62]);
|
||||
BLAKE2_ROUND(state[7], state[15], state[23], state[31], state[39], state[47], state[55], state[63]);
|
||||
/*for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
|
||||
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
|
||||
state[8 * 6 + i], state[8 * 7 + i]);
|
||||
}*/
|
||||
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(state[i], block_XY[i]);
|
||||
_mm_storeu_si128(&next_block[i], state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static const uint64_t bad_rands[32] = {
|
||||
UINT64_C(17023632018251376180), UINT64_C(4911461131397773491),
|
||||
UINT64_C(15927076453364631751), UINT64_C(7860239898779391109),
|
||||
|
||||
UINT64_C(11820267568857244377), UINT64_C(12188179869468676617),
|
||||
UINT64_C(3732913385414474778), UINT64_C(7651458777762572084),
|
||||
|
||||
UINT64_C(3062274162574341415), UINT64_C(17922653540258786897),
|
||||
UINT64_C(17393848266100524980), UINT64_C(8539695715554563839),
|
||||
|
||||
UINT64_C(13824538050656654359), UINT64_C(12078939433126460936),
|
||||
UINT64_C(15331979418564540430), UINT64_C(12058346794217174273),
|
||||
|
||||
UINT64_C(13593922096015221049), UINT64_C(18356682276374416500),
|
||||
UINT64_C(4968040514092703824), UINT64_C(11202790346130235567),
|
||||
|
||||
UINT64_C(2276229735041314644), UINT64_C(220837743321691382),
|
||||
UINT64_C(4861211596230784273), UINT64_C(6330592584132590331),
|
||||
|
||||
UINT64_C(3515580430960296763), UINT64_C(9869356316971855173),
|
||||
UINT64_C(485533243489193056), UINT64_C(14596447761048148032),
|
||||
|
||||
UINT64_C(16531790085730132900), UINT64_C(17328824500878824371),
|
||||
UINT64_C(8548260058287621283), UINT64_C(8641748798041936364)
|
||||
};
|
||||
|
||||
void generate_addresses(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position,
|
||||
uint64_t *pseudo_rands)
|
||||
{
|
||||
uint8_t offset = position->pass * 16 + position->slice * 4;
|
||||
pseudo_rands[0] = bad_rands[offset++];
|
||||
pseudo_rands[1] = bad_rands[offset++];
|
||||
pseudo_rands[2] = bad_rands[offset++];
|
||||
pseudo_rands[3] = bad_rands[offset++];
|
||||
|
||||
/*if ((position->pass == 1 && position->slice == 3))
|
||||
print64("pseudo_rands", pseudo_rands, 4);*/
|
||||
}
|
||||
|
||||
#define SEGMENT_LENGTH 4
|
||||
#define LANE_LENGTH 16
|
||||
#define POS_LANE 0
|
||||
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position)
|
||||
{
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
uint64_t pseudo_rand, ref_index;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint8_t i;
|
||||
__m128i state[64];
|
||||
int data_independent_addressing = (instance->type == Argon2_i);
|
||||
|
||||
/* Pseudo-random values that determine the reference block position */
|
||||
uint64_t *pseudo_rands = NULL;
|
||||
|
||||
pseudo_rands = (uint64_t *)malloc(/*sizeof(uint64_t) * 4*/32);
|
||||
|
||||
if (data_independent_addressing) {
|
||||
generate_addresses(instance, &position, pseudo_rands);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
i = 2; /* we have already generated the first two blocks */
|
||||
}
|
||||
|
||||
/*printf("Position.lane = %d\nPosition.slice = %d\nStarting index : %d\n", position.lane, position.slice, starting_index);*/
|
||||
/* Offset of the current block */
|
||||
curr_offset = position.slice * 4 + i;
|
||||
|
||||
if (0 == curr_offset % 16) {
|
||||
/* Last block in this lane */
|
||||
prev_offset = curr_offset + /*instance->lane_length - 1*/15;
|
||||
} else {
|
||||
/* Previous block */
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
|
||||
|
||||
for (; i < SEGMENT_LENGTH;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % LANE_LENGTH == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
if (data_independent_addressing) {
|
||||
pseudo_rand = pseudo_rands[i];
|
||||
} else {
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
}
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
|
||||
/* 1.2.3 Computing the number of possible reference block within the
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,1);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block = instance->memory + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
fill_block(state, (__m128i const *)ref_block->v, (__m128i *)curr_block->v);
|
||||
}
|
||||
|
||||
free(pseudo_rands);
|
||||
}
|
||||
49
algo/argon2/ar2/opt.h
Normal file
49
algo/argon2/ar2/opt.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Argon2 source code package
|
||||
*
|
||||
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
|
||||
*
|
||||
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along
|
||||
* with
|
||||
* this software. If not, see
|
||||
* <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_OPT_H
|
||||
#define ARGON2_OPT_H
|
||||
|
||||
/*
|
||||
* Function fills a new memory block. Differs from the
|
||||
* @param state Pointer to the just produced block. Content will be updated(!)
|
||||
* @param ref_block Pointer to the reference block
|
||||
* @param next_block Pointer to the block to be constructed
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block);
|
||||
|
||||
/*
|
||||
* Generate pseudo-random values to reference blocks in the segment and puts
|
||||
* them into the array
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Pointer to the current position
|
||||
* @param pseudo_rands Pointer to the array of 64-bit values
|
||||
* @pre pseudo_rands must point to @a instance->segment_length allocated values
|
||||
*/
|
||||
void generate_addresses(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position,
|
||||
uint64_t *pseudo_rands);
|
||||
|
||||
/*
|
||||
* Function that fills the segment using previous segments also from other
|
||||
* threads.
|
||||
* Identical to the reference code except that it calls optimized FillBlock()
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
|
||||
#endif /* ARGON2_OPT_H */
|
||||
174
algo/argon2/ar2/ref.c
Normal file
174
algo/argon2/ar2/ref.c
Normal file
@@ -0,0 +1,174 @@
|
||||
/*
|
||||
* Argon2 source code package
|
||||
*
|
||||
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
|
||||
*
|
||||
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along
|
||||
* with
|
||||
* this software. If not, see
|
||||
* <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "argon2.h"
|
||||
#include "cores.h"
|
||||
#include "ref.h"
|
||||
|
||||
#include "blake2/blamka-round-ref.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
#include "blake2/blake2.h"
|
||||
|
||||
void fill_block(const block *prev_block, const block *ref_block,
|
||||
block *next_block) {
|
||||
block blockR, block_tmp;
|
||||
unsigned i;
|
||||
|
||||
copy_block(&blockR, ref_block);
|
||||
xor_block(&blockR, prev_block);
|
||||
copy_block(&block_tmp, &blockR);
|
||||
|
||||
/* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
|
||||
(16,17,..31)... finally (112,113,...127) */
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND_NOMSG(
|
||||
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
|
||||
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
|
||||
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
|
||||
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
|
||||
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
|
||||
blockR.v[16 * i + 15]);
|
||||
}
|
||||
|
||||
/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
|
||||
(2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
|
||||
for (i = 0; i < 8; i++) {
|
||||
BLAKE2_ROUND_NOMSG(
|
||||
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
|
||||
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
|
||||
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
|
||||
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
|
||||
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
|
||||
blockR.v[2 * i + 113]);
|
||||
}
|
||||
|
||||
copy_block(next_block, &block_tmp);
|
||||
xor_block(next_block, &blockR);
|
||||
}
|
||||
|
||||
void generate_addresses(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position,
|
||||
uint64_t *pseudo_rands) {
|
||||
block zero_block, input_block, address_block;
|
||||
uint32_t i;
|
||||
|
||||
init_block_value(&zero_block, 0);
|
||||
init_block_value(&input_block, 0);
|
||||
init_block_value(&address_block, 0);
|
||||
|
||||
if (instance != NULL && position != NULL) {
|
||||
input_block.v[0] = position->pass;
|
||||
input_block.v[1] = position->lane;
|
||||
input_block.v[2] = position->slice;
|
||||
input_block.v[3] = 16;
|
||||
input_block.v[4] = 2;
|
||||
input_block.v[5] = instance->type;
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||
input_block.v[6]++;
|
||||
fill_block(&zero_block, &input_block, &address_block);
|
||||
fill_block(&zero_block, &address_block, &address_block);
|
||||
}
|
||||
|
||||
pseudo_rands[i] = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position) {
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index;
|
||||
uint32_t i;
|
||||
int data_independent_addressing = (instance->type == Argon2_i);
|
||||
/* Pseudo-random values that determine the reference block position */
|
||||
uint64_t *pseudo_rands = NULL;
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
pseudo_rands =
|
||||
(uint64_t *)malloc(sizeof(uint64_t) * 4);
|
||||
|
||||
if (pseudo_rands == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (data_independent_addressing) {
|
||||
generate_addresses(instance, &position, pseudo_rands);
|
||||
}
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
curr_offset = position.lane * 16 +
|
||||
position.slice * 4 + starting_index;
|
||||
|
||||
if (0 == curr_offset % 16) {
|
||||
/* Last block in this lane */
|
||||
prev_offset = curr_offset + 16 - 1;
|
||||
} else {
|
||||
/* Previous block */
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
for (i = starting_index; i < 4; ++i, ++curr_offset, ++prev_offset) {
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % 16 == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
if (data_independent_addressing) {
|
||||
pseudo_rand = pseudo_rands[i];
|
||||
} else {
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
}
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % 1;
|
||||
|
||||
if ((position.pass == 0) && (position.slice == 0)) {
|
||||
/* Can not reference other lanes yet */
|
||||
ref_lane = position.lane;
|
||||
}
|
||||
|
||||
/* 1.2.3 Computing the number of possible reference block within the
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||
ref_lane == position.lane);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block =
|
||||
instance->memory + 16 * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
fill_block(instance->memory + prev_offset, ref_block, curr_block);
|
||||
}
|
||||
|
||||
free(pseudo_rands);
|
||||
}
|
||||
49
algo/argon2/ar2/ref.h
Normal file
49
algo/argon2/ar2/ref.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Argon2 source code package
|
||||
*
|
||||
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
|
||||
*
|
||||
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along
|
||||
* with
|
||||
* this software. If not, see
|
||||
* <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_REF_H
|
||||
#define ARGON2_REF_H
|
||||
|
||||
/*
|
||||
* Function fills a new memory block
|
||||
* @param prev_block Pointer to the previous block
|
||||
* @param ref_block Pointer to the reference block
|
||||
* @param next_block Pointer to the block to be constructed
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_block(const block *prev_block, const block *ref_block,
|
||||
block *next_block);
|
||||
|
||||
/*
|
||||
* Generate pseudo-random values to reference blocks in the segment and puts
|
||||
* them into the array
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Pointer to the current position
|
||||
* @param pseudo_rands Pointer to the array of 64-bit values
|
||||
* @pre pseudo_rands must point to @a instance->segment_length allocated values
|
||||
*/
|
||||
void generate_addresses(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position,
|
||||
uint64_t *pseudo_rands);
|
||||
|
||||
/*
|
||||
* Function that fills the segment using previous segments also from other
|
||||
* threads
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
|
||||
#endif /* ARGON2_REF_H */
|
||||
223
algo/argon2/ar2/run.c
Normal file
223
algo/argon2/ar2/run.c
Normal file
@@ -0,0 +1,223 @@
|
||||
/*
|
||||
* Argon2 source code package
|
||||
*
|
||||
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
|
||||
*
|
||||
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along
|
||||
* with
|
||||
* this software. If not, see
|
||||
* <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "argon2.h"
|
||||
#include "cores.h"
|
||||
|
||||
#define T_COST_DEF 3
|
||||
#define LOG_M_COST_DEF 12 /* 2^12 = 4 MiB */
|
||||
#define LANES_DEF 1
|
||||
#define THREADS_DEF 1
|
||||
#define OUT_LEN 32
|
||||
#define SALT_LEN 16
|
||||
|
||||
#define UNUSED_PARAMETER(x) (void)(x)
|
||||
|
||||
static void usage(const char *cmd) {
|
||||
printf("Usage: %s pwd salt [-y version] [-t iterations] [-m memory] [-p "
|
||||
"parallelism]\n",
|
||||
cmd);
|
||||
|
||||
printf("Parameters:\n");
|
||||
printf("\tpwd\t\tThe password to hash\n");
|
||||
printf("\tsalt\t\tThe salt to use, at most 16 characters\n");
|
||||
printf("\t-d\t\tUse Argon2d instead of Argon2i (which is the default)\n");
|
||||
printf("\t-t N\t\tSets the number of iterations to N (default = %d)\n",
|
||||
T_COST_DEF);
|
||||
printf("\t-m N\t\tSets the memory usage of 2^N KiB (default %d)\n",
|
||||
LOG_M_COST_DEF);
|
||||
printf("\t-p N\t\tSets parallelism to N threads (default %d)\n",
|
||||
THREADS_DEF);
|
||||
}
|
||||
|
||||
static void fatal(const char *error) {
|
||||
fprintf(stderr, "Error: %s\n", error);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
Runs Argon2 with certain inputs and parameters, inputs not cleared. Prints the
|
||||
Base64-encoded hash string
|
||||
@out output array with at least 32 bytes allocated
|
||||
@pwd NULL-terminated string, presumably from argv[]
|
||||
@salt salt array with at least SALTLEN_DEF bytes allocated
|
||||
@t_cost number of iterations
|
||||
@m_cost amount of requested memory in KB
|
||||
@lanes amount of requested parallelism
|
||||
@threads actual parallelism
|
||||
@type String, only "d" and "i" are accepted
|
||||
*/
|
||||
static void run(uint8_t *out, char *pwd, uint8_t *salt, uint32_t t_cost,
|
||||
uint32_t m_cost, uint32_t lanes, uint32_t threads,
|
||||
const char *type) {
|
||||
clock_t start_time, stop_time;
|
||||
unsigned pwd_length;
|
||||
argon2_context context;
|
||||
int i;
|
||||
|
||||
start_time = clock();
|
||||
|
||||
if (!pwd) {
|
||||
fatal("password missing");
|
||||
}
|
||||
|
||||
if (!salt) {
|
||||
secure_wipe_memory(pwd, strlen(pwd));
|
||||
fatal("salt missing");
|
||||
}
|
||||
|
||||
pwd_length = strlen(pwd);
|
||||
|
||||
UNUSED_PARAMETER(threads);
|
||||
|
||||
context.out = out;
|
||||
context.outlen = OUT_LEN;
|
||||
context.pwd = (uint8_t *)pwd;
|
||||
context.pwdlen = pwd_length;
|
||||
context.salt = salt;
|
||||
context.saltlen = SALT_LEN;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.t_cost = t_cost;
|
||||
context.m_cost = m_cost;
|
||||
context.lanes = lanes;
|
||||
context.threads = lanes;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_FLAG_CLEAR_PASSWORD;
|
||||
|
||||
if (!strcmp(type, "d")) {
|
||||
int result = argon2d(&context);
|
||||
if (result != ARGON2_OK)
|
||||
fatal(error_message(result));
|
||||
} else if (!strcmp(type, "i")) {
|
||||
int result = argon2i(&context);
|
||||
if (result != ARGON2_OK)
|
||||
fatal(error_message(result));
|
||||
} else {
|
||||
secure_wipe_memory(pwd, strlen(pwd));
|
||||
fatal("wrong Argon2 type");
|
||||
}
|
||||
|
||||
stop_time = clock();
|
||||
|
||||
/* add back when proper decoding */
|
||||
/*
|
||||
char encoded[300];
|
||||
encode_string(encoded, sizeof encoded, &context);
|
||||
printf("%s\n", encoded);
|
||||
*/
|
||||
printf("Hash:\t\t");
|
||||
for (i = 0; i < context.outlen; ++i) {
|
||||
printf("%02x", context.out[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
printf("%2.3f seconds\n",
|
||||
((double)stop_time - start_time) / (CLOCKS_PER_SEC));
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
unsigned char out[OUT_LEN];
|
||||
uint32_t m_cost = 1 << LOG_M_COST_DEF;
|
||||
uint32_t t_cost = T_COST_DEF;
|
||||
uint32_t lanes = LANES_DEF;
|
||||
uint32_t threads = THREADS_DEF;
|
||||
char *pwd = NULL;
|
||||
uint8_t salt[SALT_LEN];
|
||||
const char *type = "i";
|
||||
int i;
|
||||
|
||||
if (argc < 3) {
|
||||
usage(argv[0]);
|
||||
return ARGON2_MISSING_ARGS;
|
||||
}
|
||||
|
||||
/* get password and salt from command line */
|
||||
pwd = argv[1];
|
||||
if (strlen(argv[2]) > SALT_LEN) {
|
||||
fatal("salt too long");
|
||||
}
|
||||
memset(salt, 0x00, SALT_LEN); /* pad with null bytes */
|
||||
memcpy(salt, argv[2], strlen(argv[2]));
|
||||
|
||||
/* parse options */
|
||||
for (i = 3; i < argc; i++) {
|
||||
const char *a = argv[i];
|
||||
unsigned long input = 0;
|
||||
if (!strcmp(a, "-m")) {
|
||||
if (i < argc - 1) {
|
||||
i++;
|
||||
input = strtoul(argv[i], NULL, 10);
|
||||
if (input == 0 || input == ULONG_MAX ||
|
||||
input > ARGON2_MAX_MEMORY_BITS) {
|
||||
fatal("bad numeric input for -m");
|
||||
}
|
||||
m_cost = ARGON2_MIN(UINT64_C(1) << input, UINT32_C(0xFFFFFFFF));
|
||||
if (m_cost > ARGON2_MAX_MEMORY) {
|
||||
fatal("m_cost overflow");
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
fatal("missing -m argument");
|
||||
}
|
||||
} else if (!strcmp(a, "-t")) {
|
||||
if (i < argc - 1) {
|
||||
i++;
|
||||
input = strtoul(argv[i], NULL, 10);
|
||||
if (input == 0 || input == ULONG_MAX ||
|
||||
input > ARGON2_MAX_TIME) {
|
||||
fatal("bad numeric input for -t");
|
||||
}
|
||||
t_cost = input;
|
||||
continue;
|
||||
} else {
|
||||
fatal("missing -t argument");
|
||||
}
|
||||
} else if (!strcmp(a, "-p")) {
|
||||
if (i < argc - 1) {
|
||||
i++;
|
||||
input = strtoul(argv[i], NULL, 10);
|
||||
if (input == 0 || input == ULONG_MAX ||
|
||||
input > ARGON2_MAX_THREADS || input > ARGON2_MAX_LANES) {
|
||||
fatal("bad numeric input for -p");
|
||||
}
|
||||
threads = input;
|
||||
lanes = threads;
|
||||
continue;
|
||||
} else {
|
||||
fatal("missing -p argument");
|
||||
}
|
||||
} else if (!strcmp(a, "-d")) {
|
||||
type = "d";
|
||||
} else {
|
||||
fatal("unknown argument");
|
||||
}
|
||||
}
|
||||
printf("Type:\t\tArgon2%c\n", type[0]);
|
||||
printf("Iterations:\t%" PRIu32 " \n", t_cost);
|
||||
printf("Memory:\t\t%" PRIu32 " KiB\n", m_cost);
|
||||
printf("Parallelism:\t%" PRIu32 " \n", lanes);
|
||||
run(out, pwd, salt, t_cost, m_cost, lanes, threads, type);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
38
algo/argon2/ar2/sj/scrypt-jane-hash.h
Normal file
38
algo/argon2/ar2/sj/scrypt-jane-hash.h
Normal file
@@ -0,0 +1,38 @@
|
||||
#if defined(SCRYPT_SKEIN512)
|
||||
#include "scrypt-jane-hash_skein512.h"
|
||||
#else
|
||||
#define SCRYPT_HASH "ERROR"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 64
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
typedef struct scrypt_hash_state_t { size_t dummy; } scrypt_hash_state;
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
static void scrypt_hash_init(scrypt_hash_state *S) {}
|
||||
static void scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {}
|
||||
static void scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {}
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {0};
|
||||
#error must define a hash function!
|
||||
#endif
|
||||
|
||||
#include "scrypt-jane-pbkdf2.h"
|
||||
|
||||
#define SCRYPT_TEST_HASH_LEN 257 /* (2 * largest block size) + 1 */
|
||||
|
||||
static int
|
||||
scrypt_test_hash(void) {
|
||||
scrypt_hash_state st;
|
||||
scrypt_hash_digest hash, final;
|
||||
uint8_t msg[SCRYPT_TEST_HASH_LEN];
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < SCRYPT_TEST_HASH_LEN; i++)
|
||||
msg[i] = (uint8_t)i;
|
||||
|
||||
scrypt_hash_init(&st);
|
||||
for (i = 0; i < SCRYPT_TEST_HASH_LEN + 1; i++) {
|
||||
scrypt_hash(hash, msg, i);
|
||||
scrypt_hash_update(&st, hash, sizeof(hash));
|
||||
}
|
||||
scrypt_hash_finish(&st, final);
|
||||
return scrypt_verify(final, scrypt_test_hash_expected, SCRYPT_HASH_DIGEST_SIZE);
|
||||
}
|
||||
|
||||
188
algo/argon2/ar2/sj/scrypt-jane-hash_skein512.h
Normal file
188
algo/argon2/ar2/sj/scrypt-jane-hash_skein512.h
Normal file
@@ -0,0 +1,188 @@
|
||||
#define SCRYPT_HASH "Skein-512"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 64
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint64_t X[8], T[2];
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static void
|
||||
skein512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks, size_t add) {
|
||||
uint64_t X[8], key[8], Xt[9+18], T[3+1];
|
||||
size_t r;
|
||||
|
||||
while (blocks--) {
|
||||
T[0] = S->T[0] + add;
|
||||
T[1] = S->T[1];
|
||||
T[2] = T[0] ^ T[1];
|
||||
key[0] = U8TO64_LE(in + 0); Xt[0] = S->X[0]; X[0] = key[0] + Xt[0];
|
||||
key[1] = U8TO64_LE(in + 8); Xt[1] = S->X[1]; X[1] = key[1] + Xt[1];
|
||||
key[2] = U8TO64_LE(in + 16); Xt[2] = S->X[2]; X[2] = key[2] + Xt[2];
|
||||
key[3] = U8TO64_LE(in + 24); Xt[3] = S->X[3]; X[3] = key[3] + Xt[3];
|
||||
key[4] = U8TO64_LE(in + 32); Xt[4] = S->X[4]; X[4] = key[4] + Xt[4];
|
||||
key[5] = U8TO64_LE(in + 40); Xt[5] = S->X[5]; X[5] = key[5] + Xt[5] + T[0];
|
||||
key[6] = U8TO64_LE(in + 48); Xt[6] = S->X[6]; X[6] = key[6] + Xt[6] + T[1];
|
||||
key[7] = U8TO64_LE(in + 56); Xt[7] = S->X[7]; X[7] = key[7] + Xt[7];
|
||||
Xt[8] = 0x1BD11BDAA9FC1A22ull ^ Xt[0] ^ Xt[1] ^ Xt[2] ^ Xt[3] ^ Xt[4] ^ Xt[5] ^ Xt[6] ^ Xt[7];
|
||||
in += SCRYPT_HASH_BLOCK_SIZE;
|
||||
|
||||
for (r = 0; r < 18; r++)
|
||||
Xt[r + 9] = Xt[r + 0];
|
||||
|
||||
for (r = 0; r < 18; r += 2) {
|
||||
X[0] += X[1]; X[1] = ROTL64(X[1], 46) ^ X[0];
|
||||
X[2] += X[3]; X[3] = ROTL64(X[3], 36) ^ X[2];
|
||||
X[4] += X[5]; X[5] = ROTL64(X[5], 19) ^ X[4];
|
||||
X[6] += X[7]; X[7] = ROTL64(X[7], 37) ^ X[6];
|
||||
X[2] += X[1]; X[1] = ROTL64(X[1], 33) ^ X[2];
|
||||
X[0] += X[3]; X[3] = ROTL64(X[3], 42) ^ X[0];
|
||||
X[6] += X[5]; X[5] = ROTL64(X[5], 14) ^ X[6];
|
||||
X[4] += X[7]; X[7] = ROTL64(X[7], 27) ^ X[4];
|
||||
X[4] += X[1]; X[1] = ROTL64(X[1], 17) ^ X[4];
|
||||
X[6] += X[3]; X[3] = ROTL64(X[3], 49) ^ X[6];
|
||||
X[0] += X[5]; X[5] = ROTL64(X[5], 36) ^ X[0];
|
||||
X[2] += X[7]; X[7] = ROTL64(X[7], 39) ^ X[2];
|
||||
X[6] += X[1]; X[1] = ROTL64(X[1], 44) ^ X[6];
|
||||
X[4] += X[3]; X[3] = ROTL64(X[3], 56) ^ X[4];
|
||||
X[2] += X[5]; X[5] = ROTL64(X[5], 54) ^ X[2];
|
||||
X[0] += X[7]; X[7] = ROTL64(X[7], 9) ^ X[0];
|
||||
|
||||
X[0] += Xt[r + 1];
|
||||
X[1] += Xt[r + 2];
|
||||
X[2] += Xt[r + 3];
|
||||
X[3] += Xt[r + 4];
|
||||
X[4] += Xt[r + 5];
|
||||
X[5] += Xt[r + 6] + T[1];
|
||||
X[6] += Xt[r + 7] + T[2];
|
||||
X[7] += Xt[r + 8] + r + 1;
|
||||
|
||||
T[3] = T[0];
|
||||
T[0] = T[1];
|
||||
T[1] = T[2];
|
||||
T[2] = T[3];
|
||||
|
||||
X[0] += X[1]; X[1] = ROTL64(X[1], 39) ^ X[0];
|
||||
X[2] += X[3]; X[3] = ROTL64(X[3], 30) ^ X[2];
|
||||
X[4] += X[5]; X[5] = ROTL64(X[5], 34) ^ X[4];
|
||||
X[6] += X[7]; X[7] = ROTL64(X[7], 24) ^ X[6];
|
||||
X[2] += X[1]; X[1] = ROTL64(X[1], 13) ^ X[2];
|
||||
X[0] += X[3]; X[3] = ROTL64(X[3], 17) ^ X[0];
|
||||
X[6] += X[5]; X[5] = ROTL64(X[5], 10) ^ X[6];
|
||||
X[4] += X[7]; X[7] = ROTL64(X[7], 50) ^ X[4];
|
||||
X[4] += X[1]; X[1] = ROTL64(X[1], 25) ^ X[4];
|
||||
X[6] += X[3]; X[3] = ROTL64(X[3], 29) ^ X[6];
|
||||
X[0] += X[5]; X[5] = ROTL64(X[5], 39) ^ X[0];
|
||||
X[2] += X[7]; X[7] = ROTL64(X[7], 43) ^ X[2];
|
||||
X[6] += X[1]; X[1] = ROTL64(X[1], 8) ^ X[6];
|
||||
X[4] += X[3]; X[3] = ROTL64(X[3], 22) ^ X[4];
|
||||
X[2] += X[5]; X[5] = ROTL64(X[5], 56) ^ X[2];
|
||||
X[0] += X[7]; X[7] = ROTL64(X[7], 35) ^ X[0];
|
||||
|
||||
X[0] += Xt[r + 2];
|
||||
X[1] += Xt[r + 3];
|
||||
X[2] += Xt[r + 4];
|
||||
X[3] += Xt[r + 5];
|
||||
X[4] += Xt[r + 6];
|
||||
X[5] += Xt[r + 7] + T[1];
|
||||
X[6] += Xt[r + 8] + T[2];
|
||||
X[7] += Xt[r + 9] + r + 2;
|
||||
|
||||
T[3] = T[0];
|
||||
T[0] = T[1];
|
||||
T[1] = T[2];
|
||||
T[2] = T[3];
|
||||
}
|
||||
|
||||
S->X[0] = key[0] ^ X[0];
|
||||
S->X[1] = key[1] ^ X[1];
|
||||
S->X[2] = key[2] ^ X[2];
|
||||
S->X[3] = key[3] ^ X[3];
|
||||
S->X[4] = key[4] ^ X[4];
|
||||
S->X[5] = key[5] ^ X[5];
|
||||
S->X[6] = key[6] ^ X[6];
|
||||
S->X[7] = key[7] ^ X[7];
|
||||
|
||||
S->T[0] = T[0];
|
||||
S->T[1] = T[1] & ~0x4000000000000000ull;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
S->X[0] = 0x4903ADFF749C51CEull;
|
||||
S->X[1] = 0x0D95DE399746DF03ull;
|
||||
S->X[2] = 0x8FD1934127C79BCEull;
|
||||
S->X[3] = 0x9A255629FF352CB1ull;
|
||||
S->X[4] = 0x5DB62599DF6CA7B0ull;
|
||||
S->X[5] = 0xEABE394CA9D5C3F4ull;
|
||||
S->X[6] = 0x991112C71A75B523ull;
|
||||
S->X[7] = 0xAE18A40B660FCC33ull;
|
||||
S->T[0] = 0x0000000000000000ull;
|
||||
S->T[1] = 0x7000000000000000ull;
|
||||
S->leftover = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t blocks, want;
|
||||
|
||||
/* skein processes the final <=64 bytes raw, so we can only update if there are at least 64+1 bytes available */
|
||||
if ((S->leftover + inlen) > SCRYPT_HASH_BLOCK_SIZE) {
|
||||
/* handle the previous data, we know there is enough for at least one block */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
in += want;
|
||||
inlen -= want;
|
||||
S->leftover = 0;
|
||||
skein512_blocks(S, S->buffer, 1, SCRYPT_HASH_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/* handle the current data if there's more than one block */
|
||||
if (inlen > SCRYPT_HASH_BLOCK_SIZE) {
|
||||
blocks = ((inlen - 1) & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
|
||||
skein512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE, SCRYPT_HASH_BLOCK_SIZE);
|
||||
inlen -= blocks;
|
||||
in += blocks;
|
||||
}
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
memcpy(S->buffer + S->leftover, in, inlen);
|
||||
S->leftover += (int) inlen;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
memset(S->buffer + S->leftover, 0, SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
S->T[1] |= 0x8000000000000000ull;
|
||||
skein512_blocks(S, S->buffer, 1, S->leftover);
|
||||
|
||||
memset(S->buffer, 0, SCRYPT_HASH_BLOCK_SIZE);
|
||||
S->T[0] = 0;
|
||||
S->T[1] = 0xff00000000000000ull;
|
||||
skein512_blocks(S, S->buffer, 1, 8);
|
||||
|
||||
U64TO8_LE(&hash[ 0], S->X[0]);
|
||||
U64TO8_LE(&hash[ 8], S->X[1]);
|
||||
U64TO8_LE(&hash[16], S->X[2]);
|
||||
U64TO8_LE(&hash[24], S->X[3]);
|
||||
U64TO8_LE(&hash[32], S->X[4]);
|
||||
U64TO8_LE(&hash[40], S->X[5]);
|
||||
U64TO8_LE(&hash[48], S->X[6]);
|
||||
U64TO8_LE(&hash[56], S->X[7]);
|
||||
}
|
||||
|
||||
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0x4d,0x52,0x29,0xff,0x10,0xbc,0xd2,0x62,0xd1,0x61,0x83,0xc8,0xe6,0xf0,0x83,0xc4,
|
||||
0x9f,0xf5,0x6a,0x42,0x75,0x2a,0x26,0x4e,0xf0,0x28,0x72,0x28,0x47,0xe8,0x23,0xdf,
|
||||
0x1e,0x64,0xf1,0x51,0x38,0x35,0x9d,0xc2,0x83,0xfc,0x35,0x4e,0xc0,0x52,0x5f,0x41,
|
||||
0x6a,0x0b,0x7d,0xf5,0xce,0x98,0xde,0x6f,0x36,0xd8,0x51,0x15,0x78,0x78,0x93,0x67,
|
||||
};
|
||||
367
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64-avx.h
Normal file
367
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64-avx.h
Normal file
@@ -0,0 +1,367 @@
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_AVX
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
a2(vmovdqa xmm4,[rax+64])
|
||||
a2(vmovdqa xmm5,[rax+80])
|
||||
a2(vmovdqa xmm6,[rax+96])
|
||||
a2(vmovdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a3(vpxor xmm4,xmm4,[r9+64])
|
||||
a3(vpxor xmm5,xmm5,[r9+80])
|
||||
a3(vpxor xmm6,xmm6,[r9+96])
|
||||
a3(vpxor xmm7,xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_avx_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_avx_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rsi+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rsi+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rsi+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rdx+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rdx+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rdx+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_avx_no_xor2:)
|
||||
a2(vmovdqa [rsp+0],xmm0)
|
||||
a2(vmovdqa [rsp+16],xmm1)
|
||||
a2(vmovdqa [rsp+32],xmm2)
|
||||
a2(vmovdqa [rsp+48],xmm3)
|
||||
a2(vmovdqa [rsp+64],xmm4)
|
||||
a2(vmovdqa [rsp+80],xmm5)
|
||||
a2(vmovdqa [rsp+96],xmm6)
|
||||
a2(vmovdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_avx_loop: )
|
||||
a3(vpaddq xmm8, xmm0, xmm2)
|
||||
a3(vpaddq xmm9, xmm1, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm8)
|
||||
a3(vpxor xmm7, xmm7, xmm9)
|
||||
a3(vpaddq xmm10, xmm0, xmm6)
|
||||
a3(vpaddq xmm11, xmm1, xmm7)
|
||||
a3(vpsrlq xmm8, xmm10, 51)
|
||||
a3(vpsrlq xmm9, xmm11, 51)
|
||||
a3(vpsllq xmm10, xmm10, 13)
|
||||
a3(vpsllq xmm11, xmm11, 13)
|
||||
a3(vpxor xmm4, xmm4, xmm8)
|
||||
a3(vpxor xmm5, xmm5, xmm9)
|
||||
a3(vpxor xmm4, xmm4, xmm10)
|
||||
a3(vpxor xmm5, xmm5, xmm11)
|
||||
a3(vpaddq xmm8, xmm6, xmm4)
|
||||
a3(vpaddq xmm9, xmm7, xmm5)
|
||||
a3(vpsrlq xmm10, xmm8, 25)
|
||||
a3(vpsrlq xmm11, xmm9, 25)
|
||||
a3(vpsllq xmm8, xmm8, 39)
|
||||
a3(vpsllq xmm9, xmm9, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm10)
|
||||
a3(vpxor xmm3, xmm3, xmm11)
|
||||
a3(vpxor xmm2, xmm2, xmm8)
|
||||
a3(vpxor xmm3, xmm3, xmm9)
|
||||
a3(vpaddq xmm10, xmm4, xmm2)
|
||||
a3(vpaddq xmm11, xmm5, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm10)
|
||||
a3(vpxor xmm1, xmm1, xmm11)
|
||||
a2(vmovdqa xmm8, xmm2)
|
||||
a2(vmovdqa xmm9, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm9, xmm8, 8)
|
||||
a4(vpalignr xmm7, xmm8, xmm9, 8)
|
||||
a3(vpaddq xmm10, xmm0, xmm2)
|
||||
a3(vpaddq xmm11, xmm1, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm10)
|
||||
a3(vpxor xmm7, xmm7, xmm11)
|
||||
a3(vpaddq xmm8, xmm0, xmm6)
|
||||
a3(vpaddq xmm9, xmm1, xmm7)
|
||||
a3(vpsrlq xmm10, xmm8, 51)
|
||||
a3(vpsrlq xmm11, xmm9, 51)
|
||||
a3(vpsllq xmm8, xmm8, 13)
|
||||
a3(vpsllq xmm9, xmm9, 13)
|
||||
a3(vpxor xmm5, xmm5, xmm10)
|
||||
a3(vpxor xmm4, xmm4, xmm11)
|
||||
a3(vpxor xmm5, xmm5, xmm8)
|
||||
a3(vpxor xmm4, xmm4, xmm9)
|
||||
a3(vpaddq xmm10, xmm6, xmm5)
|
||||
a3(vpaddq xmm11, xmm7, xmm4)
|
||||
a3(vpsrlq xmm8, xmm10, 25)
|
||||
a3(vpsrlq xmm9, xmm11, 25)
|
||||
a3(vpsllq xmm10, xmm10, 39)
|
||||
a3(vpsllq xmm11, xmm11, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm8)
|
||||
a3(vpxor xmm3, xmm3, xmm9)
|
||||
a3(vpxor xmm2, xmm2, xmm10)
|
||||
a3(vpxor xmm3, xmm3, xmm11)
|
||||
a3(vpaddq xmm8, xmm5, xmm2)
|
||||
a3(vpaddq xmm9, xmm4, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm8)
|
||||
a3(vpxor xmm1, xmm1, xmm9)
|
||||
a2(vmovdqa xmm10, xmm2)
|
||||
a2(vmovdqa xmm11, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm11, xmm10, 8)
|
||||
a4(vpalignr xmm7, xmm10, xmm11, 8)
|
||||
a2(sub rax, 2)
|
||||
aj(ja scrypt_salsa64_avx_loop)
|
||||
a3(vpaddq xmm0,xmm0,[rsp+0])
|
||||
a3(vpaddq xmm1,xmm1,[rsp+16])
|
||||
a3(vpaddq xmm2,xmm2,[rsp+32])
|
||||
a3(vpaddq xmm3,xmm3,[rsp+48])
|
||||
a3(vpaddq xmm4,xmm4,[rsp+64])
|
||||
a3(vpaddq xmm5,xmm5,[rsp+80])
|
||||
a3(vpaddq xmm6,xmm6,[rsp+96])
|
||||
a3(vpaddq xmm7,xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
a2(vmovdqa [rax+64],xmm4)
|
||||
a2(vmovdqa [rax+80],xmm5)
|
||||
a2(vmovdqa [rax+96],xmm6)
|
||||
a2(vmovdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_avx_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_AVX
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_avx(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x5 = _mm_xor_si128(x5, z2);
|
||||
x4 = _mm_xor_si128(x4, z3);
|
||||
x5 = _mm_xor_si128(x5, z0);
|
||||
x4 = _mm_xor_si128(x4, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x5, x6);
|
||||
z1 = _mm_add_epi64(x4, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x5);
|
||||
z1 = _mm_add_epi64(x3, x4);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-AVX"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
||||
221
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64-avx2.h
Normal file
221
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64-avx2.h
Normal file
@@ -0,0 +1,221 @@
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_AVX2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx2)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa ymm0,[rax+0])
|
||||
a2(vmovdqa ymm1,[rax+32])
|
||||
a2(vmovdqa ymm2,[rax+64])
|
||||
a2(vmovdqa ymm3,[rax+96])
|
||||
aj(jz scrypt_ChunkMix_avx2_no_xor1)
|
||||
a3(vpxor ymm0,ymm0,[r9+0])
|
||||
a3(vpxor ymm1,ymm1,[r9+32])
|
||||
a3(vpxor ymm2,ymm2,[r9+64])
|
||||
a3(vpxor ymm3,ymm3,[r9+96])
|
||||
a1(scrypt_ChunkMix_avx2_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_avx2_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor ymm0,ymm0,[rsi+r9+0])
|
||||
a3(vpxor ymm1,ymm1,[rsi+r9+32])
|
||||
a3(vpxor ymm2,ymm2,[rsi+r9+64])
|
||||
a3(vpxor ymm3,ymm3,[rsi+r9+96])
|
||||
aj(jz scrypt_ChunkMix_avx2_no_xor2)
|
||||
a3(vpxor ymm0,ymm0,[rdx+r9+0])
|
||||
a3(vpxor ymm1,ymm1,[rdx+r9+32])
|
||||
a3(vpxor ymm2,ymm2,[rdx+r9+64])
|
||||
a3(vpxor ymm3,ymm3,[rdx+r9+96])
|
||||
a1(scrypt_ChunkMix_avx2_no_xor2:)
|
||||
a2(vmovdqa ymm6,ymm0)
|
||||
a2(vmovdqa ymm7,ymm1)
|
||||
a2(vmovdqa ymm8,ymm2)
|
||||
a2(vmovdqa ymm9,ymm3)
|
||||
a2(mov rax,4)
|
||||
a1(scrypt_salsa64_avx2_loop: )
|
||||
a3(vpaddq ymm4, ymm1, ymm0)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpxor ymm3, ymm3, ymm4)
|
||||
a3(vpaddq ymm4, ymm0, ymm3)
|
||||
a3(vpsrlq ymm5, ymm4, 51)
|
||||
a3(vpxor ymm2, ymm2, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 13)
|
||||
a3(vpxor ymm2, ymm2, ymm4)
|
||||
a3(vpaddq ymm4, ymm3, ymm2)
|
||||
a3(vpsrlq ymm5, ymm4, 25)
|
||||
a3(vpxor ymm1, ymm1, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 39)
|
||||
a3(vpxor ymm1, ymm1, ymm4)
|
||||
a3(vpaddq ymm4, ymm2, ymm1)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpermq ymm1, ymm1, 0x39)
|
||||
a3(vpermq ymm10, ymm2, 0x4e)
|
||||
a3(vpxor ymm0, ymm0, ymm4)
|
||||
a3(vpermq ymm3, ymm3, 0x93)
|
||||
a3(vpaddq ymm4, ymm3, ymm0)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpxor ymm1, ymm1, ymm4)
|
||||
a3(vpaddq ymm4, ymm0, ymm1)
|
||||
a3(vpsrlq ymm5, ymm4, 51)
|
||||
a3(vpxor ymm10, ymm10, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 13)
|
||||
a3(vpxor ymm10, ymm10, ymm4)
|
||||
a3(vpaddq ymm4, ymm1, ymm10)
|
||||
a3(vpsrlq ymm5, ymm4, 25)
|
||||
a3(vpxor ymm3, ymm3, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 39)
|
||||
a3(vpermq ymm1, ymm1, 0x93)
|
||||
a3(vpxor ymm3, ymm3, ymm4)
|
||||
a3(vpermq ymm2, ymm10, 0x4e)
|
||||
a3(vpaddq ymm4, ymm10, ymm3)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpermq ymm3, ymm3, 0x39)
|
||||
a3(vpxor ymm0, ymm0, ymm4)
|
||||
a1(dec rax)
|
||||
aj(jnz scrypt_salsa64_avx2_loop)
|
||||
a3(vpaddq ymm0,ymm0,ymm6)
|
||||
a3(vpaddq ymm1,ymm1,ymm7)
|
||||
a3(vpaddq ymm2,ymm2,ymm8)
|
||||
a3(vpaddq ymm3,ymm3,ymm9)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],ymm0)
|
||||
a2(vmovdqa [rax+32],ymm1)
|
||||
a2(vmovdqa [rax+64],ymm2)
|
||||
a2(vmovdqa [rax+96],ymm3)
|
||||
aj(jne scrypt_ChunkMix_avx2_loop)
|
||||
a1(vzeroupper)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_AVX2
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_avx2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
ymmi *ymmp,y0,y1,y2,y3,t0,t1,t2,t3,z0,z1;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
ymmp = (ymmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
y0 = ymmp[0];
|
||||
y1 = ymmp[1];
|
||||
y2 = ymmp[2];
|
||||
y3 = ymmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
ymmp = (ymmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
y0 = _mm256_xor_si256(y0, ymmp[0]);
|
||||
y1 = _mm256_xor_si256(y1, ymmp[1]);
|
||||
y2 = _mm256_xor_si256(y2, ymmp[2]);
|
||||
y3 = _mm256_xor_si256(y3, ymmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
ymmp = (ymmi *)scrypt_block(Bin, i);
|
||||
y0 = _mm256_xor_si256(y0, ymmp[0]);
|
||||
y1 = _mm256_xor_si256(y1, ymmp[1]);
|
||||
y2 = _mm256_xor_si256(y2, ymmp[2]);
|
||||
y3 = _mm256_xor_si256(y3, ymmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
ymmp = (ymmi *)scrypt_block(Bxor, i);
|
||||
y0 = _mm256_xor_si256(y0, ymmp[0]);
|
||||
y1 = _mm256_xor_si256(y1, ymmp[1]);
|
||||
y2 = _mm256_xor_si256(y2, ymmp[2]);
|
||||
y3 = _mm256_xor_si256(y3, ymmp[3]);
|
||||
}
|
||||
|
||||
t0 = y0;
|
||||
t1 = y1;
|
||||
t2 = y2;
|
||||
t3 = y3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm256_add_epi64(y0, y1);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y3 = _mm256_xor_si256(y3, z0);
|
||||
z0 = _mm256_add_epi64(y3, y0);
|
||||
z1 = _mm256_srli_epi64(z0, 64-13);
|
||||
y2 = _mm256_xor_si256(y2, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 13);
|
||||
y2 = _mm256_xor_si256(y2, z0);
|
||||
z0 = _mm256_add_epi64(y2, y3);
|
||||
z1 = _mm256_srli_epi64(z0, 64-39);
|
||||
y1 = _mm256_xor_si256(y1, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 39);
|
||||
y1 = _mm256_xor_si256(y1, z0);
|
||||
y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(0,3,2,1));
|
||||
y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2));
|
||||
y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(2,1,0,3));
|
||||
z0 = _mm256_add_epi64(y1, y2);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y0 = _mm256_xor_si256(y0, z0);
|
||||
z0 = _mm256_add_epi64(y0, y3);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y1 = _mm256_xor_si256(y1, z0);
|
||||
z0 = _mm256_add_epi64(y1, y0);
|
||||
z1 = _mm256_srli_epi64(z0, 64-13);
|
||||
y2 = _mm256_xor_si256(y2, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 13);
|
||||
y2 = _mm256_xor_si256(y2, z0);
|
||||
z0 = _mm256_add_epi64(y2, y1);
|
||||
z1 = _mm256_srli_epi64(z0, 64-39);
|
||||
y3 = _mm256_xor_si256(y3, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 39);
|
||||
y3 = _mm256_xor_si256(y3, z0);
|
||||
z0 = _mm256_add_epi64(y3, y2);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y0 = _mm256_xor_si256(y0, z0);
|
||||
y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(2,1,0,3));
|
||||
y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2));
|
||||
y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(0,3,2,1));
|
||||
}
|
||||
|
||||
y0 = _mm256_add_epi64(y0, t0);
|
||||
y1 = _mm256_add_epi64(y1, t1);
|
||||
y2 = _mm256_add_epi64(y2, t2);
|
||||
y3 = _mm256_add_epi64(y3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
ymmp = (ymmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
ymmp[0] = y0;
|
||||
ymmp[1] = y1;
|
||||
ymmp[2] = y2;
|
||||
ymmp[3] = y3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX2)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-AVX2"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
||||
449
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64-sse2.h
Normal file
449
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64-sse2.h
Normal file
@@ -0,0 +1,449 @@
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_SSE2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_sse2)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
a2(movdqa xmm4,[rax+64])
|
||||
a2(movdqa xmm5,[rax+80])
|
||||
a2(movdqa xmm6,[rax+96])
|
||||
a2(movdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a2(pxor xmm4,[r9+64])
|
||||
a2(pxor xmm5,[r9+80])
|
||||
a2(pxor xmm6,[r9+96])
|
||||
a2(pxor xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_sse2_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
a2(pxor xmm4,[rsi+r9+64])
|
||||
a2(pxor xmm5,[rsi+r9+80])
|
||||
a2(pxor xmm6,[rsi+r9+96])
|
||||
a2(pxor xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a2(pxor xmm4,[rdx+r9+64])
|
||||
a2(pxor xmm5,[rdx+r9+80])
|
||||
a2(pxor xmm6,[rdx+r9+96])
|
||||
a2(pxor xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor2:)
|
||||
a2(movdqa [rsp+0],xmm0)
|
||||
a2(movdqa [rsp+16],xmm1)
|
||||
a2(movdqa [rsp+32],xmm2)
|
||||
a2(movdqa [rsp+48],xmm3)
|
||||
a2(movdqa [rsp+64],xmm4)
|
||||
a2(movdqa [rsp+80],xmm5)
|
||||
a2(movdqa [rsp+96],xmm6)
|
||||
a2(movdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_sse2_loop: )
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm4, xmm10)
|
||||
a2(pxor xmm5, xmm11)
|
||||
a2(pxor xmm4, xmm8)
|
||||
a2(pxor xmm5, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm4)
|
||||
a2(paddq xmm11, xmm5)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm4)
|
||||
a2(movdqa xmm9, xmm5)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm8, xmm2)
|
||||
a2(movdqa xmm9, xmm3)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(movdqa xmm2, xmm7)
|
||||
a2(movdqa xmm3, xmm6)
|
||||
a2(punpcklqdq xmm10, xmm6)
|
||||
a2(punpcklqdq xmm11, xmm7)
|
||||
a2(movdqa xmm6, xmm8)
|
||||
a2(movdqa xmm7, xmm9)
|
||||
a2(punpcklqdq xmm9, xmm9)
|
||||
a2(punpcklqdq xmm8, xmm8)
|
||||
a2(punpckhqdq xmm2, xmm10)
|
||||
a2(punpckhqdq xmm3, xmm11)
|
||||
a2(punpckhqdq xmm6, xmm9)
|
||||
a2(punpckhqdq xmm7, xmm8)
|
||||
a2(sub rax, 2)
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm5, xmm10)
|
||||
a2(pxor xmm4, xmm11)
|
||||
a2(pxor xmm5, xmm8)
|
||||
a2(pxor xmm4, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm5)
|
||||
a2(paddq xmm11, xmm4)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm5)
|
||||
a2(movdqa xmm9, xmm4)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm8, xmm2)
|
||||
a2(movdqa xmm9, xmm3)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(movdqa xmm2, xmm7)
|
||||
a2(movdqa xmm3, xmm6)
|
||||
a2(punpcklqdq xmm10, xmm6)
|
||||
a2(punpcklqdq xmm11, xmm7)
|
||||
a2(movdqa xmm6, xmm8)
|
||||
a2(movdqa xmm7, xmm9)
|
||||
a2(punpcklqdq xmm9, xmm9)
|
||||
a2(punpcklqdq xmm8, xmm8)
|
||||
a2(punpckhqdq xmm2, xmm10)
|
||||
a2(punpckhqdq xmm3, xmm11)
|
||||
a2(punpckhqdq xmm6, xmm9)
|
||||
a2(punpckhqdq xmm7, xmm8)
|
||||
aj(ja scrypt_salsa64_sse2_loop)
|
||||
a2(paddq xmm0,[rsp+0])
|
||||
a2(paddq xmm1,[rsp+16])
|
||||
a2(paddq xmm2,[rsp+32])
|
||||
a2(paddq xmm3,[rsp+48])
|
||||
a2(paddq xmm4,[rsp+64])
|
||||
a2(paddq xmm5,[rsp+80])
|
||||
a2(paddq xmm6,[rsp+96])
|
||||
a2(paddq xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
a2(movdqa [rax+64],xmm4)
|
||||
a2(movdqa [rax+80],xmm5)
|
||||
a2(movdqa [rax+96],xmm6)
|
||||
a2(movdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_sse2_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_sse2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_SSE2
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_sse2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x4;
|
||||
z1 = x5;
|
||||
z2 = x2;
|
||||
z3 = x3;
|
||||
x4 = z1;
|
||||
x5 = z0;
|
||||
x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6));
|
||||
x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7));
|
||||
x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3));
|
||||
x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2));
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x4;
|
||||
z1 = x5;
|
||||
z2 = x2;
|
||||
z3 = x3;
|
||||
x4 = z1;
|
||||
x5 = z0;
|
||||
x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6));
|
||||
x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7));
|
||||
x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3));
|
||||
x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2));
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSE2)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-SSE2"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
||||
|
||||
/* sse3/avx use this as well */
|
||||
#if defined(SCRYPT_SALSA64_INCLUDED)
|
||||
/*
|
||||
Default layout:
|
||||
0 1 2 3
|
||||
4 5 6 7
|
||||
8 9 10 11
|
||||
12 13 14 15
|
||||
|
||||
SSE2 layout:
|
||||
0 5 10 15
|
||||
12 1 6 11
|
||||
8 13 2 7
|
||||
4 9 14 3
|
||||
*/
|
||||
|
||||
|
||||
static void asm_calling_convention
|
||||
salsa64_core_tangle_sse2(uint64_t *blocks, size_t count) {
|
||||
uint64_t t;
|
||||
while (count--) {
|
||||
t = blocks[1]; blocks[1] = blocks[5]; blocks[5] = t;
|
||||
t = blocks[2]; blocks[2] = blocks[10]; blocks[10] = t;
|
||||
t = blocks[3]; blocks[3] = blocks[15]; blocks[15] = t;
|
||||
t = blocks[4]; blocks[4] = blocks[12]; blocks[12] = t;
|
||||
t = blocks[7]; blocks[7] = blocks[11]; blocks[11] = t;
|
||||
t = blocks[9]; blocks[9] = blocks[13]; blocks[13] = t;
|
||||
blocks += 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
399
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64-ssse3.h
Normal file
399
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64-ssse3.h
Normal file
@@ -0,0 +1,399 @@
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_SSSE3
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_ssse3)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
a2(movdqa xmm4,[rax+64])
|
||||
a2(movdqa xmm5,[rax+80])
|
||||
a2(movdqa xmm6,[rax+96])
|
||||
a2(movdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a2(pxor xmm4,[r9+64])
|
||||
a2(pxor xmm5,[r9+80])
|
||||
a2(pxor xmm6,[r9+96])
|
||||
a2(pxor xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_ssse3_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
a2(pxor xmm4,[rsi+r9+64])
|
||||
a2(pxor xmm5,[rsi+r9+80])
|
||||
a2(pxor xmm6,[rsi+r9+96])
|
||||
a2(pxor xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a2(pxor xmm4,[rdx+r9+64])
|
||||
a2(pxor xmm5,[rdx+r9+80])
|
||||
a2(pxor xmm6,[rdx+r9+96])
|
||||
a2(pxor xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor2:)
|
||||
a2(movdqa [rsp+0],xmm0)
|
||||
a2(movdqa [rsp+16],xmm1)
|
||||
a2(movdqa [rsp+32],xmm2)
|
||||
a2(movdqa [rsp+48],xmm3)
|
||||
a2(movdqa [rsp+64],xmm4)
|
||||
a2(movdqa [rsp+80],xmm5)
|
||||
a2(movdqa [rsp+96],xmm6)
|
||||
a2(movdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_ssse3_loop: )
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm4, xmm10)
|
||||
a2(pxor xmm5, xmm11)
|
||||
a2(pxor xmm4, xmm8)
|
||||
a2(pxor xmm5, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm4)
|
||||
a2(paddq xmm11, xmm5)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm4)
|
||||
a2(movdqa xmm9, xmm5)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm10, xmm2)
|
||||
a2(movdqa xmm11, xmm3)
|
||||
a2(movdqa xmm2, xmm6)
|
||||
a2(movdqa xmm3, xmm7)
|
||||
a3(palignr xmm2, xmm7, 8)
|
||||
a3(palignr xmm3, xmm6, 8)
|
||||
a2(movdqa xmm6, xmm11)
|
||||
a2(movdqa xmm7, xmm10)
|
||||
a3(palignr xmm6, xmm10, 8)
|
||||
a3(palignr xmm7, xmm11, 8)
|
||||
a2(sub rax, 2)
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm5, xmm10)
|
||||
a2(pxor xmm4, xmm11)
|
||||
a2(pxor xmm5, xmm8)
|
||||
a2(pxor xmm4, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm5)
|
||||
a2(paddq xmm11, xmm4)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm5)
|
||||
a2(movdqa xmm9, xmm4)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm10, xmm2)
|
||||
a2(movdqa xmm11, xmm3)
|
||||
a2(movdqa xmm2, xmm6)
|
||||
a2(movdqa xmm3, xmm7)
|
||||
a3(palignr xmm2, xmm7, 8)
|
||||
a3(palignr xmm3, xmm6, 8)
|
||||
a2(movdqa xmm6, xmm11)
|
||||
a2(movdqa xmm7, xmm10)
|
||||
a3(palignr xmm6, xmm10, 8)
|
||||
a3(palignr xmm7, xmm11, 8)
|
||||
aj(ja scrypt_salsa64_ssse3_loop)
|
||||
a2(paddq xmm0,[rsp+0])
|
||||
a2(paddq xmm1,[rsp+16])
|
||||
a2(paddq xmm2,[rsp+32])
|
||||
a2(paddq xmm3,[rsp+48])
|
||||
a2(paddq xmm4,[rsp+64])
|
||||
a2(paddq xmm5,[rsp+80])
|
||||
a2(paddq xmm6,[rsp+96])
|
||||
a2(paddq xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
a2(movdqa [rax+64],xmm4)
|
||||
a2(movdqa [rax+80],xmm5)
|
||||
a2(movdqa [rax+96],xmm6)
|
||||
a2(movdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_ssse3_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_ssse3)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_SSSE3
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_ssse3(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x5 = _mm_xor_si128(x5, z2);
|
||||
x4 = _mm_xor_si128(x4, z3);
|
||||
x5 = _mm_xor_si128(x5, z0);
|
||||
x4 = _mm_xor_si128(x4, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x5, x6);
|
||||
z1 = _mm_add_epi64(x4, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x5);
|
||||
z1 = _mm_add_epi64(x3, x4);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSSE3)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-SSSE3"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
||||
335
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64-xop.h
Normal file
335
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64-xop.h
Normal file
@@ -0,0 +1,335 @@
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_XOP
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_xop)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
a2(vmovdqa xmm4,[rax+64])
|
||||
a2(vmovdqa xmm5,[rax+80])
|
||||
a2(vmovdqa xmm6,[rax+96])
|
||||
a2(vmovdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a3(vpxor xmm4,xmm4,[r9+64])
|
||||
a3(vpxor xmm5,xmm5,[r9+80])
|
||||
a3(vpxor xmm6,xmm6,[r9+96])
|
||||
a3(vpxor xmm7,xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_xop_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_xop_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rsi+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rsi+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rsi+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rdx+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rdx+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rdx+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_xop_no_xor2:)
|
||||
a2(vmovdqa [rsp+0],xmm0)
|
||||
a2(vmovdqa [rsp+16],xmm1)
|
||||
a2(vmovdqa [rsp+32],xmm2)
|
||||
a2(vmovdqa [rsp+48],xmm3)
|
||||
a2(vmovdqa [rsp+64],xmm4)
|
||||
a2(vmovdqa [rsp+80],xmm5)
|
||||
a2(vmovdqa [rsp+96],xmm6)
|
||||
a2(vmovdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_xop_loop: )
|
||||
a3(vpaddq xmm8, xmm0, xmm2)
|
||||
a3(vpaddq xmm9, xmm1, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm8)
|
||||
a3(vpxor xmm7, xmm7, xmm9)
|
||||
a3(vpaddq xmm10, xmm0, xmm6)
|
||||
a3(vpaddq xmm11, xmm1, xmm7)
|
||||
a3(vprotq xmm10, xmm10, 13)
|
||||
a3(vprotq xmm11, xmm11, 13)
|
||||
a3(vpxor xmm4, xmm4, xmm10)
|
||||
a3(vpxor xmm5, xmm5, xmm11)
|
||||
a3(vpaddq xmm8, xmm6, xmm4)
|
||||
a3(vpaddq xmm9, xmm7, xmm5)
|
||||
a3(vprotq xmm8, xmm8, 39)
|
||||
a3(vprotq xmm9, xmm9, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm8)
|
||||
a3(vpxor xmm3, xmm3, xmm9)
|
||||
a3(vpaddq xmm10, xmm4, xmm2)
|
||||
a3(vpaddq xmm11, xmm5, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm10)
|
||||
a3(vpxor xmm1, xmm1, xmm11)
|
||||
a2(vmovdqa xmm8, xmm2)
|
||||
a2(vmovdqa xmm9, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm9, xmm8, 8)
|
||||
a4(vpalignr xmm7, xmm8, xmm9, 8)
|
||||
a3(vpaddq xmm10, xmm0, xmm2)
|
||||
a3(vpaddq xmm11, xmm1, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm10)
|
||||
a3(vpxor xmm7, xmm7, xmm11)
|
||||
a3(vpaddq xmm8, xmm0, xmm6)
|
||||
a3(vpaddq xmm9, xmm1, xmm7)
|
||||
a3(vprotq xmm8, xmm8, 13)
|
||||
a3(vprotq xmm9, xmm9, 13)
|
||||
a3(vpxor xmm5, xmm5, xmm8)
|
||||
a3(vpxor xmm4, xmm4, xmm9)
|
||||
a3(vpaddq xmm10, xmm6, xmm5)
|
||||
a3(vpaddq xmm11, xmm7, xmm4)
|
||||
a3(vprotq xmm10, xmm10, 39)
|
||||
a3(vprotq xmm11, xmm11, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm10)
|
||||
a3(vpxor xmm3, xmm3, xmm11)
|
||||
a3(vpaddq xmm8, xmm5, xmm2)
|
||||
a3(vpaddq xmm9, xmm4, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm8)
|
||||
a3(vpxor xmm1, xmm1, xmm9)
|
||||
a2(vmovdqa xmm10, xmm2)
|
||||
a2(vmovdqa xmm11, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm11, xmm10, 8)
|
||||
a4(vpalignr xmm7, xmm10, xmm11, 8)
|
||||
a2(sub rax, 2)
|
||||
aj(ja scrypt_salsa64_xop_loop)
|
||||
a3(vpaddq xmm0,xmm0,[rsp+0])
|
||||
a3(vpaddq xmm1,xmm1,[rsp+16])
|
||||
a3(vpaddq xmm2,xmm2,[rsp+32])
|
||||
a3(vpaddq xmm3,xmm3,[rsp+48])
|
||||
a3(vpaddq xmm4,xmm4,[rsp+64])
|
||||
a3(vpaddq xmm5,xmm5,[rsp+80])
|
||||
a3(vpaddq xmm6,xmm6,[rsp+96])
|
||||
a3(vpaddq xmm7,xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
a2(vmovdqa [rax+64],xmm4)
|
||||
a2(vmovdqa [rax+80],xmm5)
|
||||
a2(vmovdqa [rax+96],xmm6)
|
||||
a2(vmovdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_xop_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_xop)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_XOP
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_xop(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z0 = _mm_roti_epi64(z0, 13);
|
||||
z1 = _mm_roti_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z0 = _mm_roti_epi64(z0, 39);
|
||||
z1 = _mm_roti_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z0 = _mm_roti_epi64(z0, 13);
|
||||
z1 = _mm_roti_epi64(z1, 13);
|
||||
x5 = _mm_xor_si128(x5, z0);
|
||||
x4 = _mm_xor_si128(x4, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x5, x6);
|
||||
z1 = _mm_add_epi64(x4, x7);
|
||||
z0 = _mm_roti_epi64(z0, 39);
|
||||
z1 = _mm_roti_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x5);
|
||||
z1 = _mm_add_epi64(x3, x4);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_XOP)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-XOP"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
||||
41
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64.h
Normal file
41
algo/argon2/ar2/sj/scrypt-jane-mix_salsa64.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8 Ref"
|
||||
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_BASIC
|
||||
|
||||
static void
|
||||
salsa64_core_basic(uint64_t state[16]) {
|
||||
const size_t rounds = 8;
|
||||
uint64_t v[16], t;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < 16; i++) v[i] = state[i];
|
||||
|
||||
#define G(a,b,c,d) \
|
||||
t = v[a]+v[d]; t = ROTL64(t, 32); v[b] ^= t; \
|
||||
t = v[b]+v[a]; t = ROTL64(t, 13); v[c] ^= t; \
|
||||
t = v[c]+v[b]; t = ROTL64(t, 39); v[d] ^= t; \
|
||||
t = v[d]+v[c]; t = ROTL64(t, 32); v[a] ^= t; \
|
||||
|
||||
for (i = 0; i < rounds; i += 2) {
|
||||
G( 0, 4, 8,12);
|
||||
G( 5, 9,13, 1);
|
||||
G(10,14, 2, 6);
|
||||
G(15, 3, 7,11);
|
||||
G( 0, 1, 2, 3);
|
||||
G( 5, 6, 7, 4);
|
||||
G(10,11, 8, 9);
|
||||
G(15,12,13,14);
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++) state[i] += v[i];
|
||||
|
||||
#undef G
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
112
algo/argon2/ar2/sj/scrypt-jane-pbkdf2.h
Normal file
112
algo/argon2/ar2/sj/scrypt-jane-pbkdf2.h
Normal file
@@ -0,0 +1,112 @@
|
||||
typedef struct scrypt_hmac_state_t {
|
||||
scrypt_hash_state inner, outer;
|
||||
} scrypt_hmac_state;
|
||||
|
||||
|
||||
static void
|
||||
scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen) {
|
||||
scrypt_hash_state st;
|
||||
scrypt_hash_init(&st);
|
||||
scrypt_hash_update(&st, m, mlen);
|
||||
scrypt_hash_finish(&st, hash);
|
||||
}
|
||||
|
||||
/* hmac */
|
||||
static void
|
||||
scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen) {
|
||||
uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0};
|
||||
size_t i;
|
||||
|
||||
scrypt_hash_init(&st->inner);
|
||||
scrypt_hash_init(&st->outer);
|
||||
|
||||
if (keylen <= SCRYPT_HASH_BLOCK_SIZE) {
|
||||
/* use the key directly if it's <= blocksize bytes */
|
||||
memcpy(pad, key, keylen);
|
||||
} else {
|
||||
/* if it's > blocksize bytes, hash it */
|
||||
scrypt_hash(pad, key, keylen);
|
||||
}
|
||||
|
||||
/* inner = (key ^ 0x36) */
|
||||
/* h(inner || ...) */
|
||||
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
|
||||
pad[i] ^= 0x36;
|
||||
scrypt_hash_update(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE);
|
||||
|
||||
/* outer = (key ^ 0x5c) */
|
||||
/* h(outer || ...) */
|
||||
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
|
||||
pad[i] ^= (0x5c ^ 0x36);
|
||||
scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE);
|
||||
|
||||
scrypt_ensure_zero(pad, sizeof(pad));
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen) {
|
||||
/* h(inner || m...) */
|
||||
scrypt_hash_update(&st->inner, m, mlen);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac) {
|
||||
/* h(inner || m) */
|
||||
scrypt_hash_digest innerhash;
|
||||
scrypt_hash_finish(&st->inner, innerhash);
|
||||
|
||||
/* h(outer || h(inner || m)) */
|
||||
scrypt_hash_update(&st->outer, innerhash, sizeof(innerhash));
|
||||
scrypt_hash_finish(&st->outer, mac);
|
||||
|
||||
scrypt_ensure_zero(st, sizeof(*st));
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_pbkdf2(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *out, size_t bytes) {
|
||||
scrypt_hmac_state hmac_pw, hmac_pw_salt, work;
|
||||
scrypt_hash_digest ti, u;
|
||||
uint8_t be[4];
|
||||
uint32_t i, j, blocks;
|
||||
uint64_t c;
|
||||
|
||||
/* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */
|
||||
|
||||
/* hmac(password, ...) */
|
||||
scrypt_hmac_init(&hmac_pw, password, password_len);
|
||||
|
||||
/* hmac(password, salt...) */
|
||||
hmac_pw_salt = hmac_pw;
|
||||
scrypt_hmac_update(&hmac_pw_salt, salt, salt_len);
|
||||
|
||||
blocks = ((uint32_t)bytes + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE;
|
||||
for (i = 1; i <= blocks; i++) {
|
||||
/* U1 = hmac(password, salt || be(i)) */
|
||||
U32TO8_BE(be, i);
|
||||
work = hmac_pw_salt;
|
||||
scrypt_hmac_update(&work, be, 4);
|
||||
scrypt_hmac_finish(&work, ti);
|
||||
memcpy(u, ti, sizeof(u));
|
||||
|
||||
/* T[i] = U1 ^ U2 ^ U3... */
|
||||
for (c = 0; c < N - 1; c++) {
|
||||
/* UX = hmac(password, U{X-1}) */
|
||||
work = hmac_pw;
|
||||
scrypt_hmac_update(&work, u, SCRYPT_HASH_DIGEST_SIZE);
|
||||
scrypt_hmac_finish(&work, u);
|
||||
|
||||
/* T[i] ^= UX */
|
||||
for (j = 0; j < sizeof(u); j++)
|
||||
ti[j] ^= u[j];
|
||||
}
|
||||
|
||||
memcpy(out, ti, (bytes > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : bytes);
|
||||
out += SCRYPT_HASH_DIGEST_SIZE;
|
||||
bytes -= SCRYPT_HASH_DIGEST_SIZE;
|
||||
}
|
||||
|
||||
scrypt_ensure_zero(ti, sizeof(ti));
|
||||
scrypt_ensure_zero(u, sizeof(u));
|
||||
scrypt_ensure_zero(&hmac_pw, sizeof(hmac_pw));
|
||||
scrypt_ensure_zero(&hmac_pw_salt, sizeof(hmac_pw_salt));
|
||||
}
|
||||
463
algo/argon2/ar2/sj/scrypt-jane-portable-x86.h
Normal file
463
algo/argon2/ar2/sj/scrypt-jane-portable-x86.h
Normal file
@@ -0,0 +1,463 @@
|
||||
#if defined(CPU_X86) && (defined(COMPILER_MSVC) || defined(COMPILER_GCC))
|
||||
#define X86ASM
|
||||
|
||||
/* gcc 2.95 royally screws up stack alignments on variables */
|
||||
#if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS6PP)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 30000)))
|
||||
#define X86ASM_SSE
|
||||
#define X86ASM_SSE2
|
||||
#endif
|
||||
#if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2005)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40102)))
|
||||
#define X86ASM_SSSE3
|
||||
#endif
|
||||
#if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2010SP1)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40400)))
|
||||
#define X86ASM_AVX
|
||||
#define X86ASM_XOP
|
||||
#endif
|
||||
#if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2012)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40700)))
|
||||
#define X86ASM_AVX2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CPU_X86_64) && defined(COMPILER_GCC)
|
||||
#define X86_64ASM
|
||||
#define X86_64ASM_SSE2
|
||||
#if (COMPILER_GCC >= 40102)
|
||||
#define X86_64ASM_SSSE3
|
||||
#endif
|
||||
#if (COMPILER_GCC >= 40400)
|
||||
#define X86_64ASM_AVX
|
||||
#define X86_64ASM_XOP
|
||||
#endif
|
||||
#if (COMPILER_GCC >= 40700)
|
||||
#define X86_64ASM_AVX2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(COMPILER_MSVC) && (defined(CPU_X86_FORCE_INTRINSICS) || defined(CPU_X86_64))
|
||||
#define X86_INTRINSIC
|
||||
#if defined(CPU_X86_64) || defined(X86ASM_SSE)
|
||||
#define X86_INTRINSIC_SSE
|
||||
#endif
|
||||
#if defined(CPU_X86_64) || defined(X86ASM_SSE2)
|
||||
#define X86_INTRINSIC_SSE2
|
||||
#endif
|
||||
#if (COMPILER_MSVC >= COMPILER_MSVC_VS2005)
|
||||
#define X86_INTRINSIC_SSSE3
|
||||
#endif
|
||||
#if (COMPILER_MSVC >= COMPILER_MSVC_VS2010SP1)
|
||||
#define X86_INTRINSIC_AVX
|
||||
#define X86_INTRINSIC_XOP
|
||||
#endif
|
||||
#if (COMPILER_MSVC >= COMPILER_MSVC_VS2012)
|
||||
#define X86_INTRINSIC_AVX2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(COMPILER_GCC) && defined(CPU_X86_FORCE_INTRINSICS)
|
||||
#define X86_INTRINSIC
|
||||
#if defined(__SSE__)
|
||||
#define X86_INTRINSIC_SSE
|
||||
#endif
|
||||
#if defined(__SSE2__)
|
||||
#define X86_INTRINSIC_SSE2
|
||||
#endif
|
||||
#if defined(__SSSE3__)
|
||||
#define X86_INTRINSIC_SSSE3
|
||||
#endif
|
||||
#if defined(__AVX__)
|
||||
#define X86_INTRINSIC_AVX
|
||||
#endif
|
||||
#if defined(__XOP__)
|
||||
#define X86_INTRINSIC_XOP
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
#define X86_INTRINSIC_AVX2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* only use simd on windows (or SSE2 on gcc)! */
|
||||
#if defined(CPU_X86_FORCE_INTRINSICS) || defined(X86_INTRINSIC)
|
||||
#if defined(X86_INTRINSIC_SSE)
|
||||
#include <mmintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
typedef __m64 qmm;
|
||||
typedef __m128 xmm;
|
||||
typedef __m128d xmmd;
|
||||
#endif
|
||||
#if defined(X86_INTRINSIC_SSE2)
|
||||
#include <emmintrin.h>
|
||||
typedef __m128i xmmi;
|
||||
#endif
|
||||
#if defined(X86_INTRINSIC_SSSE3)
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
#if defined(X86_INTRINSIC_AVX)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#if defined(X86_INTRINSIC_XOP)
|
||||
#if defined(COMPILER_MSVC)
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
#if defined(X86_INTRINSIC_AVX2)
|
||||
typedef __m256i ymmi;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(X86_INTRINSIC_SSE2)
|
||||
typedef union packedelem8_t {
|
||||
uint8_t u[16];
|
||||
xmmi v;
|
||||
} packedelem8;
|
||||
|
||||
typedef union packedelem32_t {
|
||||
uint32_t u[4];
|
||||
xmmi v;
|
||||
} packedelem32;
|
||||
|
||||
typedef union packedelem64_t {
|
||||
uint64_t u[2];
|
||||
xmmi v;
|
||||
} packedelem64;
|
||||
#else
|
||||
typedef union packedelem8_t {
|
||||
uint8_t u[16];
|
||||
uint32_t dw[4];
|
||||
} packedelem8;
|
||||
|
||||
typedef union packedelem32_t {
|
||||
uint32_t u[4];
|
||||
uint8_t b[16];
|
||||
} packedelem32;
|
||||
|
||||
typedef union packedelem64_t {
|
||||
uint64_t u[2];
|
||||
uint8_t b[16];
|
||||
} packedelem64;
|
||||
#endif
|
||||
|
||||
#if defined(X86_INTRINSIC_SSSE3)
|
||||
static const packedelem8 ALIGN(16) ssse3_rotl16_32bit = {{2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13}};
|
||||
static const packedelem8 ALIGN(16) ssse3_rotl8_32bit = {{3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14}};
|
||||
#endif
|
||||
|
||||
/*
|
||||
x86 inline asm for gcc/msvc. usage:
|
||||
|
||||
asm_naked_fn_proto(return_type, name) (type parm1, type parm2..)
|
||||
asm_naked_fn(name)
|
||||
a1(..)
|
||||
a2(.., ..)
|
||||
a3(.., .., ..)
|
||||
64bit OR 0 paramters: a1(ret)
|
||||
32bit AND n parameters: aret(4n), eg aret(16) for 4 parameters
|
||||
asm_naked_fn_end(name)
|
||||
*/
|
||||
|
||||
#if defined(X86ASM) || defined(X86_64ASM)
|
||||
|
||||
#if defined(COMPILER_MSVC)
|
||||
#pragma warning(disable : 4731) /* frame pointer modified by inline assembly */
|
||||
#define a1(x) __asm {x}
|
||||
#define a2(x, y) __asm {x, y}
|
||||
#define a3(x, y, z) __asm {x, y, z}
|
||||
#define a4(x, y, z, w) __asm {x, y, z, w}
|
||||
#define aj(x) __asm {x}
|
||||
#define asm_align8 a1(ALIGN 8)
|
||||
#define asm_align16 a1(ALIGN 16)
|
||||
|
||||
#define asm_calling_convention STDCALL
|
||||
#define aret(n) a1(ret n)
|
||||
#define asm_naked_fn_proto(type, fn) static NAKED type asm_calling_convention fn
|
||||
#define asm_naked_fn(fn) {
|
||||
#define asm_naked_fn_end(fn) }
|
||||
#elif defined(COMPILER_GCC)
|
||||
#define GNU_AS1(x) #x ";\n"
|
||||
#define GNU_AS2(x, y) #x ", " #y ";\n"
|
||||
#define GNU_AS3(x, y, z) #x ", " #y ", " #z ";\n"
|
||||
#define GNU_AS4(x, y, z, w) #x ", " #y ", " #z ", " #w ";\n"
|
||||
#define GNU_ASFN(x) "\n_" #x ":\n" #x ":\n"
|
||||
#define GNU_ASJ(x) ".att_syntax prefix\n" #x "\n.intel_syntax noprefix\n"
|
||||
|
||||
#define a1(x) GNU_AS1(x)
|
||||
#define a2(x, y) GNU_AS2(x, y)
|
||||
#define a3(x, y, z) GNU_AS3(x, y, z)
|
||||
#define a4(x, y, z, w) GNU_AS4(x, y, z, w)
|
||||
#define aj(x) GNU_ASJ(x)
|
||||
#define asm_align8 ".p2align 3,,7"
|
||||
#define asm_align16 ".p2align 4,,15"
|
||||
|
||||
#if defined(OS_WINDOWS)
|
||||
#define asm_calling_convention CDECL
|
||||
#define aret(n) a1(ret)
|
||||
|
||||
#if defined(X86_64ASM)
|
||||
#define asm_naked_fn(fn) ; __asm__ ( \
|
||||
".text\n" \
|
||||
asm_align16 GNU_ASFN(fn) \
|
||||
"subq $136, %rsp;" \
|
||||
"movdqa %xmm6, 0(%rsp);" \
|
||||
"movdqa %xmm7, 16(%rsp);" \
|
||||
"movdqa %xmm8, 32(%rsp);" \
|
||||
"movdqa %xmm9, 48(%rsp);" \
|
||||
"movdqa %xmm10, 64(%rsp);" \
|
||||
"movdqa %xmm11, 80(%rsp);" \
|
||||
"movdqa %xmm12, 96(%rsp);" \
|
||||
"movq %rdi, 112(%rsp);" \
|
||||
"movq %rsi, 120(%rsp);" \
|
||||
"movq %rcx, %rdi;" \
|
||||
"movq %rdx, %rsi;" \
|
||||
"movq %r8, %rdx;" \
|
||||
"movq %r9, %rcx;" \
|
||||
"call 1f;" \
|
||||
"movdqa 0(%rsp), %xmm6;" \
|
||||
"movdqa 16(%rsp), %xmm7;" \
|
||||
"movdqa 32(%rsp), %xmm8;" \
|
||||
"movdqa 48(%rsp), %xmm9;" \
|
||||
"movdqa 64(%rsp), %xmm10;" \
|
||||
"movdqa 80(%rsp), %xmm11;" \
|
||||
"movdqa 96(%rsp), %xmm12;" \
|
||||
"movq 112(%rsp), %rdi;" \
|
||||
"movq 120(%rsp), %rsi;" \
|
||||
"addq $136, %rsp;" \
|
||||
"ret;" \
|
||||
".intel_syntax noprefix;" \
|
||||
".p2align 4,,15;" \
|
||||
"1:;"
|
||||
#else
|
||||
#define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASFN(fn)
|
||||
#endif
|
||||
#else
|
||||
#define asm_calling_convention STDCALL
|
||||
#define aret(n) a1(ret n)
|
||||
#define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASFN(fn)
|
||||
#endif
|
||||
|
||||
#define asm_naked_fn_proto(type, fn) extern type asm_calling_convention fn
|
||||
#define asm_naked_fn_end(fn) ".att_syntax prefix;\n" );
|
||||
|
||||
#define asm_gcc() __asm__ __volatile__(".intel_syntax noprefix;\n"
|
||||
#define asm_gcc_parms() ".att_syntax prefix;"
|
||||
#define asm_gcc_trashed() __asm__ __volatile__("" :::
|
||||
#define asm_gcc_end() );
|
||||
#else
|
||||
need x86 asm
|
||||
#endif
|
||||
|
||||
#endif /* X86ASM || X86_64ASM */
|
||||
|
||||
|
||||
#if defined(CPU_X86) || defined(CPU_X86_64)
|
||||
|
||||
typedef enum cpu_flags_x86_t {
|
||||
cpu_mmx = 1 << 0,
|
||||
cpu_sse = 1 << 1,
|
||||
cpu_sse2 = 1 << 2,
|
||||
cpu_sse3 = 1 << 3,
|
||||
cpu_ssse3 = 1 << 4,
|
||||
cpu_sse4_1 = 1 << 5,
|
||||
cpu_sse4_2 = 1 << 6,
|
||||
cpu_avx = 1 << 7,
|
||||
cpu_xop = 1 << 8,
|
||||
cpu_avx2 = 1 << 9
|
||||
} cpu_flags_x86;
|
||||
|
||||
typedef enum cpu_vendors_x86_t {
|
||||
cpu_nobody,
|
||||
cpu_intel,
|
||||
cpu_amd
|
||||
} cpu_vendors_x86;
|
||||
|
||||
typedef struct x86_regs_t {
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
} x86_regs;
|
||||
|
||||
#if defined(X86ASM)
|
||||
asm_naked_fn_proto(int, has_cpuid)(void)
|
||||
asm_naked_fn(has_cpuid)
|
||||
a1(pushfd)
|
||||
a1(pop eax)
|
||||
a2(mov ecx, eax)
|
||||
a2(xor eax, 0x200000)
|
||||
a1(push eax)
|
||||
a1(popfd)
|
||||
a1(pushfd)
|
||||
a1(pop eax)
|
||||
a2(xor eax, ecx)
|
||||
a2(shr eax, 21)
|
||||
a2(and eax, 1)
|
||||
a1(push ecx)
|
||||
a1(popfd)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(has_cpuid)
|
||||
#endif /* X86ASM */
|
||||
|
||||
|
||||
static void NOINLINE
|
||||
get_cpuid(x86_regs *regs, uint32_t flags) {
|
||||
#if defined(COMPILER_MSVC)
|
||||
__cpuid((int *)regs, (int)flags);
|
||||
#else
|
||||
#if defined(CPU_X86_64)
|
||||
#define cpuid_bx rbx
|
||||
#else
|
||||
#define cpuid_bx ebx
|
||||
#endif
|
||||
|
||||
asm_gcc()
|
||||
a1(push cpuid_bx)
|
||||
a2(xor ecx, ecx)
|
||||
a1(cpuid)
|
||||
a2(mov [%1 + 0], eax)
|
||||
a2(mov [%1 + 4], ebx)
|
||||
a2(mov [%1 + 8], ecx)
|
||||
a2(mov [%1 + 12], edx)
|
||||
a1(pop cpuid_bx)
|
||||
asm_gcc_parms() : "+a"(flags) : "S"(regs) : "%ecx", "%edx", "cc"
|
||||
asm_gcc_end()
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
|
||||
static uint64_t NOINLINE
|
||||
get_xgetbv(uint32_t flags) {
|
||||
#if defined(COMPILER_MSVC)
|
||||
return _xgetbv(flags);
|
||||
#else
|
||||
uint32_t lo, hi;
|
||||
asm_gcc()
|
||||
a1(xgetbv)
|
||||
asm_gcc_parms() : "+c"(flags), "=a" (lo), "=d" (hi)
|
||||
asm_gcc_end()
|
||||
return ((uint64_t)lo | ((uint64_t)hi << 32));
|
||||
#endif
|
||||
}
|
||||
#endif // AVX support
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
size_t cpu_detect_mask = (size_t)-1;
|
||||
#endif
|
||||
|
||||
static size_t
|
||||
detect_cpu(void) {
|
||||
//union { uint8_t s[12]; uint32_t i[3]; } vendor_string;
|
||||
//cpu_vendors_x86 vendor = cpu_nobody;
|
||||
x86_regs regs;
|
||||
uint32_t max_level, max_ext_level;
|
||||
size_t cpu_flags = 0;
|
||||
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
|
||||
uint64_t xgetbv_flags;
|
||||
#endif
|
||||
|
||||
#if defined(CPU_X86)
|
||||
if (!has_cpuid())
|
||||
return cpu_flags;
|
||||
#endif
|
||||
|
||||
get_cpuid(®s, 0);
|
||||
max_level = regs.eax;
|
||||
#if 0
|
||||
vendor_string.i[0] = regs.ebx;
|
||||
vendor_string.i[1] = regs.edx;
|
||||
vendor_string.i[2] = regs.ecx;
|
||||
|
||||
if (scrypt_verify(vendor_string.s, (const uint8_t *)"GenuineIntel", 12))
|
||||
vendor = cpu_intel;
|
||||
else if (scrypt_verify(vendor_string.s, (const uint8_t *)"AuthenticAMD", 12))
|
||||
vendor = cpu_amd;
|
||||
#endif
|
||||
if (max_level & 0x00000500) {
|
||||
/* "Intel P5 pre-B0" */
|
||||
cpu_flags |= cpu_mmx;
|
||||
return cpu_flags;
|
||||
}
|
||||
|
||||
if (max_level < 1)
|
||||
return cpu_flags;
|
||||
|
||||
get_cpuid(®s, 1);
|
||||
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
|
||||
/* xsave/xrestore */
|
||||
if (regs.ecx & (1 << 27)) {
|
||||
xgetbv_flags = get_xgetbv(0);
|
||||
if ((regs.ecx & (1 << 28)) && (xgetbv_flags & 0x6)) cpu_flags |= cpu_avx;
|
||||
}
|
||||
#endif
|
||||
if (regs.ecx & (1 << 20)) cpu_flags |= cpu_sse4_2;
|
||||
if (regs.ecx & (1 << 19)) cpu_flags |= cpu_sse4_2;
|
||||
if (regs.ecx & (1 << 9)) cpu_flags |= cpu_ssse3;
|
||||
if (regs.ecx & (1 )) cpu_flags |= cpu_sse3;
|
||||
if (regs.edx & (1 << 26)) cpu_flags |= cpu_sse2;
|
||||
if (regs.edx & (1 << 25)) cpu_flags |= cpu_sse;
|
||||
if (regs.edx & (1 << 23)) cpu_flags |= cpu_mmx;
|
||||
|
||||
if (cpu_flags & cpu_avx) {
|
||||
if (max_level >= 7) {
|
||||
get_cpuid(®s, 7);
|
||||
if (regs.ebx & (1 << 5)) cpu_flags |= cpu_avx2;
|
||||
}
|
||||
|
||||
get_cpuid(®s, 0x80000000);
|
||||
max_ext_level = regs.eax;
|
||||
if (max_ext_level >= 0x80000001) {
|
||||
get_cpuid(®s, 0x80000001);
|
||||
if (regs.ecx & (1 << 11)) cpu_flags |= cpu_xop;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
cpu_flags &= cpu_detect_mask;
|
||||
#endif
|
||||
|
||||
return cpu_flags;
|
||||
}
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
static const char *
|
||||
get_top_cpuflag_desc(size_t flag) {
|
||||
if (flag & cpu_avx2) return "AVX2";
|
||||
else if (flag & cpu_xop) return "XOP";
|
||||
else if (flag & cpu_avx) return "AVX";
|
||||
else if (flag & cpu_sse4_2) return "SSE4.2";
|
||||
else if (flag & cpu_sse4_1) return "SSE4.1";
|
||||
else if (flag & cpu_ssse3) return "SSSE3";
|
||||
else if (flag & cpu_sse2) return "SSE2";
|
||||
else if (flag & cpu_sse) return "SSE";
|
||||
else if (flag & cpu_mmx) return "MMX";
|
||||
else return "Basic";
|
||||
}
|
||||
#endif
|
||||
|
||||
/* enable the highest system-wide option */
|
||||
#if defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
#if !defined(__AVX2__)
|
||||
#undef X86_64ASM_AVX2
|
||||
#undef X86ASM_AVX2
|
||||
#undef X86_INTRINSIC_AVX2
|
||||
#endif
|
||||
#if !defined(__XOP__)
|
||||
#undef X86_64ASM_XOP
|
||||
#undef X86ASM_XOP
|
||||
#undef X86_INTRINSIC_XOP
|
||||
#endif
|
||||
#if !defined(__AVX__)
|
||||
#undef X86_64ASM_AVX
|
||||
#undef X86ASM_AVX
|
||||
#undef X86_INTRINSIC_AVX
|
||||
#endif
|
||||
#if !defined(__SSSE3__)
|
||||
#undef X86_64ASM_SSSE3
|
||||
#undef X86ASM_SSSE3
|
||||
#undef X86_INTRINSIC_SSSE3
|
||||
#endif
|
||||
#if !defined(__SSE2__)
|
||||
#undef X86_64ASM_SSE2
|
||||
#undef X86ASM_SSE2
|
||||
#undef X86_INTRINSIC_SSE2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* defined(CPU_X86) || defined(CPU_X86_64) */
|
||||
310
algo/argon2/ar2/sj/scrypt-jane-portable.h
Normal file
310
algo/argon2/ar2/sj/scrypt-jane-portable.h
Normal file
@@ -0,0 +1,310 @@
|
||||
/* determine os */
|
||||
#if defined(_WIN32) || defined(_WIN64) || defined(__TOS_WIN__) || defined(__WINDOWS__)
|
||||
#include <windows.h>
|
||||
#include <wincrypt.h>
|
||||
#define OS_WINDOWS
|
||||
#elif defined(sun) || defined(__sun) || defined(__SVR4) || defined(__svr4__)
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#define OS_SOLARIS
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/param.h> /* need this to define BSD */
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#define OS_NIX
|
||||
#if defined(__linux__)
|
||||
#include <endian.h>
|
||||
#define OS_LINUX
|
||||
#elif defined(BSD)
|
||||
#define OS_BSD
|
||||
|
||||
#if defined(MACOS_X) || (defined(__APPLE__) & defined(__MACH__))
|
||||
#define OS_OSX
|
||||
#elif defined(macintosh) || defined(Macintosh)
|
||||
#define OS_MAC
|
||||
#elif defined(__OpenBSD__)
|
||||
#define OS_OPENBSD
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* determine compiler */
|
||||
#if defined(_MSC_VER)
|
||||
#define COMPILER_MSVC_VS6 120000000
|
||||
#define COMPILER_MSVC_VS6PP 121000000
|
||||
#define COMPILER_MSVC_VS2002 130000000
|
||||
#define COMPILER_MSVC_VS2003 131000000
|
||||
#define COMPILER_MSVC_VS2005 140050727
|
||||
#define COMPILER_MSVC_VS2008 150000000
|
||||
#define COMPILER_MSVC_VS2008SP1 150030729
|
||||
#define COMPILER_MSVC_VS2010 160000000
|
||||
#define COMPILER_MSVC_VS2010SP1 160040219
|
||||
#define COMPILER_MSVC_VS2012RC 170000000
|
||||
#define COMPILER_MSVC_VS2012 170050727
|
||||
|
||||
#if _MSC_FULL_VER > 100000000
|
||||
#define COMPILER_MSVC (_MSC_FULL_VER)
|
||||
#else
|
||||
#define COMPILER_MSVC (_MSC_FULL_VER * 10)
|
||||
#endif
|
||||
|
||||
#if ((_MSC_VER == 1200) && defined(_mm_free))
|
||||
#undef COMPILER_MSVC
|
||||
#define COMPILER_MSVC COMPILER_MSVC_VS6PP
|
||||
#endif
|
||||
|
||||
#pragma warning(disable : 4127) /* conditional expression is constant */
|
||||
#pragma warning(disable : 4100) /* unreferenced formal parameter */
|
||||
|
||||
#ifndef _CRT_SECURE_NO_WARNINGS
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#include <float.h>
|
||||
#include <stdlib.h> /* _rotl */
|
||||
#include <intrin.h>
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef signed int int32_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
typedef signed __int64 int64_t;
|
||||
|
||||
#define ROTL32(a,b) _rotl(a,b)
|
||||
#define ROTR32(a,b) _rotr(a,b)
|
||||
#define ROTL64(a,b) _rotl64(a,b)
|
||||
#define ROTR64(a,b) _rotr64(a,b)
|
||||
#undef NOINLINE
|
||||
#define NOINLINE __declspec(noinline)
|
||||
#undef NORETURN
|
||||
#define NORETURN
|
||||
#undef INLINE
|
||||
#define INLINE __forceinline
|
||||
#undef FASTCALL
|
||||
#define FASTCALL __fastcall
|
||||
#undef CDECL
|
||||
#define CDECL __cdecl
|
||||
#undef STDCALL
|
||||
#define STDCALL __stdcall
|
||||
#undef NAKED
|
||||
#define NAKED __declspec(naked)
|
||||
#define ALIGN(n) __declspec(align(n))
|
||||
#endif
|
||||
#if defined(__ICC)
|
||||
#define COMPILER_INTEL
|
||||
#endif
|
||||
#if defined(__GNUC__)
|
||||
#if (__GNUC__ >= 3)
|
||||
#define COMPILER_GCC_PATCHLEVEL __GNUC_PATCHLEVEL__
|
||||
#else
|
||||
#define COMPILER_GCC_PATCHLEVEL 0
|
||||
#endif
|
||||
#define COMPILER_GCC (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + COMPILER_GCC_PATCHLEVEL)
|
||||
#define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b)))
|
||||
#define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b)))
|
||||
#define ROTL64(a,b) (((a) << (b)) | ((a) >> (64 - b)))
|
||||
#define ROTR64(a,b) (((a) >> (b)) | ((a) << (64 - b)))
|
||||
#undef NOINLINE
|
||||
#if (COMPILER_GCC >= 30000)
|
||||
#define NOINLINE __attribute__((noinline))
|
||||
#else
|
||||
#define NOINLINE
|
||||
#endif
|
||||
#undef NORETURN
|
||||
#if (COMPILER_GCC >= 30000)
|
||||
#define NORETURN __attribute__((noreturn))
|
||||
#else
|
||||
#define NORETURN
|
||||
#endif
|
||||
#undef INLINE
|
||||
#if (COMPILER_GCC >= 30000)
|
||||
#define INLINE __attribute__((always_inline))
|
||||
#else
|
||||
#define INLINE inline
|
||||
#endif
|
||||
#undef FASTCALL
|
||||
#if (COMPILER_GCC >= 30400)
|
||||
#define FASTCALL __attribute__((fastcall))
|
||||
#else
|
||||
#define FASTCALL
|
||||
#endif
|
||||
#undef CDECL
|
||||
#define CDECL __attribute__((cdecl))
|
||||
#undef STDCALL
|
||||
#define STDCALL __attribute__((stdcall))
|
||||
#define ALIGN(n) __attribute__((aligned(n)))
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
#define COMPILER_MINGW
|
||||
#endif
|
||||
#if defined(__PATHCC__)
|
||||
#define COMPILER_PATHCC
|
||||
#endif
|
||||
|
||||
#define OPTIONAL_INLINE
|
||||
#if defined(OPTIONAL_INLINE)
|
||||
#undef OPTIONAL_INLINE
|
||||
#define OPTIONAL_INLINE INLINE
|
||||
#else
|
||||
#define OPTIONAL_INLINE
|
||||
#endif
|
||||
|
||||
#define CRYPTO_FN NOINLINE STDCALL
|
||||
|
||||
/* determine cpu */
|
||||
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__ ) || defined(_M_X64)
|
||||
#define CPU_X86_64
|
||||
#elif defined(__i586__) || defined(__i686__) || (defined(_M_IX86) && (_M_IX86 >= 500))
|
||||
#define CPU_X86 500
|
||||
#elif defined(__i486__) || (defined(_M_IX86) && (_M_IX86 >= 400))
|
||||
#define CPU_X86 400
|
||||
#elif defined(__i386__) || (defined(_M_IX86) && (_M_IX86 >= 300)) || defined(__X86__) || defined(_X86_) || defined(__I86__)
|
||||
#define CPU_X86 300
|
||||
#elif defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(_M_IA64) || defined(__ia64)
|
||||
#define CPU_IA64
|
||||
#endif
|
||||
|
||||
#if defined(__sparc__) || defined(__sparc) || defined(__sparcv9)
|
||||
#define CPU_SPARC
|
||||
#if defined(__sparcv9)
|
||||
#define CPU_SPARC64
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CPU_X86_64) || defined(CPU_IA64) || defined(CPU_SPARC64) || defined(__64BIT__) || defined(__LP64__) || defined(_LP64) || (defined(_MIPS_SZLONG) && (_MIPS_SZLONG == 64))
|
||||
#define CPU_64BITS
|
||||
#undef FASTCALL
|
||||
#define FASTCALL
|
||||
#undef CDECL
|
||||
#define CDECL
|
||||
#undef STDCALL
|
||||
#define STDCALL
|
||||
#endif
|
||||
|
||||
#if defined(powerpc) || defined(__PPC__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(__powerpc__) || defined(__powerpc) || defined(POWERPC) || defined(_M_PPC)
|
||||
#define CPU_PPC
|
||||
#if defined(_ARCH_PWR7)
|
||||
#define CPU_POWER7
|
||||
#elif defined(__64BIT__)
|
||||
#define CPU_PPC64
|
||||
#else
|
||||
#define CPU_PPC32
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__hppa__) || defined(__hppa)
|
||||
#define CPU_HPPA
|
||||
#endif
|
||||
|
||||
#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
|
||||
#define CPU_ALPHA
|
||||
#endif
|
||||
|
||||
/* endian */
|
||||
|
||||
#if ((defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \
|
||||
(defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \
|
||||
(defined(CPU_X86) || defined(CPU_X86_64)) || \
|
||||
(defined(vax) || defined(MIPSEL) || defined(_MIPSEL)))
|
||||
#define CPU_LE
|
||||
#elif ((defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)) || \
|
||||
(defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || \
|
||||
(defined(CPU_SPARC) || defined(CPU_PPC) || defined(mc68000) || defined(sel)) || defined(_MIPSEB))
|
||||
#define CPU_BE
|
||||
#else
|
||||
/* unknown endian! */
|
||||
#endif
|
||||
|
||||
|
||||
#define U8TO32_BE(p) \
|
||||
(((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
|
||||
((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) ))
|
||||
|
||||
#define U8TO32_LE(p) \
|
||||
(((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \
|
||||
((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24))
|
||||
|
||||
#define U32TO8_BE(p, v) \
|
||||
(p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
|
||||
(p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) );
|
||||
|
||||
#define U32TO8_LE(p, v) \
|
||||
(p)[0] = (uint8_t)((v) ); (p)[1] = (uint8_t)((v) >> 8); \
|
||||
(p)[2] = (uint8_t)((v) >> 16); (p)[3] = (uint8_t)((v) >> 24);
|
||||
|
||||
#define U8TO64_BE(p) \
|
||||
(((uint64_t)U8TO32_BE(p) << 32) | (uint64_t)U8TO32_BE((p) + 4))
|
||||
|
||||
#define U8TO64_LE(p) \
|
||||
(((uint64_t)U8TO32_LE(p)) | ((uint64_t)U8TO32_LE((p) + 4) << 32))
|
||||
|
||||
#define U64TO8_BE(p, v) \
|
||||
U32TO8_BE((p), (uint32_t)((v) >> 32)); \
|
||||
U32TO8_BE((p) + 4, (uint32_t)((v) ));
|
||||
|
||||
#define U64TO8_LE(p, v) \
|
||||
U32TO8_LE((p), (uint32_t)((v) )); \
|
||||
U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
|
||||
|
||||
#define U32_SWAP(v) { \
|
||||
(v) = (((v) << 8) & 0xFF00FF00 ) | (((v) >> 8) & 0xFF00FF ); \
|
||||
(v) = ((v) << 16) | ((v) >> 16); \
|
||||
}
|
||||
|
||||
#define U64_SWAP(v) { \
|
||||
(v) = (((v) << 8) & 0xFF00FF00FF00FF00ull ) | (((v) >> 8) & 0x00FF00FF00FF00FFull ); \
|
||||
(v) = (((v) << 16) & 0xFFFF0000FFFF0000ull ) | (((v) >> 16) & 0x0000FFFF0000FFFFull ); \
|
||||
(v) = ((v) << 32) | ((v) >> 32); \
|
||||
}
|
||||
|
||||
static int
|
||||
scrypt_verify(const uint8_t *x, const uint8_t *y, size_t len) {
|
||||
uint32_t differentbits = 0;
|
||||
while (len--)
|
||||
differentbits |= (*x++ ^ *y++);
|
||||
return (1 & ((differentbits - 1) >> 8));
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_ensure_zero(void *p, size_t len) {
|
||||
#if ((defined(CPU_X86) || defined(CPU_X86_64)) && defined(COMPILER_MSVC))
|
||||
__stosb((unsigned char *)p, 0, len);
|
||||
#elif (defined(CPU_X86) && defined(COMPILER_GCC))
|
||||
__asm__ __volatile__(
|
||||
"pushl %%edi;\n"
|
||||
"pushl %%ecx;\n"
|
||||
"rep stosb;\n"
|
||||
"popl %%ecx;\n"
|
||||
"popl %%edi;\n"
|
||||
:: "a"(0), "D"(p), "c"(len) : "cc", "memory"
|
||||
);
|
||||
#elif (defined(CPU_X86_64) && defined(COMPILER_GCC))
|
||||
__asm__ __volatile__(
|
||||
"pushq %%rdi;\n"
|
||||
"pushq %%rcx;\n"
|
||||
"rep stosb;\n"
|
||||
"popq %%rcx;\n"
|
||||
"popq %%rdi;\n"
|
||||
:: "a"(0), "D"(p), "c"(len) : "cc", "memory"
|
||||
);
|
||||
#else
|
||||
volatile uint8_t *b = (volatile uint8_t *)p;
|
||||
size_t i;
|
||||
for (i = 0; i < len; i++)
|
||||
b[i] = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#include "scrypt-jane-portable-x86.h"
|
||||
|
||||
#if !defined(asm_calling_convention)
|
||||
#define asm_calling_convention
|
||||
#endif
|
||||
74
algo/argon2/ar2/sj/scrypt-jane-romix-basic.h
Normal file
74
algo/argon2/ar2/sj/scrypt-jane-romix-basic.h
Normal file
@@ -0,0 +1,74 @@
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
/* function type returned by scrypt_getROMix, used with cpu detection */
|
||||
typedef void (FASTCALL *scrypt_ROMixfn)(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r);
|
||||
#endif
|
||||
|
||||
/* romix pre/post nop function */
|
||||
static void asm_calling_convention
|
||||
scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) {
|
||||
(void)blocks; (void)nblocks;
|
||||
}
|
||||
|
||||
/* romix pre/post endian conversion function */
|
||||
static void asm_calling_convention
|
||||
scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) {
|
||||
#if !defined(CPU_LE)
|
||||
static const union { uint8_t b[2]; uint16_t w; } endian_test = {{1,0}};
|
||||
size_t i;
|
||||
if (endian_test.w == 0x100) {
|
||||
nblocks *= SCRYPT_BLOCK_WORDS;
|
||||
for (i = 0; i < nblocks; i++) {
|
||||
SCRYPT_WORD_ENDIAN_SWAP(blocks[i]);
|
||||
}
|
||||
}
|
||||
#else
|
||||
(void)blocks; (void)nblocks;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* chunkmix test function */
|
||||
typedef void (asm_calling_convention *chunkmixfn)(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r);
|
||||
typedef void (asm_calling_convention *blockfixfn)(scrypt_mix_word_t *blocks, size_t nblocks);
|
||||
|
||||
static int
|
||||
scrypt_test_mix_instance(chunkmixfn mixfn, blockfixfn prefn, blockfixfn postfn, const uint8_t expected[16]) {
|
||||
/* r = 2, (2 * r) = 4 blocks in a chunk, 4 * SCRYPT_BLOCK_WORDS total */
|
||||
const uint32_t r = 2, blocks = 2 * r, words = blocks * SCRYPT_BLOCK_WORDS;
|
||||
#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2))
|
||||
scrypt_mix_word_t ALIGN(32) chunk[2][4 * SCRYPT_BLOCK_WORDS], v;
|
||||
#else
|
||||
scrypt_mix_word_t ALIGN(16) chunk[2][4 * SCRYPT_BLOCK_WORDS], v;
|
||||
#endif
|
||||
uint8_t final[16];
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < words; i++) {
|
||||
v = (scrypt_mix_word_t)i;
|
||||
v = (v << 8) | v;
|
||||
v = (v << 16) | v;
|
||||
chunk[0][i] = v;
|
||||
}
|
||||
|
||||
prefn(chunk[0], blocks);
|
||||
mixfn(chunk[1], chunk[0], NULL, r);
|
||||
postfn(chunk[1], blocks);
|
||||
|
||||
/* grab the last 16 bytes of the final block */
|
||||
for (i = 0; i < 16; i += sizeof(scrypt_mix_word_t)) {
|
||||
SCRYPT_WORDTO8_LE(final + i, chunk[1][words - (16 / sizeof(scrypt_mix_word_t)) + (i / sizeof(scrypt_mix_word_t))]);
|
||||
}
|
||||
|
||||
return scrypt_verify(expected, final, 16);
|
||||
}
|
||||
|
||||
/* returns a pointer to item i, where item is len scrypt_mix_word_t's long */
|
||||
static scrypt_mix_word_t *
|
||||
scrypt_item(scrypt_mix_word_t *base, scrypt_mix_word_t i, scrypt_mix_word_t len) {
|
||||
return base + (i * len);
|
||||
}
|
||||
|
||||
/* returns a pointer to block i */
|
||||
static scrypt_mix_word_t *
|
||||
scrypt_block(scrypt_mix_word_t *base, scrypt_mix_word_t i) {
|
||||
return base + (i * SCRYPT_BLOCK_WORDS);
|
||||
}
|
||||
122
algo/argon2/ar2/sj/scrypt-jane-romix-template.h
Normal file
122
algo/argon2/ar2/sj/scrypt-jane-romix-template.h
Normal file
@@ -0,0 +1,122 @@
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX)
|
||||
|
||||
#if defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
#undef SCRYPT_ROMIX_FN
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix
|
||||
#endif
|
||||
|
||||
#undef SCRYPT_HAVE_ROMIX
|
||||
#define SCRYPT_HAVE_ROMIX
|
||||
|
||||
#if !defined(SCRYPT_CHUNKMIX_FN)
|
||||
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_basic
|
||||
|
||||
/*
|
||||
Bout = ChunkMix(Bin)
|
||||
|
||||
2*r: number of blocks in the chunk
|
||||
*/
|
||||
static void asm_calling_convention
|
||||
SCRYPT_CHUNKMIX_FN(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r) {
|
||||
#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2))
|
||||
scrypt_mix_word_t ALIGN(32) X[SCRYPT_BLOCK_WORDS], *block;
|
||||
#else
|
||||
scrypt_mix_word_t ALIGN(16) X[SCRYPT_BLOCK_WORDS], *block;
|
||||
#endif
|
||||
uint32_t i, j, blocksPerChunk = /*r * 2*/2, half = 0;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
block = scrypt_block(Bin, blocksPerChunk - 1);
|
||||
for (i = 0; i < SCRYPT_BLOCK_WORDS; i++)
|
||||
X[i] = block[i];
|
||||
|
||||
if (Bxor) {
|
||||
block = scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
for (i = 0; i < SCRYPT_BLOCK_WORDS; i++)
|
||||
X[i] ^= block[i];
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= /*r*/1) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
block = scrypt_block(Bin, i);
|
||||
for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
|
||||
X[j] ^= block[j];
|
||||
|
||||
if (Bxor) {
|
||||
block = scrypt_block(Bxor, i);
|
||||
for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
|
||||
X[j] ^= block[j];
|
||||
}
|
||||
SCRYPT_MIX_FN(X);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
block = scrypt_block(Bout, (i / 2) + half);
|
||||
for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
|
||||
block[j] = X[j];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
X = ROMix(X)
|
||||
|
||||
X: chunk to mix
|
||||
Y: scratch chunk
|
||||
N: number of rounds
|
||||
V[N]: array of chunks to randomly index in to
|
||||
2*r: number of blocks in a chunk
|
||||
*/
|
||||
|
||||
static void NOINLINE FASTCALL
|
||||
SCRYPT_ROMIX_FN(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[N * chunkWords]*/, uint32_t N, uint32_t r) {
|
||||
uint32_t i, j, chunkWords = (uint32_t)(SCRYPT_BLOCK_WORDS * 2);
|
||||
scrypt_mix_word_t *block = V;
|
||||
|
||||
SCRYPT_ROMIX_TANGLE_FN(X, 2);
|
||||
|
||||
/* 1: X = B */
|
||||
/* implicit */
|
||||
|
||||
/* 2: for i = 0 to N - 1 do */
|
||||
memcpy(block, X, chunkWords * sizeof(scrypt_mix_word_t));
|
||||
for (i = 0; i < /*N - 1*/511; i++, block += chunkWords) {
|
||||
/* 3: V_i = X */
|
||||
/* 4: X = H(X) */
|
||||
SCRYPT_CHUNKMIX_FN(block + chunkWords, block, NULL, /*r*/1);
|
||||
}
|
||||
SCRYPT_CHUNKMIX_FN(X, block, NULL, 1);
|
||||
|
||||
/* 6: for i = 0 to N - 1 do */
|
||||
for (i = 0; i < /*N*/512; i += 2) {
|
||||
/* 7: j = Integerify(X) % N */
|
||||
j = X[chunkWords - SCRYPT_BLOCK_WORDS] & /*(N - 1)*/511;
|
||||
|
||||
/* 8: X = H(Y ^ V_j) */
|
||||
SCRYPT_CHUNKMIX_FN(Y, X, scrypt_item(V, j, chunkWords), 1);
|
||||
|
||||
/* 7: j = Integerify(Y) % N */
|
||||
j = Y[chunkWords - SCRYPT_BLOCK_WORDS] & /*(N - 1)*/511;
|
||||
|
||||
/* 8: X = H(Y ^ V_j) */
|
||||
SCRYPT_CHUNKMIX_FN(X, Y, scrypt_item(V, j, chunkWords), 1);
|
||||
}
|
||||
|
||||
/* 10: B' = X */
|
||||
/* implicit */
|
||||
|
||||
SCRYPT_ROMIX_UNTANGLE_FN(X, 2);
|
||||
}
|
||||
|
||||
#endif /* !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX) */
|
||||
|
||||
|
||||
#undef SCRYPT_CHUNKMIX_FN
|
||||
#undef SCRYPT_ROMIX_FN
|
||||
#undef SCRYPT_MIX_FN
|
||||
#undef SCRYPT_ROMIX_TANGLE_FN
|
||||
#undef SCRYPT_ROMIX_UNTANGLE_FN
|
||||
|
||||
23
algo/argon2/ar2/sj/scrypt-jane-romix.h
Normal file
23
algo/argon2/ar2/sj/scrypt-jane-romix.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#if defined(SCRYPT_SALSA64)
|
||||
#include "scrypt-jane-salsa64.h"
|
||||
#else
|
||||
#define SCRYPT_MIX_BASE "ERROR"
|
||||
typedef uint32_t scrypt_mix_word_t;
|
||||
#define SCRYPT_WORDTO8_LE U32TO8_LE
|
||||
#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
|
||||
#define SCRYPT_BLOCK_BYTES 64
|
||||
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
static void FASTCALL scrypt_ROMix_error(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r) {}
|
||||
static scrypt_ROMixfn scrypt_getROMix(void) { return scrypt_ROMix_error; }
|
||||
#else
|
||||
static void FASTCALL scrypt_ROMix(scrypt_mix_word_t *X, scrypt_mix_word_t *Y, scrypt_mix_word_t *V, uint32_t N, uint32_t r) {}
|
||||
#endif
|
||||
static int scrypt_test_mix(void) { return 0; }
|
||||
#error must define a mix function!
|
||||
#endif
|
||||
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX SCRYPT_MIX_BASE
|
||||
#endif
|
||||
183
algo/argon2/ar2/sj/scrypt-jane-salsa64.h
Normal file
183
algo/argon2/ar2/sj/scrypt-jane-salsa64.h
Normal file
@@ -0,0 +1,183 @@
|
||||
#define SCRYPT_MIX_BASE "Salsa64/8"
|
||||
|
||||
typedef uint64_t scrypt_mix_word_t;
|
||||
|
||||
#define SCRYPT_WORDTO8_LE U64TO8_LE
|
||||
#define SCRYPT_WORD_ENDIAN_SWAP U64_SWAP
|
||||
|
||||
#define SCRYPT_BLOCK_BYTES 128
|
||||
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
|
||||
|
||||
/* must have these here in case block bytes is ever != 64 */
|
||||
#include "scrypt-jane-romix-basic.h"
|
||||
|
||||
#include "scrypt-jane-mix_salsa64-avx2.h"
|
||||
#include "scrypt-jane-mix_salsa64-xop.h"
|
||||
#include "scrypt-jane-mix_salsa64-avx.h"
|
||||
#include "scrypt-jane-mix_salsa64-ssse3.h"
|
||||
#include "scrypt-jane-mix_salsa64-sse2.h"
|
||||
#include "scrypt-jane-mix_salsa64.h"
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX2)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx2
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_avx2
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_XOP)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_xop
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_xop
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_avx
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSSE3)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_ssse3
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_ssse3
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSE2)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_sse2
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
/* cpu agnostic */
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_basic
|
||||
#define SCRYPT_MIX_FN salsa64_core_basic
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
static scrypt_ROMixfn
|
||||
scrypt_getROMix(void) {
|
||||
size_t cpuflags = detect_cpu();
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX2)
|
||||
if (cpuflags & cpu_avx2)
|
||||
return scrypt_ROMix_avx2;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
return scrypt_ROMix_xop;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
return scrypt_ROMix_avx;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
return scrypt_ROMix_ssse3;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
return scrypt_ROMix_sse2;
|
||||
else
|
||||
#endif
|
||||
|
||||
return scrypt_ROMix_basic;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
static size_t
|
||||
available_implementations(void) {
|
||||
size_t cpuflags = detect_cpu();
|
||||
size_t flags = 0;
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX2)
|
||||
if (cpuflags & cpu_avx2)
|
||||
flags |= cpu_avx2;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
flags |= cpu_xop;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
flags |= cpu_avx;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
flags |= cpu_ssse3;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
flags |= cpu_sse2;
|
||||
#endif
|
||||
|
||||
return flags;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
scrypt_test_mix(void) {
|
||||
static const uint8_t expected[16] = {
|
||||
0xf8,0x92,0x9b,0xf8,0xcc,0x1d,0xce,0x2e,0x13,0x82,0xac,0x96,0xb2,0x6c,0xee,0x2c,
|
||||
};
|
||||
|
||||
int ret = 1;
|
||||
size_t cpuflags = detect_cpu();
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX2)
|
||||
if (cpuflags & cpu_avx2)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx2, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_xop, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_ssse3, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_BASIC)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
28
algo/argon2/ar2/sj/scrypt-jane-test-vectors.h
Normal file
28
algo/argon2/ar2/sj/scrypt-jane-test-vectors.h
Normal file
@@ -0,0 +1,28 @@
|
||||
typedef struct scrypt_test_setting_t {
|
||||
const char *pw, *salt;
|
||||
uint8_t Nfactor, rfactor, pfactor;
|
||||
} scrypt_test_setting;
|
||||
|
||||
static const scrypt_test_setting post_settings[] = {
|
||||
{"", "", 3, 0, 0},
|
||||
{"password", "NaCl", 9, 3, 4},
|
||||
{0, 0, 0, 0, 0}
|
||||
};
|
||||
|
||||
#if defined(SCRYPT_SKEIN512)
|
||||
#if defined(SCRYPT_SALSA64)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xd2,0xad,0x32,0x05,0xee,0x80,0xe3,0x44,0x70,0xc6,0x34,0xde,0x05,0xb6,0xcf,0x60,
|
||||
0x89,0x98,0x70,0xc0,0xb8,0xf5,0x54,0xf1,0xa6,0xb2,0xc8,0x76,0x34,0xec,0xc4,0x59,
|
||||
0x8e,0x64,0x42,0xd0,0xa9,0xed,0xe7,0x19,0xb2,0x8a,0x11,0xc6,0xa6,0xbf,0xa7,0xa9,
|
||||
0x4e,0x44,0x32,0x7e,0x12,0x91,0x9d,0xfe,0x52,0x48,0xa8,0x27,0xb3,0xfc,0xb1,0x89},
|
||||
{0xd6,0x67,0xd2,0x3e,0x30,0x1e,0x9d,0xe2,0x55,0x68,0x17,0x3d,0x2b,0x75,0x5a,0xe5,
|
||||
0x04,0xfb,0x3d,0x0e,0x86,0xe0,0xaa,0x1d,0xd4,0x72,0xda,0xb0,0x79,0x41,0xb7,0x99,
|
||||
0x68,0xe5,0xd9,0x55,0x79,0x7d,0xc3,0xd1,0xa6,0x56,0xc1,0xbe,0x0b,0x6c,0x62,0x23,
|
||||
0x66,0x67,0x91,0x47,0x99,0x13,0x6b,0xe3,0xda,0x59,0x55,0x18,0x67,0x8f,0x2e,0x3b}
|
||||
};
|
||||
#endif
|
||||
#else
|
||||
static const uint8_t post_vectors[][64] = {{0}};
|
||||
#endif
|
||||
|
||||
92
algo/argon2/argon2a.c
Normal file
92
algo/argon2/argon2a.c
Normal file
@@ -0,0 +1,92 @@
|
||||
#include "miner.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <openssl/sha.h>
|
||||
#include "ar2/argon2.h"
|
||||
#include "ar2/cores.h"
|
||||
#include "ar2/ar2-scrypt-jane.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#define T_COSTS 2
|
||||
#define M_COSTS 16
|
||||
#define MASK 8
|
||||
#define ZERO 0
|
||||
|
||||
inline void argon_call(void *out, void *in, void *salt, int type)
|
||||
{
|
||||
argon2_context context;
|
||||
|
||||
context.out = (uint8_t *)out;
|
||||
context.pwd = (uint8_t *)in;
|
||||
context.salt = (uint8_t*)salt;
|
||||
context.pwdlen = 0;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
|
||||
argon2_core(&context, type);
|
||||
}
|
||||
|
||||
void argon2hash(void *output, const void *input)
|
||||
{
|
||||
uint32_t _ALIGN(64) hashA[8], hashB[8];
|
||||
|
||||
my_scrypt((const unsigned char *)input, 80,
|
||||
(const unsigned char *)input, 80,
|
||||
(unsigned char *)hashA);
|
||||
|
||||
argon_call(hashB, hashA, hashA, (hashA[0] & MASK) == ZERO);
|
||||
|
||||
my_scrypt((const unsigned char *)hashB, 32,
|
||||
(const unsigned char *)hashB, 32,
|
||||
(unsigned char *)output);
|
||||
}
|
||||
|
||||
int scanhash_argon2(int thr_id, struct work* work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2hash(hash, endiandata);
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio(work, hash);
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t argon2_get_max64 ()
|
||||
{
|
||||
return 0x1ffLL;
|
||||
}
|
||||
|
||||
bool register_argon2_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_argon2;
|
||||
gate->hash = (void*)&argon2hash;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->get_max64 = (void*)&argon2_get_max64;
|
||||
return true;
|
||||
};
|
||||
|
||||
88
algo/axiom.c
Normal file
88
algo/axiom.c
Normal file
@@ -0,0 +1,88 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
|
||||
static __thread uint32_t _ALIGN(128) M[65536][8];
|
||||
|
||||
void axiomhash(void *output, const void *input)
|
||||
{
|
||||
sph_shabal256_context ctx;
|
||||
const int N = 65536;
|
||||
|
||||
sph_shabal256_init(&ctx);
|
||||
sph_shabal256(&ctx, input, 80);
|
||||
sph_shabal256_close(&ctx, M[0]);
|
||||
|
||||
for(int i = 1; i < N; i++) {
|
||||
sph_shabal256_init(&ctx);
|
||||
sph_shabal256(&ctx, M[i-1], 32);
|
||||
sph_shabal256_close(&ctx, M[i]);
|
||||
}
|
||||
|
||||
for(int b = 0; b < N; b++)
|
||||
{
|
||||
const int p = b > 0 ? b - 1 : 0xFFFF;
|
||||
const int q = M[p][0] % 0xFFFF;
|
||||
const int j = (b + q) % N;
|
||||
|
||||
sph_shabal256_init(&ctx);
|
||||
#if 0
|
||||
sph_shabal256(&ctx, M[p], 32);
|
||||
sph_shabal256(&ctx, M[j], 32);
|
||||
#else
|
||||
uint8_t _ALIGN(128) hash[64];
|
||||
memcpy(hash, M[p], 32);
|
||||
memcpy(&hash[32], M[j], 32);
|
||||
sph_shabal256(&ctx, hash, 64);
|
||||
#endif
|
||||
sph_shabal256_close(&ctx, M[b]);
|
||||
}
|
||||
memcpy(output, M[N-1], 32);
|
||||
}
|
||||
|
||||
int scanhash_axiom(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(128) hash64[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
for (int k = 0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
axiomhash(hash64, endiandata);
|
||||
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return true;
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_axiom_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_axiom;
|
||||
gate->hash = (void*)&axiomhash;
|
||||
gate->hash_alt = (void*)&axiomhash;
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
return true;
|
||||
}
|
||||
0
algo/blake/.dirstamp
Normal file
0
algo/blake/.dirstamp
Normal file
108
algo/blake/blake.c
Normal file
108
algo/blake/blake.c
Normal file
@@ -0,0 +1,108 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "sph_blake.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
/* Move init out of loop, so init once externally,
|
||||
* and then use one single memcpy */
|
||||
static __thread sph_blake256_context blake_mid;
|
||||
static __thread bool ctx_midstate_done = false;
|
||||
|
||||
static void init_blake_hash(void)
|
||||
{
|
||||
sph_blake256_init(&blake_mid);
|
||||
ctx_midstate_done = true;
|
||||
}
|
||||
|
||||
void blakehash(void *state, const void *input)
|
||||
{
|
||||
sph_blake256_context ctx;
|
||||
|
||||
uint8_t hash[64];
|
||||
uint8_t *ending = (uint8_t*) input;
|
||||
ending += 64;
|
||||
|
||||
// do one memcopy to get a fresh context
|
||||
if (!ctx_midstate_done) {
|
||||
init_blake_hash();
|
||||
sph_blake256(&blake_mid, input, 64);
|
||||
}
|
||||
|
||||
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
|
||||
|
||||
sph_blake256(&ctx, ending, 16);
|
||||
sph_blake256_close(&ctx, hash);
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
|
||||
}
|
||||
|
||||
int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t _ALIGN(32) endiandata[20];
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
ctx_midstate_done = false;
|
||||
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]);
|
||||
#endif
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
blakehash(hash64, endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if (hash64[7] <= HTarget && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (hash64[7] == 0) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
n++; pdata[19] = n;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blake_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
bool register_blake_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_blake;
|
||||
gate->hash = (void*)&blakehash;
|
||||
gate->hash_alt = (void*)&blakehash;
|
||||
gate->get_max64 = (void*)&blake_get_max64;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
85
algo/blake/blake2.c
Normal file
85
algo/blake/blake2.c
Normal file
@@ -0,0 +1,85 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "crypto/blake2s.h"
|
||||
|
||||
static __thread blake2s_state s_midstate;
|
||||
static __thread blake2s_state s_ctx;
|
||||
#define MIDLEN 76
|
||||
|
||||
void blake2s_hash(void *output, const void *input)
|
||||
{
|
||||
unsigned char _ALIGN(64) hash[BLAKE2S_OUTBYTES];
|
||||
blake2s_state blake2_ctx;
|
||||
|
||||
blake2s_init(&blake2_ctx, BLAKE2S_OUTBYTES);
|
||||
blake2s_update(&blake2_ctx, input, 80);
|
||||
blake2s_final(&blake2_ctx, hash, BLAKE2S_OUTBYTES);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
static void blake2s_hash_end(uint32_t *output, const uint32_t *input)
|
||||
{
|
||||
s_ctx.buflen = MIDLEN;
|
||||
memcpy(&s_ctx, &s_midstate, 32 + 16 + MIDLEN);
|
||||
blake2s_update(&s_ctx, (uint8_t*) &input[MIDLEN/4], 80 - MIDLEN);
|
||||
blake2s_final(&s_ctx, (uint8_t*) output, BLAKE2S_OUTBYTES);
|
||||
}
|
||||
|
||||
int scanhash_blake2s(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
// midstate
|
||||
blake2s_init(&s_midstate, BLAKE2S_OUTBYTES);
|
||||
blake2s_update(&s_midstate, (uint8_t*) endiandata, MIDLEN);
|
||||
memcpy(&s_ctx, &s_midstate, sizeof(blake2s_state));
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
blake2s_hash_end(hash64, endiandata);
|
||||
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return true;
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blake2s_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
bool register_blake2s_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_blake2s;
|
||||
gate->hash = (void*)&blake2s_hash;
|
||||
gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
return true;
|
||||
};
|
||||
|
||||
129
algo/blake/blakecoin.c
Normal file
129
algo/blake/blakecoin.c
Normal file
@@ -0,0 +1,129 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#define BLAKE32_ROUNDS 8
|
||||
#include "sph_blake.h"
|
||||
|
||||
void blakecoin_init(void *cc);
|
||||
void blakecoin(void *cc, const void *data, size_t len);
|
||||
void blakecoin_close(void *cc, void *dst);
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
/* Move init out of loop, so init once externally,
|
||||
* and then use one single memcpy */
|
||||
static sph_blake256_context blake_mid;
|
||||
static bool ctx_midstate_done = false;
|
||||
|
||||
static void init_blake_hash(void)
|
||||
{
|
||||
blakecoin_init(&blake_mid);
|
||||
ctx_midstate_done = true;
|
||||
}
|
||||
|
||||
void blakecoinhash(void *state, const void *input)
|
||||
{
|
||||
sph_blake256_context ctx;
|
||||
|
||||
uint8_t hash[64];
|
||||
uint8_t *ending = (uint8_t*) input;
|
||||
ending += 64;
|
||||
|
||||
// do one memcopy to get a fresh context
|
||||
if (!ctx_midstate_done) {
|
||||
init_blake_hash();
|
||||
blakecoin(&blake_mid, input, 64);
|
||||
}
|
||||
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
|
||||
|
||||
blakecoin(&ctx, ending, 16);
|
||||
blakecoin_close(&ctx, hash);
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_blakecoin(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t _ALIGN(32) endiandata[20];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
ctx_midstate_done = false;
|
||||
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
// be32enc_array( endiandata, pdata, 19 );
|
||||
for (int kk=0; kk < 19; kk++)
|
||||
be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]);
|
||||
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]);
|
||||
#endif
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
blakecoinhash(hash64, endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if (hash64[7] <= HTarget && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (hash64[7] == 0) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
n++; pdata[19] = n;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blakecoin_gen_merkle_root ( char* merkle_root, struct stratum_ctx* sctx )
|
||||
{
|
||||
SHA256( sctx->job.coinbase, (int)sctx->job.coinbase_size, merkle_root );
|
||||
}
|
||||
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blakecoin_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
// vanilla uses default gen merkle root, otherwise identical to blakecoin
|
||||
bool register_vanilla_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_blakecoin;
|
||||
gate->hash = (void*)&blakecoinhash;
|
||||
gate->hash_alt = (void*)&blakecoinhash;
|
||||
gate->get_max64 = (void*)&blakecoin_get_max64;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool register_blakecoin_algo( algo_gate_t* gate )
|
||||
{
|
||||
register_vanilla_algo( gate );
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
return true;
|
||||
}
|
||||
|
||||
248
algo/blake/decred.c
Normal file
248
algo/blake/decred.c
Normal file
@@ -0,0 +1,248 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "sph_blake.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
#ifndef min
|
||||
#define min(a,b) (a>b ? b : a)
|
||||
#endif
|
||||
#ifndef max
|
||||
#define max(a,b) (a<b ? b : a)
|
||||
#endif
|
||||
|
||||
#define DECRED_NBITS_INDEX 29
|
||||
#define DECRED_NTIME_INDEX 34
|
||||
#define DECRED_NONCE_INDEX 35
|
||||
#define DECRED_XNONCE_INDEX 36
|
||||
#define DECRED_DATA_SIZE 192
|
||||
#define DECRED_WORK_COMPARE_SIZE 140
|
||||
|
||||
static __thread sph_blake256_context blake_mid;
|
||||
static __thread bool ctx_midstate_done = false;
|
||||
|
||||
void decred_hash(void *state, const void *input)
|
||||
{
|
||||
#define MIDSTATE_LEN 128
|
||||
sph_blake256_context ctx;
|
||||
|
||||
uint8_t *ending = (uint8_t*) input;
|
||||
ending += MIDSTATE_LEN;
|
||||
|
||||
if (!ctx_midstate_done) {
|
||||
sph_blake256_init(&blake_mid);
|
||||
sph_blake256(&blake_mid, input, MIDSTATE_LEN);
|
||||
ctx_midstate_done = true;
|
||||
}
|
||||
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
|
||||
|
||||
sph_blake256(&ctx, ending, (180 - MIDSTATE_LEN));
|
||||
sph_blake256_close(&ctx, state);
|
||||
}
|
||||
|
||||
void decred_hash_simple(void *state, const void *input)
|
||||
{
|
||||
sph_blake256_context ctx;
|
||||
sph_blake256_init(&ctx);
|
||||
sph_blake256(&ctx, input, 180);
|
||||
sph_blake256_close(&ctx, state);
|
||||
}
|
||||
|
||||
int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t _ALIGN(128) endiandata[48];
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
#define DCR_NONCE_OFT32 35
|
||||
|
||||
const uint32_t first_nonce = pdata[DCR_NONCE_OFT32];
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
ctx_midstate_done = false;
|
||||
|
||||
#if 1
|
||||
memcpy(endiandata, pdata, 180);
|
||||
#else
|
||||
for (int k=0; k < (180/4); k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
if (!thr_id) applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]);
|
||||
#endif
|
||||
|
||||
do {
|
||||
//be32enc(&endiandata[DCR_NONCE_OFT32], n);
|
||||
endiandata[DCR_NONCE_OFT32] = n;
|
||||
decred_hash(hash32, endiandata);
|
||||
|
||||
if (hash32[7] <= HTarget && fulltest(hash32, ptarget)) {
|
||||
work_set_target_ratio(work, hash32);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
#ifdef DEBUG_ALGO
|
||||
applog(LOG_BLUE, "Nonce : %08x %08x", n, swab32(n));
|
||||
applog_hash(ptarget);
|
||||
applog_compare_hash(hash32, ptarget);
|
||||
#endif
|
||||
pdata[DCR_NONCE_OFT32] = n;
|
||||
return 1;
|
||||
}
|
||||
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[DCR_NONCE_OFT32] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t *decred_get_nonceptr( uint32_t *work_data )
|
||||
{
|
||||
return &work_data[ DECRED_NONCE_INDEX ];
|
||||
}
|
||||
|
||||
// does decred need a custom stratum_get_g_work to fix nicehash
|
||||
// bad extranonce2 size?
|
||||
//
|
||||
// does decred need a custom init_nonce?
|
||||
// does it need to increment nonce, seems not because gen_work_now always
|
||||
// returns true
|
||||
|
||||
void decred_calc_network_diff( struct work* work )
|
||||
{
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
uint32_t nbits = work->data[ DECRED_NBITS_INDEX ];
|
||||
uint32_t bits = ( nbits & 0xffffff );
|
||||
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
|
||||
int m;
|
||||
net_diff = (double)0x0000ffff / (double)bits;
|
||||
|
||||
for ( m = shift; m < 29; m++ )
|
||||
net_diff *= 256.0;
|
||||
for ( m = 29; m < shift; m++ )
|
||||
net_diff /= 256.0;
|
||||
if ( shift == 28 )
|
||||
net_diff *= 256.0; // testnet
|
||||
if ( opt_debug_diff )
|
||||
applog( LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", net_diff,
|
||||
shift, bits);
|
||||
}
|
||||
|
||||
void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
|
||||
{
|
||||
// some random extradata to make the work unique
|
||||
work->data[ DECRED_XNONCE_INDEX ] = (rand()*4);
|
||||
work->height = work->data[32];
|
||||
if (!have_longpoll && work->height > *net_blocks + 1)
|
||||
{
|
||||
char netinfo[64] = { 0 };
|
||||
if (opt_showdiff && net_diff > 0.)
|
||||
{
|
||||
if (net_diff != work->targetdiff)
|
||||
sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
|
||||
work->targetdiff);
|
||||
else
|
||||
sprintf(netinfo, ", diff %.3f", net_diff);
|
||||
}
|
||||
applog(LOG_BLUE, "%s block %d%s", algo_names[opt_algo], work->height,
|
||||
netinfo);
|
||||
*net_blocks = work->height - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void decred_be_build_stratum_request( char *req, struct work *work,
|
||||
struct stratum_ctx *sctx )
|
||||
{
|
||||
unsigned char *xnonce2str;
|
||||
uint32_t ntime, nonce;
|
||||
char ntimestr[9], noncestr[9];
|
||||
|
||||
be32enc( &ntime, work->data[ DECRED_NTIME_INDEX ] );
|
||||
be32enc( &nonce, work->data[ DECRED_NONCE_INDEX ] );
|
||||
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
|
||||
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
|
||||
xnonce2str = abin2hex( (char*)( &work->data[ DECRED_XNONCE_INDEX ] ),
|
||||
sctx->xnonce1_size );
|
||||
snprintf( req, JSON_BUF_LEN,
|
||||
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
|
||||
// data shared between gen_merkle_root and build_extraheader.
|
||||
uint32_t decred_extraheader[32] = { 0 };
|
||||
int decred_headersize = 0;
|
||||
|
||||
void decred_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
|
||||
{
|
||||
// getwork over stratum, getwork merkle + header passed in coinb1
|
||||
memcpy(merkle_root, sctx->job.coinbase, 32);
|
||||
decred_headersize = min((int)sctx->job.coinbase_size - 32,
|
||||
sizeof(decred_extraheader) );
|
||||
memcpy( decred_extraheader, &sctx->job.coinbase[32], decred_headersize);
|
||||
}
|
||||
|
||||
void decred_build_extraheader( struct work* work, struct stratum_ctx* sctx )
|
||||
{
|
||||
uint32_t* extradata = (uint32_t*) sctx->xnonce1;
|
||||
int i;
|
||||
for ( i = 0; i < 8; i++ ) // prevhash
|
||||
work->data[1 + i] = swab32( work->data[1 + i] );
|
||||
for ( i = 0; i < 8; i++ ) // merkle
|
||||
work->data[9 + i] = swab32( work->data[9 + i] );
|
||||
for ( i = 0; i < decred_headersize/4; i++ ) // header
|
||||
work->data[17 + i] = decred_extraheader[i];
|
||||
// extradata
|
||||
for ( i = 0; i < sctx->xnonce1_size/4; i++ )
|
||||
work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i];
|
||||
for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ )
|
||||
work->data[i] = 0;
|
||||
work->data[37] = (rand()*4) << 8;
|
||||
sctx->bloc_height = work->data[32];
|
||||
//applog_hex(work->data, 180);
|
||||
//applog_hex(&work->data[36], 36);
|
||||
}
|
||||
|
||||
bool decred_prevent_dupes( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id )
|
||||
{
|
||||
if ( have_stratum && strcmp(stratum->job.job_id, work->job_id) )
|
||||
// need to regen g_work..
|
||||
return true;
|
||||
// extradata: prevent duplicates
|
||||
work->data[ DECRED_XNONCE_INDEX ] += 1;
|
||||
work->data[ DECRED_XNONCE_INDEX + 1 ] |= thr_id;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool register_decred_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_decred;
|
||||
gate->hash = (void*)&decred_hash;
|
||||
gate->hash_alt = (void*)&decred_hash;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
|
||||
gate->display_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->gen_merkle_root = (void*)&decred_gen_merkle_root;
|
||||
gate->build_extraheader = (void*)&decred_build_extraheader;
|
||||
gate->prevent_dupes = (void*)&decred_prevent_dupes;
|
||||
gate->nbits_index = DECRED_NBITS_INDEX;
|
||||
gate->ntime_index = DECRED_NTIME_INDEX;
|
||||
gate->nonce_index = DECRED_NONCE_INDEX;
|
||||
gate->work_data_size = DECRED_DATA_SIZE;
|
||||
gate->work_cmp_size = DECRED_WORK_COMPARE_SIZE;
|
||||
allow_mininginfo = false;
|
||||
have_gbt = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
531
algo/blake/mod_blakecoin.c
Normal file
531
algo/blake/mod_blakecoin.c
Normal file
@@ -0,0 +1,531 @@
|
||||
/* $Id: blake.c 252 2011-06-07 17:55:14Z tp $ */
|
||||
/*
|
||||
* BLAKECOIN implementation. (Stripped to 256 bits only)
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
* @author Tanguy Pruvot (cpuminer implementation)
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "sph_blake.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
static const sph_u32 IV256[8] = {
|
||||
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85),
|
||||
SPH_C32(0x3C6EF372), SPH_C32(0xA54FF53A),
|
||||
SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
|
||||
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
|
||||
};
|
||||
|
||||
#define Z00 0
|
||||
#define Z01 1
|
||||
#define Z02 2
|
||||
#define Z03 3
|
||||
#define Z04 4
|
||||
#define Z05 5
|
||||
#define Z06 6
|
||||
#define Z07 7
|
||||
#define Z08 8
|
||||
#define Z09 9
|
||||
#define Z0A A
|
||||
#define Z0B B
|
||||
#define Z0C C
|
||||
#define Z0D D
|
||||
#define Z0E E
|
||||
#define Z0F F
|
||||
|
||||
#define Z10 E
|
||||
#define Z11 A
|
||||
#define Z12 4
|
||||
#define Z13 8
|
||||
#define Z14 9
|
||||
#define Z15 F
|
||||
#define Z16 D
|
||||
#define Z17 6
|
||||
#define Z18 1
|
||||
#define Z19 C
|
||||
#define Z1A 0
|
||||
#define Z1B 2
|
||||
#define Z1C B
|
||||
#define Z1D 7
|
||||
#define Z1E 5
|
||||
#define Z1F 3
|
||||
|
||||
#define Z20 B
|
||||
#define Z21 8
|
||||
#define Z22 C
|
||||
#define Z23 0
|
||||
#define Z24 5
|
||||
#define Z25 2
|
||||
#define Z26 F
|
||||
#define Z27 D
|
||||
#define Z28 A
|
||||
#define Z29 E
|
||||
#define Z2A 3
|
||||
#define Z2B 6
|
||||
#define Z2C 7
|
||||
#define Z2D 1
|
||||
#define Z2E 9
|
||||
#define Z2F 4
|
||||
|
||||
#define Z30 7
|
||||
#define Z31 9
|
||||
#define Z32 3
|
||||
#define Z33 1
|
||||
#define Z34 D
|
||||
#define Z35 C
|
||||
#define Z36 B
|
||||
#define Z37 E
|
||||
#define Z38 2
|
||||
#define Z39 6
|
||||
#define Z3A 5
|
||||
#define Z3B A
|
||||
#define Z3C 4
|
||||
#define Z3D 0
|
||||
#define Z3E F
|
||||
#define Z3F 8
|
||||
|
||||
#define Z40 9
|
||||
#define Z41 0
|
||||
#define Z42 5
|
||||
#define Z43 7
|
||||
#define Z44 2
|
||||
#define Z45 4
|
||||
#define Z46 A
|
||||
#define Z47 F
|
||||
#define Z48 E
|
||||
#define Z49 1
|
||||
#define Z4A B
|
||||
#define Z4B C
|
||||
#define Z4C 6
|
||||
#define Z4D 8
|
||||
#define Z4E 3
|
||||
#define Z4F D
|
||||
|
||||
#define Z50 2
|
||||
#define Z51 C
|
||||
#define Z52 6
|
||||
#define Z53 A
|
||||
#define Z54 0
|
||||
#define Z55 B
|
||||
#define Z56 8
|
||||
#define Z57 3
|
||||
#define Z58 4
|
||||
#define Z59 D
|
||||
#define Z5A 7
|
||||
#define Z5B 5
|
||||
#define Z5C F
|
||||
#define Z5D E
|
||||
#define Z5E 1
|
||||
#define Z5F 9
|
||||
|
||||
#define Z60 C
|
||||
#define Z61 5
|
||||
#define Z62 1
|
||||
#define Z63 F
|
||||
#define Z64 E
|
||||
#define Z65 D
|
||||
#define Z66 4
|
||||
#define Z67 A
|
||||
#define Z68 0
|
||||
#define Z69 7
|
||||
#define Z6A 6
|
||||
#define Z6B 3
|
||||
#define Z6C 9
|
||||
#define Z6D 2
|
||||
#define Z6E 8
|
||||
#define Z6F B
|
||||
|
||||
#define Z70 D
|
||||
#define Z71 B
|
||||
#define Z72 7
|
||||
#define Z73 E
|
||||
#define Z74 C
|
||||
#define Z75 1
|
||||
#define Z76 3
|
||||
#define Z77 9
|
||||
#define Z78 5
|
||||
#define Z79 0
|
||||
#define Z7A F
|
||||
#define Z7B 4
|
||||
#define Z7C 8
|
||||
#define Z7D 6
|
||||
#define Z7E 2
|
||||
#define Z7F A
|
||||
|
||||
#define Z80 6
|
||||
#define Z81 F
|
||||
#define Z82 E
|
||||
#define Z83 9
|
||||
#define Z84 B
|
||||
#define Z85 3
|
||||
#define Z86 0
|
||||
#define Z87 8
|
||||
#define Z88 C
|
||||
#define Z89 2
|
||||
#define Z8A D
|
||||
#define Z8B 7
|
||||
#define Z8C 1
|
||||
#define Z8D 4
|
||||
#define Z8E A
|
||||
#define Z8F 5
|
||||
|
||||
#define Z90 A
|
||||
#define Z91 2
|
||||
#define Z92 8
|
||||
#define Z93 4
|
||||
#define Z94 7
|
||||
#define Z95 6
|
||||
#define Z96 1
|
||||
#define Z97 5
|
||||
#define Z98 F
|
||||
#define Z99 B
|
||||
#define Z9A 9
|
||||
#define Z9B E
|
||||
#define Z9C 3
|
||||
#define Z9D C
|
||||
#define Z9E D
|
||||
#define Z9F 0
|
||||
|
||||
#define Mx(r, i) Mx_(Z ## r ## i)
|
||||
#define Mx_(n) Mx__(n)
|
||||
#define Mx__(n) M ## n
|
||||
|
||||
#define CSx(r, i) CSx_(Z ## r ## i)
|
||||
#define CSx_(n) CSx__(n)
|
||||
#define CSx__(n) CS ## n
|
||||
|
||||
#define CS0 SPH_C32(0x243F6A88)
|
||||
#define CS1 SPH_C32(0x85A308D3)
|
||||
#define CS2 SPH_C32(0x13198A2E)
|
||||
#define CS3 SPH_C32(0x03707344)
|
||||
#define CS4 SPH_C32(0xA4093822)
|
||||
#define CS5 SPH_C32(0x299F31D0)
|
||||
#define CS6 SPH_C32(0x082EFA98)
|
||||
#define CS7 SPH_C32(0xEC4E6C89)
|
||||
#define CS8 SPH_C32(0x452821E6)
|
||||
#define CS9 SPH_C32(0x38D01377)
|
||||
#define CSA SPH_C32(0xBE5466CF)
|
||||
#define CSB SPH_C32(0x34E90C6C)
|
||||
#define CSC SPH_C32(0xC0AC29B7)
|
||||
#define CSD SPH_C32(0xC97C50DD)
|
||||
#define CSE SPH_C32(0x3F84D5B5)
|
||||
#define CSF SPH_C32(0xB5470917)
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define CBx(r, i) CBx_(Z ## r ## i)
|
||||
#define CBx_(n) CBx__(n)
|
||||
#define CBx__(n) CB ## n
|
||||
|
||||
#define CB0 SPH_C64(0x243F6A8885A308D3)
|
||||
#define CB1 SPH_C64(0x13198A2E03707344)
|
||||
#define CB2 SPH_C64(0xA4093822299F31D0)
|
||||
#define CB3 SPH_C64(0x082EFA98EC4E6C89)
|
||||
#define CB4 SPH_C64(0x452821E638D01377)
|
||||
#define CB5 SPH_C64(0xBE5466CF34E90C6C)
|
||||
#define CB6 SPH_C64(0xC0AC29B7C97C50DD)
|
||||
#define CB7 SPH_C64(0x3F84D5B5B5470917)
|
||||
#define CB8 SPH_C64(0x9216D5D98979FB1B)
|
||||
#define CB9 SPH_C64(0xD1310BA698DFB5AC)
|
||||
#define CBA SPH_C64(0x2FFD72DBD01ADFB7)
|
||||
#define CBB SPH_C64(0xB8E1AFED6A267E96)
|
||||
#define CBC SPH_C64(0xBA7C9045F12C7F99)
|
||||
#define CBD SPH_C64(0x24A19947B3916CF7)
|
||||
#define CBE SPH_C64(0x0801F2E2858EFC16)
|
||||
#define CBF SPH_C64(0x636920D871574E69)
|
||||
|
||||
#endif
|
||||
|
||||
#define GS(m0, m1, c0, c1, a, b, c, d) do { \
|
||||
a = SPH_T32(a + b + (m0 ^ c1)); \
|
||||
d = SPH_ROTR32(d ^ a, 16); \
|
||||
c = SPH_T32(c + d); \
|
||||
b = SPH_ROTR32(b ^ c, 12); \
|
||||
a = SPH_T32(a + b + (m1 ^ c0)); \
|
||||
d = SPH_ROTR32(d ^ a, 8); \
|
||||
c = SPH_T32(c + d); \
|
||||
b = SPH_ROTR32(b ^ c, 7); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_S(r) do { \
|
||||
GS(Mx(r, 0), Mx(r, 1), CSx(r, 0), CSx(r, 1), V0, V4, V8, VC); \
|
||||
GS(Mx(r, 2), Mx(r, 3), CSx(r, 2), CSx(r, 3), V1, V5, V9, VD); \
|
||||
GS(Mx(r, 4), Mx(r, 5), CSx(r, 4), CSx(r, 5), V2, V6, VA, VE); \
|
||||
GS(Mx(r, 6), Mx(r, 7), CSx(r, 6), CSx(r, 7), V3, V7, VB, VF); \
|
||||
GS(Mx(r, 8), Mx(r, 9), CSx(r, 8), CSx(r, 9), V0, V5, VA, VF); \
|
||||
GS(Mx(r, A), Mx(r, B), CSx(r, A), CSx(r, B), V1, V6, VB, VC); \
|
||||
GS(Mx(r, C), Mx(r, D), CSx(r, C), CSx(r, D), V2, V7, V8, VD); \
|
||||
GS(Mx(r, E), Mx(r, F), CSx(r, E), CSx(r, F), V3, V4, V9, VE); \
|
||||
} while (0)
|
||||
|
||||
#define DECL_STATE32 \
|
||||
sph_u32 H0, H1, H2, H3, H4, H5, H6, H7; \
|
||||
sph_u32 S0, S1, S2, S3, T0, T1;
|
||||
|
||||
#define READ_STATE32(state) do { \
|
||||
H0 = (state)->H[0]; \
|
||||
H1 = (state)->H[1]; \
|
||||
H2 = (state)->H[2]; \
|
||||
H3 = (state)->H[3]; \
|
||||
H4 = (state)->H[4]; \
|
||||
H5 = (state)->H[5]; \
|
||||
H6 = (state)->H[6]; \
|
||||
H7 = (state)->H[7]; \
|
||||
S0 = (state)->S[0]; \
|
||||
S1 = (state)->S[1]; \
|
||||
S2 = (state)->S[2]; \
|
||||
S3 = (state)->S[3]; \
|
||||
T0 = (state)->T0; \
|
||||
T1 = (state)->T1; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE32(state) do { \
|
||||
(state)->H[0] = H0; \
|
||||
(state)->H[1] = H1; \
|
||||
(state)->H[2] = H2; \
|
||||
(state)->H[3] = H3; \
|
||||
(state)->H[4] = H4; \
|
||||
(state)->H[5] = H5; \
|
||||
(state)->H[6] = H6; \
|
||||
(state)->H[7] = H7; \
|
||||
(state)->S[0] = S0; \
|
||||
(state)->S[1] = S1; \
|
||||
(state)->S[2] = S2; \
|
||||
(state)->S[3] = S3; \
|
||||
(state)->T0 = T0; \
|
||||
(state)->T1 = T1; \
|
||||
} while (0)
|
||||
|
||||
#define BLAKE32_ROUNDS 8
|
||||
|
||||
#define COMPRESS32 do { \
|
||||
sph_u32 M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
sph_u32 M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
sph_u32 V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
sph_u32 V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
V3 = H3; \
|
||||
V4 = H4; \
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = S0 ^ CS0; \
|
||||
V9 = S1 ^ CS1; \
|
||||
VA = S2 ^ CS2; \
|
||||
VB = S3 ^ CS3; \
|
||||
VC = T0 ^ CS4; \
|
||||
VD = T0 ^ CS5; \
|
||||
VE = T1 ^ CS6; \
|
||||
VF = T1 ^ CS7; \
|
||||
M0 = sph_dec32be_aligned(buf + 0); \
|
||||
M1 = sph_dec32be_aligned(buf + 4); \
|
||||
M2 = sph_dec32be_aligned(buf + 8); \
|
||||
M3 = sph_dec32be_aligned(buf + 12); \
|
||||
M4 = sph_dec32be_aligned(buf + 16); \
|
||||
M5 = sph_dec32be_aligned(buf + 20); \
|
||||
M6 = sph_dec32be_aligned(buf + 24); \
|
||||
M7 = sph_dec32be_aligned(buf + 28); \
|
||||
M8 = sph_dec32be_aligned(buf + 32); \
|
||||
M9 = sph_dec32be_aligned(buf + 36); \
|
||||
MA = sph_dec32be_aligned(buf + 40); \
|
||||
MB = sph_dec32be_aligned(buf + 44); \
|
||||
MC = sph_dec32be_aligned(buf + 48); \
|
||||
MD = sph_dec32be_aligned(buf + 52); \
|
||||
ME = sph_dec32be_aligned(buf + 56); \
|
||||
MF = sph_dec32be_aligned(buf + 60); \
|
||||
ROUND_S(0); \
|
||||
ROUND_S(1); \
|
||||
ROUND_S(2); \
|
||||
ROUND_S(3); \
|
||||
ROUND_S(4); \
|
||||
ROUND_S(5); \
|
||||
ROUND_S(6); \
|
||||
ROUND_S(7); \
|
||||
if (BLAKE32_ROUNDS == 14) { \
|
||||
ROUND_S(8); \
|
||||
ROUND_S(9); \
|
||||
ROUND_S(0); \
|
||||
ROUND_S(1); \
|
||||
ROUND_S(2); \
|
||||
ROUND_S(3); \
|
||||
} \
|
||||
H0 ^= S0 ^ V0 ^ V8; \
|
||||
H1 ^= S1 ^ V1 ^ V9; \
|
||||
H2 ^= S2 ^ V2 ^ VA; \
|
||||
H3 ^= S3 ^ V3 ^ VB; \
|
||||
H4 ^= S0 ^ V4 ^ VC; \
|
||||
H5 ^= S1 ^ V5 ^ VD; \
|
||||
H6 ^= S2 ^ V6 ^ VE; \
|
||||
H7 ^= S3 ^ V7 ^ VF; \
|
||||
} while (0)
|
||||
|
||||
|
||||
static const sph_u32 salt_zero_small[4] = { 0, 0, 0, 0 };
|
||||
|
||||
static void
|
||||
blake32_init(sph_blake_small_context *sc,
|
||||
const sph_u32 *iv, const sph_u32 *salt)
|
||||
{
|
||||
memcpy(sc->H, iv, 8 * sizeof(sph_u32));
|
||||
memcpy(sc->S, salt, 4 * sizeof(sph_u32));
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
blake32(sph_blake_small_context *sc, const void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
DECL_STATE32
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
if (len < (sizeof sc->buf) - ptr) {
|
||||
memcpy(buf + ptr, data, len);
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE32(sc);
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
ptr += clen;
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
if ((T0 = SPH_T32(T0 + 512)) < 512)
|
||||
T1 = SPH_T32(T1 + 1);
|
||||
COMPRESS32;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE32(sc);
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
blake32_close(sph_blake_small_context *sc,
|
||||
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
|
||||
{
|
||||
union {
|
||||
unsigned char buf[64];
|
||||
sph_u32 dummy;
|
||||
} u;
|
||||
size_t ptr, k;
|
||||
unsigned bit_len;
|
||||
unsigned z;
|
||||
sph_u32 th, tl;
|
||||
unsigned char *out;
|
||||
|
||||
ptr = sc->ptr;
|
||||
bit_len = ((unsigned)ptr << 3) + n;
|
||||
z = 0x80 >> n;
|
||||
u.buf[ptr] = ((ub & -z) | z) & 0xFF;
|
||||
tl = sc->T0 + bit_len;
|
||||
th = sc->T1;
|
||||
if (ptr == 0 && n == 0) {
|
||||
sc->T0 = SPH_C32(0xFFFFFE00);
|
||||
sc->T1 = SPH_C32(0xFFFFFFFF);
|
||||
} else if (sc->T0 == 0) {
|
||||
sc->T0 = SPH_C32(0xFFFFFE00) + bit_len;
|
||||
sc->T1 = SPH_T32(sc->T1 - 1);
|
||||
} else {
|
||||
sc->T0 -= 512 - bit_len;
|
||||
}
|
||||
if (bit_len <= 446) {
|
||||
memset(u.buf + ptr + 1, 0, 55 - ptr);
|
||||
if (out_size_w32 == 8)
|
||||
u.buf[55] |= 1;
|
||||
sph_enc32be_aligned(u.buf + 56, th);
|
||||
sph_enc32be_aligned(u.buf + 60, tl);
|
||||
blake32(sc, u.buf + ptr, 64 - ptr);
|
||||
} else {
|
||||
memset(u.buf + ptr + 1, 0, 63 - ptr);
|
||||
blake32(sc, u.buf + ptr, 64 - ptr);
|
||||
sc->T0 = SPH_C32(0xFFFFFE00);
|
||||
sc->T1 = SPH_C32(0xFFFFFFFF);
|
||||
memset(u.buf, 0, 56);
|
||||
if (out_size_w32 == 8)
|
||||
u.buf[55] = 1;
|
||||
sph_enc32be_aligned(u.buf + 56, th);
|
||||
sph_enc32be_aligned(u.buf + 60, tl);
|
||||
blake32(sc, u.buf, 64);
|
||||
}
|
||||
out = dst;
|
||||
for (k = 0; k < out_size_w32; k ++)
|
||||
sph_enc32be(out + (k << 2), sc->H[k]);
|
||||
}
|
||||
|
||||
void
|
||||
blakecoin_init(void *cc)
|
||||
{
|
||||
blake32_init(cc, IV256, salt_zero_small);
|
||||
}
|
||||
|
||||
void
|
||||
blakecoin(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake32(cc, data, len);
|
||||
}
|
||||
|
||||
static void
|
||||
blakecoin_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
blake32_close(cc, ub, n, dst, 8);
|
||||
blakecoin_init(cc);
|
||||
}
|
||||
|
||||
void
|
||||
blakecoin_close(void *cc, void *dst)
|
||||
{
|
||||
blakecoin_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
121
algo/blake/pentablake.c
Normal file
121
algo/blake/pentablake.c
Normal file
@@ -0,0 +1,121 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "sph_blake.h"
|
||||
|
||||
//#define DEBUG_ALGO
|
||||
|
||||
extern void pentablakehash(void *output, const void *input)
|
||||
{
|
||||
unsigned char _ALIGN(32) hash[128];
|
||||
// same as uint32_t hashA[16], hashB[16];
|
||||
#define hashB hash+64
|
||||
|
||||
sph_blake512_context ctx_blake;
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, input, 80);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hashB);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hashB, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hashB);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hashB, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
|
||||
}
|
||||
|
||||
int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t _ALIGN(32) endiandata[32];
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
if (Htarg != 0)
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
pentablakehash(hash64, endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_pentablake_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_pentablake;
|
||||
gate->hash = (void*)&pentablakehash;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
1127
algo/blake/sph_blake.c
Normal file
1127
algo/blake/sph_blake.c
Normal file
File diff suppressed because it is too large
Load Diff
327
algo/blake/sph_blake.h
Normal file
327
algo/blake/sph_blake.h
Normal file
@@ -0,0 +1,327 @@
|
||||
/* $Id: sph_blake.h 252 2011-06-07 17:55:14Z tp $ */
|
||||
/**
|
||||
* BLAKE interface. BLAKE is a family of functions which differ by their
|
||||
* output size; this implementation defines BLAKE for output sizes 224,
|
||||
* 256, 384 and 512 bits. This implementation conforms to the "third
|
||||
* round" specification.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_blake.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_BLAKE_H__
|
||||
#define SPH_BLAKE_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha3/sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-224.
|
||||
*/
|
||||
#define SPH_SIZE_blake224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-256.
|
||||
*/
|
||||
#define SPH_SIZE_blake256 256
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-384.
|
||||
*/
|
||||
#define SPH_SIZE_blake384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-512.
|
||||
*/
|
||||
#define SPH_SIZE_blake512 512
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-224 and BLAKE-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BLAKE computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BLAKE
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 H[8];
|
||||
sph_u32 S[4];
|
||||
sph_u32 T0, T1;
|
||||
#endif
|
||||
} sph_blake_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-224 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_small_context sph_blake224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-256 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_small_context sph_blake256_context;
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-384 and BLAKE-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BLAKE computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BLAKE
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u64 H[8];
|
||||
sph_u64 S[4];
|
||||
sph_u64 T0, T1;
|
||||
#endif
|
||||
} sph_blake_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-384 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_big_context sph_blake384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-512 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_big_context sph_blake512_context;
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-224 context (pointer to a
|
||||
* <code>sph_blake224_context</code>)
|
||||
*/
|
||||
void sph_blake224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-256 context (pointer to a
|
||||
* <code>sph_blake256_context</code>)
|
||||
*/
|
||||
void sph_blake256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-384 context (pointer to a
|
||||
* <code>sph_blake384_context</code>)
|
||||
*/
|
||||
void sph_blake384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-512 context (pointer to a
|
||||
* <code>sph_blake512_context</code>)
|
||||
*/
|
||||
void sph_blake512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
477
algo/blake/sse2/blake.c
Normal file
477
algo/blake/sse2/blake.c
Normal file
@@ -0,0 +1,477 @@
|
||||
/* $Id: blake.c 252 2011-06-07 17:55:14Z tp $ */
|
||||
/*
|
||||
* BLAKE implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "../sph_blake.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
static const sph_u64 blkIV512[8] = {
|
||||
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
|
||||
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
|
||||
SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
|
||||
SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
|
||||
};
|
||||
|
||||
#define Z00 0
|
||||
#define Z01 1
|
||||
#define Z02 2
|
||||
#define Z03 3
|
||||
#define Z04 4
|
||||
#define Z05 5
|
||||
#define Z06 6
|
||||
#define Z07 7
|
||||
#define Z08 8
|
||||
#define Z09 9
|
||||
#define Z0A A
|
||||
#define Z0B B
|
||||
#define Z0C C
|
||||
#define Z0D D
|
||||
#define Z0E E
|
||||
#define Z0F F
|
||||
|
||||
#define Z10 E
|
||||
#define Z11 A
|
||||
#define Z12 4
|
||||
#define Z13 8
|
||||
#define Z14 9
|
||||
#define Z15 F
|
||||
#define Z16 D
|
||||
#define Z17 6
|
||||
#define Z18 1
|
||||
#define Z19 C
|
||||
#define Z1A 0
|
||||
#define Z1B 2
|
||||
#define Z1C B
|
||||
#define Z1D 7
|
||||
#define Z1E 5
|
||||
#define Z1F 3
|
||||
|
||||
#define Z20 B
|
||||
#define Z21 8
|
||||
#define Z22 C
|
||||
#define Z23 0
|
||||
#define Z24 5
|
||||
#define Z25 2
|
||||
#define Z26 F
|
||||
#define Z27 D
|
||||
#define Z28 A
|
||||
#define Z29 E
|
||||
#define Z2A 3
|
||||
#define Z2B 6
|
||||
#define Z2C 7
|
||||
#define Z2D 1
|
||||
#define Z2E 9
|
||||
#define Z2F 4
|
||||
|
||||
#define Z30 7
|
||||
#define Z31 9
|
||||
#define Z32 3
|
||||
#define Z33 1
|
||||
#define Z34 D
|
||||
#define Z35 C
|
||||
#define Z36 B
|
||||
#define Z37 E
|
||||
#define Z38 2
|
||||
#define Z39 6
|
||||
#define Z3A 5
|
||||
#define Z3B A
|
||||
#define Z3C 4
|
||||
#define Z3D 0
|
||||
#define Z3E F
|
||||
#define Z3F 8
|
||||
|
||||
#define Z40 9
|
||||
#define Z41 0
|
||||
#define Z42 5
|
||||
#define Z43 7
|
||||
#define Z44 2
|
||||
#define Z45 4
|
||||
#define Z46 A
|
||||
#define Z47 F
|
||||
#define Z48 E
|
||||
#define Z49 1
|
||||
#define Z4A B
|
||||
#define Z4B C
|
||||
#define Z4C 6
|
||||
#define Z4D 8
|
||||
#define Z4E 3
|
||||
#define Z4F D
|
||||
|
||||
#define Z50 2
|
||||
#define Z51 C
|
||||
#define Z52 6
|
||||
#define Z53 A
|
||||
#define Z54 0
|
||||
#define Z55 B
|
||||
#define Z56 8
|
||||
#define Z57 3
|
||||
#define Z58 4
|
||||
#define Z59 D
|
||||
#define Z5A 7
|
||||
#define Z5B 5
|
||||
#define Z5C F
|
||||
#define Z5D E
|
||||
#define Z5E 1
|
||||
#define Z5F 9
|
||||
|
||||
#define Z60 C
|
||||
#define Z61 5
|
||||
#define Z62 1
|
||||
#define Z63 F
|
||||
#define Z64 E
|
||||
#define Z65 D
|
||||
#define Z66 4
|
||||
#define Z67 A
|
||||
#define Z68 0
|
||||
#define Z69 7
|
||||
#define Z6A 6
|
||||
#define Z6B 3
|
||||
#define Z6C 9
|
||||
#define Z6D 2
|
||||
#define Z6E 8
|
||||
#define Z6F B
|
||||
|
||||
#define Z70 D
|
||||
#define Z71 B
|
||||
#define Z72 7
|
||||
#define Z73 E
|
||||
#define Z74 C
|
||||
#define Z75 1
|
||||
#define Z76 3
|
||||
#define Z77 9
|
||||
#define Z78 5
|
||||
#define Z79 0
|
||||
#define Z7A F
|
||||
#define Z7B 4
|
||||
#define Z7C 8
|
||||
#define Z7D 6
|
||||
#define Z7E 2
|
||||
#define Z7F A
|
||||
|
||||
#define Z80 6
|
||||
#define Z81 F
|
||||
#define Z82 E
|
||||
#define Z83 9
|
||||
#define Z84 B
|
||||
#define Z85 3
|
||||
#define Z86 0
|
||||
#define Z87 8
|
||||
#define Z88 C
|
||||
#define Z89 2
|
||||
#define Z8A D
|
||||
#define Z8B 7
|
||||
#define Z8C 1
|
||||
#define Z8D 4
|
||||
#define Z8E A
|
||||
#define Z8F 5
|
||||
|
||||
#define Z90 A
|
||||
#define Z91 2
|
||||
#define Z92 8
|
||||
#define Z93 4
|
||||
#define Z94 7
|
||||
#define Z95 6
|
||||
#define Z96 1
|
||||
#define Z97 5
|
||||
#define Z98 F
|
||||
#define Z99 B
|
||||
#define Z9A 9
|
||||
#define Z9B E
|
||||
#define Z9C 3
|
||||
#define Z9D C
|
||||
#define Z9E D
|
||||
#define Z9F 0
|
||||
|
||||
#define Mx(r, i) Mx_(Z ## r ## i)
|
||||
#define Mx_(n) Mx__(n)
|
||||
#define Mx__(n) M ## n
|
||||
|
||||
#define CSx(r, i) CSx_(Z ## r ## i)
|
||||
#define CSx_(n) CSx__(n)
|
||||
#define CSx__(n) CS ## n
|
||||
|
||||
#define CS0 SPH_C32(0x243F6A88)
|
||||
#define CS1 SPH_C32(0x85A308D3)
|
||||
#define CS2 SPH_C32(0x13198A2E)
|
||||
#define CS3 SPH_C32(0x03707344)
|
||||
#define CS4 SPH_C32(0xA4093822)
|
||||
#define CS5 SPH_C32(0x299F31D0)
|
||||
#define CS6 SPH_C32(0x082EFA98)
|
||||
#define CS7 SPH_C32(0xEC4E6C89)
|
||||
#define CS8 SPH_C32(0x452821E6)
|
||||
#define CS9 SPH_C32(0x38D01377)
|
||||
#define CSA SPH_C32(0xBE5466CF)
|
||||
#define CSB SPH_C32(0x34E90C6C)
|
||||
#define CSC SPH_C32(0xC0AC29B7)
|
||||
#define CSD SPH_C32(0xC97C50DD)
|
||||
#define CSE SPH_C32(0x3F84D5B5)
|
||||
#define CSF SPH_C32(0xB5470917)
|
||||
|
||||
|
||||
|
||||
#define CBx(r, i) CBx_(Z ## r ## i)
|
||||
#define CBx_(n) CBx__(n)
|
||||
#define CBx__(n) CB ## n
|
||||
|
||||
#define CB0 SPH_C64(0x243F6A8885A308D3)
|
||||
#define CB1 SPH_C64(0x13198A2E03707344)
|
||||
#define CB2 SPH_C64(0xA4093822299F31D0)
|
||||
#define CB3 SPH_C64(0x082EFA98EC4E6C89)
|
||||
#define CB4 SPH_C64(0x452821E638D01377)
|
||||
#define CB5 SPH_C64(0xBE5466CF34E90C6C)
|
||||
#define CB6 SPH_C64(0xC0AC29B7C97C50DD)
|
||||
#define CB7 SPH_C64(0x3F84D5B5B5470917)
|
||||
#define CB8 SPH_C64(0x9216D5D98979FB1B)
|
||||
#define CB9 SPH_C64(0xD1310BA698DFB5AC)
|
||||
#define CBA SPH_C64(0x2FFD72DBD01ADFB7)
|
||||
#define CBB SPH_C64(0xB8E1AFED6A267E96)
|
||||
#define CBC SPH_C64(0xBA7C9045F12C7F99)
|
||||
#define CBD SPH_C64(0x24A19947B3916CF7)
|
||||
#define CBE SPH_C64(0x0801F2E2858EFC16)
|
||||
#define CBF SPH_C64(0x636920D871574E69)
|
||||
|
||||
|
||||
#define GS(m0, m1, c0, c1, a, b, c, d) do { \
|
||||
a = SPH_T32(a + b + (m0 ^ c1)); \
|
||||
d = SPH_ROTR32(d ^ a, 16); \
|
||||
c = SPH_T32(c + d); \
|
||||
b = SPH_ROTR32(b ^ c, 12); \
|
||||
a = SPH_T32(a + b + (m1 ^ c0)); \
|
||||
d = SPH_ROTR32(d ^ a, 8); \
|
||||
c = SPH_T32(c + d); \
|
||||
b = SPH_ROTR32(b ^ c, 7); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_S(r) do { \
|
||||
GS(Mx(r, 0), Mx(r, 1), CSx(r, 0), CSx(r, 1), V0, V4, V8, VC); \
|
||||
GS(Mx(r, 2), Mx(r, 3), CSx(r, 2), CSx(r, 3), V1, V5, V9, VD); \
|
||||
GS(Mx(r, 4), Mx(r, 5), CSx(r, 4), CSx(r, 5), V2, V6, VA, VE); \
|
||||
GS(Mx(r, 6), Mx(r, 7), CSx(r, 6), CSx(r, 7), V3, V7, VB, VF); \
|
||||
GS(Mx(r, 8), Mx(r, 9), CSx(r, 8), CSx(r, 9), V0, V5, VA, VF); \
|
||||
GS(Mx(r, A), Mx(r, B), CSx(r, A), CSx(r, B), V1, V6, VB, VC); \
|
||||
GS(Mx(r, C), Mx(r, D), CSx(r, C), CSx(r, D), V2, V7, V8, VD); \
|
||||
GS(Mx(r, E), Mx(r, F), CSx(r, E), CSx(r, F), V3, V4, V9, VE); \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
||||
#define GB(m0, m1, c0, c1, a, b, c, d) do { \
|
||||
a = SPH_T64(a + b + (m0 ^ c1)); \
|
||||
d = SPH_ROTR64(d ^ a, 32); \
|
||||
c = SPH_T64(c + d); \
|
||||
b = SPH_ROTR64(b ^ c, 25); \
|
||||
a = SPH_T64(a + b + (m1 ^ c0)); \
|
||||
d = SPH_ROTR64(d ^ a, 16); \
|
||||
c = SPH_T64(c + d); \
|
||||
b = SPH_ROTR64(b ^ c, 11); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_B(r) do { \
|
||||
GB(Mx(r, 0), Mx(r, 1), CBx(r, 0), CBx(r, 1), V0, V4, V8, VC); \
|
||||
GB(Mx(r, 2), Mx(r, 3), CBx(r, 2), CBx(r, 3), V1, V5, V9, VD); \
|
||||
GB(Mx(r, 4), Mx(r, 5), CBx(r, 4), CBx(r, 5), V2, V6, VA, VE); \
|
||||
GB(Mx(r, 6), Mx(r, 7), CBx(r, 6), CBx(r, 7), V3, V7, VB, VF); \
|
||||
GB(Mx(r, 8), Mx(r, 9), CBx(r, 8), CBx(r, 9), V0, V5, VA, VF); \
|
||||
GB(Mx(r, A), Mx(r, B), CBx(r, A), CBx(r, B), V1, V6, VB, VC); \
|
||||
GB(Mx(r, C), Mx(r, D), CBx(r, C), CBx(r, D), V2, V7, V8, VD); \
|
||||
GB(Mx(r, E), Mx(r, F), CBx(r, E), CBx(r, F), V3, V4, V9, VE); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define COMPRESS64 do { \
|
||||
int r; \
|
||||
int b=0; \
|
||||
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
V0 = blkH0, \
|
||||
V1 = blkH1, \
|
||||
V2 = blkH2, \
|
||||
V3 = blkH3, \
|
||||
V4 = blkH4, \
|
||||
V5 = blkH5, \
|
||||
V6 = blkH6, \
|
||||
V7 = blkH7; \
|
||||
V8 = blkS0 ^ CB0, \
|
||||
V9 = blkS1 ^ CB1, \
|
||||
VA = blkS2 ^ CB2, \
|
||||
VB = blkS3 ^ CB3, \
|
||||
VC = hashctA ^ CB4, \
|
||||
VD = hashctA ^ CB5, \
|
||||
VE = hashctB ^ CB6, \
|
||||
VF = hashctB ^ CB7; \
|
||||
M0 = sph_dec64be_aligned(buf + 0), \
|
||||
M1 = sph_dec64be_aligned(buf + 8), \
|
||||
M2 = sph_dec64be_aligned(buf + 16), \
|
||||
M3 = sph_dec64be_aligned(buf + 24), \
|
||||
M4 = sph_dec64be_aligned(buf + 32), \
|
||||
M5 = sph_dec64be_aligned(buf + 40), \
|
||||
M6 = sph_dec64be_aligned(buf + 48), \
|
||||
M7 = sph_dec64be_aligned(buf + 56), \
|
||||
M8 = sph_dec64be_aligned(buf + 64), \
|
||||
M9 = sph_dec64be_aligned(buf + 72), \
|
||||
MA = sph_dec64be_aligned(buf + 80), \
|
||||
MB = sph_dec64be_aligned(buf + 88), \
|
||||
MC = sph_dec64be_aligned(buf + 96), \
|
||||
MD = sph_dec64be_aligned(buf + 104), \
|
||||
ME = sph_dec64be_aligned(buf + 112), \
|
||||
MF = sph_dec64be_aligned(buf + 120); \
|
||||
/* loop once and a half */ \
|
||||
/* save some space */ \
|
||||
for (;;) { \
|
||||
ROUND_B(0); \
|
||||
ROUND_B(1); \
|
||||
ROUND_B(2); \
|
||||
ROUND_B(3); \
|
||||
ROUND_B(4); \
|
||||
ROUND_B(5); \
|
||||
if (b) break; \
|
||||
b = 1; \
|
||||
ROUND_B(6); \
|
||||
ROUND_B(7); \
|
||||
ROUND_B(8); \
|
||||
ROUND_B(9); \
|
||||
}; \
|
||||
blkH0 ^= blkS0 ^ V0 ^ V8, \
|
||||
blkH1 ^= blkS1 ^ V1 ^ V9, \
|
||||
blkH2 ^= blkS2 ^ V2 ^ VA, \
|
||||
blkH3 ^= blkS3 ^ V3 ^ VB, \
|
||||
blkH4 ^= blkS0 ^ V4 ^ VC, \
|
||||
blkH5 ^= blkS1 ^ V5 ^ VD, \
|
||||
blkH6 ^= blkS2 ^ V6 ^ VE, \
|
||||
blkH7 ^= blkS3 ^ V7 ^ VF; \
|
||||
} while (0)
|
||||
/*
|
||||
*/
|
||||
#define DECL_BLK \
|
||||
sph_u64 blkH0; \
|
||||
sph_u64 blkH1; \
|
||||
sph_u64 blkH2; \
|
||||
sph_u64 blkH3; \
|
||||
sph_u64 blkH4; \
|
||||
sph_u64 blkH5; \
|
||||
sph_u64 blkH6; \
|
||||
sph_u64 blkH7; \
|
||||
sph_u64 blkS0; \
|
||||
sph_u64 blkS1; \
|
||||
sph_u64 blkS2; \
|
||||
sph_u64 blkS3; \
|
||||
|
||||
/* load initial constants */
|
||||
#define BLK_I \
|
||||
do { \
|
||||
blkH0 = SPH_C64(0x6A09E667F3BCC908); \
|
||||
blkH1 = SPH_C64(0xBB67AE8584CAA73B); \
|
||||
blkH2 = SPH_C64(0x3C6EF372FE94F82B); \
|
||||
blkH3 = SPH_C64(0xA54FF53A5F1D36F1); \
|
||||
blkH4 = SPH_C64(0x510E527FADE682D1); \
|
||||
blkH5 = SPH_C64(0x9B05688C2B3E6C1F); \
|
||||
blkH6 = SPH_C64(0x1F83D9ABFB41BD6B); \
|
||||
blkH7 = SPH_C64(0x5BE0CD19137E2179); \
|
||||
blkS0 = 0; \
|
||||
blkS1 = 0; \
|
||||
blkS2 = 0; \
|
||||
blkS3 = 0; \
|
||||
hashctB = SPH_T64(0- 1); \
|
||||
} while (0)
|
||||
|
||||
/* copy in 80 for initial hash */
|
||||
#define BLK_W \
|
||||
do { \
|
||||
memcpy(hashbuf, input, 80); \
|
||||
hashctA = SPH_C64(0xFFFFFFFFFFFFFC00) + 80*8; \
|
||||
hashptr = 80; \
|
||||
} while (0)
|
||||
|
||||
/* copy in 64 for looped hash */
|
||||
#define BLK_U \
|
||||
do { \
|
||||
memcpy(hashbuf, hash , 64); \
|
||||
hashctA = SPH_C64(0xFFFFFFFFFFFFFC00) + 64*8; \
|
||||
hashptr = 64; \
|
||||
} while (0)
|
||||
|
||||
/* blake compress function */
|
||||
/* hash = blake512(loaded) */
|
||||
#define BLK_C \
|
||||
do { \
|
||||
\
|
||||
union { \
|
||||
unsigned char buf[128]; \
|
||||
sph_u64 dummy; \
|
||||
} u; \
|
||||
size_t ptr; \
|
||||
unsigned bit_len; \
|
||||
\
|
||||
ptr = hashptr; \
|
||||
bit_len = ((unsigned)ptr << 3) + 0; \
|
||||
u.buf[ptr] = ((0 & -(0x80)) | (0x80)) & 0xFF; \
|
||||
memset(u.buf + ptr + 1, 0, 111 - ptr); \
|
||||
u.buf[111] |= 1; \
|
||||
sph_enc64be_aligned(u.buf + 112, 0); \
|
||||
sph_enc64be_aligned(u.buf + 120, bit_len); \
|
||||
do { \
|
||||
const void *data = u.buf + ptr; \
|
||||
unsigned char *buf; \
|
||||
buf = hashbuf; \
|
||||
size_t clen; \
|
||||
clen = (sizeof(char)*128) - hashptr; \
|
||||
memcpy(buf + hashptr, data, clen); \
|
||||
hashctA = SPH_T64(hashctA + 1024); \
|
||||
hashctB = SPH_T64(hashctB + 1); \
|
||||
COMPRESS64; \
|
||||
} while (0); \
|
||||
/* end blake64(sc, u.buf + ptr, 128 - ptr); */ \
|
||||
sph_enc64be((unsigned char*)(hash) + (0 << 3), blkH0), \
|
||||
sph_enc64be((unsigned char*)(hash) + (1 << 3), blkH1); \
|
||||
sph_enc64be((unsigned char*)(hash) + (2 << 3), blkH2), \
|
||||
sph_enc64be((unsigned char*)(hash) + (3 << 3), blkH3); \
|
||||
sph_enc64be((unsigned char*)(hash) + (4 << 3), blkH4), \
|
||||
sph_enc64be((unsigned char*)(hash) + (5 << 3), blkH5); \
|
||||
sph_enc64be((unsigned char*)(hash) + (6 << 3), blkH6), \
|
||||
sph_enc64be((unsigned char*)(hash) + (7 << 3), blkH7); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
2
algo/blake/sse2/blake/sse41/api.h
Normal file
2
algo/blake/sse2/blake/sse41/api.h
Normal file
@@ -0,0 +1,2 @@
|
||||
#define CRYPTO_BYTES 64
|
||||
|
||||
2
algo/blake/sse2/blake/sse41/architectures
Normal file
2
algo/blake/sse2/blake/sse41/architectures
Normal file
@@ -0,0 +1,2 @@
|
||||
amd64
|
||||
x86
|
||||
8
algo/blake/sse2/blake/sse41/config.h
Normal file
8
algo/blake/sse2/blake/sse41/config.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef __BLAKE512_CONFIG_H__
|
||||
#define __BLAKE512_CONFIG_H__
|
||||
|
||||
#define AVOID_BRANCHING 1
|
||||
//#define HAVE_XOP 1
|
||||
|
||||
#endif
|
||||
|
||||
287
algo/blake/sse2/blake/sse41/hash.c
Normal file
287
algo/blake/sse2/blake/sse41/hash.c
Normal file
@@ -0,0 +1,287 @@
|
||||
|
||||
#include "hash.h"
|
||||
/*
|
||||
#ifndef NOT_SUPERCOP
|
||||
|
||||
#include "crypto_hash.h"
|
||||
#include "crypto_uint64.h"
|
||||
#include "crypto_uint32.h"
|
||||
#include "crypto_uint8.h"
|
||||
|
||||
typedef crypto_uint64 u64;
|
||||
typedef crypto_uint32 u32;
|
||||
typedef crypto_uint8 u8;
|
||||
|
||||
#else
|
||||
|
||||
typedef unsigned long long u64;
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned char u8;
|
||||
|
||||
#endif
|
||||
*/
|
||||
#define U8TO32(p) \
|
||||
(((u32)((p)[0]) << 24) | ((u32)((p)[1]) << 16) | \
|
||||
((u32)((p)[2]) << 8) | ((u32)((p)[3]) ))
|
||||
#define U8TO64(p) \
|
||||
(((u64)U8TO32(p) << 32) | (u64)U8TO32((p) + 4))
|
||||
#define U32TO8(p, v) \
|
||||
(p)[0] = (u8)((v) >> 24); (p)[1] = (u8)((v) >> 16); \
|
||||
(p)[2] = (u8)((v) >> 8); (p)[3] = (u8)((v) );
|
||||
#define U64TO8(p, v) \
|
||||
U32TO8((p), (u32)((v) >> 32)); \
|
||||
U32TO8((p) + 4, (u32)((v) ));
|
||||
/*
|
||||
typedef struct
|
||||
{
|
||||
__m128i h[4];
|
||||
u64 s[4], t[2];
|
||||
u32 buflen, nullt;
|
||||
u8 buf[128];
|
||||
} state __attribute__ ((aligned (64)));
|
||||
*/
|
||||
static const u8 padding[129] =
|
||||
{
|
||||
0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
};
|
||||
|
||||
static inline int blake512_compress( hashState_blake * state, const u8 * datablock )
|
||||
{
|
||||
|
||||
__m128i row1l,row1h;
|
||||
__m128i row2l,row2h;
|
||||
__m128i row3l,row3h;
|
||||
__m128i row4l,row4h;
|
||||
|
||||
const __m128i r16 = _mm_setr_epi8(2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9);
|
||||
const __m128i u8to64 = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
|
||||
__m128i m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
__m128i t0, t1, t2, t3, t4, t5, t6, t7;
|
||||
__m128i b0, b1, b2, b3;
|
||||
|
||||
m0 = _mm_loadu_si128((__m128i*)(datablock + 0));
|
||||
m1 = _mm_loadu_si128((__m128i*)(datablock + 16));
|
||||
m2 = _mm_loadu_si128((__m128i*)(datablock + 32));
|
||||
m3 = _mm_loadu_si128((__m128i*)(datablock + 48));
|
||||
m4 = _mm_loadu_si128((__m128i*)(datablock + 64));
|
||||
m5 = _mm_loadu_si128((__m128i*)(datablock + 80));
|
||||
m6 = _mm_loadu_si128((__m128i*)(datablock + 96));
|
||||
m7 = _mm_loadu_si128((__m128i*)(datablock + 112));
|
||||
|
||||
m0 = BSWAP64(m0);
|
||||
m1 = BSWAP64(m1);
|
||||
m2 = BSWAP64(m2);
|
||||
m3 = BSWAP64(m3);
|
||||
m4 = BSWAP64(m4);
|
||||
m5 = BSWAP64(m5);
|
||||
m6 = BSWAP64(m6);
|
||||
m7 = BSWAP64(m7);
|
||||
|
||||
row1l = state->h[0];
|
||||
row1h = state->h[1];
|
||||
row2l = state->h[2];
|
||||
row2h = state->h[3];
|
||||
row3l = _mm_set_epi64x(0x13198A2E03707344ULL, 0x243F6A8885A308D3ULL);
|
||||
row3h = _mm_set_epi64x(0x082EFA98EC4E6C89ULL, 0xA4093822299F31D0ULL);
|
||||
|
||||
row4l = _mm_set_epi64x(0xBE5466CF34E90C6CULL, 0x452821E638D01377ULL);
|
||||
row4h = _mm_set_epi64x(0x3F84D5B5B5470917ULL, 0xC0AC29B7C97C50DDULL);
|
||||
|
||||
#ifdef AVOID_BRANCHING
|
||||
do
|
||||
{
|
||||
const __m128i mask = _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_set1_epi32(state->nullt));
|
||||
const __m128i xor1 = _mm_and_si128(_mm_set1_epi64x(state->t[0]), mask);
|
||||
const __m128i xor2 = _mm_and_si128(_mm_set1_epi64x(state->t[1]), mask);
|
||||
row4l = _mm_xor_si128(row4l, xor1);
|
||||
row4h = _mm_xor_si128(row4h, xor2);
|
||||
} while(0);
|
||||
#else
|
||||
if(!state->nullt)
|
||||
{
|
||||
row4l = _mm_xor_si128(row4l, _mm_set1_epi64x(state->t[0]));
|
||||
row4h = _mm_xor_si128(row4h, _mm_set1_epi64x(state->t[1]));
|
||||
}
|
||||
#endif
|
||||
|
||||
ROUND( 0);
|
||||
ROUND( 1);
|
||||
ROUND( 2);
|
||||
ROUND( 3);
|
||||
ROUND( 4);
|
||||
ROUND( 5);
|
||||
ROUND( 6);
|
||||
ROUND( 7);
|
||||
ROUND( 8);
|
||||
ROUND( 9);
|
||||
ROUND(10);
|
||||
ROUND(11);
|
||||
ROUND(12);
|
||||
ROUND(13);
|
||||
ROUND(14);
|
||||
ROUND(15);
|
||||
|
||||
row1l = _mm_xor_si128(row3l,row1l);
|
||||
row1h = _mm_xor_si128(row3h,row1h);
|
||||
|
||||
state->h[0] = _mm_xor_si128(row1l, state->h[0]);
|
||||
state->h[1] = _mm_xor_si128(row1h, state->h[1]);
|
||||
|
||||
row2l = _mm_xor_si128(row4l,row2l);
|
||||
row2h = _mm_xor_si128(row4h,row2h);
|
||||
|
||||
state->h[2] = _mm_xor_si128(row2l, state->h[2]);
|
||||
state->h[3] = _mm_xor_si128(row2h, state->h[3]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void blake512_init( hashState_blake * S, u64 databitlen )
|
||||
{
|
||||
memset(S, 0, sizeof(hashState_blake));
|
||||
S->h[0] = _mm_set_epi64x(0xBB67AE8584CAA73BULL, 0x6A09E667F3BCC908ULL);
|
||||
S->h[1] = _mm_set_epi64x(0xA54FF53A5F1D36F1ULL, 0x3C6EF372FE94F82BULL);
|
||||
S->h[2] = _mm_set_epi64x(0x9B05688C2B3E6C1FULL, 0x510E527FADE682D1ULL);
|
||||
S->h[3] = _mm_set_epi64x(0x5BE0CD19137E2179ULL, 0x1F83D9ABFB41BD6BULL);
|
||||
S->buflen = databitlen;
|
||||
}
|
||||
|
||||
|
||||
static void blake512_update( hashState_blake * S, const u8 * data, u64 datalen )
|
||||
{
|
||||
|
||||
|
||||
int left = (S->buflen >> 3);
|
||||
int fill = 128 - left;
|
||||
|
||||
if( left && ( ((datalen >> 3) & 0x7F) >= fill ) ) {
|
||||
memcpy( (void *) (S->buf + left), (void *) data, fill );
|
||||
S->t[0] += 1024;
|
||||
blake512_compress( S, S->buf );
|
||||
data += fill;
|
||||
datalen -= (fill << 3);
|
||||
left = 0;
|
||||
}
|
||||
|
||||
while( datalen >= 1024 ) {
|
||||
S->t[0] += 1024;
|
||||
blake512_compress( S, data );
|
||||
data += 128;
|
||||
datalen -= 1024;
|
||||
}
|
||||
|
||||
if( datalen > 0 ) {
|
||||
memcpy( (void *) (S->buf + left), (void *) data, ( datalen>>3 ) & 0x7F );
|
||||
S->buflen = (left<<3) + datalen;
|
||||
}
|
||||
else S->buflen=0;
|
||||
}
|
||||
|
||||
static inline void blake512_final( hashState_blake * S, u8 * digest )
|
||||
{
|
||||
|
||||
u8 msglen[16], zo=0x01,oo=0x81;
|
||||
u64 lo=S->t[0] + S->buflen, hi = S->t[1];
|
||||
if ( lo < S->buflen ) hi++;
|
||||
U64TO8( msglen + 0, hi );
|
||||
U64TO8( msglen + 8, lo );
|
||||
|
||||
if ( S->buflen == 888 ) /* one padding byte */
|
||||
{
|
||||
S->t[0] -= 8;
|
||||
blake512_update( S, &oo, 8 );
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( S->buflen < 888 ) /* enough space to fill the block */
|
||||
{
|
||||
if ( S->buflen == 0 ) S->nullt=1;
|
||||
S->t[0] -= 888 - S->buflen;
|
||||
blake512_update( S, padding, 888 - S->buflen );
|
||||
}
|
||||
else /* NOT enough space, need 2 compressions */
|
||||
{
|
||||
S->t[0] -= 1024 - S->buflen;
|
||||
blake512_update( S, padding, 1024 - S->buflen );
|
||||
S->t[0] -= 888;
|
||||
blake512_update( S, padding+1, 888 );
|
||||
S->nullt = 1;
|
||||
}
|
||||
blake512_update( S, &zo, 8 );
|
||||
S->t[0] -= 8;
|
||||
}
|
||||
S->t[0] -= 128;
|
||||
blake512_update( S, msglen, 128 );
|
||||
|
||||
do
|
||||
{
|
||||
const __m128i u8to64 = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
_mm_storeu_si128((__m128i*)(digest + 0), BSWAP64(S->h[0]));
|
||||
_mm_storeu_si128((__m128i*)(digest + 16), BSWAP64(S->h[1]));
|
||||
_mm_storeu_si128((__m128i*)(digest + 32), BSWAP64(S->h[2]));
|
||||
_mm_storeu_si128((__m128i*)(digest + 48), BSWAP64(S->h[3]));
|
||||
} while(0);
|
||||
}
|
||||
|
||||
/*
|
||||
int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen )
|
||||
{
|
||||
|
||||
hashState_blake S;
|
||||
blake512_init( &S );
|
||||
blake512_update( &S, in, inlen*8 );
|
||||
blake512_final( &S, out );
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
/*
|
||||
#ifdef NOT_SUPERCOP
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
int i, v;
|
||||
u8 data[144], digest[64];
|
||||
u8 test1[]= {0x97, 0x96, 0x15, 0x87, 0xF6, 0xD9, 0x70, 0xFA, 0xBA, 0x6D, 0x24, 0x78, 0x04, 0x5D, 0xE6, 0xD1,
|
||||
0xFA, 0xBD, 0x09, 0xB6, 0x1A, 0xE5, 0x09, 0x32, 0x05, 0x4D, 0x52, 0xBC, 0x29, 0xD3, 0x1B, 0xE4,
|
||||
0xFF, 0x91, 0x02, 0xB9, 0xF6, 0x9E, 0x2B, 0xBD, 0xB8, 0x3B, 0xE1, 0x3D, 0x4B, 0x9C, 0x06, 0x09,
|
||||
0x1E, 0x5F, 0xA0, 0xB4, 0x8B, 0xD0, 0x81, 0xB6, 0x34, 0x05, 0x8B, 0xE0, 0xEC, 0x49, 0xBE, 0xB3};
|
||||
u8 test2[]= {0x31, 0x37, 0x17, 0xD6, 0x08, 0xE9, 0xCF, 0x75, 0x8D, 0xCB, 0x1E, 0xB0, 0xF0, 0xC3, 0xCF, 0x9F,
|
||||
0xC1, 0x50, 0xB2, 0xD5, 0x00, 0xFB, 0x33, 0xF5, 0x1C, 0x52, 0xAF, 0xC9, 0x9D, 0x35, 0x8A, 0x2F,
|
||||
0x13, 0x74, 0xB8, 0xA3, 0x8B, 0xBA, 0x79, 0x74, 0xE7, 0xF6, 0xEF, 0x79, 0xCA, 0xB1, 0x6F, 0x22,
|
||||
0xCE, 0x1E, 0x64, 0x9D, 0x6E, 0x01, 0xAD, 0x95, 0x89, 0xC2, 0x13, 0x04, 0x5D, 0x54, 0x5D, 0xDE};
|
||||
|
||||
for(i=0; i<144; ++i) data[i]=0;
|
||||
|
||||
crypto_hash( digest, data, 1 );
|
||||
v=0;
|
||||
for(i=0; i<64; ++i) {
|
||||
printf("%02X", digest[i]);
|
||||
if ( digest[i] != test1[i]) v=1;
|
||||
}
|
||||
if (v) printf("\nerror\n");
|
||||
else printf("\nok\n");
|
||||
|
||||
for(i=0; i<144; ++i) data[i]=0;
|
||||
|
||||
crypto_hash( digest, data, 144 );
|
||||
v=0;
|
||||
for(i=0; i<64; ++i) {
|
||||
printf("%02X", digest[i]);
|
||||
if ( digest[i] != test2[i]) v=1;
|
||||
}
|
||||
if (v) printf("\nerror\n");
|
||||
else printf("\nok\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
*/
|
||||
|
||||
|
||||
74
algo/blake/sse2/blake/sse41/hash.h
Normal file
74
algo/blake/sse2/blake/sse41/hash.h
Normal file
@@ -0,0 +1,74 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <x86intrin.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "rounds.h"
|
||||
/*
|
||||
#ifndef NOT_SUPERCOP
|
||||
|
||||
#include "crypto_hash.h"
|
||||
#include "crypto_uint64.h"
|
||||
#include "crypto_uint32.h"
|
||||
#include "crypto_uint8.h"
|
||||
|
||||
typedef crypto_uint64 u64;
|
||||
typedef crypto_uint32 u32;
|
||||
typedef crypto_uint8 u8;
|
||||
|
||||
#else
|
||||
*/
|
||||
typedef unsigned long long u64;
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned char u8;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m128i h[4];
|
||||
u64 s[4], t[2];
|
||||
u32 buflen, nullt;
|
||||
u8 buf[128];
|
||||
} hashState_blake __attribute__ ((aligned (64)));
|
||||
/*
|
||||
#endif
|
||||
|
||||
#define U8TO32(p) \
|
||||
(((u32)((p)[0]) << 24) | ((u32)((p)[1]) << 16) | \
|
||||
((u32)((p)[2]) << 8) | ((u32)((p)[3]) ))
|
||||
#define U8TO64(p) \
|
||||
(((u64)U8TO32(p) << 32) | (u64)U8TO32((p) + 4))
|
||||
#define U32TO8(p, v) \
|
||||
(p)[0] = (u8)((v) >> 24); (p)[1] = (u8)((v) >> 16); \
|
||||
(p)[2] = (u8)((v) >> 8); (p)[3] = (u8)((v) );
|
||||
#define U64TO8(p, v) \
|
||||
U32TO8((p), (u32)((v) >> 32)); \
|
||||
U32TO8((p) + 4, (u32)((v) ));
|
||||
*/
|
||||
|
||||
/*
|
||||
static const u8 padding[129] =
|
||||
{
|
||||
0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
};
|
||||
|
||||
*/
|
||||
static inline void blake512_init( hashState_blake * S, u64 datalen );
|
||||
|
||||
|
||||
static void blake512_update( hashState_blake * S, const u8 * data, u64 datalen ) ;
|
||||
|
||||
static inline void blake512_final( hashState_blake * S, u8 * digest ) ;
|
||||
|
||||
|
||||
int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen ) ;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
2
algo/blake/sse2/blake/sse41/implementors
Normal file
2
algo/blake/sse2/blake/sse41/implementors
Normal file
@@ -0,0 +1,2 @@
|
||||
Jean-Philippe Aumasson
|
||||
Samuel Neves
|
||||
871
algo/blake/sse2/blake/sse41/rounds.h
Normal file
871
algo/blake/sse2/blake/sse41/rounds.h
Normal file
@@ -0,0 +1,871 @@
|
||||
|
||||
#ifndef __BLAKE512_ROUNDS_H__
|
||||
#define __BLAKE512_ROUNDS_H__
|
||||
|
||||
#ifndef HAVE_XOP
|
||||
#define BSWAP64(x) _mm_shuffle_epi8((x), u8to64)
|
||||
|
||||
#define _mm_roti_epi64(x, c) \
|
||||
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
|
||||
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
|
||||
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-c)))
|
||||
#else
|
||||
#define BSWAP64(x) _mm_perm_epi8((x),(x),u8to64)
|
||||
#endif
|
||||
|
||||
|
||||
#define LOAD_MSG_0_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m0, m1); \
|
||||
t1 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0x13198A2E03707344ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m2, m3); \
|
||||
t3 = _mm_set_epi64x(0x3F84D5B5B5470917ULL, 0xBE5466CF34E90C6CULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_0_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m0, m1); \
|
||||
t1 = _mm_set_epi64x(0xA4093822299F31D0ULL, 0x243F6A8885A308D3ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m2, m3); \
|
||||
t3 = _mm_set_epi64x(0xC0AC29B7C97C50DDULL, 0x452821E638D01377ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_0_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m4, m5); \
|
||||
t1 = _mm_set_epi64x(0xB8E1AFED6A267E96ULL, 0xD1310BA698DFB5ACULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m6, m7); \
|
||||
t3 = _mm_set_epi64x(0x636920D871574E69ULL, 0x24A19947B3916CF7ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_0_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m4, m5); \
|
||||
t1 = _mm_set_epi64x(0x2FFD72DBD01ADFB7ULL, 0x9216D5D98979FB1BULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m6, m7); \
|
||||
t3 = _mm_set_epi64x(0x801F2E2858EFC16ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m7, m2); \
|
||||
t1 = _mm_set_epi64x(0x9216D5D98979FB1BULL, 0x2FFD72DBD01ADFB7ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m4, m6); \
|
||||
t3 = _mm_set_epi64x(0xC0AC29B7C97C50DDULL, 0x636920D871574E69ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m5, m4); \
|
||||
t1 = _mm_set_epi64x(0x452821E638D01377ULL, 0x801F2E2858EFC16ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_alignr_epi8(m3, m7, 8); \
|
||||
t3 = _mm_set_epi64x(0x24A19947B3916CF7ULL, 0xD1310BA698DFB5ACULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
|
||||
t1 = _mm_set_epi64x(0xA4093822299F31D0ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m5, m2); \
|
||||
t3 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0x3F84D5B5B5470917ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_1_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m6, m1); \
|
||||
t1 = _mm_set_epi64x(0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m3, m1); \
|
||||
t3 = _mm_set_epi64x(0xBE5466CF34E90C6CULL, 0xB8E1AFED6A267E96ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_alignr_epi8(m6, m5, 8); \
|
||||
t1 = _mm_set_epi64x(0x243F6A8885A308D3ULL, 0x9216D5D98979FB1BULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m2, m7); \
|
||||
t3 = _mm_set_epi64x(0x24A19947B3916CF7ULL, 0xA4093822299F31D0ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m4, m0); \
|
||||
t1 = _mm_set_epi64x(0xBA7C9045F12C7F99ULL, 0xB8E1AFED6A267E96ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m1, m6, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x636920D871574E69ULL, 0xBE5466CF34E90C6CULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m5, m1, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0xC0AC29B7C97C50DDULL, 0x801F2E2858EFC16ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m3, m4); \
|
||||
t3 = _mm_set_epi64x(0x452821E638D01377ULL, 0x13198A2E03707344ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_2_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m7, m3); \
|
||||
t1 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0x2FFD72DBD01ADFB7ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_alignr_epi8(m2, m0, 8); \
|
||||
t3 = _mm_set_epi64x(0xD1310BA698DFB5ACULL, 0x3F84D5B5B5470917ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m3, m1); \
|
||||
t1 = _mm_set_epi64x(0x13198A2E03707344ULL, 0xD1310BA698DFB5ACULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m6, m5); \
|
||||
t3 = _mm_set_epi64x(0x801F2E2858EFC16ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m4, m0); \
|
||||
t1 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0x3F84D5B5B5470917ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m6, m7); \
|
||||
t3 = _mm_set_epi64x(0xB8E1AFED6A267E96ULL, 0x24A19947B3916CF7ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m1, m2, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0x2FFD72DBD01ADFB7ULL, 0xC0AC29B7C97C50DDULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m2, m7, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x9216D5D98979FB1BULL, 0x243F6A8885A308D3ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_3_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m3, m5); \
|
||||
t1 = _mm_set_epi64x(0xBE5466CF34E90C6CULL, 0xA4093822299F31D0ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m0, m4); \
|
||||
t3 = _mm_set_epi64x(0x636920D871574E69ULL, 0x452821E638D01377ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m4, m2); \
|
||||
t1 = _mm_set_epi64x(0x3F84D5B5B5470917ULL, 0x243F6A8885A308D3ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m1, m5); \
|
||||
t3 = _mm_set_epi64x(0x636920D871574E69ULL, 0x452821E638D01377ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m0, m3, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0xBE5466CF34E90C6CULL, 0xD1310BA698DFB5ACULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m2, m7, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x2FFD72DBD01ADFB7ULL, 0xA4093822299F31D0ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m7, m5, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0xBA7C9045F12C7F99ULL, 0x13198A2E03707344ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m3, m1, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x24A19947B3916CF7ULL, 0x9216D5D98979FB1BULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_4_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_alignr_epi8(m6, m0, 8); \
|
||||
t1 = _mm_set_epi64x(0xB8E1AFED6A267E96ULL, 0x801F2E2858EFC16ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m4, m6, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0xC0AC29B7C97C50DDULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m1, m3); \
|
||||
t1 = _mm_set_epi64x(0x2FFD72DBD01ADFB7ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m0, m4); \
|
||||
t3 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0xB8E1AFED6A267E96ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m6, m5); \
|
||||
t1 = _mm_set_epi64x(0xC0AC29B7C97C50DDULL, 0xA4093822299F31D0ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m5, m1); \
|
||||
t3 = _mm_set_epi64x(0x9216D5D98979FB1BULL, 0x243F6A8885A308D3ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m2, m3, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0xBE5466CF34E90C6CULL, 0x24A19947B3916CF7ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m7, m0); \
|
||||
t3 = _mm_set_epi64x(0xD1310BA698DFB5ACULL, 0x801F2E2858EFC16ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_5_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m6, m2); \
|
||||
t1 = _mm_set_epi64x(0x3F84D5B5B5470917ULL, 0x452821E638D01377ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m7, m4, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x13198A2E03707344ULL, 0x636920D871574E69ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m6, m0, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0x636920D871574E69ULL, 0xBE5466CF34E90C6CULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m7, m2); \
|
||||
t3 = _mm_set_epi64x(0x2FFD72DBD01ADFB7ULL, 0x24A19947B3916CF7ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m2, m7); \
|
||||
t1 = _mm_set_epi64x(0x13198A2E03707344ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_alignr_epi8(m5, m6, 8); \
|
||||
t3 = _mm_set_epi64x(0x452821E638D01377ULL, 0x801F2E2858EFC16ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m0, m3); \
|
||||
t1 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0x3F84D5B5B5470917ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
|
||||
t3 = _mm_set_epi64x(0xB8E1AFED6A267E96ULL, 0xA4093822299F31D0ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_6_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m3, m1); \
|
||||
t1 = _mm_set_epi64x(0xC0AC29B7C97C50DDULL, 0x243F6A8885A308D3ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m1, m5, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x9216D5D98979FB1BULL, 0xD1310BA698DFB5ACULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m6, m3); \
|
||||
t1 = _mm_set_epi64x(0x801F2E2858EFC16ULL, 0xB8E1AFED6A267E96ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m6, m1, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0xD1310BA698DFB5ACULL, 0x13198A2E03707344ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_alignr_epi8(m7, m5, 8); \
|
||||
t1 = _mm_set_epi64x(0x3F84D5B5B5470917ULL, 0x24A19947B3916CF7ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m0, m4); \
|
||||
t3 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m2, m7); \
|
||||
t1 = _mm_set_epi64x(0x452821E638D01377ULL, 0x243F6A8885A308D3ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m4, m1); \
|
||||
t3 = _mm_set_epi64x(0x2FFD72DBD01ADFB7ULL, 0xC0AC29B7C97C50DDULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_7_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m0, m2); \
|
||||
t1 = _mm_set_epi64x(0x636920D871574E69ULL, 0xBE5466CF34E90C6CULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m3, m5); \
|
||||
t3 = _mm_set_epi64x(0xA4093822299F31D0ULL, 0x9216D5D98979FB1BULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m3, m7); \
|
||||
t1 = _mm_set_epi64x(0xD1310BA698DFB5ACULL, 0x636920D871574E69ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_alignr_epi8(m0, m5, 8); \
|
||||
t3 = _mm_set_epi64x(0x9216D5D98979FB1BULL, 0x82EFA98EC4E6C89ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m7, m4); \
|
||||
t1 = _mm_set_epi64x(0x801F2E2858EFC16ULL, 0xC0AC29B7C97C50DDULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_alignr_epi8(m4, m1, 8); \
|
||||
t3 = _mm_set_epi64x(0x243F6A8885A308D3ULL, 0xB8E1AFED6A267E96ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = m6; \
|
||||
t1 = _mm_set_epi64x(0x3F84D5B5B5470917ULL, 0xA4093822299F31D0ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_alignr_epi8(m5, m0, 8); \
|
||||
t3 = _mm_set_epi64x(0xBE5466CF34E90C6CULL, 0x452821E638D01377ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_8_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m1, m3, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0x24A19947B3916CF7ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = m2; \
|
||||
t3 = _mm_set_epi64x(0x2FFD72DBD01ADFB7ULL, 0x13198A2E03707344ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m5, m4); \
|
||||
t1 = _mm_set_epi64x(0x452821E638D01377ULL, 0xA4093822299F31D0ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m3, m0); \
|
||||
t3 = _mm_set_epi64x(0xBE5466CF34E90C6CULL, 0xC0AC29B7C97C50DDULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m1, m2); \
|
||||
t1 = _mm_set_epi64x(0x9216D5D98979FB1BULL, 0x2FFD72DBD01ADFB7ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m3, m2, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x13198A2E03707344ULL, 0x3F84D5B5B5470917ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m7, m4); \
|
||||
t1 = _mm_set_epi64x(0x801F2E2858EFC16ULL, 0xB8E1AFED6A267E96ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m1, m6); \
|
||||
t3 = _mm_set_epi64x(0x243F6A8885A308D3ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_9_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_alignr_epi8(m7, m5, 8); \
|
||||
t1 = _mm_set_epi64x(0xD1310BA698DFB5ACULL, 0x636920D871574E69ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m6, m0); \
|
||||
t3 = _mm_set_epi64x(0x24A19947B3916CF7ULL, 0x82EFA98EC4E6C89ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m0, m1); \
|
||||
t1 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0x13198A2E03707344ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m2, m3); \
|
||||
t3 = _mm_set_epi64x(0x3F84D5B5B5470917ULL, 0xBE5466CF34E90C6CULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m0, m1); \
|
||||
t1 = _mm_set_epi64x(0xA4093822299F31D0ULL, 0x243F6A8885A308D3ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m2, m3); \
|
||||
t3 = _mm_set_epi64x(0xC0AC29B7C97C50DDULL, 0x452821E638D01377ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m4, m5); \
|
||||
t1 = _mm_set_epi64x(0xB8E1AFED6A267E96ULL, 0xD1310BA698DFB5ACULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m6, m7); \
|
||||
t3 = _mm_set_epi64x(0x636920D871574E69ULL, 0x24A19947B3916CF7ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_10_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m4, m5); \
|
||||
t1 = _mm_set_epi64x(0x2FFD72DBD01ADFB7ULL, 0x9216D5D98979FB1BULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m6, m7); \
|
||||
t3 = _mm_set_epi64x(0x801F2E2858EFC16ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m7, m2); \
|
||||
t1 = _mm_set_epi64x(0x9216D5D98979FB1BULL, 0x2FFD72DBD01ADFB7ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m4, m6); \
|
||||
t3 = _mm_set_epi64x(0xC0AC29B7C97C50DDULL, 0x636920D871574E69ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m5, m4); \
|
||||
t1 = _mm_set_epi64x(0x452821E638D01377ULL, 0x801F2E2858EFC16ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_alignr_epi8(m3, m7, 8); \
|
||||
t3 = _mm_set_epi64x(0x24A19947B3916CF7ULL, 0xD1310BA698DFB5ACULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
|
||||
t1 = _mm_set_epi64x(0xA4093822299F31D0ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m5, m2); \
|
||||
t3 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0x3F84D5B5B5470917ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_11_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m6, m1); \
|
||||
t1 = _mm_set_epi64x(0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m3, m1); \
|
||||
t3 = _mm_set_epi64x(0xBE5466CF34E90C6CULL, 0xB8E1AFED6A267E96ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_12_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_alignr_epi8(m6, m5, 8); \
|
||||
t1 = _mm_set_epi64x(0x243F6A8885A308D3ULL, 0x9216D5D98979FB1BULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m2, m7); \
|
||||
t3 = _mm_set_epi64x(0x24A19947B3916CF7ULL, 0xA4093822299F31D0ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_12_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m4, m0); \
|
||||
t1 = _mm_set_epi64x(0xBA7C9045F12C7F99ULL, 0xB8E1AFED6A267E96ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m1, m6, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x636920D871574E69ULL, 0xBE5466CF34E90C6CULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_12_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m5, m1, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0xC0AC29B7C97C50DDULL, 0x801F2E2858EFC16ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m3, m4); \
|
||||
t3 = _mm_set_epi64x(0x452821E638D01377ULL, 0x13198A2E03707344ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_12_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m7, m3); \
|
||||
t1 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0x2FFD72DBD01ADFB7ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_alignr_epi8(m2, m0, 8); \
|
||||
t3 = _mm_set_epi64x(0xD1310BA698DFB5ACULL, 0x3F84D5B5B5470917ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_13_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m3, m1); \
|
||||
t1 = _mm_set_epi64x(0x13198A2E03707344ULL, 0xD1310BA698DFB5ACULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m6, m5); \
|
||||
t3 = _mm_set_epi64x(0x801F2E2858EFC16ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_13_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m4, m0); \
|
||||
t1 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0x3F84D5B5B5470917ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m6, m7); \
|
||||
t3 = _mm_set_epi64x(0xB8E1AFED6A267E96ULL, 0x24A19947B3916CF7ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_13_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m1, m2, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0x2FFD72DBD01ADFB7ULL, 0xC0AC29B7C97C50DDULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m2, m7, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x9216D5D98979FB1BULL, 0x243F6A8885A308D3ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_13_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m3, m5); \
|
||||
t1 = _mm_set_epi64x(0xBE5466CF34E90C6CULL, 0xA4093822299F31D0ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m0, m4); \
|
||||
t3 = _mm_set_epi64x(0x636920D871574E69ULL, 0x452821E638D01377ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_14_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m4, m2); \
|
||||
t1 = _mm_set_epi64x(0x3F84D5B5B5470917ULL, 0x243F6A8885A308D3ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m1, m5); \
|
||||
t3 = _mm_set_epi64x(0x636920D871574E69ULL, 0x452821E638D01377ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_14_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m0, m3, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0xBE5466CF34E90C6CULL, 0xD1310BA698DFB5ACULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m2, m7, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x2FFD72DBD01ADFB7ULL, 0xA4093822299F31D0ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_14_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m7, m5, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0xBA7C9045F12C7F99ULL, 0x13198A2E03707344ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m3, m1, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x24A19947B3916CF7ULL, 0x9216D5D98979FB1BULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_14_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_alignr_epi8(m6, m0, 8); \
|
||||
t1 = _mm_set_epi64x(0xB8E1AFED6A267E96ULL, 0x801F2E2858EFC16ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m4, m6, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0xC0AC29B7C97C50DDULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_15_1(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m1, m3); \
|
||||
t1 = _mm_set_epi64x(0x2FFD72DBD01ADFB7ULL, 0xBA7C9045F12C7F99ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpacklo_epi64(m0, m4); \
|
||||
t3 = _mm_set_epi64x(0x82EFA98EC4E6C89ULL, 0xB8E1AFED6A267E96ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_15_2(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpacklo_epi64(m6, m5); \
|
||||
t1 = _mm_set_epi64x(0xC0AC29B7C97C50DDULL, 0xA4093822299F31D0ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m5, m1); \
|
||||
t3 = _mm_set_epi64x(0x9216D5D98979FB1BULL, 0x243F6A8885A308D3ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_15_3(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_blend_epi16(m2, m3, 0xF0); \
|
||||
t1 = _mm_set_epi64x(0xBE5466CF34E90C6CULL, 0x24A19947B3916CF7ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_unpackhi_epi64(m7, m0); \
|
||||
t3 = _mm_set_epi64x(0xD1310BA698DFB5ACULL, 0x801F2E2858EFC16ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LOAD_MSG_15_4(b0, b1) \
|
||||
do \
|
||||
{ \
|
||||
t0 = _mm_unpackhi_epi64(m6, m2); \
|
||||
t1 = _mm_set_epi64x(0x3F84D5B5B5470917ULL, 0x452821E638D01377ULL); \
|
||||
b0 = _mm_xor_si128(t0, t1); \
|
||||
t2 = _mm_blend_epi16(m7, m4, 0xF0); \
|
||||
t3 = _mm_set_epi64x(0x13198A2E03707344ULL, 0x636920D871574E69ULL); \
|
||||
b1 = _mm_xor_si128(t2, t3); \
|
||||
} while(0)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
||||
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
||||
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
||||
\
|
||||
row4l = _mm_xor_si128(row4l, row1l); \
|
||||
row4h = _mm_xor_si128(row4h, row1h); \
|
||||
\
|
||||
row4l = _mm_roti_epi64(row4l, -32); \
|
||||
row4h = _mm_roti_epi64(row4h, -32); \
|
||||
\
|
||||
row3l = _mm_add_epi64(row3l, row4l); \
|
||||
row3h = _mm_add_epi64(row3h, row4h); \
|
||||
\
|
||||
row2l = _mm_xor_si128(row2l, row3l); \
|
||||
row2h = _mm_xor_si128(row2h, row3h); \
|
||||
\
|
||||
row2l = _mm_roti_epi64(row2l, -25); \
|
||||
row2h = _mm_roti_epi64(row2h, -25); \
|
||||
|
||||
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
||||
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
||||
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
||||
\
|
||||
row4l = _mm_xor_si128(row4l, row1l); \
|
||||
row4h = _mm_xor_si128(row4h, row1h); \
|
||||
\
|
||||
row4l = _mm_roti_epi64(row4l, -16); \
|
||||
row4h = _mm_roti_epi64(row4h, -16); \
|
||||
\
|
||||
row3l = _mm_add_epi64(row3l, row4l); \
|
||||
row3h = _mm_add_epi64(row3h, row4h); \
|
||||
\
|
||||
row2l = _mm_xor_si128(row2l, row3l); \
|
||||
row2h = _mm_xor_si128(row2h, row3h); \
|
||||
\
|
||||
row2l = _mm_roti_epi64(row2l, -11); \
|
||||
row2h = _mm_roti_epi64(row2h, -11); \
|
||||
|
||||
|
||||
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
||||
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
|
||||
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
|
||||
row2l = t0; \
|
||||
row2h = t1; \
|
||||
\
|
||||
t0 = row3l; \
|
||||
row3l = row3h; \
|
||||
row3h = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
|
||||
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
|
||||
row4l = t1; \
|
||||
row4h = t0;
|
||||
|
||||
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
||||
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
|
||||
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
|
||||
row2l = t0; \
|
||||
row2h = t1; \
|
||||
\
|
||||
t0 = row3l; \
|
||||
row3l = row3h; \
|
||||
row3h = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
|
||||
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
|
||||
row4l = t1; \
|
||||
row4h = t0;
|
||||
|
||||
#define ROUND(r) \
|
||||
LOAD_MSG_ ##r ##_1(b0, b1); \
|
||||
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
LOAD_MSG_ ##r ##_2(b0, b1); \
|
||||
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
|
||||
LOAD_MSG_ ##r ##_3(b0, b1); \
|
||||
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
LOAD_MSG_ ##r ##_4(b0, b1); \
|
||||
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
|
||||
|
||||
#endif
|
||||
|
||||
0
algo/bmw/.dirstamp
Normal file
0
algo/bmw/.dirstamp
Normal file
66
algo/bmw/bmw256.c
Normal file
66
algo/bmw/bmw256.c
Normal file
@@ -0,0 +1,66 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "sph_bmw.h"
|
||||
|
||||
void bmwhash(void *output, const void *input)
|
||||
{
|
||||
/*
|
||||
uint32_t hash[16];
|
||||
sph_bmw256_context ctx;
|
||||
|
||||
sph_bmw256_init(&ctx);
|
||||
sph_bmw256(&ctx, input, 80);
|
||||
sph_bmw256_close(&ctx, hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
*/
|
||||
}
|
||||
|
||||
int scanhash_bmw(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
for (int k = 0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
bmwhash(hash64, endiandata);
|
||||
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return true;
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_bmw256_algo( algo_gate_t* gate )
|
||||
{
|
||||
algo_not_implemented();
|
||||
return false;
|
||||
// gate->scanhash = (void*)&scanhash_bmw;
|
||||
// gate->hash = (void*)&bmwhash;
|
||||
return true;
|
||||
};
|
||||
|
||||
965
algo/bmw/sph_bmw.c
Normal file
965
algo/bmw/sph_bmw.c
Normal file
@@ -0,0 +1,965 @@
|
||||
/* $Id: bmw.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* BMW implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include "sph_bmw.h"
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BMW
|
||||
#define SPH_SMALL_FOOTPRINT_BMW 1
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0x00010203), SPH_C32(0x04050607),
|
||||
SPH_C32(0x08090A0B), SPH_C32(0x0C0D0E0F),
|
||||
SPH_C32(0x10111213), SPH_C32(0x14151617),
|
||||
SPH_C32(0x18191A1B), SPH_C32(0x1C1D1E1F),
|
||||
SPH_C32(0x20212223), SPH_C32(0x24252627),
|
||||
SPH_C32(0x28292A2B), SPH_C32(0x2C2D2E2F),
|
||||
SPH_C32(0x30313233), SPH_C32(0x34353637),
|
||||
SPH_C32(0x38393A3B), SPH_C32(0x3C3D3E3F)
|
||||
};
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
SPH_C32(0x40414243), SPH_C32(0x44454647),
|
||||
SPH_C32(0x48494A4B), SPH_C32(0x4C4D4E4F),
|
||||
SPH_C32(0x50515253), SPH_C32(0x54555657),
|
||||
SPH_C32(0x58595A5B), SPH_C32(0x5C5D5E5F),
|
||||
SPH_C32(0x60616263), SPH_C32(0x64656667),
|
||||
SPH_C32(0x68696A6B), SPH_C32(0x6C6D6E6F),
|
||||
SPH_C32(0x70717273), SPH_C32(0x74757677),
|
||||
SPH_C32(0x78797A7B), SPH_C32(0x7C7D7E7F)
|
||||
};
|
||||
|
||||
#if SPH_64
|
||||
|
||||
static const sph_u64 IV384[] = {
|
||||
SPH_C64(0x0001020304050607), SPH_C64(0x08090A0B0C0D0E0F),
|
||||
SPH_C64(0x1011121314151617), SPH_C64(0x18191A1B1C1D1E1F),
|
||||
SPH_C64(0x2021222324252627), SPH_C64(0x28292A2B2C2D2E2F),
|
||||
SPH_C64(0x3031323334353637), SPH_C64(0x38393A3B3C3D3E3F),
|
||||
SPH_C64(0x4041424344454647), SPH_C64(0x48494A4B4C4D4E4F),
|
||||
SPH_C64(0x5051525354555657), SPH_C64(0x58595A5B5C5D5E5F),
|
||||
SPH_C64(0x6061626364656667), SPH_C64(0x68696A6B6C6D6E6F),
|
||||
SPH_C64(0x7071727374757677), SPH_C64(0x78797A7B7C7D7E7F)
|
||||
};
|
||||
|
||||
static const sph_u64 IV512[] = {
|
||||
SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F),
|
||||
SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F),
|
||||
SPH_C64(0xA0A1A2A3A4A5A6A7), SPH_C64(0xA8A9AAABACADAEAF),
|
||||
SPH_C64(0xB0B1B2B3B4B5B6B7), SPH_C64(0xB8B9BABBBCBDBEBF),
|
||||
SPH_C64(0xC0C1C2C3C4C5C6C7), SPH_C64(0xC8C9CACBCCCDCECF),
|
||||
SPH_C64(0xD0D1D2D3D4D5D6D7), SPH_C64(0xD8D9DADBDCDDDEDF),
|
||||
SPH_C64(0xE0E1E2E3E4E5E6E7), SPH_C64(0xE8E9EAEBECEDEEEF),
|
||||
SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF)
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#define XCAT(x, y) XCAT_(x, y)
|
||||
#define XCAT_(x, y) x ## y
|
||||
|
||||
#define LPAR (
|
||||
|
||||
#define I16_16 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
#define I16_17 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
#define I16_18 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
#define I16_19 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
|
||||
#define I16_20 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
|
||||
#define I16_21 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
|
||||
#define I16_22 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
#define I16_23 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
|
||||
#define I16_24 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
|
||||
#define I16_25 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
|
||||
#define I16_26 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
|
||||
#define I16_27 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
|
||||
#define I16_28 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
|
||||
#define I16_29 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
|
||||
#define I16_30 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
|
||||
#define I16_31 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
|
||||
|
||||
#define M16_16 0, 1, 3, 4, 7, 10, 11
|
||||
#define M16_17 1, 2, 4, 5, 8, 11, 12
|
||||
#define M16_18 2, 3, 5, 6, 9, 12, 13
|
||||
#define M16_19 3, 4, 6, 7, 10, 13, 14
|
||||
#define M16_20 4, 5, 7, 8, 11, 14, 15
|
||||
#define M16_21 5, 6, 8, 9, 12, 15, 16
|
||||
#define M16_22 6, 7, 9, 10, 13, 0, 1
|
||||
#define M16_23 7, 8, 10, 11, 14, 1, 2
|
||||
#define M16_24 8, 9, 11, 12, 15, 2, 3
|
||||
#define M16_25 9, 10, 12, 13, 0, 3, 4
|
||||
#define M16_26 10, 11, 13, 14, 1, 4, 5
|
||||
#define M16_27 11, 12, 14, 15, 2, 5, 6
|
||||
#define M16_28 12, 13, 15, 16, 3, 6, 7
|
||||
#define M16_29 13, 14, 0, 1, 4, 7, 8
|
||||
#define M16_30 14, 15, 1, 2, 5, 8, 9
|
||||
#define M16_31 15, 16, 2, 3, 6, 9, 10
|
||||
|
||||
#define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \
|
||||
^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19))
|
||||
#define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 8) ^ SPH_ROTL32(x, 23))
|
||||
#define ss2(x) (((x) >> 2) ^ SPH_T32((x) << 1) \
|
||||
^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25))
|
||||
#define ss3(x) (((x) >> 2) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29))
|
||||
#define ss4(x) (((x) >> 1) ^ (x))
|
||||
#define ss5(x) (((x) >> 2) ^ (x))
|
||||
#define rs1(x) SPH_ROTL32(x, 3)
|
||||
#define rs2(x) SPH_ROTL32(x, 7)
|
||||
#define rs3(x) SPH_ROTL32(x, 13)
|
||||
#define rs4(x) SPH_ROTL32(x, 16)
|
||||
#define rs5(x) SPH_ROTL32(x, 19)
|
||||
#define rs6(x) SPH_ROTL32(x, 23)
|
||||
#define rs7(x) SPH_ROTL32(x, 27)
|
||||
|
||||
#define Ks(j) SPH_T32((sph_u32)(j) * SPH_C32(0x05555555))
|
||||
|
||||
#define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
|
||||
(SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \
|
||||
- SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m))
|
||||
|
||||
#define expand1s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \
|
||||
+ ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \
|
||||
+ ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \
|
||||
+ ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand1s(qf, mf, hf, i16) \
|
||||
expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand1s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand1s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#define expand2s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \
|
||||
+ qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \
|
||||
+ qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \
|
||||
+ qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand2s(qf, mf, hf, i16) \
|
||||
expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand2s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \
|
||||
^ SPH_ROTL64(x, 4) ^ SPH_ROTL64(x, 37))
|
||||
#define sb1(x) (((x) >> 1) ^ SPH_T64((x) << 2) \
|
||||
^ SPH_ROTL64(x, 13) ^ SPH_ROTL64(x, 43))
|
||||
#define sb2(x) (((x) >> 2) ^ SPH_T64((x) << 1) \
|
||||
^ SPH_ROTL64(x, 19) ^ SPH_ROTL64(x, 53))
|
||||
#define sb3(x) (((x) >> 2) ^ SPH_T64((x) << 2) \
|
||||
^ SPH_ROTL64(x, 28) ^ SPH_ROTL64(x, 59))
|
||||
#define sb4(x) (((x) >> 1) ^ (x))
|
||||
#define sb5(x) (((x) >> 2) ^ (x))
|
||||
#define rb1(x) SPH_ROTL64(x, 5)
|
||||
#define rb2(x) SPH_ROTL64(x, 11)
|
||||
#define rb3(x) SPH_ROTL64(x, 27)
|
||||
#define rb4(x) SPH_ROTL64(x, 32)
|
||||
#define rb5(x) SPH_ROTL64(x, 37)
|
||||
#define rb6(x) SPH_ROTL64(x, 43)
|
||||
#define rb7(x) SPH_ROTL64(x, 53)
|
||||
|
||||
#define Kb(j) SPH_T64((sph_u64)(j) * SPH_C64(0x0555555555555555))
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW
|
||||
|
||||
static const sph_u64 Kb_tab[] = {
|
||||
Kb(16), Kb(17), Kb(18), Kb(19), Kb(20), Kb(21), Kb(22), Kb(23),
|
||||
Kb(24), Kb(25), Kb(26), Kb(27), Kb(28), Kb(29), Kb(30), Kb(31)
|
||||
};
|
||||
|
||||
#define rol_off(mf, j, off) \
|
||||
SPH_ROTL64(mf(((j) + (off)) & 15), (((j) + (off)) & 15) + 1)
|
||||
|
||||
#define add_elt_b(mf, hf, j) \
|
||||
(SPH_T64(rol_off(mf, j, 0) + rol_off(mf, j, 3) \
|
||||
- rol_off(mf, j, 10) + Kb_tab[j]) ^ hf(((j) + 7) & 15))
|
||||
|
||||
#define expand1b(qf, mf, hf, i) \
|
||||
SPH_T64(sb1(qf((i) - 16)) + sb2(qf((i) - 15)) \
|
||||
+ sb3(qf((i) - 14)) + sb0(qf((i) - 13)) \
|
||||
+ sb1(qf((i) - 12)) + sb2(qf((i) - 11)) \
|
||||
+ sb3(qf((i) - 10)) + sb0(qf((i) - 9)) \
|
||||
+ sb1(qf((i) - 8)) + sb2(qf((i) - 7)) \
|
||||
+ sb3(qf((i) - 6)) + sb0(qf((i) - 5)) \
|
||||
+ sb1(qf((i) - 4)) + sb2(qf((i) - 3)) \
|
||||
+ sb3(qf((i) - 2)) + sb0(qf((i) - 1)) \
|
||||
+ add_elt_b(mf, hf, (i) - 16))
|
||||
|
||||
#define expand2b(qf, mf, hf, i) \
|
||||
SPH_T64(qf((i) - 16) + rb1(qf((i) - 15)) \
|
||||
+ qf((i) - 14) + rb2(qf((i) - 13)) \
|
||||
+ qf((i) - 12) + rb3(qf((i) - 11)) \
|
||||
+ qf((i) - 10) + rb4(qf((i) - 9)) \
|
||||
+ qf((i) - 8) + rb5(qf((i) - 7)) \
|
||||
+ qf((i) - 6) + rb6(qf((i) - 5)) \
|
||||
+ qf((i) - 4) + rb7(qf((i) - 3)) \
|
||||
+ sb4(qf((i) - 2)) + sb5(qf((i) - 1)) \
|
||||
+ add_elt_b(mf, hf, (i) - 16))
|
||||
|
||||
#else
|
||||
|
||||
#define add_elt_b(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
|
||||
(SPH_T64(SPH_ROTL64(mf(j0m), j1m) + SPH_ROTL64(mf(j3m), j4m) \
|
||||
- SPH_ROTL64(mf(j10m), j11m) + Kb(j16)) ^ hf(j7m))
|
||||
|
||||
#define expand1b_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T64(sb1(qf(i0)) + sb2(qf(i1)) + sb3(qf(i2)) + sb0(qf(i3)) \
|
||||
+ sb1(qf(i4)) + sb2(qf(i5)) + sb3(qf(i6)) + sb0(qf(i7)) \
|
||||
+ sb1(qf(i8)) + sb2(qf(i9)) + sb3(qf(i10)) + sb0(qf(i11)) \
|
||||
+ sb1(qf(i12)) + sb2(qf(i13)) + sb3(qf(i14)) + sb0(qf(i15)) \
|
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand1b(qf, mf, hf, i16) \
|
||||
expand1b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand1b_(qf, mf, hf, i16, ix, iy) \
|
||||
expand1b_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#define expand2b_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T64(qf(i0) + rb1(qf(i1)) + qf(i2) + rb2(qf(i3)) \
|
||||
+ qf(i4) + rb3(qf(i5)) + qf(i6) + rb4(qf(i7)) \
|
||||
+ qf(i8) + rb5(qf(i9)) + qf(i10) + rb6(qf(i11)) \
|
||||
+ qf(i12) + rb7(qf(i13)) + sb4(qf(i14)) + sb5(qf(i15)) \
|
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand2b(qf, mf, hf, i16) \
|
||||
expand2b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand2b_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2b_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define MAKE_W(tt, i0, op01, i1, op12, i2, op23, i3, op34, i4) \
|
||||
tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
|
||||
op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
|
||||
|
||||
#define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
#define Ws3 MAKE_W(SPH_T32, 0, -, 1, +, 8, -, 10, +, 13)
|
||||
#define Ws4 MAKE_W(SPH_T32, 1, +, 2, +, 9, -, 11, -, 14)
|
||||
#define Ws5 MAKE_W(SPH_T32, 3, -, 2, +, 10, -, 12, +, 15)
|
||||
#define Ws6 MAKE_W(SPH_T32, 4, -, 0, -, 3, -, 11, +, 13)
|
||||
#define Ws7 MAKE_W(SPH_T32, 1, -, 4, -, 5, -, 12, -, 14)
|
||||
#define Ws8 MAKE_W(SPH_T32, 2, -, 5, -, 6, +, 13, -, 15)
|
||||
#define Ws9 MAKE_W(SPH_T32, 0, -, 3, +, 6, -, 7, +, 14)
|
||||
#define Ws10 MAKE_W(SPH_T32, 8, -, 1, -, 4, -, 7, +, 15)
|
||||
#define Ws11 MAKE_W(SPH_T32, 8, -, 0, -, 2, -, 5, +, 9)
|
||||
#define Ws12 MAKE_W(SPH_T32, 1, +, 3, -, 6, -, 9, +, 10)
|
||||
#define Ws13 MAKE_W(SPH_T32, 2, +, 4, +, 7, +, 10, +, 11)
|
||||
#define Ws14 MAKE_W(SPH_T32, 3, -, 5, +, 8, -, 11, -, 12)
|
||||
#define Ws15 MAKE_W(SPH_T32, 12, -, 4, -, 6, -, 9, +, 13)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW
|
||||
|
||||
#define MAKE_Qas do { \
|
||||
unsigned u; \
|
||||
sph_u32 Ws[16]; \
|
||||
Ws[ 0] = Ws0; \
|
||||
Ws[ 1] = Ws1; \
|
||||
Ws[ 2] = Ws2; \
|
||||
Ws[ 3] = Ws3; \
|
||||
Ws[ 4] = Ws4; \
|
||||
Ws[ 5] = Ws5; \
|
||||
Ws[ 6] = Ws6; \
|
||||
Ws[ 7] = Ws7; \
|
||||
Ws[ 8] = Ws8; \
|
||||
Ws[ 9] = Ws9; \
|
||||
Ws[10] = Ws10; \
|
||||
Ws[11] = Ws11; \
|
||||
Ws[12] = Ws12; \
|
||||
Ws[13] = Ws13; \
|
||||
Ws[14] = Ws14; \
|
||||
Ws[15] = Ws15; \
|
||||
for (u = 0; u < 15; u += 5) { \
|
||||
qt[u + 0] = SPH_T32(ss0(Ws[u + 0]) + H(u + 1)); \
|
||||
qt[u + 1] = SPH_T32(ss1(Ws[u + 1]) + H(u + 2)); \
|
||||
qt[u + 2] = SPH_T32(ss2(Ws[u + 2]) + H(u + 3)); \
|
||||
qt[u + 3] = SPH_T32(ss3(Ws[u + 3]) + H(u + 4)); \
|
||||
qt[u + 4] = SPH_T32(ss4(Ws[u + 4]) + H(u + 5)); \
|
||||
} \
|
||||
qt[15] = SPH_T32(ss0(Ws[15]) + H(0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbs do { \
|
||||
qt[16] = expand1s(Qs, M, H, 16); \
|
||||
qt[17] = expand1s(Qs, M, H, 17); \
|
||||
qt[18] = expand2s(Qs, M, H, 18); \
|
||||
qt[19] = expand2s(Qs, M, H, 19); \
|
||||
qt[20] = expand2s(Qs, M, H, 20); \
|
||||
qt[21] = expand2s(Qs, M, H, 21); \
|
||||
qt[22] = expand2s(Qs, M, H, 22); \
|
||||
qt[23] = expand2s(Qs, M, H, 23); \
|
||||
qt[24] = expand2s(Qs, M, H, 24); \
|
||||
qt[25] = expand2s(Qs, M, H, 25); \
|
||||
qt[26] = expand2s(Qs, M, H, 26); \
|
||||
qt[27] = expand2s(Qs, M, H, 27); \
|
||||
qt[28] = expand2s(Qs, M, H, 28); \
|
||||
qt[29] = expand2s(Qs, M, H, 29); \
|
||||
qt[30] = expand2s(Qs, M, H, 30); \
|
||||
qt[31] = expand2s(Qs, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define MAKE_Qas do { \
|
||||
qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \
|
||||
qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \
|
||||
qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \
|
||||
qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \
|
||||
qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \
|
||||
qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \
|
||||
qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \
|
||||
qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \
|
||||
qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \
|
||||
qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \
|
||||
qt[10] = SPH_T32(ss0(Ws10) + H(11)); \
|
||||
qt[11] = SPH_T32(ss1(Ws11) + H(12)); \
|
||||
qt[12] = SPH_T32(ss2(Ws12) + H(13)); \
|
||||
qt[13] = SPH_T32(ss3(Ws13) + H(14)); \
|
||||
qt[14] = SPH_T32(ss4(Ws14) + H(15)); \
|
||||
qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbs do { \
|
||||
qt[16] = expand1s(Qs, M, H, 16); \
|
||||
qt[17] = expand1s(Qs, M, H, 17); \
|
||||
qt[18] = expand2s(Qs, M, H, 18); \
|
||||
qt[19] = expand2s(Qs, M, H, 19); \
|
||||
qt[20] = expand2s(Qs, M, H, 20); \
|
||||
qt[21] = expand2s(Qs, M, H, 21); \
|
||||
qt[22] = expand2s(Qs, M, H, 22); \
|
||||
qt[23] = expand2s(Qs, M, H, 23); \
|
||||
qt[24] = expand2s(Qs, M, H, 24); \
|
||||
qt[25] = expand2s(Qs, M, H, 25); \
|
||||
qt[26] = expand2s(Qs, M, H, 26); \
|
||||
qt[27] = expand2s(Qs, M, H, 27); \
|
||||
qt[28] = expand2s(Qs, M, H, 28); \
|
||||
qt[29] = expand2s(Qs, M, H, 29); \
|
||||
qt[30] = expand2s(Qs, M, H, 30); \
|
||||
qt[31] = expand2s(Qs, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define MAKE_Qs do { \
|
||||
MAKE_Qas; \
|
||||
MAKE_Qbs; \
|
||||
} while (0)
|
||||
|
||||
#define Qs(j) (qt[j])
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Wb1 MAKE_W(SPH_T64, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Wb2 MAKE_W(SPH_T64, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
#define Wb3 MAKE_W(SPH_T64, 0, -, 1, +, 8, -, 10, +, 13)
|
||||
#define Wb4 MAKE_W(SPH_T64, 1, +, 2, +, 9, -, 11, -, 14)
|
||||
#define Wb5 MAKE_W(SPH_T64, 3, -, 2, +, 10, -, 12, +, 15)
|
||||
#define Wb6 MAKE_W(SPH_T64, 4, -, 0, -, 3, -, 11, +, 13)
|
||||
#define Wb7 MAKE_W(SPH_T64, 1, -, 4, -, 5, -, 12, -, 14)
|
||||
#define Wb8 MAKE_W(SPH_T64, 2, -, 5, -, 6, +, 13, -, 15)
|
||||
#define Wb9 MAKE_W(SPH_T64, 0, -, 3, +, 6, -, 7, +, 14)
|
||||
#define Wb10 MAKE_W(SPH_T64, 8, -, 1, -, 4, -, 7, +, 15)
|
||||
#define Wb11 MAKE_W(SPH_T64, 8, -, 0, -, 2, -, 5, +, 9)
|
||||
#define Wb12 MAKE_W(SPH_T64, 1, +, 3, -, 6, -, 9, +, 10)
|
||||
#define Wb13 MAKE_W(SPH_T64, 2, +, 4, +, 7, +, 10, +, 11)
|
||||
#define Wb14 MAKE_W(SPH_T64, 3, -, 5, +, 8, -, 11, -, 12)
|
||||
#define Wb15 MAKE_W(SPH_T64, 12, -, 4, -, 6, -, 9, +, 13)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW
|
||||
|
||||
#define MAKE_Qab do { \
|
||||
unsigned u; \
|
||||
sph_u64 Wb[16]; \
|
||||
Wb[ 0] = Wb0; \
|
||||
Wb[ 1] = Wb1; \
|
||||
Wb[ 2] = Wb2; \
|
||||
Wb[ 3] = Wb3; \
|
||||
Wb[ 4] = Wb4; \
|
||||
Wb[ 5] = Wb5; \
|
||||
Wb[ 6] = Wb6; \
|
||||
Wb[ 7] = Wb7; \
|
||||
Wb[ 8] = Wb8; \
|
||||
Wb[ 9] = Wb9; \
|
||||
Wb[10] = Wb10; \
|
||||
Wb[11] = Wb11; \
|
||||
Wb[12] = Wb12; \
|
||||
Wb[13] = Wb13; \
|
||||
Wb[14] = Wb14; \
|
||||
Wb[15] = Wb15; \
|
||||
for (u = 0; u < 15; u += 5) { \
|
||||
qt[u + 0] = SPH_T64(sb0(Wb[u + 0]) + H(u + 1)); \
|
||||
qt[u + 1] = SPH_T64(sb1(Wb[u + 1]) + H(u + 2)); \
|
||||
qt[u + 2] = SPH_T64(sb2(Wb[u + 2]) + H(u + 3)); \
|
||||
qt[u + 3] = SPH_T64(sb3(Wb[u + 3]) + H(u + 4)); \
|
||||
qt[u + 4] = SPH_T64(sb4(Wb[u + 4]) + H(u + 5)); \
|
||||
} \
|
||||
qt[15] = SPH_T64(sb0(Wb[15]) + H(0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbb do { \
|
||||
unsigned u; \
|
||||
for (u = 16; u < 18; u ++) \
|
||||
qt[u] = expand1b(Qb, M, H, u); \
|
||||
for (u = 18; u < 32; u ++) \
|
||||
qt[u] = expand2b(Qb, M, H, u); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define MAKE_Qab do { \
|
||||
qt[ 0] = SPH_T64(sb0(Wb0 ) + H( 1)); \
|
||||
qt[ 1] = SPH_T64(sb1(Wb1 ) + H( 2)); \
|
||||
qt[ 2] = SPH_T64(sb2(Wb2 ) + H( 3)); \
|
||||
qt[ 3] = SPH_T64(sb3(Wb3 ) + H( 4)); \
|
||||
qt[ 4] = SPH_T64(sb4(Wb4 ) + H( 5)); \
|
||||
qt[ 5] = SPH_T64(sb0(Wb5 ) + H( 6)); \
|
||||
qt[ 6] = SPH_T64(sb1(Wb6 ) + H( 7)); \
|
||||
qt[ 7] = SPH_T64(sb2(Wb7 ) + H( 8)); \
|
||||
qt[ 8] = SPH_T64(sb3(Wb8 ) + H( 9)); \
|
||||
qt[ 9] = SPH_T64(sb4(Wb9 ) + H(10)); \
|
||||
qt[10] = SPH_T64(sb0(Wb10) + H(11)); \
|
||||
qt[11] = SPH_T64(sb1(Wb11) + H(12)); \
|
||||
qt[12] = SPH_T64(sb2(Wb12) + H(13)); \
|
||||
qt[13] = SPH_T64(sb3(Wb13) + H(14)); \
|
||||
qt[14] = SPH_T64(sb4(Wb14) + H(15)); \
|
||||
qt[15] = SPH_T64(sb0(Wb15) + H( 0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbb do { \
|
||||
qt[16] = expand1b(Qb, M, H, 16); \
|
||||
qt[17] = expand1b(Qb, M, H, 17); \
|
||||
qt[18] = expand2b(Qb, M, H, 18); \
|
||||
qt[19] = expand2b(Qb, M, H, 19); \
|
||||
qt[20] = expand2b(Qb, M, H, 20); \
|
||||
qt[21] = expand2b(Qb, M, H, 21); \
|
||||
qt[22] = expand2b(Qb, M, H, 22); \
|
||||
qt[23] = expand2b(Qb, M, H, 23); \
|
||||
qt[24] = expand2b(Qb, M, H, 24); \
|
||||
qt[25] = expand2b(Qb, M, H, 25); \
|
||||
qt[26] = expand2b(Qb, M, H, 26); \
|
||||
qt[27] = expand2b(Qb, M, H, 27); \
|
||||
qt[28] = expand2b(Qb, M, H, 28); \
|
||||
qt[29] = expand2b(Qb, M, H, 29); \
|
||||
qt[30] = expand2b(Qb, M, H, 30); \
|
||||
qt[31] = expand2b(Qb, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define MAKE_Qb do { \
|
||||
MAKE_Qab; \
|
||||
MAKE_Qbb; \
|
||||
} while (0)
|
||||
|
||||
#define Qb(j) (qt[j])
|
||||
|
||||
#endif
|
||||
|
||||
#define FOLD(type, mkQ, tt, rol, mf, qf, dhf) do { \
|
||||
type qt[32], xl, xh; \
|
||||
mkQ; \
|
||||
xl = qf(16) ^ qf(17) ^ qf(18) ^ qf(19) \
|
||||
^ qf(20) ^ qf(21) ^ qf(22) ^ qf(23); \
|
||||
xh = xl ^ qf(24) ^ qf(25) ^ qf(26) ^ qf(27) \
|
||||
^ qf(28) ^ qf(29) ^ qf(30) ^ qf(31); \
|
||||
dhf( 0) = tt(((xh << 5) ^ (qf(16) >> 5) ^ mf( 0)) \
|
||||
+ (xl ^ qf(24) ^ qf( 0))); \
|
||||
dhf( 1) = tt(((xh >> 7) ^ (qf(17) << 8) ^ mf( 1)) \
|
||||
+ (xl ^ qf(25) ^ qf( 1))); \
|
||||
dhf( 2) = tt(((xh >> 5) ^ (qf(18) << 5) ^ mf( 2)) \
|
||||
+ (xl ^ qf(26) ^ qf( 2))); \
|
||||
dhf( 3) = tt(((xh >> 1) ^ (qf(19) << 5) ^ mf( 3)) \
|
||||
+ (xl ^ qf(27) ^ qf( 3))); \
|
||||
dhf( 4) = tt(((xh >> 3) ^ (qf(20) << 0) ^ mf( 4)) \
|
||||
+ (xl ^ qf(28) ^ qf( 4))); \
|
||||
dhf( 5) = tt(((xh << 6) ^ (qf(21) >> 6) ^ mf( 5)) \
|
||||
+ (xl ^ qf(29) ^ qf( 5))); \
|
||||
dhf( 6) = tt(((xh >> 4) ^ (qf(22) << 6) ^ mf( 6)) \
|
||||
+ (xl ^ qf(30) ^ qf( 6))); \
|
||||
dhf( 7) = tt(((xh >> 11) ^ (qf(23) << 2) ^ mf( 7)) \
|
||||
+ (xl ^ qf(31) ^ qf( 7))); \
|
||||
dhf( 8) = tt(rol(dhf(4), 9) + (xh ^ qf(24) ^ mf( 8)) \
|
||||
+ ((xl << 8) ^ qf(23) ^ qf( 8))); \
|
||||
dhf( 9) = tt(rol(dhf(5), 10) + (xh ^ qf(25) ^ mf( 9)) \
|
||||
+ ((xl >> 6) ^ qf(16) ^ qf( 9))); \
|
||||
dhf(10) = tt(rol(dhf(6), 11) + (xh ^ qf(26) ^ mf(10)) \
|
||||
+ ((xl << 6) ^ qf(17) ^ qf(10))); \
|
||||
dhf(11) = tt(rol(dhf(7), 12) + (xh ^ qf(27) ^ mf(11)) \
|
||||
+ ((xl << 4) ^ qf(18) ^ qf(11))); \
|
||||
dhf(12) = tt(rol(dhf(0), 13) + (xh ^ qf(28) ^ mf(12)) \
|
||||
+ ((xl >> 3) ^ qf(19) ^ qf(12))); \
|
||||
dhf(13) = tt(rol(dhf(1), 14) + (xh ^ qf(29) ^ mf(13)) \
|
||||
+ ((xl >> 4) ^ qf(20) ^ qf(13))); \
|
||||
dhf(14) = tt(rol(dhf(2), 15) + (xh ^ qf(30) ^ mf(14)) \
|
||||
+ ((xl >> 7) ^ qf(21) ^ qf(14))); \
|
||||
dhf(15) = tt(rol(dhf(3), 16) + (xh ^ qf(31) ^ mf(15)) \
|
||||
+ ((xl >> 2) ^ qf(22) ^ qf(15))); \
|
||||
} while (0)
|
||||
|
||||
#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH)
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define FOLDb FOLD(sph_u64, MAKE_Qb, SPH_T64, SPH_ROTL64, M, Qb, dH)
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16])
|
||||
{
|
||||
#if SPH_LITTLE_FAST
|
||||
#define M(x) sph_dec32le_aligned(data + 4 * (x))
|
||||
#else
|
||||
sph_u32 mv[16];
|
||||
|
||||
mv[ 0] = sph_dec32le_aligned(data + 0);
|
||||
mv[ 1] = sph_dec32le_aligned(data + 4);
|
||||
mv[ 2] = sph_dec32le_aligned(data + 8);
|
||||
mv[ 3] = sph_dec32le_aligned(data + 12);
|
||||
mv[ 4] = sph_dec32le_aligned(data + 16);
|
||||
mv[ 5] = sph_dec32le_aligned(data + 20);
|
||||
mv[ 6] = sph_dec32le_aligned(data + 24);
|
||||
mv[ 7] = sph_dec32le_aligned(data + 28);
|
||||
mv[ 8] = sph_dec32le_aligned(data + 32);
|
||||
mv[ 9] = sph_dec32le_aligned(data + 36);
|
||||
mv[10] = sph_dec32le_aligned(data + 40);
|
||||
mv[11] = sph_dec32le_aligned(data + 44);
|
||||
mv[12] = sph_dec32le_aligned(data + 48);
|
||||
mv[13] = sph_dec32le_aligned(data + 52);
|
||||
mv[14] = sph_dec32le_aligned(data + 56);
|
||||
mv[15] = sph_dec32le_aligned(data + 60);
|
||||
#define M(x) (mv[x])
|
||||
#endif
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
|
||||
FOLDs;
|
||||
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
}
|
||||
|
||||
static const sph_u32 final_s[16] = {
|
||||
SPH_C32(0xaaaaaaa0), SPH_C32(0xaaaaaaa1), SPH_C32(0xaaaaaaa2),
|
||||
SPH_C32(0xaaaaaaa3), SPH_C32(0xaaaaaaa4), SPH_C32(0xaaaaaaa5),
|
||||
SPH_C32(0xaaaaaaa6), SPH_C32(0xaaaaaaa7), SPH_C32(0xaaaaaaa8),
|
||||
SPH_C32(0xaaaaaaa9), SPH_C32(0xaaaaaaaa), SPH_C32(0xaaaaaaab),
|
||||
SPH_C32(0xaaaaaaac), SPH_C32(0xaaaaaaad), SPH_C32(0xaaaaaaae),
|
||||
SPH_C32(0xaaaaaaaf)
|
||||
};
|
||||
|
||||
static void
|
||||
bmw32_init(sph_bmw_small_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
memcpy(sc->H, iv, sizeof sc->H);
|
||||
sc->ptr = 0;
|
||||
#if SPH_64
|
||||
sc->bit_count = 0;
|
||||
#else
|
||||
sc->bit_count_high = 0;
|
||||
sc->bit_count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
bmw32(sph_bmw_small_context *sc, const void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
sph_u32 htmp[16];
|
||||
sph_u32 *h1, *h2;
|
||||
#if !SPH_64
|
||||
sph_u32 tmp;
|
||||
#endif
|
||||
|
||||
#if SPH_64
|
||||
sc->bit_count += (sph_u64)len << 3;
|
||||
#else
|
||||
tmp = sc->bit_count_low;
|
||||
sc->bit_count_low = SPH_T32(tmp + ((sph_u32)len << 3));
|
||||
if (sc->bit_count_low < tmp)
|
||||
sc->bit_count_high ++;
|
||||
sc->bit_count_high += len >> 29;
|
||||
#endif
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
h1 = sc->H;
|
||||
h2 = htmp;
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
sph_u32 *ht;
|
||||
|
||||
compress_small(buf, h1, h2);
|
||||
ht = h1;
|
||||
h1 = h2;
|
||||
h2 = ht;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
sc->ptr = ptr;
|
||||
if (h1 != sc->H)
|
||||
memcpy(sc->H, h1, sizeof sc->H);
|
||||
}
|
||||
|
||||
static void
|
||||
bmw32_close(sph_bmw_small_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char *buf, *out;
|
||||
size_t ptr, u, v;
|
||||
unsigned z;
|
||||
sph_u32 h1[16], h2[16], *h;
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
h = sc->H;
|
||||
if (ptr > (sizeof sc->buf) - 8) {
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
|
||||
compress_small(buf, h, h1);
|
||||
ptr = 0;
|
||||
h = h1;
|
||||
}
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
|
||||
#if SPH_64
|
||||
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
|
||||
SPH_T64(sc->bit_count + n));
|
||||
#else
|
||||
sph_enc32le_aligned(buf + (sizeof sc->buf) - 8,
|
||||
sc->bit_count_low + n);
|
||||
sph_enc32le_aligned(buf + (sizeof sc->buf) - 4,
|
||||
SPH_T32(sc->bit_count_high));
|
||||
#endif
|
||||
compress_small(buf, h, h2);
|
||||
for (u = 0; u < 16; u ++)
|
||||
sph_enc32le_aligned(buf + 4 * u, h2[u]);
|
||||
compress_small(buf, final_s, h1);
|
||||
out = dst;
|
||||
for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
|
||||
sph_enc32le(out + 4 * u, h1[v]);
|
||||
}
|
||||
|
||||
#if SPH_64
|
||||
|
||||
static void
|
||||
compress_big(const unsigned char *data, const sph_u64 h[16], sph_u64 dh[16])
|
||||
{
|
||||
#if SPH_LITTLE_FAST
|
||||
#define M(x) sph_dec64le_aligned(data + 8 * (x))
|
||||
#else
|
||||
sph_u64 mv[16];
|
||||
|
||||
mv[ 0] = sph_dec64le_aligned(data + 0);
|
||||
mv[ 1] = sph_dec64le_aligned(data + 8);
|
||||
mv[ 2] = sph_dec64le_aligned(data + 16);
|
||||
mv[ 3] = sph_dec64le_aligned(data + 24);
|
||||
mv[ 4] = sph_dec64le_aligned(data + 32);
|
||||
mv[ 5] = sph_dec64le_aligned(data + 40);
|
||||
mv[ 6] = sph_dec64le_aligned(data + 48);
|
||||
mv[ 7] = sph_dec64le_aligned(data + 56);
|
||||
mv[ 8] = sph_dec64le_aligned(data + 64);
|
||||
mv[ 9] = sph_dec64le_aligned(data + 72);
|
||||
mv[10] = sph_dec64le_aligned(data + 80);
|
||||
mv[11] = sph_dec64le_aligned(data + 88);
|
||||
mv[12] = sph_dec64le_aligned(data + 96);
|
||||
mv[13] = sph_dec64le_aligned(data + 104);
|
||||
mv[14] = sph_dec64le_aligned(data + 112);
|
||||
mv[15] = sph_dec64le_aligned(data + 120);
|
||||
#define M(x) (mv[x])
|
||||
#endif
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
|
||||
FOLDb;
|
||||
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
}
|
||||
|
||||
static const sph_u64 final_b[16] = {
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa2), SPH_C64(0xaaaaaaaaaaaaaaa3),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa4), SPH_C64(0xaaaaaaaaaaaaaaa5),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa6), SPH_C64(0xaaaaaaaaaaaaaaa7),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa8), SPH_C64(0xaaaaaaaaaaaaaaa9),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaaa), SPH_C64(0xaaaaaaaaaaaaaaab),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaac), SPH_C64(0xaaaaaaaaaaaaaaad),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaae), SPH_C64(0xaaaaaaaaaaaaaaaf)
|
||||
};
|
||||
|
||||
static void
|
||||
bmw64_init(sph_bmw_big_context *sc, const sph_u64 *iv)
|
||||
{
|
||||
memcpy(sc->H, iv, sizeof sc->H);
|
||||
sc->ptr = 0;
|
||||
sc->bit_count = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
bmw64(sph_bmw_big_context *sc, const void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
sph_u64 htmp[16];
|
||||
sph_u64 *h1, *h2;
|
||||
|
||||
sc->bit_count += (sph_u64)len << 3;
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
h1 = sc->H;
|
||||
h2 = htmp;
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
sph_u64 *ht;
|
||||
|
||||
compress_big(buf, h1, h2);
|
||||
ht = h1;
|
||||
h1 = h2;
|
||||
h2 = ht;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
sc->ptr = ptr;
|
||||
if (h1 != sc->H)
|
||||
memcpy(sc->H, h1, sizeof sc->H);
|
||||
}
|
||||
|
||||
static void
|
||||
bmw64_close(sph_bmw_big_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w64)
|
||||
{
|
||||
unsigned char *buf, *out;
|
||||
size_t ptr, u, v;
|
||||
unsigned z;
|
||||
sph_u64 h1[16], h2[16], *h;
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
h = sc->H;
|
||||
if (ptr > (sizeof sc->buf) - 8) {
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
|
||||
compress_big(buf, h, h1);
|
||||
ptr = 0;
|
||||
h = h1;
|
||||
}
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
|
||||
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
|
||||
SPH_T64(sc->bit_count + n));
|
||||
compress_big(buf, h, h2);
|
||||
for (u = 0; u < 16; u ++)
|
||||
sph_enc64le_aligned(buf + 8 * u, h2[u]);
|
||||
compress_big(buf, final_b, h1);
|
||||
out = dst;
|
||||
for (u = 0, v = 16 - out_size_w64; u < out_size_w64; u ++, v ++)
|
||||
sph_enc64le(out + 8 * u, h1[v]);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224_init(void *cc)
|
||||
{
|
||||
bmw32_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw32(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw224_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw32_close(cc, ub, n, dst, 7);
|
||||
// sph_bmw224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256_init(void *cc)
|
||||
{
|
||||
bmw32_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw32(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw256_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw32_close(cc, ub, n, dst, 8);
|
||||
// sph_bmw256_init(cc);
|
||||
}
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384_init(void *cc)
|
||||
{
|
||||
bmw64_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw64(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw384_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw64_close(cc, ub, n, dst, 6);
|
||||
// sph_bmw384_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512_init(void *cc)
|
||||
{
|
||||
bmw64_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw64(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw512_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw64_close(cc, ub, n, dst, 8);
|
||||
// sph_bmw512_init(cc);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
328
algo/bmw/sph_bmw.h
Normal file
328
algo/bmw/sph_bmw.h
Normal file
@@ -0,0 +1,328 @@
|
||||
/* $Id: sph_bmw.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* BMW interface. BMW (aka "Blue Midnight Wish") is a family of
|
||||
* functions which differ by their output size; this implementation
|
||||
* defines BMW for output sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_bmw.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_BMW_H__
|
||||
#define SPH_BMW_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha3/sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-224.
|
||||
*/
|
||||
#define SPH_SIZE_bmw224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-256.
|
||||
*/
|
||||
#define SPH_SIZE_bmw256 256
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-384.
|
||||
*/
|
||||
#define SPH_SIZE_bmw384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-512.
|
||||
*/
|
||||
#define SPH_SIZE_bmw512 512
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-224 and BMW-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BMW computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BMW
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 H[16];
|
||||
#if SPH_64
|
||||
sph_u64 bit_count;
|
||||
#else
|
||||
sph_u32 bit_count_high, bit_count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_bmw_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-224 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_small_context sph_bmw224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-256 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_small_context sph_bmw256_context;
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-384 and BMW-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BMW computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BMW
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u64 H[16];
|
||||
sph_u64 bit_count;
|
||||
#endif
|
||||
} sph_bmw_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-384 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_big_context sph_bmw384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-512 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_big_context sph_bmw512_context;
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Initialize a BMW-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-224 context (pointer to a
|
||||
* <code>sph_bmw224_context</code>)
|
||||
*/
|
||||
void sph_bmw224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BMW-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-256 context (pointer to a
|
||||
* <code>sph_bmw256_context</code>)
|
||||
*/
|
||||
void sph_bmw256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Initialize a BMW-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-384 context (pointer to a
|
||||
* <code>sph_bmw384_context</code>)
|
||||
*/
|
||||
void sph_bmw384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BMW-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-512 context (pointer to a
|
||||
* <code>sph_bmw512_context</code>)
|
||||
*/
|
||||
void sph_bmw512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
517
algo/bmw/sse2/bmw.c
Normal file
517
algo/bmw/sse2/bmw.c
Normal file
@@ -0,0 +1,517 @@
|
||||
/* $Id: bmw.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* BMW implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include "../sph_bmw.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
static const sph_u64 bmwIV512[] = {
|
||||
SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F),
|
||||
SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F),
|
||||
SPH_C64(0xA0A1A2A3A4A5A6A7), SPH_C64(0xA8A9AAABACADAEAF),
|
||||
SPH_C64(0xB0B1B2B3B4B5B6B7), SPH_C64(0xB8B9BABBBCBDBEBF),
|
||||
SPH_C64(0xC0C1C2C3C4C5C6C7), SPH_C64(0xC8C9CACBCCCDCECF),
|
||||
SPH_C64(0xD0D1D2D3D4D5D6D7), SPH_C64(0xD8D9DADBDCDDDEDF),
|
||||
SPH_C64(0xE0E1E2E3E4E5E6E7), SPH_C64(0xE8E9EAEBECEDEEEF),
|
||||
SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF)
|
||||
};
|
||||
|
||||
#define XCAT(x, y) XCAT_(x, y)
|
||||
#define XCAT_(x, y) x ## y
|
||||
|
||||
#define LPAR (
|
||||
|
||||
#define I16_16 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
#define I16_17 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
#define I16_18 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
#define I16_19 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
|
||||
#define I16_20 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
|
||||
#define I16_21 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
|
||||
#define I16_22 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
#define I16_23 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
|
||||
#define I16_24 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
|
||||
#define I16_25 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
|
||||
#define I16_26 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
|
||||
#define I16_27 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
|
||||
#define I16_28 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
|
||||
#define I16_29 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
|
||||
#define I16_30 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
|
||||
#define I16_31 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
|
||||
|
||||
#define M16_16 0, 1, 3, 4, 7, 10, 11
|
||||
#define M16_17 1, 2, 4, 5, 8, 11, 12
|
||||
#define M16_18 2, 3, 5, 6, 9, 12, 13
|
||||
#define M16_19 3, 4, 6, 7, 10, 13, 14
|
||||
#define M16_20 4, 5, 7, 8, 11, 14, 15
|
||||
#define M16_21 5, 6, 8, 9, 12, 15, 16
|
||||
#define M16_22 6, 7, 9, 10, 13, 0, 1
|
||||
#define M16_23 7, 8, 10, 11, 14, 1, 2
|
||||
#define M16_24 8, 9, 11, 12, 15, 2, 3
|
||||
#define M16_25 9, 10, 12, 13, 0, 3, 4
|
||||
#define M16_26 10, 11, 13, 14, 1, 4, 5
|
||||
#define M16_27 11, 12, 14, 15, 2, 5, 6
|
||||
#define M16_28 12, 13, 15, 16, 3, 6, 7
|
||||
#define M16_29 13, 14, 0, 1, 4, 7, 8
|
||||
#define M16_30 14, 15, 1, 2, 5, 8, 9
|
||||
#define M16_31 15, 16, 2, 3, 6, 9, 10
|
||||
|
||||
#define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \
|
||||
^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19))
|
||||
#define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 8) ^ SPH_ROTL32(x, 23))
|
||||
#define ss2(x) (((x) >> 2) ^ SPH_T32((x) << 1) \
|
||||
^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25))
|
||||
#define ss3(x) (((x) >> 2) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29))
|
||||
#define ss4(x) (((x) >> 1) ^ (x))
|
||||
#define ss5(x) (((x) >> 2) ^ (x))
|
||||
#define rs1(x) SPH_ROTL32(x, 3)
|
||||
#define rs2(x) SPH_ROTL32(x, 7)
|
||||
#define rs3(x) SPH_ROTL32(x, 13)
|
||||
#define rs4(x) SPH_ROTL32(x, 16)
|
||||
#define rs5(x) SPH_ROTL32(x, 19)
|
||||
#define rs6(x) SPH_ROTL32(x, 23)
|
||||
#define rs7(x) SPH_ROTL32(x, 27)
|
||||
|
||||
#define Ks(j) SPH_T32((sph_u32)(j) * SPH_C32(0x05555555))
|
||||
|
||||
#define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
|
||||
(SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \
|
||||
- SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m))
|
||||
|
||||
#define expand1s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \
|
||||
+ ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \
|
||||
+ ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \
|
||||
+ ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand1s(qf, mf, hf, i16) \
|
||||
expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand1s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand1s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#define expand2s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \
|
||||
+ qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \
|
||||
+ qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \
|
||||
+ qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand2s(qf, mf, hf, i16) \
|
||||
expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand2s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \
|
||||
^ SPH_ROTL64(x, 4) ^ SPH_ROTL64(x, 37))
|
||||
#define sb1(x) (((x) >> 1) ^ SPH_T64((x) << 2) \
|
||||
^ SPH_ROTL64(x, 13) ^ SPH_ROTL64(x, 43))
|
||||
#define sb2(x) (((x) >> 2) ^ SPH_T64((x) << 1) \
|
||||
^ SPH_ROTL64(x, 19) ^ SPH_ROTL64(x, 53))
|
||||
#define sb3(x) (((x) >> 2) ^ SPH_T64((x) << 2) \
|
||||
^ SPH_ROTL64(x, 28) ^ SPH_ROTL64(x, 59))
|
||||
#define sb4(x) (((x) >> 1) ^ (x))
|
||||
#define sb5(x) (((x) >> 2) ^ (x))
|
||||
#define rb1(x) SPH_ROTL64(x, 5)
|
||||
#define rb2(x) SPH_ROTL64(x, 11)
|
||||
#define rb3(x) SPH_ROTL64(x, 27)
|
||||
#define rb4(x) SPH_ROTL64(x, 32)
|
||||
#define rb5(x) SPH_ROTL64(x, 37)
|
||||
#define rb6(x) SPH_ROTL64(x, 43)
|
||||
#define rb7(x) SPH_ROTL64(x, 53)
|
||||
|
||||
#define Kb(j) SPH_T64((sph_u64)(j) * SPH_C64(0x0555555555555555))
|
||||
|
||||
#if 0
|
||||
|
||||
static const sph_u64 Kb_tab[] = {
|
||||
Kb(16), Kb(17), Kb(18), Kb(19), Kb(20), Kb(21), Kb(22), Kb(23),
|
||||
Kb(24), Kb(25), Kb(26), Kb(27), Kb(28), Kb(29), Kb(30), Kb(31)
|
||||
};
|
||||
|
||||
#define rol_off(mf, j, off) \
|
||||
SPH_ROTL64(mf(((j) + (off)) & 15), (((j) + (off)) & 15) + 1)
|
||||
|
||||
#define add_elt_b(mf, hf, j) \
|
||||
(SPH_T64(rol_off(mf, j, 0) + rol_off(mf, j, 3) \
|
||||
- rol_off(mf, j, 10) + Kb_tab[j]) ^ hf(((j) + 7) & 15))
|
||||
|
||||
#define expand1b(qf, mf, hf, i) \
|
||||
SPH_T64(sb1(qf((i) - 16)) + sb2(qf((i) - 15)) \
|
||||
+ sb3(qf((i) - 14)) + sb0(qf((i) - 13)) \
|
||||
+ sb1(qf((i) - 12)) + sb2(qf((i) - 11)) \
|
||||
+ sb3(qf((i) - 10)) + sb0(qf((i) - 9)) \
|
||||
+ sb1(qf((i) - 8)) + sb2(qf((i) - 7)) \
|
||||
+ sb3(qf((i) - 6)) + sb0(qf((i) - 5)) \
|
||||
+ sb1(qf((i) - 4)) + sb2(qf((i) - 3)) \
|
||||
+ sb3(qf((i) - 2)) + sb0(qf((i) - 1)) \
|
||||
+ add_elt_b(mf, hf, (i) - 16))
|
||||
|
||||
#define expand2b(qf, mf, hf, i) \
|
||||
SPH_T64(qf((i) - 16) + rb1(qf((i) - 15)) \
|
||||
+ qf((i) - 14) + rb2(qf((i) - 13)) \
|
||||
+ qf((i) - 12) + rb3(qf((i) - 11)) \
|
||||
+ qf((i) - 10) + rb4(qf((i) - 9)) \
|
||||
+ qf((i) - 8) + rb5(qf((i) - 7)) \
|
||||
+ qf((i) - 6) + rb6(qf((i) - 5)) \
|
||||
+ qf((i) - 4) + rb7(qf((i) - 3)) \
|
||||
+ sb4(qf((i) - 2)) + sb5(qf((i) - 1)) \
|
||||
+ add_elt_b(mf, hf, (i) - 16))
|
||||
|
||||
#else
|
||||
|
||||
#define add_elt_b(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
|
||||
(SPH_T64(SPH_ROTL64(mf(j0m), j1m) + SPH_ROTL64(mf(j3m), j4m) \
|
||||
- SPH_ROTL64(mf(j10m), j11m) + Kb(j16)) ^ hf(j7m))
|
||||
|
||||
#define expand1b_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T64(sb1(qf(i0)) + sb2(qf(i1)) + sb3(qf(i2)) + sb0(qf(i3)) \
|
||||
+ sb1(qf(i4)) + sb2(qf(i5)) + sb3(qf(i6)) + sb0(qf(i7)) \
|
||||
+ sb1(qf(i8)) + sb2(qf(i9)) + sb3(qf(i10)) + sb0(qf(i11)) \
|
||||
+ sb1(qf(i12)) + sb2(qf(i13)) + sb3(qf(i14)) + sb0(qf(i15)) \
|
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand1b(qf, mf, hf, i16) \
|
||||
expand1b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand1b_(qf, mf, hf, i16, ix, iy) \
|
||||
expand1b_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#define expand2b_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T64(qf(i0) + rb1(qf(i1)) + qf(i2) + rb2(qf(i3)) \
|
||||
+ qf(i4) + rb3(qf(i5)) + qf(i6) + rb4(qf(i7)) \
|
||||
+ qf(i8) + rb5(qf(i9)) + qf(i10) + rb6(qf(i11)) \
|
||||
+ qf(i12) + rb7(qf(i13)) + sb4(qf(i14)) + sb5(qf(i15)) \
|
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand2b(qf, mf, hf, i16) \
|
||||
expand2b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand2b_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2b_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define MAKE_W(tt, i0, op01, i1, op12, i2, op23, i3, op34, i4) \
|
||||
tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
|
||||
op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
|
||||
|
||||
#define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
#define Ws3 MAKE_W(SPH_T32, 0, -, 1, +, 8, -, 10, +, 13)
|
||||
#define Ws4 MAKE_W(SPH_T32, 1, +, 2, +, 9, -, 11, -, 14)
|
||||
#define Ws5 MAKE_W(SPH_T32, 3, -, 2, +, 10, -, 12, +, 15)
|
||||
#define Ws6 MAKE_W(SPH_T32, 4, -, 0, -, 3, -, 11, +, 13)
|
||||
#define Ws7 MAKE_W(SPH_T32, 1, -, 4, -, 5, -, 12, -, 14)
|
||||
#define Ws8 MAKE_W(SPH_T32, 2, -, 5, -, 6, +, 13, -, 15)
|
||||
#define Ws9 MAKE_W(SPH_T32, 0, -, 3, +, 6, -, 7, +, 14)
|
||||
#define Ws10 MAKE_W(SPH_T32, 8, -, 1, -, 4, -, 7, +, 15)
|
||||
#define Ws11 MAKE_W(SPH_T32, 8, -, 0, -, 2, -, 5, +, 9)
|
||||
#define Ws12 MAKE_W(SPH_T32, 1, +, 3, -, 6, -, 9, +, 10)
|
||||
#define Ws13 MAKE_W(SPH_T32, 2, +, 4, +, 7, +, 10, +, 11)
|
||||
#define Ws14 MAKE_W(SPH_T32, 3, -, 5, +, 8, -, 11, -, 12)
|
||||
#define Ws15 MAKE_W(SPH_T32, 12, -, 4, -, 6, -, 9, +, 13)
|
||||
|
||||
#define MAKE_Qas do { \
|
||||
qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \
|
||||
qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \
|
||||
qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \
|
||||
qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \
|
||||
qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \
|
||||
qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \
|
||||
qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \
|
||||
qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \
|
||||
qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \
|
||||
qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \
|
||||
qt[10] = SPH_T32(ss0(Ws10) + H(11)); \
|
||||
qt[11] = SPH_T32(ss1(Ws11) + H(12)); \
|
||||
qt[12] = SPH_T32(ss2(Ws12) + H(13)); \
|
||||
qt[13] = SPH_T32(ss3(Ws13) + H(14)); \
|
||||
qt[14] = SPH_T32(ss4(Ws14) + H(15)); \
|
||||
qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbs do { \
|
||||
qt[16] = expand1s(Qs, M, H, 16); \
|
||||
qt[17] = expand1s(Qs, M, H, 17); \
|
||||
qt[18] = expand2s(Qs, M, H, 18); \
|
||||
qt[19] = expand2s(Qs, M, H, 19); \
|
||||
qt[20] = expand2s(Qs, M, H, 20); \
|
||||
qt[21] = expand2s(Qs, M, H, 21); \
|
||||
qt[22] = expand2s(Qs, M, H, 22); \
|
||||
qt[23] = expand2s(Qs, M, H, 23); \
|
||||
qt[24] = expand2s(Qs, M, H, 24); \
|
||||
qt[25] = expand2s(Qs, M, H, 25); \
|
||||
qt[26] = expand2s(Qs, M, H, 26); \
|
||||
qt[27] = expand2s(Qs, M, H, 27); \
|
||||
qt[28] = expand2s(Qs, M, H, 28); \
|
||||
qt[29] = expand2s(Qs, M, H, 29); \
|
||||
qt[30] = expand2s(Qs, M, H, 30); \
|
||||
qt[31] = expand2s(Qs, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qs do { \
|
||||
MAKE_Qas; \
|
||||
MAKE_Qbs; \
|
||||
} while (0)
|
||||
|
||||
#define Qs(j) (qt[j])
|
||||
|
||||
#define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Wb1 MAKE_W(SPH_T64, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Wb2 MAKE_W(SPH_T64, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
#define Wb3 MAKE_W(SPH_T64, 0, -, 1, +, 8, -, 10, +, 13)
|
||||
#define Wb4 MAKE_W(SPH_T64, 1, +, 2, +, 9, -, 11, -, 14)
|
||||
#define Wb5 MAKE_W(SPH_T64, 3, -, 2, +, 10, -, 12, +, 15)
|
||||
#define Wb6 MAKE_W(SPH_T64, 4, -, 0, -, 3, -, 11, +, 13)
|
||||
#define Wb7 MAKE_W(SPH_T64, 1, -, 4, -, 5, -, 12, -, 14)
|
||||
#define Wb8 MAKE_W(SPH_T64, 2, -, 5, -, 6, +, 13, -, 15)
|
||||
#define Wb9 MAKE_W(SPH_T64, 0, -, 3, +, 6, -, 7, +, 14)
|
||||
#define Wb10 MAKE_W(SPH_T64, 8, -, 1, -, 4, -, 7, +, 15)
|
||||
#define Wb11 MAKE_W(SPH_T64, 8, -, 0, -, 2, -, 5, +, 9)
|
||||
#define Wb12 MAKE_W(SPH_T64, 1, +, 3, -, 6, -, 9, +, 10)
|
||||
#define Wb13 MAKE_W(SPH_T64, 2, +, 4, +, 7, +, 10, +, 11)
|
||||
#define Wb14 MAKE_W(SPH_T64, 3, -, 5, +, 8, -, 11, -, 12)
|
||||
#define Wb15 MAKE_W(SPH_T64, 12, -, 4, -, 6, -, 9, +, 13)
|
||||
|
||||
#define MAKE_Qab do { \
|
||||
qt[ 0] = SPH_T64(sb0(Wb0 ) + H( 1)); \
|
||||
qt[ 1] = SPH_T64(sb1(Wb1 ) + H( 2)); \
|
||||
qt[ 2] = SPH_T64(sb2(Wb2 ) + H( 3)); \
|
||||
qt[ 3] = SPH_T64(sb3(Wb3 ) + H( 4)); \
|
||||
qt[ 4] = SPH_T64(sb4(Wb4 ) + H( 5)); \
|
||||
qt[ 5] = SPH_T64(sb0(Wb5 ) + H( 6)); \
|
||||
qt[ 6] = SPH_T64(sb1(Wb6 ) + H( 7)); \
|
||||
qt[ 7] = SPH_T64(sb2(Wb7 ) + H( 8)); \
|
||||
qt[ 8] = SPH_T64(sb3(Wb8 ) + H( 9)); \
|
||||
qt[ 9] = SPH_T64(sb4(Wb9 ) + H(10)); \
|
||||
qt[10] = SPH_T64(sb0(Wb10) + H(11)); \
|
||||
qt[11] = SPH_T64(sb1(Wb11) + H(12)); \
|
||||
qt[12] = SPH_T64(sb2(Wb12) + H(13)); \
|
||||
qt[13] = SPH_T64(sb3(Wb13) + H(14)); \
|
||||
qt[14] = SPH_T64(sb4(Wb14) + H(15)); \
|
||||
qt[15] = SPH_T64(sb0(Wb15) + H( 0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbb do { \
|
||||
qt[16] = expand1b(Qb, M, H, 16); \
|
||||
qt[17] = expand1b(Qb, M, H, 17); \
|
||||
qt[18] = expand2b(Qb, M, H, 18); \
|
||||
qt[19] = expand2b(Qb, M, H, 19); \
|
||||
qt[20] = expand2b(Qb, M, H, 20); \
|
||||
qt[21] = expand2b(Qb, M, H, 21); \
|
||||
qt[22] = expand2b(Qb, M, H, 22); \
|
||||
qt[23] = expand2b(Qb, M, H, 23); \
|
||||
qt[24] = expand2b(Qb, M, H, 24); \
|
||||
qt[25] = expand2b(Qb, M, H, 25); \
|
||||
qt[26] = expand2b(Qb, M, H, 26); \
|
||||
qt[27] = expand2b(Qb, M, H, 27); \
|
||||
qt[28] = expand2b(Qb, M, H, 28); \
|
||||
qt[29] = expand2b(Qb, M, H, 29); \
|
||||
qt[30] = expand2b(Qb, M, H, 30); \
|
||||
qt[31] = expand2b(Qb, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qb do { \
|
||||
MAKE_Qab; \
|
||||
MAKE_Qbb; \
|
||||
} while (0)
|
||||
|
||||
#define Qb(j) (qt[j])
|
||||
|
||||
#define FOLD(type, mkQ, tt, rol, mf, qf, dhf) do { \
|
||||
type qt[32], xl, xh; \
|
||||
mkQ; \
|
||||
xl = qf(16) ^ qf(17) ^ qf(18) ^ qf(19) \
|
||||
^ qf(20) ^ qf(21) ^ qf(22) ^ qf(23); \
|
||||
xh = xl ^ qf(24) ^ qf(25) ^ qf(26) ^ qf(27) \
|
||||
^ qf(28) ^ qf(29) ^ qf(30) ^ qf(31); \
|
||||
dhf( 0) = tt(((xh << 5) ^ (qf(16) >> 5) ^ mf( 0)) \
|
||||
+ (xl ^ qf(24) ^ qf( 0))); \
|
||||
dhf( 1) = tt(((xh >> 7) ^ (qf(17) << 8) ^ mf( 1)) \
|
||||
+ (xl ^ qf(25) ^ qf( 1))); \
|
||||
dhf( 2) = tt(((xh >> 5) ^ (qf(18) << 5) ^ mf( 2)) \
|
||||
+ (xl ^ qf(26) ^ qf( 2))); \
|
||||
dhf( 3) = tt(((xh >> 1) ^ (qf(19) << 5) ^ mf( 3)) \
|
||||
+ (xl ^ qf(27) ^ qf( 3))); \
|
||||
dhf( 4) = tt(((xh >> 3) ^ (qf(20) << 0) ^ mf( 4)) \
|
||||
+ (xl ^ qf(28) ^ qf( 4))); \
|
||||
dhf( 5) = tt(((xh << 6) ^ (qf(21) >> 6) ^ mf( 5)) \
|
||||
+ (xl ^ qf(29) ^ qf( 5))); \
|
||||
dhf( 6) = tt(((xh >> 4) ^ (qf(22) << 6) ^ mf( 6)) \
|
||||
+ (xl ^ qf(30) ^ qf( 6))); \
|
||||
dhf( 7) = tt(((xh >> 11) ^ (qf(23) << 2) ^ mf( 7)) \
|
||||
+ (xl ^ qf(31) ^ qf( 7))); \
|
||||
dhf( 8) = tt(rol(dhf(4), 9) + (xh ^ qf(24) ^ mf( 8)) \
|
||||
+ ((xl << 8) ^ qf(23) ^ qf( 8))); \
|
||||
dhf( 9) = tt(rol(dhf(5), 10) + (xh ^ qf(25) ^ mf( 9)) \
|
||||
+ ((xl >> 6) ^ qf(16) ^ qf( 9))); \
|
||||
dhf(10) = tt(rol(dhf(6), 11) + (xh ^ qf(26) ^ mf(10)) \
|
||||
+ ((xl << 6) ^ qf(17) ^ qf(10))); \
|
||||
dhf(11) = tt(rol(dhf(7), 12) + (xh ^ qf(27) ^ mf(11)) \
|
||||
+ ((xl << 4) ^ qf(18) ^ qf(11))); \
|
||||
dhf(12) = tt(rol(dhf(0), 13) + (xh ^ qf(28) ^ mf(12)) \
|
||||
+ ((xl >> 3) ^ qf(19) ^ qf(12))); \
|
||||
dhf(13) = tt(rol(dhf(1), 14) + (xh ^ qf(29) ^ mf(13)) \
|
||||
+ ((xl >> 4) ^ qf(20) ^ qf(13))); \
|
||||
dhf(14) = tt(rol(dhf(2), 15) + (xh ^ qf(30) ^ mf(14)) \
|
||||
+ ((xl >> 7) ^ qf(21) ^ qf(14))); \
|
||||
dhf(15) = tt(rol(dhf(3), 16) + (xh ^ qf(31) ^ mf(15)) \
|
||||
+ ((xl >> 2) ^ qf(22) ^ qf(15))); \
|
||||
} while (0)
|
||||
|
||||
#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH)
|
||||
|
||||
#define FOLDb FOLD(sph_u64, MAKE_Qb, SPH_T64, SPH_ROTL64, M, Qb, dH)
|
||||
|
||||
#define DECL_BMW \
|
||||
sph_u64 bmwH[16]; \
|
||||
|
||||
/* load initial constants */
|
||||
#define BMW_I \
|
||||
do { \
|
||||
memcpy(bmwH, bmwIV512, sizeof bmwH); \
|
||||
hashptr = 0; \
|
||||
hashctA = 0; \
|
||||
} while (0)
|
||||
|
||||
/* load hash for loop */
|
||||
#define BMW_U \
|
||||
do { \
|
||||
const void *data = hash; \
|
||||
size_t len = 64; \
|
||||
unsigned char *buf; \
|
||||
\
|
||||
hashctA += (sph_u64)len << 3; \
|
||||
buf = hashbuf; \
|
||||
memcpy(buf, data, 64); \
|
||||
hashptr = 64; \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* bmw512 hash loaded */
|
||||
/* hash = blake512(loaded) */
|
||||
#define BMW_C \
|
||||
do { \
|
||||
void *dst = hash; \
|
||||
size_t out_size_w64 = 8; \
|
||||
unsigned char *data; \
|
||||
sph_u64 *dh; \
|
||||
unsigned char *out; \
|
||||
size_t ptr, u, v; \
|
||||
unsigned z; \
|
||||
sph_u64 h1[16], h2[16], *h; \
|
||||
data = hashbuf; \
|
||||
ptr = hashptr; \
|
||||
z = 0x80 >> 0; \
|
||||
data[ptr ++] = ((0 & -z) | z) & 0xFF; \
|
||||
memset(data + ptr, 0, (sizeof(char)*128) - 8 - ptr); \
|
||||
sph_enc64le_aligned(data + (sizeof(char)*128) - 8, \
|
||||
SPH_T64(hashctA + 0)); \
|
||||
/* for break loop */ \
|
||||
/* one copy of inline FOLD */ \
|
||||
/* FOLD uses, */ \
|
||||
/* uint64 *h, data */ \
|
||||
/* uint64 dh, state */ \
|
||||
h = bmwH; \
|
||||
dh = h2; \
|
||||
for (;;) { \
|
||||
FOLDb; \
|
||||
/* dh gets changed for 2nd run */ \
|
||||
if (dh == h1) break; \
|
||||
for (u = 0; u < 16; u ++) \
|
||||
sph_enc64le_aligned(data + 8 * u, h2[u]); \
|
||||
dh = h1; \
|
||||
h = final_b; \
|
||||
} \
|
||||
/* end wrapped for break loop */ \
|
||||
out = dst; \
|
||||
for (u = 0, v = 16 - out_size_w64; u < out_size_w64; u ++, v ++) \
|
||||
sph_enc64le(out + 8 * u, h1[v]); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
compress_big(const unsigned char *data, const sph_u64 h[16], sph_u64 dh[16])
|
||||
{
|
||||
|
||||
#define M(x) sph_dec64le_aligned(data + 8 * (x))
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
|
||||
FOLDb;
|
||||
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
}
|
||||
|
||||
static const sph_u64 final_b[16] = {
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa2), SPH_C64(0xaaaaaaaaaaaaaaa3),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa4), SPH_C64(0xaaaaaaaaaaaaaaa5),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa6), SPH_C64(0xaaaaaaaaaaaaaaa7),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa8), SPH_C64(0xaaaaaaaaaaaaaaa9),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaaa), SPH_C64(0xaaaaaaaaaaaaaaab),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaac), SPH_C64(0xaaaaaaaaaaaaaaad),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaae), SPH_C64(0xaaaaaaaaaaaaaaaf)
|
||||
};
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
525
algo/bmw/sse2/bmw.c.new
Normal file
525
algo/bmw/sse2/bmw.c.new
Normal file
@@ -0,0 +1,525 @@
|
||||
/* $Id: bmw.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* BMW implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include "../sph_bmw.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
static const sph_u64 bmwIV512[] = {
|
||||
SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F),
|
||||
SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F),
|
||||
SPH_C64(0xA0A1A2A3A4A5A6A7), SPH_C64(0xA8A9AAABACADAEAF),
|
||||
SPH_C64(0xB0B1B2B3B4B5B6B7), SPH_C64(0xB8B9BABBBCBDBEBF),
|
||||
SPH_C64(0xC0C1C2C3C4C5C6C7), SPH_C64(0xC8C9CACBCCCDCECF),
|
||||
SPH_C64(0xD0D1D2D3D4D5D6D7), SPH_C64(0xD8D9DADBDCDDDEDF),
|
||||
SPH_C64(0xE0E1E2E3E4E5E6E7), SPH_C64(0xE8E9EAEBECEDEEEF),
|
||||
SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF)
|
||||
};
|
||||
|
||||
#define XCAT(x, y) XCAT_(x, y)
|
||||
#define XCAT_(x, y) x ## y
|
||||
|
||||
#define LPAR (
|
||||
|
||||
#define I16_16 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
#define I16_17 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
#define I16_18 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
#define I16_19 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
|
||||
#define I16_20 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
|
||||
#define I16_21 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
|
||||
#define I16_22 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
#define I16_23 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
|
||||
#define I16_24 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
|
||||
#define I16_25 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
|
||||
#define I16_26 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
|
||||
#define I16_27 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
|
||||
#define I16_28 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
|
||||
#define I16_29 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
|
||||
#define I16_30 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
|
||||
#define I16_31 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
|
||||
|
||||
#define M16_16 0, 1, 3, 4, 7, 10, 11
|
||||
#define M16_17 1, 2, 4, 5, 8, 11, 12
|
||||
#define M16_18 2, 3, 5, 6, 9, 12, 13
|
||||
#define M16_19 3, 4, 6, 7, 10, 13, 14
|
||||
#define M16_20 4, 5, 7, 8, 11, 14, 15
|
||||
#define M16_21 5, 6, 8, 9, 12, 15, 16
|
||||
#define M16_22 6, 7, 9, 10, 13, 0, 1
|
||||
#define M16_23 7, 8, 10, 11, 14, 1, 2
|
||||
#define M16_24 8, 9, 11, 12, 15, 2, 3
|
||||
#define M16_25 9, 10, 12, 13, 0, 3, 4
|
||||
#define M16_26 10, 11, 13, 14, 1, 4, 5
|
||||
#define M16_27 11, 12, 14, 15, 2, 5, 6
|
||||
#define M16_28 12, 13, 15, 16, 3, 6, 7
|
||||
#define M16_29 13, 14, 0, 1, 4, 7, 8
|
||||
#define M16_30 14, 15, 1, 2, 5, 8, 9
|
||||
#define M16_31 15, 16, 2, 3, 6, 9, 10
|
||||
|
||||
#define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \
|
||||
^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19))
|
||||
#define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 8) ^ SPH_ROTL32(x, 23))
|
||||
#define ss2(x) (((x) >> 2) ^ SPH_T32((x) << 1) \
|
||||
^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25))
|
||||
#define ss3(x) (((x) >> 2) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29))
|
||||
#define ss4(x) (((x) >> 1) ^ (x))
|
||||
#define ss5(x) (((x) >> 2) ^ (x))
|
||||
#define rs1(x) SPH_ROTL32(x, 3)
|
||||
#define rs2(x) SPH_ROTL32(x, 7)
|
||||
#define rs3(x) SPH_ROTL32(x, 13)
|
||||
#define rs4(x) SPH_ROTL32(x, 16)
|
||||
#define rs5(x) SPH_ROTL32(x, 19)
|
||||
#define rs6(x) SPH_ROTL32(x, 23)
|
||||
#define rs7(x) SPH_ROTL32(x, 27)
|
||||
|
||||
#define Ks(j) SPH_T32((sph_u32)(j) * SPH_C32(0x05555555))
|
||||
|
||||
#define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
|
||||
(SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \
|
||||
- SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m))
|
||||
|
||||
#define expand1s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \
|
||||
+ ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \
|
||||
+ ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \
|
||||
+ ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand1s(qf, mf, hf, i16) \
|
||||
expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand1s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand1s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#define expand2s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \
|
||||
+ qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \
|
||||
+ qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \
|
||||
+ qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand2s(qf, mf, hf, i16) \
|
||||
expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand2s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \
|
||||
^ SPH_ROTL64(x, 4) ^ SPH_ROTL64(x, 37))
|
||||
#define sb1(x) (((x) >> 1) ^ SPH_T64((x) << 2) \
|
||||
^ SPH_ROTL64(x, 13) ^ SPH_ROTL64(x, 43))
|
||||
#define sb2(x) (((x) >> 2) ^ SPH_T64((x) << 1) \
|
||||
^ SPH_ROTL64(x, 19) ^ SPH_ROTL64(x, 53))
|
||||
#define sb3(x) (((x) >> 2) ^ SPH_T64((x) << 2) \
|
||||
^ SPH_ROTL64(x, 28) ^ SPH_ROTL64(x, 59))
|
||||
#define sb4(x) (((x) >> 1) ^ (x))
|
||||
#define sb5(x) (((x) >> 2) ^ (x))
|
||||
#define rb1(x) SPH_ROTL64(x, 5)
|
||||
#define rb2(x) SPH_ROTL64(x, 11)
|
||||
#define rb3(x) SPH_ROTL64(x, 27)
|
||||
#define rb4(x) SPH_ROTL64(x, 32)
|
||||
#define rb5(x) SPH_ROTL64(x, 37)
|
||||
#define rb6(x) SPH_ROTL64(x, 43)
|
||||
#define rb7(x) SPH_ROTL64(x, 53)
|
||||
|
||||
#define Kb(j) SPH_T64((sph_u64)(j) * SPH_C64(0x0555555555555555))
|
||||
|
||||
#if 0
|
||||
|
||||
static const sph_u64 Kb_tab[] = {
|
||||
Kb(16), Kb(17), Kb(18), Kb(19), Kb(20), Kb(21), Kb(22), Kb(23),
|
||||
Kb(24), Kb(25), Kb(26), Kb(27), Kb(28), Kb(29), Kb(30), Kb(31)
|
||||
};
|
||||
|
||||
#define rol_off(mf, j, off) \
|
||||
SPH_ROTL64(mf(((j) + (off)) & 15), (((j) + (off)) & 15) + 1)
|
||||
|
||||
#define add_elt_b(mf, hf, j) \
|
||||
(SPH_T64(rol_off(mf, j, 0) + rol_off(mf, j, 3) \
|
||||
- rol_off(mf, j, 10) + Kb_tab[j]) ^ hf(((j) + 7) & 15))
|
||||
|
||||
#define expand1b(qf, mf, hf, i) \
|
||||
SPH_T64(sb1(qf((i) - 16)) + sb2(qf((i) - 15)) \
|
||||
+ sb3(qf((i) - 14)) + sb0(qf((i) - 13)) \
|
||||
+ sb1(qf((i) - 12)) + sb2(qf((i) - 11)) \
|
||||
+ sb3(qf((i) - 10)) + sb0(qf((i) - 9)) \
|
||||
+ sb1(qf((i) - 8)) + sb2(qf((i) - 7)) \
|
||||
+ sb3(qf((i) - 6)) + sb0(qf((i) - 5)) \
|
||||
+ sb1(qf((i) - 4)) + sb2(qf((i) - 3)) \
|
||||
+ sb3(qf((i) - 2)) + sb0(qf((i) - 1)) \
|
||||
+ add_elt_b(mf, hf, (i) - 16))
|
||||
|
||||
#define expand2b(qf, mf, hf, i) \
|
||||
SPH_T64(qf((i) - 16) + rb1(qf((i) - 15)) \
|
||||
+ qf((i) - 14) + rb2(qf((i) - 13)) \
|
||||
+ qf((i) - 12) + rb3(qf((i) - 11)) \
|
||||
+ qf((i) - 10) + rb4(qf((i) - 9)) \
|
||||
+ qf((i) - 8) + rb5(qf((i) - 7)) \
|
||||
+ qf((i) - 6) + rb6(qf((i) - 5)) \
|
||||
+ qf((i) - 4) + rb7(qf((i) - 3)) \
|
||||
+ sb4(qf((i) - 2)) + sb5(qf((i) - 1)) \
|
||||
+ add_elt_b(mf, hf, (i) - 16))
|
||||
|
||||
#else
|
||||
|
||||
#define add_elt_b(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
|
||||
(SPH_T64(SPH_ROTL64(mf(j0m), j1m) + SPH_ROTL64(mf(j3m), j4m) \
|
||||
- SPH_ROTL64(mf(j10m), j11m) + Kb(j16)) ^ hf(j7m))
|
||||
|
||||
#define expand1b_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T64(sb1(qf(i0)) + sb2(qf(i1)) + sb3(qf(i2)) + sb0(qf(i3)) \
|
||||
+ sb1(qf(i4)) + sb2(qf(i5)) + sb3(qf(i6)) + sb0(qf(i7)) \
|
||||
+ sb1(qf(i8)) + sb2(qf(i9)) + sb3(qf(i10)) + sb0(qf(i11)) \
|
||||
+ sb1(qf(i12)) + sb2(qf(i13)) + sb3(qf(i14)) + sb0(qf(i15)) \
|
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand1b(qf, mf, hf, i16) \
|
||||
expand1b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand1b_(qf, mf, hf, i16, ix, iy) \
|
||||
expand1b_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#define expand2b_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T64(qf(i0) + rb1(qf(i1)) + qf(i2) + rb2(qf(i3)) \
|
||||
+ qf(i4) + rb3(qf(i5)) + qf(i6) + rb4(qf(i7)) \
|
||||
+ qf(i8) + rb5(qf(i9)) + qf(i10) + rb6(qf(i11)) \
|
||||
+ qf(i12) + rb7(qf(i13)) + sb4(qf(i14)) + sb5(qf(i15)) \
|
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand2b(qf, mf, hf, i16) \
|
||||
expand2b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand2b_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2b_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define MAKE_W(tt, i0, op01, i1, op12, i2, op23, i3, op34, i4) \
|
||||
tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
|
||||
op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
|
||||
|
||||
#define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
#define Ws3 MAKE_W(SPH_T32, 0, -, 1, +, 8, -, 10, +, 13)
|
||||
#define Ws4 MAKE_W(SPH_T32, 1, +, 2, +, 9, -, 11, -, 14)
|
||||
#define Ws5 MAKE_W(SPH_T32, 3, -, 2, +, 10, -, 12, +, 15)
|
||||
#define Ws6 MAKE_W(SPH_T32, 4, -, 0, -, 3, -, 11, +, 13)
|
||||
#define Ws7 MAKE_W(SPH_T32, 1, -, 4, -, 5, -, 12, -, 14)
|
||||
#define Ws8 MAKE_W(SPH_T32, 2, -, 5, -, 6, +, 13, -, 15)
|
||||
#define Ws9 MAKE_W(SPH_T32, 0, -, 3, +, 6, -, 7, +, 14)
|
||||
#define Ws10 MAKE_W(SPH_T32, 8, -, 1, -, 4, -, 7, +, 15)
|
||||
#define Ws11 MAKE_W(SPH_T32, 8, -, 0, -, 2, -, 5, +, 9)
|
||||
#define Ws12 MAKE_W(SPH_T32, 1, +, 3, -, 6, -, 9, +, 10)
|
||||
#define Ws13 MAKE_W(SPH_T32, 2, +, 4, +, 7, +, 10, +, 11)
|
||||
#define Ws14 MAKE_W(SPH_T32, 3, -, 5, +, 8, -, 11, -, 12)
|
||||
#define Ws15 MAKE_W(SPH_T32, 12, -, 4, -, 6, -, 9, +, 13)
|
||||
|
||||
#define MAKE_Qas do { \
|
||||
qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \
|
||||
qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \
|
||||
qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \
|
||||
qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \
|
||||
qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \
|
||||
qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \
|
||||
qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \
|
||||
qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \
|
||||
qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \
|
||||
qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \
|
||||
qt[10] = SPH_T32(ss0(Ws10) + H(11)); \
|
||||
qt[11] = SPH_T32(ss1(Ws11) + H(12)); \
|
||||
qt[12] = SPH_T32(ss2(Ws12) + H(13)); \
|
||||
qt[13] = SPH_T32(ss3(Ws13) + H(14)); \
|
||||
qt[14] = SPH_T32(ss4(Ws14) + H(15)); \
|
||||
qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbs do { \
|
||||
qt[16] = expand1s(Qs, M, H, 16); \
|
||||
qt[17] = expand1s(Qs, M, H, 17); \
|
||||
qt[18] = expand2s(Qs, M, H, 18); \
|
||||
qt[19] = expand2s(Qs, M, H, 19); \
|
||||
qt[20] = expand2s(Qs, M, H, 20); \
|
||||
qt[21] = expand2s(Qs, M, H, 21); \
|
||||
qt[22] = expand2s(Qs, M, H, 22); \
|
||||
qt[23] = expand2s(Qs, M, H, 23); \
|
||||
qt[24] = expand2s(Qs, M, H, 24); \
|
||||
qt[25] = expand2s(Qs, M, H, 25); \
|
||||
qt[26] = expand2s(Qs, M, H, 26); \
|
||||
qt[27] = expand2s(Qs, M, H, 27); \
|
||||
qt[28] = expand2s(Qs, M, H, 28); \
|
||||
qt[29] = expand2s(Qs, M, H, 29); \
|
||||
qt[30] = expand2s(Qs, M, H, 30); \
|
||||
qt[31] = expand2s(Qs, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qs do { \
|
||||
MAKE_Qas; \
|
||||
MAKE_Qbs; \
|
||||
} while (0)
|
||||
|
||||
#define Qs(j) (qt[j])
|
||||
|
||||
#define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Wb1 MAKE_W(SPH_T64, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Wb2 MAKE_W(SPH_T64, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
#define Wb3 MAKE_W(SPH_T64, 0, -, 1, +, 8, -, 10, +, 13)
|
||||
#define Wb4 MAKE_W(SPH_T64, 1, +, 2, +, 9, -, 11, -, 14)
|
||||
#define Wb5 MAKE_W(SPH_T64, 3, -, 2, +, 10, -, 12, +, 15)
|
||||
#define Wb6 MAKE_W(SPH_T64, 4, -, 0, -, 3, -, 11, +, 13)
|
||||
#define Wb7 MAKE_W(SPH_T64, 1, -, 4, -, 5, -, 12, -, 14)
|
||||
#define Wb8 MAKE_W(SPH_T64, 2, -, 5, -, 6, +, 13, -, 15)
|
||||
#define Wb9 MAKE_W(SPH_T64, 0, -, 3, +, 6, -, 7, +, 14)
|
||||
#define Wb10 MAKE_W(SPH_T64, 8, -, 1, -, 4, -, 7, +, 15)
|
||||
#define Wb11 MAKE_W(SPH_T64, 8, -, 0, -, 2, -, 5, +, 9)
|
||||
#define Wb12 MAKE_W(SPH_T64, 1, +, 3, -, 6, -, 9, +, 10)
|
||||
#define Wb13 MAKE_W(SPH_T64, 2, +, 4, +, 7, +, 10, +, 11)
|
||||
#define Wb14 MAKE_W(SPH_T64, 3, -, 5, +, 8, -, 11, -, 12)
|
||||
#define Wb15 MAKE_W(SPH_T64, 12, -, 4, -, 6, -, 9, +, 13)
|
||||
|
||||
#define MAKE_Qab do { \
|
||||
qt[ 0] = SPH_T64(sb0(Wb0 ) + H( 1)); \
|
||||
qt[ 1] = SPH_T64(sb1(Wb1 ) + H( 2)); \
|
||||
qt[ 2] = SPH_T64(sb2(Wb2 ) + H( 3)); \
|
||||
qt[ 3] = SPH_T64(sb3(Wb3 ) + H( 4)); \
|
||||
qt[ 4] = SPH_T64(sb4(Wb4 ) + H( 5)); \
|
||||
qt[ 5] = SPH_T64(sb0(Wb5 ) + H( 6)); \
|
||||
qt[ 6] = SPH_T64(sb1(Wb6 ) + H( 7)); \
|
||||
qt[ 7] = SPH_T64(sb2(Wb7 ) + H( 8)); \
|
||||
qt[ 8] = SPH_T64(sb3(Wb8 ) + H( 9)); \
|
||||
qt[ 9] = SPH_T64(sb4(Wb9 ) + H(10)); \
|
||||
qt[10] = SPH_T64(sb0(Wb10) + H(11)); \
|
||||
qt[11] = SPH_T64(sb1(Wb11) + H(12)); \
|
||||
qt[12] = SPH_T64(sb2(Wb12) + H(13)); \
|
||||
qt[13] = SPH_T64(sb3(Wb13) + H(14)); \
|
||||
qt[14] = SPH_T64(sb4(Wb14) + H(15)); \
|
||||
qt[15] = SPH_T64(sb0(Wb15) + H( 0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbb do { \
|
||||
qt[16] = expand1b(Qb, M, H, 16); \
|
||||
qt[17] = expand1b(Qb, M, H, 17); \
|
||||
qt[18] = expand2b(Qb, M, H, 18); \
|
||||
qt[19] = expand2b(Qb, M, H, 19); \
|
||||
qt[20] = expand2b(Qb, M, H, 20); \
|
||||
qt[21] = expand2b(Qb, M, H, 21); \
|
||||
qt[22] = expand2b(Qb, M, H, 22); \
|
||||
qt[23] = expand2b(Qb, M, H, 23); \
|
||||
qt[24] = expand2b(Qb, M, H, 24); \
|
||||
qt[25] = expand2b(Qb, M, H, 25); \
|
||||
qt[26] = expand2b(Qb, M, H, 26); \
|
||||
qt[27] = expand2b(Qb, M, H, 27); \
|
||||
qt[28] = expand2b(Qb, M, H, 28); \
|
||||
qt[29] = expand2b(Qb, M, H, 29); \
|
||||
qt[30] = expand2b(Qb, M, H, 30); \
|
||||
qt[31] = expand2b(Qb, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qb do { \
|
||||
MAKE_Qab; \
|
||||
MAKE_Qbb; \
|
||||
} while (0)
|
||||
|
||||
#define Qb(j) (qt[j])
|
||||
|
||||
#define FOLD(type, mkQ, tt, rol, mf, qf, dhf) do { \
|
||||
type qt[32], xl, xh; \
|
||||
mkQ; \
|
||||
xl = qf(16) ^ qf(17) ^ qf(18) ^ qf(19) \
|
||||
^ qf(20) ^ qf(21) ^ qf(22) ^ qf(23); \
|
||||
xh = xl ^ qf(24) ^ qf(25) ^ qf(26) ^ qf(27) \
|
||||
^ qf(28) ^ qf(29) ^ qf(30) ^ qf(31); \
|
||||
dhf( 0) = tt(((xh << 5) ^ (qf(16) >> 5) ^ mf( 0)) \
|
||||
+ (xl ^ qf(24) ^ qf( 0))); \
|
||||
dhf( 1) = tt(((xh >> 7) ^ (qf(17) << 8) ^ mf( 1)) \
|
||||
+ (xl ^ qf(25) ^ qf( 1))); \
|
||||
dhf( 2) = tt(((xh >> 5) ^ (qf(18) << 5) ^ mf( 2)) \
|
||||
+ (xl ^ qf(26) ^ qf( 2))); \
|
||||
dhf( 3) = tt(((xh >> 1) ^ (qf(19) << 5) ^ mf( 3)) \
|
||||
+ (xl ^ qf(27) ^ qf( 3))); \
|
||||
dhf( 4) = tt(((xh >> 3) ^ (qf(20) << 0) ^ mf( 4)) \
|
||||
+ (xl ^ qf(28) ^ qf( 4))); \
|
||||
dhf( 5) = tt(((xh << 6) ^ (qf(21) >> 6) ^ mf( 5)) \
|
||||
+ (xl ^ qf(29) ^ qf( 5))); \
|
||||
dhf( 6) = tt(((xh >> 4) ^ (qf(22) << 6) ^ mf( 6)) \
|
||||
+ (xl ^ qf(30) ^ qf( 6))); \
|
||||
dhf( 7) = tt(((xh >> 11) ^ (qf(23) << 2) ^ mf( 7)) \
|
||||
+ (xl ^ qf(31) ^ qf( 7))); \
|
||||
dhf( 8) = tt(rol(dhf(4), 9) + (xh ^ qf(24) ^ mf( 8)) \
|
||||
+ ((xl << 8) ^ qf(23) ^ qf( 8))); \
|
||||
dhf( 9) = tt(rol(dhf(5), 10) + (xh ^ qf(25) ^ mf( 9)) \
|
||||
+ ((xl >> 6) ^ qf(16) ^ qf( 9))); \
|
||||
dhf(10) = tt(rol(dhf(6), 11) + (xh ^ qf(26) ^ mf(10)) \
|
||||
+ ((xl << 6) ^ qf(17) ^ qf(10))); \
|
||||
dhf(11) = tt(rol(dhf(7), 12) + (xh ^ qf(27) ^ mf(11)) \
|
||||
+ ((xl << 4) ^ qf(18) ^ qf(11))); \
|
||||
dhf(12) = tt(rol(dhf(0), 13) + (xh ^ qf(28) ^ mf(12)) \
|
||||
+ ((xl >> 3) ^ qf(19) ^ qf(12))); \
|
||||
dhf(13) = tt(rol(dhf(1), 14) + (xh ^ qf(29) ^ mf(13)) \
|
||||
+ ((xl >> 4) ^ qf(20) ^ qf(13))); \
|
||||
dhf(14) = tt(rol(dhf(2), 15) + (xh ^ qf(30) ^ mf(14)) \
|
||||
+ ((xl >> 7) ^ qf(21) ^ qf(14))); \
|
||||
dhf(15) = tt(rol(dhf(3), 16) + (xh ^ qf(31) ^ mf(15)) \
|
||||
+ ((xl >> 2) ^ qf(22) ^ qf(15))); \
|
||||
} while (0)
|
||||
|
||||
#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH)
|
||||
|
||||
#define FOLDb FOLD(sph_u64, MAKE_Qb, SPH_T64, SPH_ROTL64, M, Qb, dH)
|
||||
|
||||
#define DECL_BMW \
|
||||
sph_u64 bmwH[16]; \
|
||||
|
||||
/* load initial constants */
|
||||
#define BMW_I \
|
||||
do { \
|
||||
memcpy(bmwH, bmwIV512, sizeof bmwH); \
|
||||
hashptr = 0; \
|
||||
hashctA = 0; \
|
||||
} while (0)
|
||||
|
||||
/* load hash for loop */
|
||||
#define BMW_U \
|
||||
do { \
|
||||
const void *data = hash; \
|
||||
size_t len = 64; \
|
||||
unsigned char *buf; \
|
||||
\
|
||||
hashctA += (sph_u64)len << 3; \
|
||||
buf = hashbuf; \
|
||||
memcpy(buf, data, 64); \
|
||||
hashptr = 64; \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* bmw512 hash loaded */
|
||||
/* hash = blake512(loaded) */
|
||||
#define BMW_C \
|
||||
do { \
|
||||
void *dst = hash; \
|
||||
size_t out_size_w64 = 8; \
|
||||
unsigned char *data; \
|
||||
sph_u64 *dh; \
|
||||
unsigned char *out; \
|
||||
size_t ptr, u, v; \
|
||||
unsigned z; \
|
||||
sph_u64 h1[16], h2[16], *h; \
|
||||
data = hashbuf; \
|
||||
ptr = hashptr; \
|
||||
z = 0x80 >> 0; \
|
||||
data[ptr ++] = ((0 & -z) | z) & 0xFF; \
|
||||
memset(data + ptr, 0, (sizeof(char)*128) - 8 - ptr); \
|
||||
sph_enc64le_aligned(data + (sizeof(char)*128) - 8, \
|
||||
SPH_T64(hashctA + 0)); \
|
||||
/* for break loop */ \
|
||||
/* one copy of inline FOLD */ \
|
||||
/* FOLD uses, */ \
|
||||
/* uint64 *h, data */ \
|
||||
/* uint64 dh, state */ \
|
||||
h = bmwH; \
|
||||
dh = h2; \
|
||||
for (;;) { \
|
||||
FOLDb; \
|
||||
/* dh gets changed for 2nd run */ \
|
||||
if (dh == h1) break; \
|
||||
for (u = 0; u < 16; u ++) \
|
||||
sph_enc64le_aligned(data + 8 * u, h2[u]); \
|
||||
dh = h1; \
|
||||
h = final_b; \
|
||||
} \
|
||||
/* end wrapped for break loop */ \
|
||||
out = dst; \
|
||||
sph_enc64le(out, h1[8]); \
|
||||
sph_enc64le(out + 8, h1[9]); \
|
||||
sph_enc64le(out + 16, h1[10]); \
|
||||
sph_enc64le(out + 24, h1[11]); \
|
||||
sph_enc64le(out + 32, h1[12]); \
|
||||
sph_enc64le(out + 40, h1[13]); \
|
||||
sph_enc64le(out + 48, h1[14]); \
|
||||
sph_enc64le(out + 56, h1[15]); \
|
||||
/* for (u = 0, v = 16 - out_size_w64; u < out_size_w64; u ++, v ++)*/ \
|
||||
/* sph_enc64le(out + 8 * u, h1[v]);*/ \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
compress_big(const unsigned char *data, const sph_u64 h[16], sph_u64 dh[16])
|
||||
{
|
||||
|
||||
#define M(x) sph_dec64le_aligned(data + 8 * (x))
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
|
||||
FOLDb;
|
||||
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
}
|
||||
|
||||
static const sph_u64 final_b[16] = {
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa2), SPH_C64(0xaaaaaaaaaaaaaaa3),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa4), SPH_C64(0xaaaaaaaaaaaaaaa5),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa6), SPH_C64(0xaaaaaaaaaaaaaaa7),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa8), SPH_C64(0xaaaaaaaaaaaaaaa9),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaaa), SPH_C64(0xaaaaaaaaaaaaaaab),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaac), SPH_C64(0xaaaaaaaaaaaaaaad),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaae), SPH_C64(0xaaaaaaaaaaaaaaaf)
|
||||
};
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
61
algo/bmw/sse2/sph_bmw.h
Normal file
61
algo/bmw/sse2/sph_bmw.h
Normal file
@@ -0,0 +1,61 @@
|
||||
/* $Id: sph_bmw.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* BMW interface. BMW (aka "Blue Midnight Wish") is a family of
|
||||
* functions which differ by their output size; this implementation
|
||||
* defines BMW for output sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_bmw.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_BMW_H__
|
||||
#define SPH_BMW_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
#define SPH_SIZE_bmw512 512
|
||||
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
sph_u64 bmwH[16];
|
||||
#endif
|
||||
} sph_bmw_big_context;
|
||||
|
||||
typedef sph_bmw_big_context sph_bmw512_context;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
0
algo/cryptonight/.dirstamp
Normal file
0
algo/cryptonight/.dirstamp
Normal file
365
algo/cryptonight/cryptolight.c
Normal file
365
algo/cryptonight/cryptolight.c
Normal file
@@ -0,0 +1,365 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__arm__) || defined(_MSC_VER)
|
||||
#ifndef NOASM
|
||||
#define NOASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "crypto/int-util.h"
|
||||
#include "crypto/hash-ops.h"
|
||||
|
||||
#if USE_INT128
|
||||
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
|
||||
typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
|
||||
#elif defined (_MSC_VER)
|
||||
/* only for mingw64 on windows */
|
||||
#undef USE_INT128
|
||||
#define USE_INT128 (0)
|
||||
#else
|
||||
typedef __uint128_t uint128_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define LITE 1
|
||||
#if LITE /* cryptonight-light */
|
||||
#define MEMORY (1 << 20)
|
||||
#define ITER (1 << 19)
|
||||
#else
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define ITER (1 << 20)
|
||||
#endif
|
||||
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
static void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
static void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
}
|
||||
|
||||
static void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SUCCESS == r));
|
||||
}
|
||||
|
||||
static void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SKEIN_SUCCESS == r));
|
||||
}
|
||||
|
||||
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#if !defined(_MSC_VER) && !defined(NOASM)
|
||||
extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#else
|
||||
#define fast_aesb_single_round aesb_single_round
|
||||
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
|
||||
#endif
|
||||
|
||||
#if defined(NOASM) || !defined(__x86_64__)
|
||||
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = hi_dword(multiplier);
|
||||
uint64_t b = lo_dword(multiplier);
|
||||
uint64_t c = hi_dword(multiplicand);
|
||||
uint64_t d = lo_dword(multiplicand);
|
||||
|
||||
uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + bc;
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
assert(ac <= *product_hi);
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#else
|
||||
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
|
||||
#endif
|
||||
|
||||
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
|
||||
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
|
||||
};
|
||||
|
||||
|
||||
static inline size_t e2i(const uint8_t* a) {
|
||||
#if !LITE
|
||||
return ((uint32_t *)a)[0] & 0x1FFFF0;
|
||||
#else
|
||||
return ((uint32_t *)a)[0] & 0xFFFF0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
|
||||
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
|
||||
hi += ((uint64_t*) c)[0];
|
||||
|
||||
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
|
||||
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
|
||||
((uint64_t*) dst)[0] = hi;
|
||||
((uint64_t*) dst)[1] = lo;
|
||||
}
|
||||
|
||||
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) a) ^= *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
|
||||
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
|
||||
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
struct cryptonight_ctx {
|
||||
uint8_t _ALIGN(16) long_state[MEMORY];
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
|
||||
uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
|
||||
oaes_ctx* aes_ctx;
|
||||
};
|
||||
|
||||
static void cryptolight_hash_ctx(void* output, const void* input, int len, struct cryptonight_ctx* ctx)
|
||||
{
|
||||
len = 76;
|
||||
hash_process(&ctx->state.hs, (const uint8_t*) input, len);
|
||||
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
size_t i, j;
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
|
||||
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
|
||||
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx->a);
|
||||
aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
|
||||
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx->a);
|
||||
aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
|
||||
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
hash_permutation(&ctx->state.hs);
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx->aes_ctx);
|
||||
}
|
||||
|
||||
void cryptolight_hash(void* output, const void* input, int len) {
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
cryptolight_hash_ctx(output, input, len, ctx);
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
static void cryptolight_hash_ctx_aes_ni(void* output, const void* input,
|
||||
int len, struct cryptonight_ctx* ctx)
|
||||
{
|
||||
hash_process(&ctx->state.hs, (const uint8_t*)input, len);
|
||||
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
size_t i, j;
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
|
||||
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
|
||||
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx->a);
|
||||
fast_aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
|
||||
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx->a);
|
||||
fast_aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
|
||||
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
hash_permutation(&ctx->state.hs);
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx->aes_ctx);
|
||||
}
|
||||
|
||||
int scanhash_cryptolight(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
|
||||
uint32_t n = *nonceptr - 1;
|
||||
const uint32_t first_nonce = n + 1;
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
uint32_t _ALIGN(32) hash[HASH_SIZE / 4];
|
||||
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
do {
|
||||
*nonceptr = ++n;
|
||||
cryptolight_hash_ctx_aes_ni(hash, pdata, 76, ctx);
|
||||
if (unlikely(hash[7] < ptarget[7])) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
free(ctx);
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
#else
|
||||
do {
|
||||
*nonceptr = ++n;
|
||||
cryptolight_hash_ctx(hash, pdata, 76, ctx);
|
||||
if (unlikely(hash[7] < ptarget[7])) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
free(ctx);
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
#endif
|
||||
free(ctx);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_cryptolight_algo( algo_gate_t* gate )
|
||||
{
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptolight;
|
||||
gate->hash = (void*)&cryptolight_hash;
|
||||
gate->hash_suw = (void*)&cryptolight_hash;
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
return true;
|
||||
};
|
||||
|
||||
244
algo/cryptonight/cryptonight-aesni.c
Normal file
244
algo/cryptonight/cryptonight-aesni.c
Normal file
@@ -0,0 +1,244 @@
|
||||
#include <x86intrin.h>
|
||||
#include <memory.h>
|
||||
#include "cryptonight.h"
|
||||
#include "miner.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
void aesni_parallel_noxor(uint8_t *long_state, uint8_t *text, uint8_t *ExpandedKey);
|
||||
void aesni_parallel_xor(uint8_t *text, uint8_t *ExpandedKey, uint8_t *long_state);
|
||||
void that_fucking_loop(uint8_t a[16], uint8_t b[16], uint8_t *long_state);
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
{
|
||||
__m128i tmp4;
|
||||
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
|
||||
tmp4 = _mm_slli_si128(*tmp1, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
|
||||
}
|
||||
|
||||
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
__m128i tmp2, tmp4;
|
||||
|
||||
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
|
||||
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
|
||||
tmp4 = _mm_slli_si128(*tmp3, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Special thanks to Intel for helping me
|
||||
// with ExpandAESKey256() and its subroutines
|
||||
static inline void ExpandAESKey256(char *keybuf)
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
__m128i tmp1, tmp2, tmp3, *keys;
|
||||
|
||||
keys = (__m128i *)keybuf;
|
||||
|
||||
tmp1 = _mm_load_si128((__m128i *)keybuf);
|
||||
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[2] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[3] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[4] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[5] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[6] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[7] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[8] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[9] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[10] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[11] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[12] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[13] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[14] = tmp1;
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t long_state[MEMORY] __attribute((aligned(16)));
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(16)));
|
||||
uint64_t a[AES_BLOCK_SIZE >> 3] __attribute__((aligned(16)));
|
||||
uint64_t b[AES_BLOCK_SIZE >> 3] __attribute__((aligned(16)));
|
||||
uint8_t c[AES_BLOCK_SIZE] __attribute__((aligned(16)));
|
||||
// oaes_ctx* aes_ctx;
|
||||
} cryptonight_ctx;
|
||||
|
||||
static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
uint8_t ExpandedKey[256];
|
||||
size_t i, j;
|
||||
|
||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
longoutput = (__m128i *)ctx.long_state;
|
||||
expkey = (__m128i *)ExpandedKey;
|
||||
xmminput = (__m128i *)ctx.text;
|
||||
|
||||
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
|
||||
// aesni_parallel_noxor(&ctx->long_state[i], ctx->text, ExpandedKey);
|
||||
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
|
||||
{
|
||||
for(j = 0; j < 10; j++)
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
_mm_store_si128(&(longoutput[(i >> 4)]), xmminput[0]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 1]), xmminput[1]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 2]), xmminput[2]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 3]), xmminput[3]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 4]), xmminput[4]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 5]), xmminput[5]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 6]), xmminput[6]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
|
||||
}
|
||||
|
||||
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
|
||||
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
|
||||
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
|
||||
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
|
||||
|
||||
// for (i = 0; i < 2; i++)
|
||||
// {
|
||||
// ctx.a[i] = ((uint64_t *)ctx.state.k)[i] ^ ((uint64_t *)ctx.state.k)[i+4];
|
||||
// ctx.b[i] = ((uint64_t *)ctx.state.k)[i+2] ^ ((uint64_t *)ctx.state.k)[i+6];
|
||||
// }
|
||||
|
||||
__m128i b_x = _mm_load_si128((__m128i *)ctx.b);
|
||||
uint64_t a[2] __attribute((aligned(16))), b[2] __attribute((aligned(16)));
|
||||
a[0] = ctx.a[0];
|
||||
a[1] = ctx.a[1];
|
||||
|
||||
for(i = 0; __builtin_expect(i < 0x80000, 1); i++)
|
||||
{
|
||||
__m128i c_x = _mm_load_si128((__m128i *)&ctx.long_state[a[0] & 0x1FFFF0]);
|
||||
__m128i a_x = _mm_load_si128((__m128i *)a);
|
||||
uint64_t c[2];
|
||||
c_x = _mm_aesenc_si128(c_x, a_x);
|
||||
|
||||
_mm_store_si128((__m128i *)c, c_x);
|
||||
__builtin_prefetch(&ctx.long_state[c[0] & 0x1FFFF0], 0, 1);
|
||||
|
||||
b_x = _mm_xor_si128(b_x, c_x);
|
||||
_mm_store_si128((__m128i *)&ctx.long_state[a[0] & 0x1FFFF0], b_x);
|
||||
|
||||
uint64_t *nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
uint64_t b[2];
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
{
|
||||
uint64_t hi, lo;
|
||||
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
|
||||
|
||||
__asm__("mulq %3\n\t"
|
||||
: "=d" (hi),
|
||||
"=a" (lo)
|
||||
: "%a" (c[0]),
|
||||
"rm" (b[0])
|
||||
: "cc" );
|
||||
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
}
|
||||
uint64_t *dst = (uint64_t*)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
dst[0] = a[0];
|
||||
dst[1] = a[1];
|
||||
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
b_x = c_x;
|
||||
__builtin_prefetch(&ctx.long_state[a[0] & 0x1FFFF0], 0, 3);
|
||||
}
|
||||
|
||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
|
||||
// aesni_parallel_xor(&ctx->text, ExpandedKey, &ctx->long_state[i]);
|
||||
|
||||
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE)
|
||||
{
|
||||
xmminput[0] = _mm_xor_si128(longoutput[(i >> 4)], xmminput[0]);
|
||||
xmminput[1] = _mm_xor_si128(longoutput[(i >> 4) + 1], xmminput[1]);
|
||||
xmminput[2] = _mm_xor_si128(longoutput[(i >> 4) + 2], xmminput[2]);
|
||||
xmminput[3] = _mm_xor_si128(longoutput[(i >> 4) + 3], xmminput[3]);
|
||||
xmminput[4] = _mm_xor_si128(longoutput[(i >> 4) + 4], xmminput[4]);
|
||||
xmminput[5] = _mm_xor_si128(longoutput[(i >> 4) + 5], xmminput[5]);
|
||||
xmminput[6] = _mm_xor_si128(longoutput[(i >> 4) + 6], xmminput[6]);
|
||||
xmminput[7] = _mm_xor_si128(longoutput[(i >> 4) + 7], xmminput[7]);
|
||||
|
||||
for(j = 0; j < 10; j++)
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
#endif
|
||||
}
|
||||
110
algo/cryptonight/cryptonight-common.c
Normal file
110
algo/cryptonight/cryptonight-common.c
Normal file
@@ -0,0 +1,110 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
// Modified for CPUminer by Lucas Jones
|
||||
|
||||
#include "cpuminer-config.h"
|
||||
//#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
#endif
|
||||
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "cryptonight.h"
|
||||
|
||||
/*
|
||||
#if defined __unix__ && (!defined __APPLE__)
|
||||
#include <sys/mman.h>
|
||||
#elif defined _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
*/
|
||||
|
||||
void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
#ifdef NO_AES_NI
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
#else
|
||||
hashState_groestl256 ctx;
|
||||
init_groestl256( &ctx );
|
||||
update_groestl256( &ctx, input, len * 8 );
|
||||
final_groestl256( &ctx, output );
|
||||
#endif
|
||||
}
|
||||
|
||||
void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
void (* const extra_hashes[4])( const void *, size_t, char *) =
|
||||
{ do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash };
|
||||
|
||||
void cryptonight_hash( void *restrict output, const void *input, int len )
|
||||
{
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
cryptonight_hash_ctx ( output, input, len );
|
||||
#else
|
||||
cryptonight_hash_aes( output, input, len );
|
||||
#endif
|
||||
}
|
||||
|
||||
void cryptonight_hash_suw( void *restrict output, const void *input )
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
cryptonight_hash_ctx ( output, input, 76 );
|
||||
#else
|
||||
cryptonight_hash_aes( output, input, 76 );
|
||||
#endif
|
||||
}
|
||||
|
||||
int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
|
||||
uint32_t n = *nonceptr - 1;
|
||||
const uint32_t first_nonce = n + 1;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t hash[32 / 4] __attribute__((aligned(32)));
|
||||
do
|
||||
{
|
||||
*nonceptr = ++n;
|
||||
cryptonight_hash( hash, pdata, 76 );
|
||||
if (unlikely( hash[7] < Htarg ))
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_cryptonight_algo( algo_gate_t* gate )
|
||||
{
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
gate->hash = (void*)&cryptonight_hash;
|
||||
gate->hash_suw = (void*)&cryptonight_hash_suw;
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
return true;
|
||||
};
|
||||
|
||||
242
algo/cryptonight/cryptonight.c
Normal file
242
algo/cryptonight/cryptonight.c
Normal file
@@ -0,0 +1,242 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
// Modified for CPUminer by Lucas Jones
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
#if defined(__arm__) || defined(_MSC_VER)
|
||||
#ifndef NOASM
|
||||
#define NOASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "crypto/int-util.h"
|
||||
#include "crypto/hash-ops.h"
|
||||
//#include "cryptonight.h"
|
||||
|
||||
#if USE_INT128
|
||||
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
|
||||
typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
|
||||
#elif defined (_MSC_VER)
|
||||
/* only for mingw64 on windows */
|
||||
#undef USE_INT128
|
||||
#define USE_INT128 (0)
|
||||
#else
|
||||
typedef __uint128_t uint128_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define LITE 0
|
||||
#if LITE /* cryptonight-light */
|
||||
#define MEMORY (1 << 20)
|
||||
#define ITER (1 << 19)
|
||||
#else
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define ITER (1 << 20)
|
||||
#endif
|
||||
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
static void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
static void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
}
|
||||
|
||||
static void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SUCCESS == r));
|
||||
}
|
||||
|
||||
static void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SKEIN_SUCCESS == r));
|
||||
}
|
||||
|
||||
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#if !defined(_MSC_VER) && !defined(NOASM)
|
||||
extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#else
|
||||
#define fast_aesb_single_round aesb_single_round
|
||||
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(NOASM) || !defined(__x86_64__)
|
||||
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = hi_dword(multiplier);
|
||||
uint64_t b = lo_dword(multiplier);
|
||||
uint64_t c = hi_dword(multiplicand);
|
||||
uint64_t d = lo_dword(multiplicand);
|
||||
|
||||
uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + bc;
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
assert(ac <= *product_hi);
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#else
|
||||
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
|
||||
#endif
|
||||
|
||||
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
|
||||
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
|
||||
};
|
||||
|
||||
static inline size_t e2i(const uint8_t* a) {
|
||||
#if !LITE
|
||||
return ((uint32_t *)a)[0] & 0x1FFFF0;
|
||||
#else
|
||||
return ((uint32_t *)a)[0] & 0xFFFF0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
|
||||
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
|
||||
hi += ((uint64_t*) c)[0];
|
||||
|
||||
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
|
||||
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
|
||||
((uint64_t*) dst)[0] = hi;
|
||||
((uint64_t*) dst)[1] = lo;
|
||||
}
|
||||
|
||||
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) a) ^= *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
|
||||
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
|
||||
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint8_t _ALIGN(16) long_state[MEMORY];
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
|
||||
uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
|
||||
oaes_ctx* aes_ctx;
|
||||
} cryptonight_ctx;
|
||||
|
||||
static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_ctx(void* output, const void* input, int len)
|
||||
{
|
||||
hash_process(&ctx.state.hs, (const uint8_t*) input, len);
|
||||
ctx.aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
size_t i, j;
|
||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx.aes_ctx, ctx.state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 0], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 1], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 2], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 3], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 4], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 5], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 6], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 7], ctx.aes_ctx->key->exp_data);
|
||||
memcpy(&ctx.long_state[i], ctx.text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx.state.k[0], &ctx.state.k[32], ctx.a);
|
||||
xor_blocks_dst(&ctx.state.k[16], &ctx.state.k[48], ctx.b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.c, ctx.a);
|
||||
xor_blocks_dst(ctx.c, ctx.b, &ctx.long_state[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx.c, ctx.a, &ctx.long_state[e2i(ctx.c)]);
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.b, ctx.a);
|
||||
xor_blocks_dst(ctx.b, ctx.c, &ctx.long_state[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx.b, ctx.a, &ctx.long_state[e2i(ctx.b)]);
|
||||
}
|
||||
|
||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx.aes_ctx, &ctx.state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
xor_blocks(&ctx.text[0 * AES_BLOCK_SIZE], &ctx.long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[0 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[1 * AES_BLOCK_SIZE], &ctx.long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[1 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[2 * AES_BLOCK_SIZE], &ctx.long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[2 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[3 * AES_BLOCK_SIZE], &ctx.long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[3 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[4 * AES_BLOCK_SIZE], &ctx.long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[4 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[5 * AES_BLOCK_SIZE], &ctx.long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[5 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[6 * AES_BLOCK_SIZE], &ctx.long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[6 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[7 * AES_BLOCK_SIZE], &ctx.long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[7 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||
hash_permutation(&ctx.state.hs);
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx.aes_ctx);
|
||||
}
|
||||
|
||||
47
algo/cryptonight/cryptonight.h
Normal file
47
algo/cryptonight/cryptonight.h
Normal file
@@ -0,0 +1,47 @@
|
||||
#ifndef __CRYPTONIGHT_H_INCLUDED
|
||||
#define __CRYPTONIGHT_H_INCLUDED
|
||||
|
||||
#include <stddef.h>
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "miner.h"
|
||||
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define ITER (1 << 20)
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128
|
||||
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union hash_state {
|
||||
uint8_t b[200];
|
||||
uint64_t w[25];
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
void do_blake_hash(const void* input, size_t len, char* output);
|
||||
void do_groestl_hash(const void* input, size_t len, char* output);
|
||||
void do_jh_hash(const void* input, size_t len, char* output);
|
||||
void do_skein_hash(const void* input, size_t len, char* output);
|
||||
void cryptonight_hash_ctx(void* output, const void* input, int len);
|
||||
void keccakf(uint64_t st[25], int rounds);
|
||||
extern void (* const extra_hashes[4])(const void *, size_t, char *);
|
||||
|
||||
int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len );
|
||||
|
||||
#endif
|
||||
|
||||
0
algo/cubehash/.dirstamp
Normal file
0
algo/cubehash/.dirstamp
Normal file
723
algo/cubehash/sph_cubehash.c
Normal file
723
algo/cubehash/sph_cubehash.c
Normal file
@@ -0,0 +1,723 @@
|
||||
/* $Id: cubehash.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* CubeHash implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "sph_cubehash.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_CUBEHASH
|
||||
#define SPH_SMALL_FOOTPRINT_CUBEHASH 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Some tests were conducted on an Intel Core2 Q6600 (32-bit and 64-bit
|
||||
* mode), a PowerPC G3, and a MIPS-compatible CPU (Broadcom BCM3302).
|
||||
* It appears that the optimal settings are:
|
||||
* -- full unroll, no state copy on the "big" systems (x86, PowerPC)
|
||||
* -- unroll to 4 or 8, state copy on the "small" system (MIPS)
|
||||
*/
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_CUBEHASH
|
||||
|
||||
#if !defined SPH_CUBEHASH_UNROLL
|
||||
#define SPH_CUBEHASH_UNROLL 4
|
||||
#endif
|
||||
#if !defined SPH_CUBEHASH_NOCOPY
|
||||
#define SPH_CUBEHASH_NOCOPY 1
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if !defined SPH_CUBEHASH_UNROLL
|
||||
#define SPH_CUBEHASH_UNROLL 0
|
||||
#endif
|
||||
#if !defined SPH_CUBEHASH_NOCOPY
|
||||
#define SPH_CUBEHASH_NOCOPY 0
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0xB0FC8217), SPH_C32(0x1BEE1A90), SPH_C32(0x829E1A22),
|
||||
SPH_C32(0x6362C342), SPH_C32(0x24D91C30), SPH_C32(0x03A7AA24),
|
||||
SPH_C32(0xA63721C8), SPH_C32(0x85B0E2EF), SPH_C32(0xF35D13F3),
|
||||
SPH_C32(0x41DA807D), SPH_C32(0x21A70CA6), SPH_C32(0x1F4E9774),
|
||||
SPH_C32(0xB3E1C932), SPH_C32(0xEB0A79A8), SPH_C32(0xCDDAAA66),
|
||||
SPH_C32(0xE2F6ECAA), SPH_C32(0x0A713362), SPH_C32(0xAA3080E0),
|
||||
SPH_C32(0xD8F23A32), SPH_C32(0xCEF15E28), SPH_C32(0xDB086314),
|
||||
SPH_C32(0x7F709DF7), SPH_C32(0xACD228A4), SPH_C32(0x704D6ECE),
|
||||
SPH_C32(0xAA3EC95F), SPH_C32(0xE387C214), SPH_C32(0x3A6445FF),
|
||||
SPH_C32(0x9CAB81C3), SPH_C32(0xC73D4B98), SPH_C32(0xD277AEBE),
|
||||
SPH_C32(0xFD20151C), SPH_C32(0x00CB573E)
|
||||
};
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
SPH_C32(0xEA2BD4B4), SPH_C32(0xCCD6F29F), SPH_C32(0x63117E71),
|
||||
SPH_C32(0x35481EAE), SPH_C32(0x22512D5B), SPH_C32(0xE5D94E63),
|
||||
SPH_C32(0x7E624131), SPH_C32(0xF4CC12BE), SPH_C32(0xC2D0B696),
|
||||
SPH_C32(0x42AF2070), SPH_C32(0xD0720C35), SPH_C32(0x3361DA8C),
|
||||
SPH_C32(0x28CCECA4), SPH_C32(0x8EF8AD83), SPH_C32(0x4680AC00),
|
||||
SPH_C32(0x40E5FBAB), SPH_C32(0xD89041C3), SPH_C32(0x6107FBD5),
|
||||
SPH_C32(0x6C859D41), SPH_C32(0xF0B26679), SPH_C32(0x09392549),
|
||||
SPH_C32(0x5FA25603), SPH_C32(0x65C892FD), SPH_C32(0x93CB6285),
|
||||
SPH_C32(0x2AF2B5AE), SPH_C32(0x9E4B4E60), SPH_C32(0x774ABFDD),
|
||||
SPH_C32(0x85254725), SPH_C32(0x15815AEB), SPH_C32(0x4AB6AAD6),
|
||||
SPH_C32(0x9CDAF8AF), SPH_C32(0xD6032C0A)
|
||||
};
|
||||
|
||||
static const sph_u32 IV384[] = {
|
||||
SPH_C32(0xE623087E), SPH_C32(0x04C00C87), SPH_C32(0x5EF46453),
|
||||
SPH_C32(0x69524B13), SPH_C32(0x1A05C7A9), SPH_C32(0x3528DF88),
|
||||
SPH_C32(0x6BDD01B5), SPH_C32(0x5057B792), SPH_C32(0x6AA7A922),
|
||||
SPH_C32(0x649C7EEE), SPH_C32(0xF426309F), SPH_C32(0xCB629052),
|
||||
SPH_C32(0xFC8E20ED), SPH_C32(0xB3482BAB), SPH_C32(0xF89E5E7E),
|
||||
SPH_C32(0xD83D4DE4), SPH_C32(0x44BFC10D), SPH_C32(0x5FC1E63D),
|
||||
SPH_C32(0x2104E6CB), SPH_C32(0x17958F7F), SPH_C32(0xDBEAEF70),
|
||||
SPH_C32(0xB4B97E1E), SPH_C32(0x32C195F6), SPH_C32(0x6184A8E4),
|
||||
SPH_C32(0x796C2543), SPH_C32(0x23DE176D), SPH_C32(0xD33BBAEC),
|
||||
SPH_C32(0x0C12E5D2), SPH_C32(0x4EB95A7B), SPH_C32(0x2D18BA01),
|
||||
SPH_C32(0x04EE475F), SPH_C32(0x1FC5F22E)
|
||||
};
|
||||
|
||||
static const sph_u32 IV512[] = {
|
||||
SPH_C32(0x2AEA2A61), SPH_C32(0x50F494D4), SPH_C32(0x2D538B8B),
|
||||
SPH_C32(0x4167D83E), SPH_C32(0x3FEE2313), SPH_C32(0xC701CF8C),
|
||||
SPH_C32(0xCC39968E), SPH_C32(0x50AC5695), SPH_C32(0x4D42C787),
|
||||
SPH_C32(0xA647A8B3), SPH_C32(0x97CF0BEF), SPH_C32(0x825B4537),
|
||||
SPH_C32(0xEEF864D2), SPH_C32(0xF22090C4), SPH_C32(0xD0E5CD33),
|
||||
SPH_C32(0xA23911AE), SPH_C32(0xFCD398D9), SPH_C32(0x148FE485),
|
||||
SPH_C32(0x1B017BEF), SPH_C32(0xB6444532), SPH_C32(0x6A536159),
|
||||
SPH_C32(0x2FF5781C), SPH_C32(0x91FA7934), SPH_C32(0x0DBADEA9),
|
||||
SPH_C32(0xD65C8A2B), SPH_C32(0xA5A70E75), SPH_C32(0xB1C62456),
|
||||
SPH_C32(0xBC796576), SPH_C32(0x1921C8F7), SPH_C32(0xE7989AF1),
|
||||
SPH_C32(0x7795D246), SPH_C32(0xD43E3B44)
|
||||
};
|
||||
|
||||
#define T32 SPH_T32
|
||||
#define ROTL32 SPH_ROTL32
|
||||
|
||||
#if SPH_CUBEHASH_NOCOPY
|
||||
|
||||
#define DECL_STATE
|
||||
#define READ_STATE(cc)
|
||||
#define WRITE_STATE(cc)
|
||||
|
||||
#define x0 ((sc)->state[ 0])
|
||||
#define x1 ((sc)->state[ 1])
|
||||
#define x2 ((sc)->state[ 2])
|
||||
#define x3 ((sc)->state[ 3])
|
||||
#define x4 ((sc)->state[ 4])
|
||||
#define x5 ((sc)->state[ 5])
|
||||
#define x6 ((sc)->state[ 6])
|
||||
#define x7 ((sc)->state[ 7])
|
||||
#define x8 ((sc)->state[ 8])
|
||||
#define x9 ((sc)->state[ 9])
|
||||
#define xa ((sc)->state[10])
|
||||
#define xb ((sc)->state[11])
|
||||
#define xc ((sc)->state[12])
|
||||
#define xd ((sc)->state[13])
|
||||
#define xe ((sc)->state[14])
|
||||
#define xf ((sc)->state[15])
|
||||
#define xg ((sc)->state[16])
|
||||
#define xh ((sc)->state[17])
|
||||
#define xi ((sc)->state[18])
|
||||
#define xj ((sc)->state[19])
|
||||
#define xk ((sc)->state[20])
|
||||
#define xl ((sc)->state[21])
|
||||
#define xm ((sc)->state[22])
|
||||
#define xn ((sc)->state[23])
|
||||
#define xo ((sc)->state[24])
|
||||
#define xp ((sc)->state[25])
|
||||
#define xq ((sc)->state[26])
|
||||
#define xr ((sc)->state[27])
|
||||
#define xs ((sc)->state[28])
|
||||
#define xt ((sc)->state[29])
|
||||
#define xu ((sc)->state[30])
|
||||
#define xv ((sc)->state[31])
|
||||
|
||||
#else
|
||||
|
||||
#define DECL_STATE \
|
||||
sph_u32 x0, x1, x2, x3, x4, x5, x6, x7; \
|
||||
sph_u32 x8, x9, xa, xb, xc, xd, xe, xf; \
|
||||
sph_u32 xg, xh, xi, xj, xk, xl, xm, xn; \
|
||||
sph_u32 xo, xp, xq, xr, xs, xt, xu, xv;
|
||||
|
||||
#define READ_STATE(cc) do { \
|
||||
x0 = (cc)->state[ 0]; \
|
||||
x1 = (cc)->state[ 1]; \
|
||||
x2 = (cc)->state[ 2]; \
|
||||
x3 = (cc)->state[ 3]; \
|
||||
x4 = (cc)->state[ 4]; \
|
||||
x5 = (cc)->state[ 5]; \
|
||||
x6 = (cc)->state[ 6]; \
|
||||
x7 = (cc)->state[ 7]; \
|
||||
x8 = (cc)->state[ 8]; \
|
||||
x9 = (cc)->state[ 9]; \
|
||||
xa = (cc)->state[10]; \
|
||||
xb = (cc)->state[11]; \
|
||||
xc = (cc)->state[12]; \
|
||||
xd = (cc)->state[13]; \
|
||||
xe = (cc)->state[14]; \
|
||||
xf = (cc)->state[15]; \
|
||||
xg = (cc)->state[16]; \
|
||||
xh = (cc)->state[17]; \
|
||||
xi = (cc)->state[18]; \
|
||||
xj = (cc)->state[19]; \
|
||||
xk = (cc)->state[20]; \
|
||||
xl = (cc)->state[21]; \
|
||||
xm = (cc)->state[22]; \
|
||||
xn = (cc)->state[23]; \
|
||||
xo = (cc)->state[24]; \
|
||||
xp = (cc)->state[25]; \
|
||||
xq = (cc)->state[26]; \
|
||||
xr = (cc)->state[27]; \
|
||||
xs = (cc)->state[28]; \
|
||||
xt = (cc)->state[29]; \
|
||||
xu = (cc)->state[30]; \
|
||||
xv = (cc)->state[31]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE(cc) do { \
|
||||
(cc)->state[ 0] = x0; \
|
||||
(cc)->state[ 1] = x1; \
|
||||
(cc)->state[ 2] = x2; \
|
||||
(cc)->state[ 3] = x3; \
|
||||
(cc)->state[ 4] = x4; \
|
||||
(cc)->state[ 5] = x5; \
|
||||
(cc)->state[ 6] = x6; \
|
||||
(cc)->state[ 7] = x7; \
|
||||
(cc)->state[ 8] = x8; \
|
||||
(cc)->state[ 9] = x9; \
|
||||
(cc)->state[10] = xa; \
|
||||
(cc)->state[11] = xb; \
|
||||
(cc)->state[12] = xc; \
|
||||
(cc)->state[13] = xd; \
|
||||
(cc)->state[14] = xe; \
|
||||
(cc)->state[15] = xf; \
|
||||
(cc)->state[16] = xg; \
|
||||
(cc)->state[17] = xh; \
|
||||
(cc)->state[18] = xi; \
|
||||
(cc)->state[19] = xj; \
|
||||
(cc)->state[20] = xk; \
|
||||
(cc)->state[21] = xl; \
|
||||
(cc)->state[22] = xm; \
|
||||
(cc)->state[23] = xn; \
|
||||
(cc)->state[24] = xo; \
|
||||
(cc)->state[25] = xp; \
|
||||
(cc)->state[26] = xq; \
|
||||
(cc)->state[27] = xr; \
|
||||
(cc)->state[28] = xs; \
|
||||
(cc)->state[29] = xt; \
|
||||
(cc)->state[30] = xu; \
|
||||
(cc)->state[31] = xv; \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define INPUT_BLOCK do { \
|
||||
x0 ^= sph_dec32le_aligned(buf + 0); \
|
||||
x1 ^= sph_dec32le_aligned(buf + 4); \
|
||||
x2 ^= sph_dec32le_aligned(buf + 8); \
|
||||
x3 ^= sph_dec32le_aligned(buf + 12); \
|
||||
x4 ^= sph_dec32le_aligned(buf + 16); \
|
||||
x5 ^= sph_dec32le_aligned(buf + 20); \
|
||||
x6 ^= sph_dec32le_aligned(buf + 24); \
|
||||
x7 ^= sph_dec32le_aligned(buf + 28); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_EVEN do { \
|
||||
xg = T32(x0 + xg); \
|
||||
x0 = ROTL32(x0, 7); \
|
||||
xh = T32(x1 + xh); \
|
||||
x1 = ROTL32(x1, 7); \
|
||||
xi = T32(x2 + xi); \
|
||||
x2 = ROTL32(x2, 7); \
|
||||
xj = T32(x3 + xj); \
|
||||
x3 = ROTL32(x3, 7); \
|
||||
xk = T32(x4 + xk); \
|
||||
x4 = ROTL32(x4, 7); \
|
||||
xl = T32(x5 + xl); \
|
||||
x5 = ROTL32(x5, 7); \
|
||||
xm = T32(x6 + xm); \
|
||||
x6 = ROTL32(x6, 7); \
|
||||
xn = T32(x7 + xn); \
|
||||
x7 = ROTL32(x7, 7); \
|
||||
xo = T32(x8 + xo); \
|
||||
x8 = ROTL32(x8, 7); \
|
||||
xp = T32(x9 + xp); \
|
||||
x9 = ROTL32(x9, 7); \
|
||||
xq = T32(xa + xq); \
|
||||
xa = ROTL32(xa, 7); \
|
||||
xr = T32(xb + xr); \
|
||||
xb = ROTL32(xb, 7); \
|
||||
xs = T32(xc + xs); \
|
||||
xc = ROTL32(xc, 7); \
|
||||
xt = T32(xd + xt); \
|
||||
xd = ROTL32(xd, 7); \
|
||||
xu = T32(xe + xu); \
|
||||
xe = ROTL32(xe, 7); \
|
||||
xv = T32(xf + xv); \
|
||||
xf = ROTL32(xf, 7); \
|
||||
x8 ^= xg; \
|
||||
x9 ^= xh; \
|
||||
xa ^= xi; \
|
||||
xb ^= xj; \
|
||||
xc ^= xk; \
|
||||
xd ^= xl; \
|
||||
xe ^= xm; \
|
||||
xf ^= xn; \
|
||||
x0 ^= xo; \
|
||||
x1 ^= xp; \
|
||||
x2 ^= xq; \
|
||||
x3 ^= xr; \
|
||||
x4 ^= xs; \
|
||||
x5 ^= xt; \
|
||||
x6 ^= xu; \
|
||||
x7 ^= xv; \
|
||||
xi = T32(x8 + xi); \
|
||||
x8 = ROTL32(x8, 11); \
|
||||
xj = T32(x9 + xj); \
|
||||
x9 = ROTL32(x9, 11); \
|
||||
xg = T32(xa + xg); \
|
||||
xa = ROTL32(xa, 11); \
|
||||
xh = T32(xb + xh); \
|
||||
xb = ROTL32(xb, 11); \
|
||||
xm = T32(xc + xm); \
|
||||
xc = ROTL32(xc, 11); \
|
||||
xn = T32(xd + xn); \
|
||||
xd = ROTL32(xd, 11); \
|
||||
xk = T32(xe + xk); \
|
||||
xe = ROTL32(xe, 11); \
|
||||
xl = T32(xf + xl); \
|
||||
xf = ROTL32(xf, 11); \
|
||||
xq = T32(x0 + xq); \
|
||||
x0 = ROTL32(x0, 11); \
|
||||
xr = T32(x1 + xr); \
|
||||
x1 = ROTL32(x1, 11); \
|
||||
xo = T32(x2 + xo); \
|
||||
x2 = ROTL32(x2, 11); \
|
||||
xp = T32(x3 + xp); \
|
||||
x3 = ROTL32(x3, 11); \
|
||||
xu = T32(x4 + xu); \
|
||||
x4 = ROTL32(x4, 11); \
|
||||
xv = T32(x5 + xv); \
|
||||
x5 = ROTL32(x5, 11); \
|
||||
xs = T32(x6 + xs); \
|
||||
x6 = ROTL32(x6, 11); \
|
||||
xt = T32(x7 + xt); \
|
||||
x7 = ROTL32(x7, 11); \
|
||||
xc ^= xi; \
|
||||
xd ^= xj; \
|
||||
xe ^= xg; \
|
||||
xf ^= xh; \
|
||||
x8 ^= xm; \
|
||||
x9 ^= xn; \
|
||||
xa ^= xk; \
|
||||
xb ^= xl; \
|
||||
x4 ^= xq; \
|
||||
x5 ^= xr; \
|
||||
x6 ^= xo; \
|
||||
x7 ^= xp; \
|
||||
x0 ^= xu; \
|
||||
x1 ^= xv; \
|
||||
x2 ^= xs; \
|
||||
x3 ^= xt; \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_ODD do { \
|
||||
xj = T32(xc + xj); \
|
||||
xc = ROTL32(xc, 7); \
|
||||
xi = T32(xd + xi); \
|
||||
xd = ROTL32(xd, 7); \
|
||||
xh = T32(xe + xh); \
|
||||
xe = ROTL32(xe, 7); \
|
||||
xg = T32(xf + xg); \
|
||||
xf = ROTL32(xf, 7); \
|
||||
xn = T32(x8 + xn); \
|
||||
x8 = ROTL32(x8, 7); \
|
||||
xm = T32(x9 + xm); \
|
||||
x9 = ROTL32(x9, 7); \
|
||||
xl = T32(xa + xl); \
|
||||
xa = ROTL32(xa, 7); \
|
||||
xk = T32(xb + xk); \
|
||||
xb = ROTL32(xb, 7); \
|
||||
xr = T32(x4 + xr); \
|
||||
x4 = ROTL32(x4, 7); \
|
||||
xq = T32(x5 + xq); \
|
||||
x5 = ROTL32(x5, 7); \
|
||||
xp = T32(x6 + xp); \
|
||||
x6 = ROTL32(x6, 7); \
|
||||
xo = T32(x7 + xo); \
|
||||
x7 = ROTL32(x7, 7); \
|
||||
xv = T32(x0 + xv); \
|
||||
x0 = ROTL32(x0, 7); \
|
||||
xu = T32(x1 + xu); \
|
||||
x1 = ROTL32(x1, 7); \
|
||||
xt = T32(x2 + xt); \
|
||||
x2 = ROTL32(x2, 7); \
|
||||
xs = T32(x3 + xs); \
|
||||
x3 = ROTL32(x3, 7); \
|
||||
x4 ^= xj; \
|
||||
x5 ^= xi; \
|
||||
x6 ^= xh; \
|
||||
x7 ^= xg; \
|
||||
x0 ^= xn; \
|
||||
x1 ^= xm; \
|
||||
x2 ^= xl; \
|
||||
x3 ^= xk; \
|
||||
xc ^= xr; \
|
||||
xd ^= xq; \
|
||||
xe ^= xp; \
|
||||
xf ^= xo; \
|
||||
x8 ^= xv; \
|
||||
x9 ^= xu; \
|
||||
xa ^= xt; \
|
||||
xb ^= xs; \
|
||||
xh = T32(x4 + xh); \
|
||||
x4 = ROTL32(x4, 11); \
|
||||
xg = T32(x5 + xg); \
|
||||
x5 = ROTL32(x5, 11); \
|
||||
xj = T32(x6 + xj); \
|
||||
x6 = ROTL32(x6, 11); \
|
||||
xi = T32(x7 + xi); \
|
||||
x7 = ROTL32(x7, 11); \
|
||||
xl = T32(x0 + xl); \
|
||||
x0 = ROTL32(x0, 11); \
|
||||
xk = T32(x1 + xk); \
|
||||
x1 = ROTL32(x1, 11); \
|
||||
xn = T32(x2 + xn); \
|
||||
x2 = ROTL32(x2, 11); \
|
||||
xm = T32(x3 + xm); \
|
||||
x3 = ROTL32(x3, 11); \
|
||||
xp = T32(xc + xp); \
|
||||
xc = ROTL32(xc, 11); \
|
||||
xo = T32(xd + xo); \
|
||||
xd = ROTL32(xd, 11); \
|
||||
xr = T32(xe + xr); \
|
||||
xe = ROTL32(xe, 11); \
|
||||
xq = T32(xf + xq); \
|
||||
xf = ROTL32(xf, 11); \
|
||||
xt = T32(x8 + xt); \
|
||||
x8 = ROTL32(x8, 11); \
|
||||
xs = T32(x9 + xs); \
|
||||
x9 = ROTL32(x9, 11); \
|
||||
xv = T32(xa + xv); \
|
||||
xa = ROTL32(xa, 11); \
|
||||
xu = T32(xb + xu); \
|
||||
xb = ROTL32(xb, 11); \
|
||||
x0 ^= xh; \
|
||||
x1 ^= xg; \
|
||||
x2 ^= xj; \
|
||||
x3 ^= xi; \
|
||||
x4 ^= xl; \
|
||||
x5 ^= xk; \
|
||||
x6 ^= xn; \
|
||||
x7 ^= xm; \
|
||||
x8 ^= xp; \
|
||||
x9 ^= xo; \
|
||||
xa ^= xr; \
|
||||
xb ^= xq; \
|
||||
xc ^= xt; \
|
||||
xd ^= xs; \
|
||||
xe ^= xv; \
|
||||
xf ^= xu; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* There is no need to unroll all 16 rounds. The word-swapping permutation
|
||||
* is an involution, so we need to unroll an even number of rounds. On
|
||||
* "big" systems, unrolling 4 rounds yields about 97% of the speed
|
||||
* achieved with full unrolling; and it keeps the code more compact
|
||||
* for small architectures.
|
||||
*/
|
||||
|
||||
#if SPH_CUBEHASH_UNROLL == 2
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
int j; \
|
||||
for (j = 0; j < 8; j ++) { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#elif SPH_CUBEHASH_UNROLL == 4
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
int j; \
|
||||
for (j = 0; j < 4; j ++) { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#elif SPH_CUBEHASH_UNROLL == 8
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
int j; \
|
||||
for (j = 0; j < 2; j ++) { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
cubehash_init(sph_cubehash_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
memcpy(sc->state, iv, sizeof sc->state);
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
cubehash_core(sph_cubehash_context *sc, const void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
DECL_STATE
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
if (len < (sizeof sc->buf) - ptr) {
|
||||
memcpy(buf + ptr, data, len);
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE(sc);
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
ptr += clen;
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
INPUT_BLOCK;
|
||||
SIXTEEN_ROUNDS;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE(sc);
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
cubehash_close(sph_cubehash_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char *buf, *out;
|
||||
size_t ptr;
|
||||
unsigned z;
|
||||
int i;
|
||||
DECL_STATE
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
|
||||
READ_STATE(sc);
|
||||
INPUT_BLOCK;
|
||||
for (i = 0; i < 11; i ++) {
|
||||
SIXTEEN_ROUNDS;
|
||||
if (i == 0)
|
||||
xv ^= SPH_C32(1);
|
||||
}
|
||||
WRITE_STATE(sc);
|
||||
out = dst;
|
||||
for (z = 0; z < out_size_w32; z ++)
|
||||
sph_enc32le(out + (z << 2), sc->state[z]);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash224_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 7);
|
||||
sph_cubehash224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash256_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 8);
|
||||
sph_cubehash256_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash384_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 12);
|
||||
sph_cubehash384_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash512_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 16);
|
||||
sph_cubehash512_init(cc);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
292
algo/cubehash/sph_cubehash.h
Normal file
292
algo/cubehash/sph_cubehash.h
Normal file
@@ -0,0 +1,292 @@
|
||||
/* $Id: sph_cubehash.h 180 2010-05-08 02:29:25Z tp $ */
|
||||
/**
|
||||
* CubeHash interface. CubeHash is a family of functions which differ by
|
||||
* their output size; this implementation defines CubeHash for output
|
||||
* sizes 224, 256, 384 and 512 bits, with the "standard parameters"
|
||||
* (CubeHash16/32 with the CubeHash specification notations).
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_cubehash.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_CUBEHASH_H__
|
||||
#define SPH_CUBEHASH_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha3/sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-224.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-256.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-384.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-512.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for CubeHash computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a CubeHash computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running CubeHash computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[32]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 state[32];
|
||||
#endif
|
||||
} sph_cubehash_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-224 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash224_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-256 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash256_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-384 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash384_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-512 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash512_context;
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-224 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-224 context (pointer to a
|
||||
* <code>sph_cubehash224_context</code>)
|
||||
*/
|
||||
void sph_cubehash224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-256 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-256 context (pointer to a
|
||||
* <code>sph_cubehash256_context</code>)
|
||||
*/
|
||||
void sph_cubehash256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-384 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-384 context (pointer to a
|
||||
* <code>sph_cubehash384_context</code>)
|
||||
*/
|
||||
void sph_cubehash384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-512 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-512 context (pointer to a
|
||||
* <code>sph_cubehash512_context</code>)
|
||||
*/
|
||||
void sph_cubehash512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
0
algo/cubehash/sse2/.dirstamp
Normal file
0
algo/cubehash/sse2/.dirstamp
Normal file
268
algo/cubehash/sse2/cubehash_sse2.c
Normal file
268
algo/cubehash/sse2/cubehash_sse2.c
Normal file
@@ -0,0 +1,268 @@
|
||||
/* CubeHash 16/32 is recommended for SHA-3 "normal", 16/1 for "formal" */
|
||||
#define CUBEHASH_ROUNDS 16
|
||||
#define CUBEHASH_BLOCKBYTES 32
|
||||
#define OPTIMIZE_SSE2
|
||||
#if defined(OPTIMIZE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#ifdef __AVX2__
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#include "cubehash_sse2.h"
|
||||
#include "algo/sha3/sha3-defs.h"
|
||||
|
||||
//enum { SUCCESS = 0, FAIL = 1, BAD_HASHBITLEN = 2 };
|
||||
|
||||
//#if defined(OPTIMIZE_SSE2)
|
||||
|
||||
static void transform( cubehashParam *sp )
|
||||
{
|
||||
int r;
|
||||
const int rounds = sp->rounds;
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
__m256i x0, x1, x2, x3, y0, y1;
|
||||
#ifdef UNUSED
|
||||
__m256i y2, y3;
|
||||
#endif
|
||||
|
||||
x0 = _mm256_load_si256( 0 + sp->x );
|
||||
x1 = _mm256_load_si256( 2 + sp->x );
|
||||
x2 = _mm256_load_si256( 4 + sp->x );
|
||||
x3 = _mm256_load_si256( 6 + sp->x );
|
||||
|
||||
for ( r = 0; r < rounds; ++r )
|
||||
{
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = x1;
|
||||
y1 = x0;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
_mm256_srli_epi32( y0, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 7 ),
|
||||
_mm256_srli_epi32( y1, 25 ) );
|
||||
x0 = _mm256_xor_si256( x0, x2 );
|
||||
x1 = _mm256_xor_si256( x1, x3 );
|
||||
x2 = _mm256_shuffle_epi32( x2, 0x4e );
|
||||
x3 = _mm256_shuffle_epi32( x3, 0x4e );
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = _mm256_permute2f128_si256( x0, x0, 1 );
|
||||
y1 = _mm256_permute2f128_si256( x1, x1, 1 );
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
|
||||
_mm256_srli_epi32( y0, 21 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
|
||||
_mm256_srli_epi32( y1, 21 ) );
|
||||
x0 = _mm256_xor_si256( x0, x2 );
|
||||
x1 = _mm256_xor_si256( x1, x3 );
|
||||
x2 = _mm256_shuffle_epi32( x2, 0xb1 );
|
||||
x3 = _mm256_shuffle_epi32( x3, 0xb1 );
|
||||
}
|
||||
|
||||
_mm256_store_si256( 0 + sp->x, x0 );
|
||||
_mm256_store_si256( 2 + sp->x, x1 );
|
||||
_mm256_store_si256( 4 + sp->x, x2 );
|
||||
_mm256_store_si256( 6 + sp->x, x3 );
|
||||
|
||||
#elif defined OPTIMIZE_SSE2
|
||||
|
||||
__m128i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3;
|
||||
#ifdef UNUSED
|
||||
__m128i y4, y5, y6, y7;
|
||||
#endif
|
||||
|
||||
x0 = _mm_load_si128(0 + sp->x);
|
||||
x1 = _mm_load_si128(1 + sp->x);
|
||||
x2 = _mm_load_si128(2 + sp->x);
|
||||
x3 = _mm_load_si128(3 + sp->x);
|
||||
x4 = _mm_load_si128(4 + sp->x);
|
||||
x5 = _mm_load_si128(5 + sp->x);
|
||||
x6 = _mm_load_si128(6 + sp->x);
|
||||
x7 = _mm_load_si128(7 + sp->x);
|
||||
|
||||
for (r = 0; r < rounds; ++r) {
|
||||
x4 = _mm_add_epi32(x0, x4);
|
||||
x5 = _mm_add_epi32(x1, x5);
|
||||
x6 = _mm_add_epi32(x2, x6);
|
||||
x7 = _mm_add_epi32(x3, x7);
|
||||
y0 = x2;
|
||||
y1 = x3;
|
||||
y2 = x0;
|
||||
y3 = x1;
|
||||
x0 = _mm_xor_si128(_mm_slli_epi32(y0, 7), _mm_srli_epi32(y0, 25));
|
||||
x1 = _mm_xor_si128(_mm_slli_epi32(y1, 7), _mm_srli_epi32(y1, 25));
|
||||
x2 = _mm_xor_si128(_mm_slli_epi32(y2, 7), _mm_srli_epi32(y2, 25));
|
||||
x3 = _mm_xor_si128(_mm_slli_epi32(y3, 7), _mm_srli_epi32(y3, 25));
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x2 = _mm_xor_si128(x2, x6);
|
||||
x3 = _mm_xor_si128(x3, x7);
|
||||
x4 = _mm_shuffle_epi32(x4, 0x4e);
|
||||
x5 = _mm_shuffle_epi32(x5, 0x4e);
|
||||
x6 = _mm_shuffle_epi32(x6, 0x4e);
|
||||
x7 = _mm_shuffle_epi32(x7, 0x4e);
|
||||
x4 = _mm_add_epi32(x0, x4);
|
||||
x5 = _mm_add_epi32(x1, x5);
|
||||
x6 = _mm_add_epi32(x2, x6);
|
||||
x7 = _mm_add_epi32(x3, x7);
|
||||
y0 = x1;
|
||||
y1 = x0;
|
||||
y2 = x3;
|
||||
y3 = x2;
|
||||
x0 = _mm_xor_si128(_mm_slli_epi32(y0, 11), _mm_srli_epi32(y0, 21));
|
||||
x1 = _mm_xor_si128(_mm_slli_epi32(y1, 11), _mm_srli_epi32(y1, 21));
|
||||
x2 = _mm_xor_si128(_mm_slli_epi32(y2, 11), _mm_srli_epi32(y2, 21));
|
||||
x3 = _mm_xor_si128(_mm_slli_epi32(y3, 11), _mm_srli_epi32(y3, 21));
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x2 = _mm_xor_si128(x2, x6);
|
||||
x3 = _mm_xor_si128(x3, x7);
|
||||
x4 = _mm_shuffle_epi32(x4, 0xb1);
|
||||
x5 = _mm_shuffle_epi32(x5, 0xb1);
|
||||
x6 = _mm_shuffle_epi32(x6, 0xb1);
|
||||
x7 = _mm_shuffle_epi32(x7, 0xb1);
|
||||
}
|
||||
|
||||
_mm_store_si128(0 + sp->x, x0);
|
||||
_mm_store_si128(1 + sp->x, x1);
|
||||
_mm_store_si128(2 + sp->x, x2);
|
||||
_mm_store_si128(3 + sp->x, x3);
|
||||
_mm_store_si128(4 + sp->x, x4);
|
||||
_mm_store_si128(5 + sp->x, x5);
|
||||
_mm_store_si128(6 + sp->x, x6);
|
||||
_mm_store_si128(7 + sp->x, x7);
|
||||
|
||||
#else /* OPTIMIZE_SSE2 */
|
||||
// Tis code probably not used, sph used instead for uniptoimized mining.
|
||||
|
||||
#define ROTATE(a,b) (((a) << (b)) | ((a) >> (32 - b)))
|
||||
|
||||
uint32_t y[16];
|
||||
int i;
|
||||
|
||||
for (r = 0; r < rounds; ++r) {
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i + 16] += sp->x[i];
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i] = ROTATE(y[i],7);
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i] ^= sp->x[i + 16];
|
||||
|
||||
for (i = 0; i < 16; ++i) y[i ^ 2] = sp->x[i + 16];
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i + 16] = y[i];
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i + 16] += sp->x[i];
|
||||
|
||||
for (i = 0; i < 16; ++i) y[i ^ 4] = sp->x[i];
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i] = ROTATE(y[i],11);
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i] ^= sp->x[i + 16];
|
||||
|
||||
for (i = 0; i < 16; ++i) y[i ^ 1] = sp->x[i + 16];
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i + 16] = y[i];
|
||||
|
||||
}
|
||||
#endif
|
||||
} // transform
|
||||
|
||||
int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (hashbitlen < 8) return BAD_HASHBITLEN;
|
||||
if (hashbitlen > 512) return BAD_HASHBITLEN;
|
||||
if (hashbitlen != 8 * (hashbitlen / 8)) return BAD_HASHBITLEN;
|
||||
|
||||
/* Sanity checks */
|
||||
if (rounds <= 0 || rounds > 32) rounds = CUBEHASH_ROUNDS;
|
||||
if (blockbytes <= 0 || blockbytes >= 256) blockbytes = CUBEHASH_BLOCKBYTES;
|
||||
|
||||
sp->hashbitlen = hashbitlen;
|
||||
sp->rounds = rounds;
|
||||
sp->blockbytes = blockbytes;
|
||||
#if defined(OPTIMIZE_SSE2)
|
||||
for (i = 0; i < 8; ++i) sp->x[i] = _mm_set_epi32(0, 0, 0, 0);
|
||||
sp->x[0] = _mm_set_epi32(0, sp->rounds, sp->blockbytes, hashbitlen / 8);
|
||||
#else
|
||||
for (i = 0; i < 32; ++i) sp->x[i] = 0;
|
||||
sp->x[0] = hashbitlen / 8;
|
||||
sp->x[1] = sp->blockbytes;
|
||||
sp->x[2] = sp->rounds;
|
||||
#endif
|
||||
for (i = 0; i < 10; ++i) transform(sp);
|
||||
sp->pos = 0;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
cubehashReset(cubehashParam *sp)
|
||||
{
|
||||
return cubehashInit(sp, sp->hashbitlen, sp->rounds, sp->blockbytes);
|
||||
}
|
||||
|
||||
int cubehashUpdate(cubehashParam *sp, const byte *data, size_t size)
|
||||
{
|
||||
uint64_t databitlen = 8 * size;
|
||||
|
||||
/* caller promises us that previous data had integral number of bytes */
|
||||
/* so sp->pos is a multiple of 8 */
|
||||
|
||||
while (databitlen >= 8) {
|
||||
#if defined(OPTIMIZE_SSE2)
|
||||
((unsigned char *) sp->x)[sp->pos / 8] ^= *data;
|
||||
#else
|
||||
uint32_t u = *data;
|
||||
u <<= 8 * ((sp->pos / 8) % 4);
|
||||
sp->x[sp->pos / 32] ^= u;
|
||||
#endif
|
||||
data += 1;
|
||||
databitlen -= 8;
|
||||
sp->pos += 8;
|
||||
if (sp->pos == 8 * sp->blockbytes) {
|
||||
transform(sp);
|
||||
sp->pos = 0;
|
||||
}
|
||||
}
|
||||
if (databitlen > 0) {
|
||||
#if defined(OPTIMIZE_SSE2)
|
||||
((unsigned char *) sp->x)[sp->pos / 8] ^= *data;
|
||||
#else
|
||||
uint32_t u = *data;
|
||||
u <<= 8 * ((sp->pos / 8) % 4);
|
||||
sp->x[sp->pos / 32] ^= u;
|
||||
#endif
|
||||
sp->pos += databitlen;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int cubehashDigest(cubehashParam *sp, byte *digest)
|
||||
{
|
||||
int i;
|
||||
|
||||
#if defined(OPTIMIZE_SSE2)
|
||||
((unsigned char *) sp->x)[sp->pos / 8] ^= (128 >> (sp->pos % 8));
|
||||
transform(sp);
|
||||
sp->x[7] = _mm_xor_si128(sp->x[7], _mm_set_epi32(1, 0, 0, 0));
|
||||
for (i = 0; i < 10; ++i) transform(sp);
|
||||
for (i = 0; i < sp->hashbitlen / 8; ++i)
|
||||
digest[i] = ((unsigned char *) sp->x)[i];
|
||||
#else
|
||||
uint32_t u;
|
||||
|
||||
u = (128 >> (sp->pos % 8));
|
||||
u <<= 8 * ((sp->pos / 8) % 4);
|
||||
sp->x[sp->pos / 32] ^= u;
|
||||
transform(sp);
|
||||
sp->x[31] ^= 1;
|
||||
for (i = 0; i < 10; ++i) transform(sp);
|
||||
for (i = 0; i < sp->hashbitlen / 8; ++i)
|
||||
digest[i] = sp->x[i / 4] >> (8 * (i % 4));
|
||||
#endif
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
292
algo/cubehash/sse2/cubehash_sse2.c.broke
Normal file
292
algo/cubehash/sse2/cubehash_sse2.c.broke
Normal file
@@ -0,0 +1,292 @@
|
||||
/* CubeHash 16/32 is recommended for SHA-3 "normal", 16/1 for "formal" */
|
||||
#define CUBEHASH_ROUNDS 16
|
||||
#define CUBEHASH_BLOCKBYTES 32
|
||||
#define OPTIMIZE_SSE2
|
||||
#if defined(OPTIMIZE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#ifdef __AVX2__
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#include "cubehash_sse2.h"
|
||||
#include "algo/sha3/sha3-defs.h"
|
||||
|
||||
//enum { SUCCESS = 0, FAIL = 1, BAD_HASHBITLEN = 2 };
|
||||
|
||||
//#if defined(OPTIMIZE_SSE2)
|
||||
|
||||
static inline void transform( cubehashParam *sp )
|
||||
{
|
||||
int r;
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
__m256i x0, x1, x2, x3, y0, y1;
|
||||
#ifdef UNUSED
|
||||
__m256i y2, y3;
|
||||
#endif
|
||||
|
||||
x0 = _mm256_loadu_si256( 0 + sp->x );
|
||||
x1 = _mm256_loadu_si256( 2 + sp->x );
|
||||
x2 = _mm256_loadu_si256( 4 + sp->x );
|
||||
x3 = _mm256_loadu_si256( 6 + sp->x );
|
||||
|
||||
for ( r = 0; r < sp->rounds; ++r )
|
||||
{
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = x1;
|
||||
y1 = x0;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
_mm256_srli_epi32( y0, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 7 ),
|
||||
_mm256_srli_epi32( y1, 25 ) );
|
||||
x0 = _mm256_xor_si256( x0, x2 );
|
||||
x1 = _mm256_xor_si256( x1, x3 );
|
||||
x2 = _mm256_shuffle_epi32( x2, 0x4e );
|
||||
x3 = _mm256_shuffle_epi32( x3, 0x4e );
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = _mm256_permute2f128_si256( x0, x0, 1 );
|
||||
y1 = _mm256_permute2f128_si256( x1, x1, 1 );
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
|
||||
_mm256_srli_epi32( y0, 21 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
|
||||
_mm256_srli_epi32( y1, 21 ) );
|
||||
x0 = _mm256_xor_si256( x0, x2 );
|
||||
x1 = _mm256_xor_si256( x1, x3 );
|
||||
x2 = _mm256_shuffle_epi32( x2, 0xb1 );
|
||||
x3 = _mm256_shuffle_epi32( x3, 0xb1 );
|
||||
}
|
||||
|
||||
_mm256_storeu_si256( 0 + sp->x, x0 );
|
||||
_mm256_storeu_si256( 2 + sp->x, x1 );
|
||||
_mm256_storeu_si256( 4 + sp->x, x2 );
|
||||
_mm256_storeu_si256( 6 + sp->x, x3 );
|
||||
|
||||
#elif defined OPTIMIZE_SSE2
|
||||
|
||||
__m128i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3;
|
||||
#ifdef UNUSED
|
||||
__m128i y4, y5, y6, y7;
|
||||
#endif
|
||||
|
||||
x0 = _mm_load_si128(0 + sp->x);
|
||||
x1 = _mm_load_si128(1 + sp->x);
|
||||
x2 = _mm_load_si128(2 + sp->x);
|
||||
x3 = _mm_load_si128(3 + sp->x);
|
||||
x4 = _mm_load_si128(4 + sp->x);
|
||||
x5 = _mm_load_si128(5 + sp->x);
|
||||
x6 = _mm_load_si128(6 + sp->x);
|
||||
x7 = _mm_load_si128(7 + sp->x);
|
||||
|
||||
for (r = 0; r < sp->rounds; ++r) {
|
||||
x4 = _mm_add_epi32(x0, x4);
|
||||
x5 = _mm_add_epi32(x1, x5);
|
||||
x6 = _mm_add_epi32(x2, x6);
|
||||
x7 = _mm_add_epi32(x3, x7);
|
||||
y0 = x2;
|
||||
y1 = x3;
|
||||
y2 = x0;
|
||||
y3 = x1;
|
||||
x0 = _mm_xor_si128(_mm_slli_epi32(y0, 7), _mm_srli_epi32(y0, 25));
|
||||
x1 = _mm_xor_si128(_mm_slli_epi32(y1, 7), _mm_srli_epi32(y1, 25));
|
||||
x2 = _mm_xor_si128(_mm_slli_epi32(y2, 7), _mm_srli_epi32(y2, 25));
|
||||
x3 = _mm_xor_si128(_mm_slli_epi32(y3, 7), _mm_srli_epi32(y3, 25));
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x2 = _mm_xor_si128(x2, x6);
|
||||
x3 = _mm_xor_si128(x3, x7);
|
||||
x4 = _mm_shuffle_epi32(x4, 0x4e);
|
||||
x5 = _mm_shuffle_epi32(x5, 0x4e);
|
||||
x6 = _mm_shuffle_epi32(x6, 0x4e);
|
||||
x7 = _mm_shuffle_epi32(x7, 0x4e);
|
||||
x4 = _mm_add_epi32(x0, x4);
|
||||
x5 = _mm_add_epi32(x1, x5);
|
||||
x6 = _mm_add_epi32(x2, x6);
|
||||
x7 = _mm_add_epi32(x3, x7);
|
||||
y0 = x1;
|
||||
y1 = x0;
|
||||
y2 = x3;
|
||||
y3 = x2;
|
||||
x0 = _mm_xor_si128(_mm_slli_epi32(y0, 11), _mm_srli_epi32(y0, 21));
|
||||
x1 = _mm_xor_si128(_mm_slli_epi32(y1, 11), _mm_srli_epi32(y1, 21));
|
||||
x2 = _mm_xor_si128(_mm_slli_epi32(y2, 11), _mm_srli_epi32(y2, 21));
|
||||
x3 = _mm_xor_si128(_mm_slli_epi32(y3, 11), _mm_srli_epi32(y3, 21));
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x2 = _mm_xor_si128(x2, x6);
|
||||
x3 = _mm_xor_si128(x3, x7);
|
||||
x4 = _mm_shuffle_epi32(x4, 0xb1);
|
||||
x5 = _mm_shuffle_epi32(x5, 0xb1);
|
||||
x6 = _mm_shuffle_epi32(x6, 0xb1);
|
||||
x7 = _mm_shuffle_epi32(x7, 0xb1);
|
||||
}
|
||||
|
||||
_mm_store_si128(0 + sp->x, x0);
|
||||
_mm_store_si128(1 + sp->x, x1);
|
||||
_mm_store_si128(2 + sp->x, x2);
|
||||
_mm_store_si128(3 + sp->x, x3);
|
||||
_mm_store_si128(4 + sp->x, x4);
|
||||
_mm_store_si128(5 + sp->x, x5);
|
||||
_mm_store_si128(6 + sp->x, x6);
|
||||
_mm_store_si128(7 + sp->x, x7);
|
||||
|
||||
#else /* OPTIMIZE_SSE2 */
|
||||
// Tis code probably not used, sph used instead for uniptoimized mining.
|
||||
|
||||
#define ROTATE(a,b) (((a) << (b)) | ((a) >> (32 - b)))
|
||||
|
||||
uint32_t y[16];
|
||||
int i;
|
||||
|
||||
for (r = 0; r < sp->rounds; ++r) {
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i + 16] += sp->x[i];
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i] = ROTATE(y[i],7);
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i] ^= sp->x[i + 16];
|
||||
|
||||
for (i = 0; i < 16; ++i) y[i ^ 2] = sp->x[i + 16];
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i + 16] = y[i];
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i + 16] += sp->x[i];
|
||||
|
||||
for (i = 0; i < 16; ++i) y[i ^ 4] = sp->x[i];
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i] = ROTATE(y[i],11);
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i] ^= sp->x[i + 16];
|
||||
|
||||
for (i = 0; i < 16; ++i) y[i ^ 1] = sp->x[i + 16];
|
||||
|
||||
for (i = 0; i < 16; ++i) sp->x[i + 16] = y[i];
|
||||
|
||||
}
|
||||
#endif
|
||||
} // transform
|
||||
|
||||
int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (hashbitlen < 8) return BAD_HASHBITLEN;
|
||||
if (hashbitlen > 512) return BAD_HASHBITLEN;
|
||||
if (hashbitlen != 8 * (hashbitlen / 8)) return BAD_HASHBITLEN;
|
||||
|
||||
/* Sanity checks */
|
||||
if (rounds <= 0 || rounds > 32) rounds = CUBEHASH_ROUNDS;
|
||||
if (blockbytes <= 0 || blockbytes >= 256) blockbytes = CUBEHASH_BLOCKBYTES;
|
||||
|
||||
sp->hashbitlen = hashbitlen;
|
||||
sp->rounds = rounds;
|
||||
sp->blockbytes = blockbytes;
|
||||
#if defined __AVX2__
|
||||
for (i = 0; i < 4; ++i) sp->x[i] = _mm256_set_epi64x( 0, 0, 0, 0 );
|
||||
// try swapping
|
||||
sp->x[0] = _mm256_set_epi32( 0, sp->rounds, sp->blockbytes, hashbitlen / 8,
|
||||
0, 0, 0, 0);
|
||||
// sp->x[0] = _mm256_set_epi32( 0, 0, 0, 0,
|
||||
// 0, sp->rounds, sp->blockbytes, hashbitlen / 8 );
|
||||
#elif defined(OPTIMIZE_SSE2)
|
||||
for (i = 0; i < 8; ++i) sp->x[i] = _mm_set_epi32(0, 0, 0, 0);
|
||||
sp->x[0] = _mm_set_epi32(0, sp->rounds, sp->blockbytes, hashbitlen / 8);
|
||||
#else
|
||||
for (i = 0; i < 32; ++i) sp->x[i] = 0;
|
||||
sp->x[0] = hashbitlen / 8;
|
||||
sp->x[1] = sp->blockbytes;
|
||||
sp->x[2] = sp->rounds;
|
||||
#endif
|
||||
for (i = 0; i < 10; ++i) transform(sp);
|
||||
sp->pos = 0;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
cubehashReset(cubehashParam *sp)
|
||||
{
|
||||
return cubehashInit(sp, sp->hashbitlen, sp->rounds, sp->blockbytes);
|
||||
}
|
||||
|
||||
int cubehashUpdate(cubehashParam *sp, const byte *data, size_t size)
|
||||
{
|
||||
uint64_t databitlen = 8 * size;
|
||||
|
||||
/* caller promises us that previous data had integral number of bytes */
|
||||
/* so sp->pos is a multiple of 8 */
|
||||
|
||||
while (databitlen >= 8) {
|
||||
#if defined __AVX2__
|
||||
((unsigned char *) sp->x)[sp->pos / 8] ^= *data;
|
||||
#elif defined(OPTIMIZE_SSE2)
|
||||
((unsigned char *) sp->x)[sp->pos / 8] ^= *data;
|
||||
#else
|
||||
uint32_t u = *data;
|
||||
u <<= 8 * ((sp->pos / 8) % 4);
|
||||
sp->x[sp->pos / 32] ^= u;
|
||||
#endif
|
||||
data += 1;
|
||||
databitlen -= 8;
|
||||
sp->pos += 8;
|
||||
if (sp->pos == 8 * sp->blockbytes) {
|
||||
transform(sp);
|
||||
sp->pos = 0;
|
||||
}
|
||||
}
|
||||
if (databitlen > 0) {
|
||||
#if defined __AVX2__
|
||||
((unsigned char *) sp->x)[sp->pos / 8] ^= *data;
|
||||
#elif defined(OPTIMIZE_SSE2)
|
||||
((unsigned char *) sp->x)[sp->pos / 8] ^= *data;
|
||||
#else
|
||||
uint32_t u = *data;
|
||||
u <<= 8 * ((sp->pos / 8) % 4);
|
||||
sp->x[sp->pos / 32] ^= u;
|
||||
#endif
|
||||
sp->pos += databitlen;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
int cubehashDigest(cubehashParam *sp, byte *digest)
|
||||
{
|
||||
int i;
|
||||
#if defined __AVX2__
|
||||
((unsigned char *) sp->x)[sp->pos / 8] ^= (128 >> (sp->pos % 8));
|
||||
__m128i t;
|
||||
transform(sp);
|
||||
// try control 0
|
||||
// t = _mm256_extracti128_si256( sp->x[7], 1 );
|
||||
t = _mm256_extracti128_si256( sp->x[7], 0 );
|
||||
t = _mm_xor_si128( t, _mm_set_epi32(1, 0, 0, 0) );
|
||||
// _mm256_inserti128_si256( sp->x[7], t, 1 );
|
||||
_mm256_inserti128_si256( sp->x[7], t, 0 );
|
||||
|
||||
for (i = 0; i < 10; ++i) transform(sp);
|
||||
for (i = 0; i < sp->hashbitlen / 8; ++i)
|
||||
digest[i] = ((unsigned char *) sp->x)[i];
|
||||
|
||||
#elif defined(OPTIMIZE_SSE2)
|
||||
((unsigned char *) sp->x)[sp->pos / 8] ^= (128 >> (sp->pos % 8));
|
||||
transform(sp);
|
||||
sp->x[7] = _mm_xor_si128(sp->x[7], _mm_set_epi32(1, 0, 0, 0));
|
||||
for (i = 0; i < 10; ++i) transform(sp);
|
||||
for (i = 0; i < sp->hashbitlen / 8; ++i)
|
||||
digest[i] = ((unsigned char *) sp->x)[i];
|
||||
#else
|
||||
uint32_t u;
|
||||
|
||||
u = (128 >> (sp->pos % 8));
|
||||
u <<= 8 * ((sp->pos / 8) % 4);
|
||||
sp->x[sp->pos / 32] ^= u;
|
||||
transform(sp);
|
||||
sp->x[31] ^= 1;
|
||||
for (i = 0; i < 10; ++i) transform(sp);
|
||||
for (i = 0; i < sp->hashbitlen / 8; ++i)
|
||||
digest[i] = sp->x[i / 4] >> (8 * (i % 4));
|
||||
#endif
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
64
algo/cubehash/sse2/cubehash_sse2.h
Normal file
64
algo/cubehash/sse2/cubehash_sse2.h
Normal file
@@ -0,0 +1,64 @@
|
||||
#ifndef CUBEHASH_SSE2_H__
|
||||
#define CUBEHASH_SSE2_H__
|
||||
|
||||
#include "compat.h"
|
||||
#include <stdint.h>
|
||||
#include "algo/sha3/sha3-defs.h"
|
||||
//#include <beecrypt/beecrypt.h>
|
||||
|
||||
//#if defined(__SSE2__)
|
||||
#define OPTIMIZE_SSE2
|
||||
//#endif
|
||||
|
||||
#if defined(OPTIMIZE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
/*!\brief Holds all the parameters necessary for the CUBEHASH algorithm.
|
||||
* \ingroup HASH_cubehash_m
|
||||
*/
|
||||
|
||||
struct _cubehashParam
|
||||
//#endif
|
||||
{
|
||||
int hashbitlen;
|
||||
int rounds;
|
||||
int blockbytes;
|
||||
int pos; /* number of bits read into x from current block */
|
||||
#if defined(OPTIMIZE_SSE2)
|
||||
__m128i _ALIGN(256) x[8];
|
||||
#else
|
||||
uint32_t x[32];
|
||||
#endif
|
||||
};
|
||||
|
||||
//#ifndef __cplusplus
|
||||
typedef struct _cubehashParam cubehashParam;
|
||||
//#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*!\var cubehash256
|
||||
* \brief Holds the full API description of the CUBEHASH algorithm.
|
||||
*/
|
||||
//extern BEECRYPTAPI const hashFunction cubehash256;
|
||||
|
||||
//BEECRYPTAPI
|
||||
int cubehashInit(cubehashParam* sp, int hashbitlen, int rounds, int blockbytes);
|
||||
|
||||
//BEECRYPTAPI
|
||||
int cubehashReset(cubehashParam* sp);
|
||||
|
||||
//BEECRYPTAPI
|
||||
int cubehashUpdate(cubehashParam* sp, const byte *data, size_t size);
|
||||
|
||||
//BEECRYPTAPI
|
||||
int cubehashDigest(cubehashParam* sp, byte *digest);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* H_CUBEHASH */
|
||||
264
algo/drop.c
Normal file
264
algo/drop.c
Normal file
@@ -0,0 +1,264 @@
|
||||
/**
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2015 kernels10, tpruvot
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file drop.c
|
||||
* @author kernels10 <kernels10@gmail.com.com>
|
||||
* @author tpruvot <tpruvot@github>
|
||||
*/
|
||||
|
||||
#define POK_BOOL_MASK 0x00008000
|
||||
#define POK_DATA_MASK 0xFFFF0000
|
||||
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/cubehash/sph_cubehash.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#include "algo/fugue//sph_fugue.h"
|
||||
#include "algo/luffa/sph_luffa.h"
|
||||
#include "algo/simd/sph_simd.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
|
||||
static void shiftr_lp(const uint32_t *input, uint32_t *output, unsigned int shift)
|
||||
{
|
||||
if(!shift) {
|
||||
memcpy(output, input, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(output, 0, 64);
|
||||
for(int i = 0; i < 15; ++i) {
|
||||
output[i + 1] |= (input[i] >> (32 - shift));
|
||||
output[i] |= (input[i] << shift);
|
||||
}
|
||||
|
||||
output[15] |= (input[15] << shift);
|
||||
return;
|
||||
}
|
||||
|
||||
static void switchHash(const void *input, void *output, int id)
|
||||
{
|
||||
/*
|
||||
sph_keccak512_context ctx_keccak;
|
||||
sph_blake512_context ctx_blake;
|
||||
sph_groestl512_context ctx_groestl;
|
||||
sph_skein512_context ctx_skein;
|
||||
sph_luffa512_context ctx_luffa;
|
||||
sph_echo512_context ctx_echo;
|
||||
sph_simd512_context ctx_simd;
|
||||
sph_cubehash512_context ctx_cubehash;
|
||||
sph_fugue512_context ctx_fugue;
|
||||
sph_shavite512_context ctx_shavite;
|
||||
|
||||
switch(id) {
|
||||
case 0:
|
||||
sph_keccak512_init(&ctx_keccak); sph_keccak512(&ctx_keccak, input, 64); sph_keccak512_close(&ctx_keccak, output);
|
||||
break;
|
||||
case 1:
|
||||
sph_blake512_init(&ctx_blake); sph_blake512(&ctx_blake, input, 64); sph_blake512_close(&ctx_blake, output);
|
||||
break;
|
||||
case 2:
|
||||
sph_groestl512_init(&ctx_groestl); sph_groestl512(&ctx_groestl, input, 64); sph_groestl512_close(&ctx_groestl, output);
|
||||
break;
|
||||
case 3:
|
||||
sph_skein512_init(&ctx_skein); sph_skein512(&ctx_skein, input, 64); sph_skein512_close(&ctx_skein, output);
|
||||
break;
|
||||
case 4:
|
||||
sph_luffa512_init(&ctx_luffa); sph_luffa512(&ctx_luffa, input, 64); sph_luffa512_close(&ctx_luffa, output);
|
||||
break;
|
||||
case 5:
|
||||
sph_echo512_init(&ctx_echo); sph_echo512(&ctx_echo, input, 64); sph_echo512_close(&ctx_echo, output);
|
||||
break;
|
||||
case 6:
|
||||
sph_shavite512_init(&ctx_shavite); sph_shavite512(&ctx_shavite, input, 64); sph_shavite512_close(&ctx_shavite, output);
|
||||
break;
|
||||
case 7:
|
||||
sph_fugue512_init(&ctx_fugue); sph_fugue512(&ctx_fugue, input, 64); sph_fugue512_close(&ctx_fugue, output);
|
||||
break;
|
||||
case 8:
|
||||
sph_simd512_init(&ctx_simd); sph_simd512(&ctx_simd, input, 64); sph_simd512_close(&ctx_simd, output);
|
||||
break;
|
||||
case 9:
|
||||
sph_cubehash512_init(&ctx_cubehash); sph_cubehash512(&ctx_cubehash, input, 64); sph_cubehash512_close(&ctx_cubehash, output);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void droplp_hash(void *state, const void *input)
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[2][16];
|
||||
sph_jh512_context ctx_jh;
|
||||
uint32_t *hashA = hash[0];
|
||||
uint32_t *hashB = hash[1];
|
||||
|
||||
sph_jh512_init(&ctx_jh);
|
||||
sph_jh512(&ctx_jh, input, 80);
|
||||
sph_jh512_close(&ctx_jh, (void*)(hashA));
|
||||
|
||||
unsigned int startPosition = hashA[0] % 31;
|
||||
unsigned int i = 0;
|
||||
int j = 0;
|
||||
int start = 0;
|
||||
|
||||
for (i = startPosition; i < 31; i+=9) {
|
||||
start = i % 10;
|
||||
for (j = start; j < 10; j++) {
|
||||
shiftr_lp(hashA, hashB, (i & 3));
|
||||
switchHash((const void*)hashB, (void*)hashA, j);
|
||||
}
|
||||
for (j = 0; j < start; j++) {
|
||||
shiftr_lp(hashA, hashB, (i & 3));
|
||||
switchHash((const void*)hashB, (void*)hashA, j);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < startPosition; i += 9) {
|
||||
start = i % 10;
|
||||
for (j = start; j < 10; j++) {
|
||||
shiftr_lp(hashA, hashB, (i & 3));
|
||||
switchHash((const void*)hashB, (void*)hashA, j);
|
||||
}
|
||||
for (j = 0; j < start; j++) {
|
||||
shiftr_lp(hashA, hashB, (i & 3));
|
||||
switchHash((const void*)hashB, (void*)hashA, j);
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(state, hashA, 32);
|
||||
}
|
||||
|
||||
static void droplp_hash_pok(void *output, uint32_t *pdata, const uint32_t version)
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t pok;
|
||||
|
||||
pdata[0] = version;
|
||||
droplp_hash(hash, pdata);
|
||||
|
||||
// fill PoK
|
||||
pok = version | (hash[0] & POK_DATA_MASK);
|
||||
if (pdata[0] != pok) {
|
||||
pdata[0] = pok;
|
||||
droplp_hash(hash, pdata);
|
||||
}
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_drop(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[16];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t version = pdata[0] & (~POK_DATA_MASK);
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
#define tmpdata pdata
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x07ff;
|
||||
|
||||
const uint32_t htarg = ptarget[7];
|
||||
|
||||
do {
|
||||
tmpdata[19] = nonce;
|
||||
droplp_hash_pok(hash, tmpdata, version);
|
||||
|
||||
if (hash[7] <= htarg && fulltest(hash, ptarget)) {
|
||||
pdata[0] = tmpdata[0];
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
if (opt_debug)
|
||||
applog(LOG_INFO, "found nonce %x", nonce);
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void drop_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t* end_nonce_ptr, bool clean_job )
|
||||
{
|
||||
// ignore POK in first word
|
||||
// const int nonce_i = 19;
|
||||
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
|
||||
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr ) ) )
|
||||
{
|
||||
work_free( work );
|
||||
work_copy( work, g_work );
|
||||
*nonceptr = ( 0xffffffffU / opt_n_threads ) * thr_id;
|
||||
if ( opt_randomize )
|
||||
*nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
|
||||
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
|
||||
}
|
||||
else
|
||||
++(*nonceptr);
|
||||
}
|
||||
|
||||
void drop_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (65536.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
void drop_display_pok( struct work* work )
|
||||
{
|
||||
if ( work->data[0] & 0x00008000 )
|
||||
applog(LOG_BLUE, "POK received: %08xx", work->data[0] );
|
||||
}
|
||||
|
||||
// Need to fix POK offset problems like zr5
|
||||
bool register_drop_algo( algo_gate_t* gate )
|
||||
{
|
||||
algo_not_tested();
|
||||
gate->scanhash = (void*)&scanhash_drop;
|
||||
gate->hash = (void*)&droplp_hash_pok;
|
||||
gate->hash_alt = (void*)&droplp_hash_pok;
|
||||
gate->hash_suw = (void*)&droplp_hash_pok;
|
||||
gate->get_new_work = (void*)&drop_get_new_work;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
|
||||
gate->set_work_data_endian = (void*)&swab_work_data;
|
||||
gate->display_extra_data = (void*)&drop_display_pok;
|
||||
gate->work_data_size = 80;
|
||||
gate->work_cmp_size = 72;
|
||||
return true;
|
||||
};
|
||||
|
||||
0
algo/echo/.dirstamp
Normal file
0
algo/echo/.dirstamp
Normal file
0
algo/echo/aes_ni/.dirstamp
Normal file
0
algo/echo/aes_ni/.dirstamp
Normal file
2
algo/echo/aes_ni/api.h
Normal file
2
algo/echo/aes_ni/api.h
Normal file
@@ -0,0 +1,2 @@
|
||||
#define CRYPTO_BYTES 64
|
||||
#define CRYPTO_VERSION "1.208"
|
||||
2
algo/echo/aes_ni/architectures
Normal file
2
algo/echo/aes_ni/architectures
Normal file
@@ -0,0 +1,2 @@
|
||||
amd64
|
||||
x86
|
||||
623
algo/echo/aes_ni/hash.c
Normal file
623
algo/echo/aes_ni/hash.c
Normal file
@@ -0,0 +1,623 @@
|
||||
/*
|
||||
* file : echo_vperm.c
|
||||
* version : 1.0.208
|
||||
* date : 14.12.2010
|
||||
*
|
||||
* - vperm and aes_ni implementations of hash function ECHO
|
||||
* - implements NIST hash api
|
||||
* - assumes that message lenght is multiple of 8-bits
|
||||
* - _ECHO_VPERM_ must be defined if compiling with ../main.c
|
||||
* - define NO_AES_NI for aes_ni version
|
||||
*
|
||||
* Cagdas Calik
|
||||
* ccalik@metu.edu.tr
|
||||
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <memory.h>
|
||||
#include "miner.h"
|
||||
#include "hash_api.h"
|
||||
#include "vperm.h"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#include <wmmintrin.h>
|
||||
#else
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
MYALIGN const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F};
|
||||
MYALIGN const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC};
|
||||
MYALIGN const unsigned int _k_opt[] = {0xD6B66000, 0xFF9F4929, 0xDEBE6808, 0xF7974121, 0x50BCEC00, 0x01EDBD51, 0xB05C0CE0, 0xE10D5DB1};
|
||||
MYALIGN const unsigned int _k_inv[] = {0x0D080180, 0x0E05060F, 0x0A0B0C02, 0x04070309, 0x0F0B0780, 0x01040A06, 0x02050809, 0x030D0E0C};
|
||||
MYALIGN const unsigned int _k_sb1[] = {0xCB503E00, 0xB19BE18F, 0x142AF544, 0xA5DF7A6E, 0xFAE22300, 0x3618D415, 0x0D2ED9EF, 0x3BF7CCC1};
|
||||
MYALIGN const unsigned int _k_sb2[] = {0x0B712400, 0xE27A93C6, 0xBC982FCD, 0x5EB7E955, 0x0AE12900, 0x69EB8840, 0xAB82234A, 0xC2A163C8};
|
||||
MYALIGN const unsigned int _k_sb3[] = {0xC0211A00, 0x53E17249, 0xA8B2DA89, 0xFB68933B, 0xF0030A00, 0x5FF35C55, 0xA6ACFAA5, 0xF956AF09};
|
||||
MYALIGN const unsigned int _k_sb4[] = {0x3FD64100, 0xE1E937A0, 0x49087E9F, 0xA876DE97, 0xC393EA00, 0x3D50AED7, 0x876D2914, 0xBA44FE79};
|
||||
MYALIGN const unsigned int _k_sb5[] = {0xF4867F00, 0x5072D62F, 0x5D228BDB, 0x0DA9A4F9, 0x3971C900, 0x0B487AC2, 0x8A43F0FB, 0x81B332B8};
|
||||
MYALIGN const unsigned int _k_sb7[] = {0xFFF75B00, 0xB20845E9, 0xE1BAA416, 0x531E4DAC, 0x3390E000, 0x62A3F282, 0x21C1D3B1, 0x43125170};
|
||||
MYALIGN const unsigned int _k_sbo[] = {0x6FBDC700, 0xD0D26D17, 0xC502A878, 0x15AABF7A, 0x5FBB6A00, 0xCFE474A5, 0x412B35FA, 0x8E1E90D1};
|
||||
MYALIGN const unsigned int _k_h63[] = {0x63636363, 0x63636363, 0x63636363, 0x63636363};
|
||||
MYALIGN const unsigned int _k_hc6[] = {0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6};
|
||||
MYALIGN const unsigned int _k_h5b[] = {0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b};
|
||||
MYALIGN const unsigned int _k_h4e[] = {0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e};
|
||||
MYALIGN const unsigned int _k_h0e[] = {0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e};
|
||||
MYALIGN const unsigned int _k_h15[] = {0x15151515, 0x15151515, 0x15151515, 0x15151515};
|
||||
MYALIGN const unsigned int _k_aesmix1[] = {0x0f0a0500, 0x030e0904, 0x07020d08, 0x0b06010c};
|
||||
MYALIGN const unsigned int _k_aesmix2[] = {0x000f0a05, 0x04030e09, 0x0807020d, 0x0c0b0601};
|
||||
MYALIGN const unsigned int _k_aesmix3[] = {0x05000f0a, 0x0904030e, 0x0d080702, 0x010c0b06};
|
||||
MYALIGN const unsigned int _k_aesmix4[] = {0x0a05000f, 0x0e090403, 0x020d0807, 0x06010c0b};
|
||||
|
||||
|
||||
MYALIGN const unsigned int const1[] = {0x00000001, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int mul2mask[] = {0x00001b00, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int lsbmask[] = {0x01010101, 0x01010101, 0x01010101, 0x01010101};
|
||||
MYALIGN const unsigned int invshiftrows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c};
|
||||
MYALIGN const unsigned int zero[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int mul2ipt[] = {0x728efc00, 0x6894e61a, 0x3fc3b14d, 0x25d9ab57, 0xfd5ba600, 0x2a8c71d7, 0x1eb845e3, 0xc96f9234};
|
||||
|
||||
|
||||
//#include "crypto_hash.h"
|
||||
|
||||
int crypto_hash(
|
||||
unsigned char *out,
|
||||
const unsigned char *in,
|
||||
unsigned long long inlen
|
||||
)
|
||||
{
|
||||
|
||||
if(hash_echo(512, in, inlen * 8, out) == SUCCESS)
|
||||
return 0;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
int main()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
#if 0
|
||||
void DumpState(__m128i *ps)
|
||||
{
|
||||
int i, j, k;
|
||||
unsigned int ucol;
|
||||
|
||||
for(j = 0; j < 4; j++)
|
||||
{
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
printf("row %d,col %d : ", i, j);
|
||||
for(k = 0; k < 4; k++)
|
||||
{
|
||||
ucol = *((int*)ps + 16 * i + 4 * j + k);
|
||||
printf("%02x%02x%02x%02x ", (ucol >> 0) & 0xff, (ucol >> 8) & 0xff, (ucol >> 16) & 0xff, (ucol >> 24) & 0xff);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#define ECHO_SUBBYTES(state, i, j) \
|
||||
state[i][j] = _mm_aesenc_si128(state[i][j], k1);\
|
||||
state[i][j] = _mm_aesenc_si128(state[i][j], M128(zero));\
|
||||
k1 = _mm_add_epi32(k1, M128(const1))
|
||||
#else
|
||||
#define ECHO_SUBBYTES(state, i, j) \
|
||||
AES_ROUND_VPERM(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
|
||||
state[i][j] = _mm_xor_si128(state[i][j], k1);\
|
||||
AES_ROUND_VPERM(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
|
||||
k1 = _mm_add_epi32(k1, M128(const1))
|
||||
|
||||
#define ECHO_SUB_AND_MIX(state, i, j, state2, c, r1, r2, r3, r4) \
|
||||
AES_ROUND_VPERM_CORE(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
|
||||
ktemp = k1;\
|
||||
TRANSFORM(ktemp, _k_ipt, t1, t4);\
|
||||
state[i][j] = _mm_xor_si128(state[i][j], ktemp);\
|
||||
AES_ROUND_VPERM_CORE(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
|
||||
k1 = _mm_add_epi32(k1, M128(const1));\
|
||||
s1 = state[i][j];\
|
||||
s2 = s1;\
|
||||
TRANSFORM(s2, mul2ipt, t1, t2);\
|
||||
s3 = _mm_xor_si128(s1, s2);\
|
||||
state2[r1][c] = _mm_xor_si128(state2[r1][c], s2);\
|
||||
state2[r2][c] = _mm_xor_si128(state2[r2][c], s1);\
|
||||
state2[r3][c] = _mm_xor_si128(state2[r3][c], s1);\
|
||||
state2[r4][c] = _mm_xor_si128(state2[r4][c], s3)
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define ECHO_MIXBYTES(state1, state2, j, t1, t2, s2) \
|
||||
s2 = _mm_add_epi8(state1[0][j], state1[0][j]);\
|
||||
t1 = _mm_srli_epi16(state1[0][j], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = s2;\
|
||||
state2[1][j] = state1[0][j];\
|
||||
state2[2][j] = state1[0][j];\
|
||||
state2[3][j] = _mm_xor_si128(s2, state1[0][j]);\
|
||||
s2 = _mm_add_epi8(state1[1][(j + 1) & 3], state1[1][(j + 1) & 3]);\
|
||||
t1 = _mm_srli_epi16(state1[1][(j + 1) & 3], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = _mm_xor_si128(state2[0][j], _mm_xor_si128(s2, state1[1][(j + 1) & 3]));\
|
||||
state2[1][j] = _mm_xor_si128(state2[1][j], s2);\
|
||||
state2[2][j] = _mm_xor_si128(state2[2][j], state1[1][(j + 1) & 3]);\
|
||||
state2[3][j] = _mm_xor_si128(state2[3][j], state1[1][(j + 1) & 3]);\
|
||||
s2 = _mm_add_epi8(state1[2][(j + 2) & 3], state1[2][(j + 2) & 3]);\
|
||||
t1 = _mm_srli_epi16(state1[2][(j + 2) & 3], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = _mm_xor_si128(state2[0][j], state1[2][(j + 2) & 3]);\
|
||||
state2[1][j] = _mm_xor_si128(state2[1][j], _mm_xor_si128(s2, state1[2][(j + 2) & 3]));\
|
||||
state2[2][j] = _mm_xor_si128(state2[2][j], s2);\
|
||||
state2[3][j] = _mm_xor_si128(state2[3][j], state1[2][(j + 2) & 3]);\
|
||||
s2 = _mm_add_epi8(state1[3][(j + 3) & 3], state1[3][(j + 3) & 3]);\
|
||||
t1 = _mm_srli_epi16(state1[3][(j + 3) & 3], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = _mm_xor_si128(state2[0][j], state1[3][(j + 3) & 3]);\
|
||||
state2[1][j] = _mm_xor_si128(state2[1][j], state1[3][(j + 3) & 3]);\
|
||||
state2[2][j] = _mm_xor_si128(state2[2][j], _mm_xor_si128(s2, state1[3][(j + 3) & 3]));\
|
||||
state2[3][j] = _mm_xor_si128(state2[3][j], s2)
|
||||
|
||||
|
||||
#define ECHO_ROUND_UNROLL2 \
|
||||
ECHO_SUBBYTES(_state, 0, 0);\
|
||||
ECHO_SUBBYTES(_state, 1, 0);\
|
||||
ECHO_SUBBYTES(_state, 2, 0);\
|
||||
ECHO_SUBBYTES(_state, 3, 0);\
|
||||
ECHO_SUBBYTES(_state, 0, 1);\
|
||||
ECHO_SUBBYTES(_state, 1, 1);\
|
||||
ECHO_SUBBYTES(_state, 2, 1);\
|
||||
ECHO_SUBBYTES(_state, 3, 1);\
|
||||
ECHO_SUBBYTES(_state, 0, 2);\
|
||||
ECHO_SUBBYTES(_state, 1, 2);\
|
||||
ECHO_SUBBYTES(_state, 2, 2);\
|
||||
ECHO_SUBBYTES(_state, 3, 2);\
|
||||
ECHO_SUBBYTES(_state, 0, 3);\
|
||||
ECHO_SUBBYTES(_state, 1, 3);\
|
||||
ECHO_SUBBYTES(_state, 2, 3);\
|
||||
ECHO_SUBBYTES(_state, 3, 3);\
|
||||
ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 0);\
|
||||
ECHO_SUBBYTES(_state2, 1, 0);\
|
||||
ECHO_SUBBYTES(_state2, 2, 0);\
|
||||
ECHO_SUBBYTES(_state2, 3, 0);\
|
||||
ECHO_SUBBYTES(_state2, 0, 1);\
|
||||
ECHO_SUBBYTES(_state2, 1, 1);\
|
||||
ECHO_SUBBYTES(_state2, 2, 1);\
|
||||
ECHO_SUBBYTES(_state2, 3, 1);\
|
||||
ECHO_SUBBYTES(_state2, 0, 2);\
|
||||
ECHO_SUBBYTES(_state2, 1, 2);\
|
||||
ECHO_SUBBYTES(_state2, 2, 2);\
|
||||
ECHO_SUBBYTES(_state2, 3, 2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 3);\
|
||||
ECHO_SUBBYTES(_state2, 1, 3);\
|
||||
ECHO_SUBBYTES(_state2, 2, 3);\
|
||||
ECHO_SUBBYTES(_state2, 3, 3);\
|
||||
ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
|
||||
|
||||
|
||||
|
||||
#define SAVESTATE(dst, src)\
|
||||
dst[0][0] = src[0][0];\
|
||||
dst[0][1] = src[0][1];\
|
||||
dst[0][2] = src[0][2];\
|
||||
dst[0][3] = src[0][3];\
|
||||
dst[1][0] = src[1][0];\
|
||||
dst[1][1] = src[1][1];\
|
||||
dst[1][2] = src[1][2];\
|
||||
dst[1][3] = src[1][3];\
|
||||
dst[2][0] = src[2][0];\
|
||||
dst[2][1] = src[2][1];\
|
||||
dst[2][2] = src[2][2];\
|
||||
dst[2][3] = src[2][3];\
|
||||
dst[3][0] = src[3][0];\
|
||||
dst[3][1] = src[3][1];\
|
||||
dst[3][2] = src[3][2];\
|
||||
dst[3][3] = src[3][3]
|
||||
|
||||
|
||||
void Compress(hashState_echo *ctx, const unsigned char *pmsg, unsigned int uBlockCount)
|
||||
{
|
||||
unsigned int r, b, i, j;
|
||||
__m128i t1, t2, t3, t4, s1, s2, s3, k1, ktemp;
|
||||
__m128i _state[4][4], _state2[4][4], _statebackup[4][4];
|
||||
|
||||
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < ctx->uHashSize / 256; j++)
|
||||
_state[i][j] = ctx->state[i][j];
|
||||
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
// transform cv
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < ctx->uHashSize / 256; j++)
|
||||
{
|
||||
TRANSFORM(_state[i][j], _k_ipt, t1, t2);
|
||||
}
|
||||
#endif
|
||||
|
||||
for(b = 0; b < uBlockCount; b++)
|
||||
{
|
||||
ctx->k = _mm_add_epi64(ctx->k, ctx->const1536);
|
||||
|
||||
// load message
|
||||
for(j = ctx->uHashSize / 256; j < 4; j++)
|
||||
{
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
_state[i][j] = _mm_loadu_si128((__m128i*)pmsg + 4 * (j - (ctx->uHashSize / 256)) + i);
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
// transform message
|
||||
TRANSFORM(_state[i][j], _k_ipt, t1, t2);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// save state
|
||||
SAVESTATE(_statebackup, _state);
|
||||
|
||||
|
||||
k1 = ctx->k;
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
for(r = 0; r < ctx->uRounds / 2; r++)
|
||||
{
|
||||
ECHO_ROUND_UNROLL2;
|
||||
}
|
||||
|
||||
#else
|
||||
for(r = 0; r < ctx->uRounds / 2; r++)
|
||||
{
|
||||
_state2[0][0] = M128(zero); _state2[1][0] = M128(zero); _state2[2][0] = M128(zero); _state2[3][0] = M128(zero);
|
||||
_state2[0][1] = M128(zero); _state2[1][1] = M128(zero); _state2[2][1] = M128(zero); _state2[3][1] = M128(zero);
|
||||
_state2[0][2] = M128(zero); _state2[1][2] = M128(zero); _state2[2][2] = M128(zero); _state2[3][2] = M128(zero);
|
||||
_state2[0][3] = M128(zero); _state2[1][3] = M128(zero); _state2[2][3] = M128(zero); _state2[3][3] = M128(zero);
|
||||
|
||||
ECHO_SUB_AND_MIX(_state, 0, 0, _state2, 0, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state, 1, 0, _state2, 3, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state, 2, 0, _state2, 2, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state, 3, 0, _state2, 1, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state, 0, 1, _state2, 1, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state, 1, 1, _state2, 0, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state, 2, 1, _state2, 3, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state, 3, 1, _state2, 2, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state, 0, 2, _state2, 2, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state, 1, 2, _state2, 1, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state, 2, 2, _state2, 0, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state, 3, 2, _state2, 3, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state, 0, 3, _state2, 3, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state, 1, 3, _state2, 2, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state, 2, 3, _state2, 1, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state, 3, 3, _state2, 0, 3, 0, 1, 2);
|
||||
|
||||
_state[0][0] = M128(zero); _state[1][0] = M128(zero); _state[2][0] = M128(zero); _state[3][0] = M128(zero);
|
||||
_state[0][1] = M128(zero); _state[1][1] = M128(zero); _state[2][1] = M128(zero); _state[3][1] = M128(zero);
|
||||
_state[0][2] = M128(zero); _state[1][2] = M128(zero); _state[2][2] = M128(zero); _state[3][2] = M128(zero);
|
||||
_state[0][3] = M128(zero); _state[1][3] = M128(zero); _state[2][3] = M128(zero); _state[3][3] = M128(zero);
|
||||
|
||||
ECHO_SUB_AND_MIX(_state2, 0, 0, _state, 0, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state2, 1, 0, _state, 3, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state2, 2, 0, _state, 2, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state2, 3, 0, _state, 1, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state2, 0, 1, _state, 1, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state2, 1, 1, _state, 0, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state2, 2, 1, _state, 3, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state2, 3, 1, _state, 2, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state2, 0, 2, _state, 2, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state2, 1, 2, _state, 1, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state2, 2, 2, _state, 0, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state2, 3, 2, _state, 3, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state2, 0, 3, _state, 3, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state2, 1, 3, _state, 2, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state2, 2, 3, _state, 1, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state2, 3, 3, _state, 0, 3, 0, 1, 2);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
if(ctx->uHashSize == 256)
|
||||
{
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][1]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][3]);
|
||||
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][1]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][3]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
|
||||
_state[i][1] = _mm_xor_si128(_state[i][1], _state[i][3]);
|
||||
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);
|
||||
|
||||
_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][1]);
|
||||
_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][3]);
|
||||
}
|
||||
}
|
||||
|
||||
pmsg += ctx->uBlockLength;
|
||||
}
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
// transform state
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < 4; j++)
|
||||
{
|
||||
TRANSFORM(_state[i][j], _k_opt, t1, t2);
|
||||
}
|
||||
#endif
|
||||
|
||||
SAVESTATE(ctx->state, _state);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
HashReturn init_echo(hashState_echo *ctx, int nHashSize)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
ctx->k = _mm_xor_si128(ctx->k, ctx->k);
|
||||
ctx->processed_bits = 0;
|
||||
ctx->uBufferBytes = 0;
|
||||
|
||||
switch(nHashSize)
|
||||
{
|
||||
case 256:
|
||||
ctx->uHashSize = 256;
|
||||
ctx->uBlockLength = 192;
|
||||
ctx->uRounds = 8;
|
||||
ctx->hashsize = _mm_set_epi32(0, 0, 0, 0x00000100);
|
||||
ctx->const1536 = _mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000600);
|
||||
break;
|
||||
|
||||
case 512:
|
||||
ctx->uHashSize = 512;
|
||||
ctx->uBlockLength = 128;
|
||||
ctx->uRounds = 10;
|
||||
ctx->hashsize = _mm_set_epi32(0, 0, 0, 0x00000200);
|
||||
ctx->const1536 = _mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000400);
|
||||
break;
|
||||
|
||||
default:
|
||||
return BAD_HASHBITLEN;
|
||||
}
|
||||
|
||||
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < nHashSize / 256; j++)
|
||||
ctx->state[i][j] = ctx->hashsize;
|
||||
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = nHashSize / 256; j < 4; j++)
|
||||
ctx->state[i][j] = _mm_set_epi32(0, 0, 0, 0);
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
HashReturn update_echo(hashState_echo *state, const BitSequence *data, DataLength databitlen)
|
||||
{
|
||||
unsigned int uByteLength, uBlockCount, uRemainingBytes;
|
||||
|
||||
uByteLength = (unsigned int)(databitlen / 8);
|
||||
|
||||
if((state->uBufferBytes + uByteLength) >= state->uBlockLength)
|
||||
{
|
||||
if(state->uBufferBytes != 0)
|
||||
{
|
||||
// Fill the buffer
|
||||
memcpy(state->buffer + state->uBufferBytes, (void*)data, state->uBlockLength - state->uBufferBytes);
|
||||
|
||||
// Process buffer
|
||||
Compress(state, state->buffer, 1);
|
||||
state->processed_bits += state->uBlockLength * 8;
|
||||
|
||||
data += state->uBlockLength - state->uBufferBytes;
|
||||
uByteLength -= state->uBlockLength - state->uBufferBytes;
|
||||
}
|
||||
|
||||
// buffer now does not contain any unprocessed bytes
|
||||
|
||||
uBlockCount = uByteLength / state->uBlockLength;
|
||||
uRemainingBytes = uByteLength % state->uBlockLength;
|
||||
|
||||
if(uBlockCount > 0)
|
||||
{
|
||||
Compress(state, data, uBlockCount);
|
||||
|
||||
state->processed_bits += uBlockCount * state->uBlockLength * 8;
|
||||
data += uBlockCount * state->uBlockLength;
|
||||
}
|
||||
|
||||
if(uRemainingBytes > 0)
|
||||
{
|
||||
memcpy(state->buffer, (void*)data, uRemainingBytes);
|
||||
}
|
||||
|
||||
state->uBufferBytes = uRemainingBytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(state->buffer + state->uBufferBytes, (void*)data, uByteLength);
|
||||
state->uBufferBytes += uByteLength;
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
HashReturn final_echo(hashState_echo *state, BitSequence *hashval)
|
||||
{
|
||||
__m128i remainingbits;
|
||||
|
||||
// Add remaining bytes in the buffer
|
||||
state->processed_bits += state->uBufferBytes * 8;
|
||||
|
||||
remainingbits = _mm_set_epi32(0, 0, 0, state->uBufferBytes * 8);
|
||||
|
||||
// Pad with 0x80
|
||||
state->buffer[state->uBufferBytes++] = 0x80;
|
||||
|
||||
// Enough buffer space for padding in this block?
|
||||
if((state->uBlockLength - state->uBufferBytes) >= 18)
|
||||
{
|
||||
// Pad with zeros
|
||||
memset(state->buffer + state->uBufferBytes, 0, state->uBlockLength - (state->uBufferBytes + 18));
|
||||
|
||||
// Hash size
|
||||
*((unsigned short*)(state->buffer + state->uBlockLength - 18)) = state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*((DataLength*)(state->buffer + state->uBlockLength - 16)) = state->processed_bits;
|
||||
*((DataLength*)(state->buffer + state->uBlockLength - 8)) = 0;
|
||||
|
||||
// Last block contains message bits?
|
||||
if(state->uBufferBytes == 1)
|
||||
{
|
||||
state->k = _mm_xor_si128(state->k, state->k);
|
||||
state->k = _mm_sub_epi64(state->k, state->const1536);
|
||||
}
|
||||
else
|
||||
{
|
||||
state->k = _mm_add_epi64(state->k, remainingbits);
|
||||
state->k = _mm_sub_epi64(state->k, state->const1536);
|
||||
}
|
||||
|
||||
// Compress
|
||||
Compress(state, state->buffer, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fill with zero and compress
|
||||
memset(state->buffer + state->uBufferBytes, 0, state->uBlockLength - state->uBufferBytes);
|
||||
state->k = _mm_add_epi64(state->k, remainingbits);
|
||||
state->k = _mm_sub_epi64(state->k, state->const1536);
|
||||
Compress(state, state->buffer, 1);
|
||||
|
||||
// Last block
|
||||
memset(state->buffer, 0, state->uBlockLength - 18);
|
||||
|
||||
// Hash size
|
||||
*((unsigned short*)(state->buffer + state->uBlockLength - 18)) = state->uHashSize;
|
||||
|
||||
// Processed bits
|
||||
*((DataLength*)(state->buffer + state->uBlockLength - 16)) = state->processed_bits;
|
||||
*((DataLength*)(state->buffer + state->uBlockLength - 8)) = 0;
|
||||
|
||||
// Compress the last block
|
||||
state->k = _mm_xor_si128(state->k, state->k);
|
||||
state->k = _mm_sub_epi64(state->k, state->const1536);
|
||||
Compress(state, state->buffer, 1);
|
||||
}
|
||||
|
||||
// Store the hash value
|
||||
_mm_storeu_si128((__m128i*)hashval + 0, state->state[0][0]);
|
||||
_mm_storeu_si128((__m128i*)hashval + 1, state->state[1][0]);
|
||||
|
||||
if(state->uHashSize == 512)
|
||||
{
|
||||
_mm_storeu_si128((__m128i*)hashval + 2, state->state[2][0]);
|
||||
_mm_storeu_si128((__m128i*)hashval + 3, state->state[3][0]);
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
|
||||
{
|
||||
HashReturn hRet;
|
||||
hashState_echo hs;
|
||||
|
||||
/////
|
||||
/*
|
||||
__m128i a, b, c, d, t[4], u[4], v[4];
|
||||
|
||||
a = _mm_set_epi32(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100);
|
||||
b = _mm_set_epi32(0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110);
|
||||
c = _mm_set_epi32(0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120);
|
||||
d = _mm_set_epi32(0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130);
|
||||
|
||||
t[0] = _mm_unpacklo_epi8(a, b);
|
||||
t[1] = _mm_unpackhi_epi8(a, b);
|
||||
t[2] = _mm_unpacklo_epi8(c, d);
|
||||
t[3] = _mm_unpackhi_epi8(c, d);
|
||||
|
||||
u[0] = _mm_unpacklo_epi16(t[0], t[2]);
|
||||
u[1] = _mm_unpackhi_epi16(t[0], t[2]);
|
||||
u[2] = _mm_unpacklo_epi16(t[1], t[3]);
|
||||
u[3] = _mm_unpackhi_epi16(t[1], t[3]);
|
||||
|
||||
|
||||
t[0] = _mm_unpacklo_epi16(u[0], u[1]);
|
||||
t[1] = _mm_unpackhi_epi16(u[0], u[1]);
|
||||
t[2] = _mm_unpacklo_epi16(u[2], u[3]);
|
||||
t[3] = _mm_unpackhi_epi16(u[2], u[3]);
|
||||
|
||||
u[0] = _mm_unpacklo_epi8(t[0], t[1]);
|
||||
u[1] = _mm_unpackhi_epi8(t[0], t[1]);
|
||||
u[2] = _mm_unpacklo_epi8(t[2], t[3]);
|
||||
u[3] = _mm_unpackhi_epi8(t[2], t[3]);
|
||||
|
||||
a = _mm_unpacklo_epi8(u[0], u[1]);
|
||||
b = _mm_unpackhi_epi8(u[0], u[1]);
|
||||
c = _mm_unpacklo_epi8(u[2], u[3]);
|
||||
d = _mm_unpackhi_epi8(u[2], u[3]);
|
||||
*/
|
||||
/////
|
||||
|
||||
hRet = init_echo(&hs, hashbitlen);
|
||||
if(hRet != SUCCESS)
|
||||
return hRet;
|
||||
|
||||
hRet = update_echo(&hs, data, databitlen);
|
||||
if(hRet != SUCCESS)
|
||||
return hRet;
|
||||
|
||||
hRet = final_echo(&hs, hashval);
|
||||
if(hRet != SUCCESS)
|
||||
return hRet;
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
58
algo/echo/aes_ni/hash_api.h
Normal file
58
algo/echo/aes_ni/hash_api.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* file : hash_api.h
|
||||
* version : 1.0.208
|
||||
* date : 14.12.2010
|
||||
*
|
||||
* ECHO vperm implementation Hash API
|
||||
*
|
||||
* Cagdas Calik
|
||||
* ccalik@metu.edu.tr
|
||||
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef HASH_API_H
|
||||
#define HASH_API_H
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#define HASH_IMPL_STR "ECHO-aesni"
|
||||
#else
|
||||
#define HASH_IMPL_STR "ECHO-vperm"
|
||||
#endif
|
||||
|
||||
|
||||
#include "algo/sha3/sha3_common.h"
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m128i state[4][4];
|
||||
__m128i k;
|
||||
__m128i hashsize;
|
||||
__m128i const1536;
|
||||
|
||||
unsigned int uRounds;
|
||||
unsigned int uHashSize;
|
||||
unsigned int uBlockLength;
|
||||
unsigned int uBufferBytes;
|
||||
DataLength processed_bits;
|
||||
BitSequence buffer[192];
|
||||
|
||||
} hashState_echo;
|
||||
|
||||
HashReturn init_echo(hashState_echo *state, int hashbitlen);
|
||||
|
||||
HashReturn reinit_echo(hashState_echo *state);
|
||||
|
||||
HashReturn update_echo(hashState_echo *state, const BitSequence *data, DataLength databitlen);
|
||||
|
||||
HashReturn final_echo(hashState_echo *state, BitSequence *hashval);
|
||||
|
||||
HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
|
||||
|
||||
|
||||
#endif // HASH_API_H
|
||||
|
||||
1
algo/echo/aes_ni/implementors
Normal file
1
algo/echo/aes_ni/implementors
Normal file
@@ -0,0 +1 @@
|
||||
Çağdaş Çalık
|
||||
119
algo/echo/aes_ni/vperm.h
Normal file
119
algo/echo/aes_ni/vperm.h
Normal file
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
* file : vperm.h
|
||||
* version : 1.0.208
|
||||
* date : 14.12.2010
|
||||
*
|
||||
* vperm implementation of AES s-box
|
||||
*
|
||||
* Credits: Adapted from Mike Hamburg's AES implementation, http://crypto.stanford.edu/vpaes/
|
||||
*
|
||||
* Cagdas Calik
|
||||
* ccalik@metu.edu.tr
|
||||
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef VPERM_H
|
||||
#define VPERM_H
|
||||
|
||||
#include "algo/sha3/sha3_common.h"
|
||||
#include <tmmintrin.h>
|
||||
|
||||
/*
|
||||
extern const unsigned int _k_s0F[];
|
||||
extern const unsigned int _k_ipt[];
|
||||
extern const unsigned int _k_opt[];
|
||||
extern const unsigned int _k_inv[];
|
||||
extern const unsigned int _k_sb1[];
|
||||
extern const unsigned int _k_sb2[];
|
||||
extern const unsigned int _k_sb3[];
|
||||
extern const unsigned int _k_sb4[];
|
||||
extern const unsigned int _k_sb5[];
|
||||
extern const unsigned int _k_sb7[];
|
||||
extern const unsigned int _k_sbo[];
|
||||
extern const unsigned int _k_h63[];
|
||||
extern const unsigned int _k_hc6[];
|
||||
extern const unsigned int _k_h5b[];
|
||||
extern const unsigned int _k_h4e[];
|
||||
extern const unsigned int _k_h0e[];
|
||||
extern const unsigned int _k_h15[];
|
||||
extern const unsigned int _k_aesmix1[];
|
||||
extern const unsigned int _k_aesmix2[];
|
||||
extern const unsigned int _k_aesmix3[];
|
||||
extern const unsigned int _k_aesmix4[];
|
||||
*/
|
||||
|
||||
// input: x, table
|
||||
// output: x
|
||||
#define TRANSFORM(x, table, t1, t2)\
|
||||
t1 = _mm_andnot_si128(M128(_k_s0F), x);\
|
||||
t1 = _mm_srli_epi32(t1, 4);\
|
||||
x = _mm_and_si128(x, M128(_k_s0F));\
|
||||
t1 = _mm_shuffle_epi8(*((__m128i*)table + 1), t1);\
|
||||
x = _mm_shuffle_epi8(*((__m128i*)table + 0), x);\
|
||||
x = _mm_xor_si128(x, t1)
|
||||
|
||||
// compiled erroneously with 32-bit msc compiler
|
||||
//t2 = _mm_shuffle_epi8(table[0], x);\
|
||||
//x = _mm_shuffle_epi8(table[1], t1);\
|
||||
//x = _mm_xor_si128(x, t2)
|
||||
|
||||
|
||||
// input: x
|
||||
// output: t2, t3
|
||||
#define SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4)\
|
||||
t1 = _mm_andnot_si128(M128(_k_s0F), x);\
|
||||
t1 = _mm_srli_epi32(t1, 4);\
|
||||
x = _mm_and_si128(x, M128(_k_s0F));\
|
||||
t2 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 1), x);\
|
||||
x = _mm_xor_si128(x, t1);\
|
||||
t3 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t1);\
|
||||
t3 = _mm_xor_si128(t3, t2);\
|
||||
t4 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), x);\
|
||||
t4 = _mm_xor_si128(t4, t2);\
|
||||
t2 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t3);\
|
||||
t2 = _mm_xor_si128(t2, x);\
|
||||
t3 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t4);\
|
||||
t3 = _mm_xor_si128(t3, t1);\
|
||||
|
||||
|
||||
// input: x1, x2, table
|
||||
// output: y
|
||||
#define VPERM_LOOKUP(x1, x2, table, y, t)\
|
||||
t = _mm_shuffle_epi8(*((__m128i*)table + 0), x1);\
|
||||
y = _mm_shuffle_epi8(*((__m128i*)table + 1), x2);\
|
||||
y = _mm_xor_si128(y, t)
|
||||
|
||||
|
||||
// input: x
|
||||
// output: x
|
||||
#define SUBSTITUTE_VPERM(x, t1, t2, t3, t4) \
|
||||
TRANSFORM(x, _k_ipt, t1, t2);\
|
||||
SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4);\
|
||||
VPERM_LOOKUP(t2, t3, _k_sbo, x, t1);\
|
||||
x = _mm_xor_si128(x, M128(_k_h63))
|
||||
|
||||
|
||||
// input: x
|
||||
// output: x
|
||||
#define AES_ROUND_VPERM_CORE(x, t1, t2, t3, t4, s1, s2, s3) \
|
||||
SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4);\
|
||||
VPERM_LOOKUP(t2, t3, _k_sb1, s1, t1);\
|
||||
VPERM_LOOKUP(t2, t3, _k_sb2, s2, t1);\
|
||||
s3 = _mm_xor_si128(s1, s2);\
|
||||
x = _mm_shuffle_epi8(s2, M128(_k_aesmix1));\
|
||||
x = _mm_xor_si128(x, _mm_shuffle_epi8(s3, M128(_k_aesmix2)));\
|
||||
x = _mm_xor_si128(x, _mm_shuffle_epi8(s1, M128(_k_aesmix3)));\
|
||||
x = _mm_xor_si128(x, _mm_shuffle_epi8(s1, M128(_k_aesmix4)));\
|
||||
x = _mm_xor_si128(x, M128(_k_h5b))
|
||||
|
||||
|
||||
// input: x
|
||||
// output: x
|
||||
#define AES_ROUND_VPERM(x, t1, t2, t3, t4, s1, s2, s3) \
|
||||
TRANSFORM(x, _k_ipt, t1, t2);\
|
||||
AES_ROUND_VPERM_CORE(x, t1, t2, t3, t4, s1, s2, s3);\
|
||||
TRANSFORM(x, _k_opt, t1, t2)
|
||||
|
||||
#endif // VPERM_H
|
||||
|
||||
1031
algo/echo/sph_echo.c
Normal file
1031
algo/echo/sph_echo.c
Normal file
File diff suppressed because it is too large
Load Diff
320
algo/echo/sph_echo.h
Normal file
320
algo/echo/sph_echo.h
Normal file
@@ -0,0 +1,320 @@
|
||||
/* $Id: sph_echo.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* ECHO interface. ECHO is a family of functions which differ by
|
||||
* their output size; this implementation defines ECHO for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_echo.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_ECHO_H__
|
||||
#define SPH_ECHO_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha3/sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-224.
|
||||
*/
|
||||
#define SPH_SIZE_echo224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-256.
|
||||
*/
|
||||
#define SPH_SIZE_echo256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-384.
|
||||
*/
|
||||
#define SPH_SIZE_echo384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-512.
|
||||
*/
|
||||
#define SPH_SIZE_echo512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an ECHO computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for ECHO-224
|
||||
* and ECHO-256.
|
||||
*
|
||||
* The contents of this structure are private. A running ECHO computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[192]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u32 Vs[4][4];
|
||||
#if SPH_64
|
||||
sph_u64 Vb[4][2];
|
||||
#endif
|
||||
} u;
|
||||
sph_u32 C0, C1, C2, C3;
|
||||
#endif
|
||||
} sph_echo_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an ECHO computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for ECHO-384
|
||||
* and ECHO-512.
|
||||
*
|
||||
* The contents of this structure are private. A running ECHO computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u32 Vs[8][4];
|
||||
#if SPH_64
|
||||
sph_u64 Vb[8][2];
|
||||
#endif
|
||||
} u;
|
||||
sph_u32 C0, C1, C2, C3;
|
||||
#endif
|
||||
} sph_echo_big_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-224 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_echo_small_context sph_echo224_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-256 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_echo_small_context sph_echo256_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-384 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_echo_big_context sph_echo384_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-512 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_echo_big_context sph_echo512_context;
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-224 context (pointer to a
|
||||
* <code>sph_echo224_context</code>)
|
||||
*/
|
||||
void sph_echo224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-256 context (pointer to a
|
||||
* <code>sph_echo256_context</code>)
|
||||
*/
|
||||
void sph_echo256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-384 context (pointer to a
|
||||
* <code>sph_echo384_context</code>)
|
||||
*/
|
||||
void sph_echo384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-512 context (pointer to a
|
||||
* <code>sph_echo512_context</code>)
|
||||
*/
|
||||
void sph_echo512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1031
algo/echo/sse2/echo.c
Normal file
1031
algo/echo/sse2/echo.c
Normal file
File diff suppressed because it is too large
Load Diff
320
algo/echo/sse2/sph_echo.h
Normal file
320
algo/echo/sse2/sph_echo.h
Normal file
@@ -0,0 +1,320 @@
|
||||
/* $Id: sph_echo.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* ECHO interface. ECHO is a family of functions which differ by
|
||||
* their output size; this implementation defines ECHO for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_echo.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_ECHO_H__
|
||||
#define SPH_ECHO_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha3/sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-224.
|
||||
*/
|
||||
#define SPH_SIZE_echo224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-256.
|
||||
*/
|
||||
#define SPH_SIZE_echo256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-384.
|
||||
*/
|
||||
#define SPH_SIZE_echo384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-512.
|
||||
*/
|
||||
#define SPH_SIZE_echo512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an ECHO computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for ECHO-224
|
||||
* and ECHO-256.
|
||||
*
|
||||
* The contents of this structure are private. A running ECHO computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[192]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u32 Vs[4][4];
|
||||
#if SPH_64
|
||||
sph_u64 Vb[4][2];
|
||||
#endif
|
||||
} u;
|
||||
sph_u32 C0, C1, C2, C3;
|
||||
#endif
|
||||
} sph_echo_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an ECHO computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for ECHO-384
|
||||
* and ECHO-512.
|
||||
*
|
||||
* The contents of this structure are private. A running ECHO computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u32 Vs[8][4];
|
||||
#if SPH_64
|
||||
sph_u64 Vb[8][2];
|
||||
#endif
|
||||
} u;
|
||||
sph_u32 C0, C1, C2, C3;
|
||||
#endif
|
||||
} sph_echo_big_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-224 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_echo_small_context sph_echo224_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-256 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_echo_small_context sph_echo256_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-384 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_echo_big_context sph_echo384_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-512 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_echo_big_context sph_echo512_context;
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-224 context (pointer to a
|
||||
* <code>sph_echo224_context</code>)
|
||||
*/
|
||||
void sph_echo224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-256 context (pointer to a
|
||||
* <code>sph_echo256_context</code>)
|
||||
*/
|
||||
void sph_echo256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-384 context (pointer to a
|
||||
* <code>sph_echo384_context</code>)
|
||||
*/
|
||||
void sph_echo384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-512 context (pointer to a
|
||||
* <code>sph_echo512_context</code>)
|
||||
*/
|
||||
void sph_echo512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
139
algo/fresh.c
Normal file
139
algo/fresh.c
Normal file
@@ -0,0 +1,139 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/sph_simd.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
|
||||
//#define DEBUG_ALGO
|
||||
|
||||
extern void freshhash(void* output, const void* input, uint32_t len)
|
||||
{
|
||||
unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
|
||||
#define hashA hash
|
||||
#define hashB hash+64
|
||||
|
||||
sph_shavite512_context ctx_shavite;
|
||||
sph_simd512_context ctx_simd;
|
||||
sph_echo512_context ctx_echo;
|
||||
|
||||
sph_shavite512_init(&ctx_shavite);
|
||||
sph_shavite512(&ctx_shavite, input, len);
|
||||
sph_shavite512_close(&ctx_shavite, hashA);
|
||||
|
||||
sph_simd512_init(&ctx_simd);
|
||||
sph_simd512(&ctx_simd, hashA, 64);
|
||||
sph_simd512_close(&ctx_simd, hashB);
|
||||
|
||||
sph_shavite512_init(&ctx_shavite);
|
||||
sph_shavite512(&ctx_shavite, hashB, 64);
|
||||
sph_shavite512_close(&ctx_shavite, hashA);
|
||||
|
||||
sph_simd512_init(&ctx_simd);
|
||||
sph_simd512(&ctx_simd, hashA, 64);
|
||||
sph_simd512_close(&ctx_simd, hashB);
|
||||
|
||||
sph_echo512_init(&ctx_echo);
|
||||
sph_echo512(&ctx_echo, hashB, 64);
|
||||
sph_echo512_close(&ctx_echo, hashA);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_fresh(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t len = 80;
|
||||
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
#ifdef _MSC_VER
|
||||
uint32_t __declspec(align(32)) hash64[8];
|
||||
#else
|
||||
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||
#endif
|
||||
uint32_t endiandata[32];
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
for (int k = 0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
if (Htarg != 0)
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
freshhash(hash64, endiandata, len);
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void fresh_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
|
||||
bool register_fresh_algo( algo_gate_t* gate )
|
||||
{
|
||||
algo_not_tested();
|
||||
gate->scanhash = (void*)&scanhash_fresh;
|
||||
gate->hash = (void*)&freshhash;
|
||||
gate->hash_alt = (void*)&freshhash;
|
||||
gate->set_target = (void*)&fresh_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
0
algo/fugue/.dirstamp
Normal file
0
algo/fugue/.dirstamp
Normal file
1208
algo/fugue/sph_fugue.c
Normal file
1208
algo/fugue/sph_fugue.c
Normal file
File diff suppressed because it is too large
Load Diff
81
algo/fugue/sph_fugue.h
Normal file
81
algo/fugue/sph_fugue.h
Normal file
@@ -0,0 +1,81 @@
|
||||
#ifndef SPH_FUGUE_H__
|
||||
#define SPH_FUGUE_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha3/sph_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#define SPH_SIZE_fugue224 224
|
||||
|
||||
#define SPH_SIZE_fugue256 256
|
||||
|
||||
#define SPH_SIZE_fugue384 384
|
||||
|
||||
#define SPH_SIZE_fugue512 512
|
||||
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
sph_u32 partial;
|
||||
unsigned partial_len;
|
||||
unsigned round_shift;
|
||||
sph_u32 S[36];
|
||||
#if SPH_64
|
||||
sph_u64 bit_count;
|
||||
#else
|
||||
sph_u32 bit_count_high, bit_count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_fugue_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue224_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue256_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue384_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue512_context;
|
||||
|
||||
void sph_fugue224_init(void *cc);
|
||||
|
||||
void sph_fugue224(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue224_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void sph_fugue256_init(void *cc);
|
||||
|
||||
void sph_fugue256(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue256_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void sph_fugue384_init(void *cc);
|
||||
|
||||
void sph_fugue384(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue384_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void sph_fugue512_init(void *cc);
|
||||
|
||||
void sph_fugue512(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue512_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
0
algo/gost/.dirstamp
Normal file
0
algo/gost/.dirstamp
Normal file
1045
algo/gost/sph_gost.c
Normal file
1045
algo/gost/sph_gost.c
Normal file
File diff suppressed because it is too large
Load Diff
185
algo/gost/sph_gost.h
Normal file
185
algo/gost/sph_gost.h
Normal file
@@ -0,0 +1,185 @@
|
||||
/* $Id: sph_gost.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* GOST interface. This is the interface for GOST R 12 with the
|
||||
* recommended parameters for SHA-3, with output lengths 256
|
||||
* and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_gost.h
|
||||
* @author Mish <mish@btchouse.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_GOST_H__
|
||||
#define SPH_GOST_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha3/sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for GOST-256.
|
||||
*/
|
||||
#define SPH_SIZE_gost256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for GOST-512.
|
||||
*/
|
||||
#define SPH_SIZE_gost512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Keccak computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once a
|
||||
* GOST computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running GOST computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
|
||||
/**
|
||||
* This structure is a context for Gost-256 computations.
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[32]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 V[3][8];
|
||||
#endif
|
||||
} sph_gost256_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Gost-512 computations.
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 V[5][8];
|
||||
#endif
|
||||
} sph_gost512_context;
|
||||
|
||||
|
||||
/**
|
||||
* Initialize a GOST-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the GOST-256 context (pointer to a
|
||||
* <code>sph_gost256_context</code>)
|
||||
*/
|
||||
void sph_gost256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Gost-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_gost256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current GOST-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the GOST-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_gost256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the GOST-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_gost256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Gost-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the GOST-512 context (pointer to a
|
||||
* <code>sph_gost512_context</code>)
|
||||
*/
|
||||
void sph_gost512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the GOST-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_gost512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current GOST-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the GOST-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_gost512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the GOST-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_gost512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
0
algo/groestl/.dirstamp
Normal file
0
algo/groestl/.dirstamp
Normal file
0
algo/groestl/aes_ni/.dirstamp
Normal file
0
algo/groestl/aes_ni/.dirstamp
Normal file
14
algo/groestl/aes_ni/README
Normal file
14
algo/groestl/aes_ni/README
Normal file
@@ -0,0 +1,14 @@
|
||||
This package contains an implementation of the Groestl-512 hash
|
||||
function optimized for the Intel AES instructions.
|
||||
|
||||
Authors are Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
||||
|
||||
There are no known present or future claims by a copyright holder that
|
||||
the distribution of this software infringes the copyright. In
|
||||
particular, the author of the software is not making such claims and
|
||||
does not intend to make such claims.
|
||||
|
||||
Moreover, there are no known present or future claims by a patent
|
||||
holder that the use of this software infringes the patent. In
|
||||
particular, the author of the software is not making such claims and
|
||||
does not intend to make such claims.
|
||||
2
algo/groestl/aes_ni/api.h
Normal file
2
algo/groestl/aes_ni/api.h
Normal file
@@ -0,0 +1,2 @@
|
||||
#define CRYPTO_BYTES 64
|
||||
#define CRYPTO_VERSION "2.2"
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user