mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
292 lines
9.7 KiB
C
292 lines
9.7 KiB
C
/*
|
|
* Copyright 2009 Colin Percival, 2014 savale
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* This file was originally written by Colin Percival as part of the Tarsnap
|
|
* online backup system.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include "simd-utils.h"
|
|
#include "compat/sph_types.h"
|
|
#include "sph_blake2b.h"
|
|
|
|
// Little-endian byte access.
|
|
#define B2B_GET64(p) \
|
|
(((uint64_t) ((uint8_t *) (p))[0]) ^ \
|
|
(((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \
|
|
(((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \
|
|
(((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \
|
|
(((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \
|
|
(((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \
|
|
(((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \
|
|
(((uint64_t) ((uint8_t *) (p))[7]) << 56))
|
|
|
|
#if defined(__AVX2__)
|
|
|
|
#define BLAKE2B_G( Sa, Sb, Sc, Sd, Se, Sf, Sg, Sh ) \
|
|
{ \
|
|
V[0] = _mm256_add_epi64( V[0], _mm256_add_epi64( V[1], \
|
|
_mm256_set_epi64x( m[ sigmaR[ Sg ] ], m[ sigmaR[ Se ] ], \
|
|
m[ sigmaR[ Sc ] ], m[ sigmaR[ Sa ] ] ) ) ); \
|
|
V[3] = mm256_ror_64( _mm256_xor_si256( V[3], V[0] ), 32 ); \
|
|
V[2] = _mm256_add_epi64( V[2], V[3] ); \
|
|
V[1] = mm256_ror_64( _mm256_xor_si256( V[1], V[2] ), 24 ); \
|
|
\
|
|
V[0] = _mm256_add_epi64( V[0], _mm256_add_epi64( V[1], \
|
|
_mm256_set_epi64x( m[ sigmaR[ Sh ] ], m[ sigmaR[ Sf ] ], \
|
|
m[ sigmaR[ Sd ] ], m[ sigmaR[ Sb ] ] ) ) ); \
|
|
V[3] = mm256_ror_64( _mm256_xor_si256( V[3], V[0] ), 16 ); \
|
|
V[2] = _mm256_add_epi64( V[2], V[3] ); \
|
|
V[1] = mm256_ror_64( _mm256_xor_si256( V[1], V[2] ), 63 ); \
|
|
}
|
|
|
|
// Pivot about V[1] instead of V[0] reduces latency.
|
|
#define BLAKE2B_ROUND( R ) \
|
|
{ \
|
|
__m256i *V = (__m256i*)v; \
|
|
const uint8_t *sigmaR = sigma[R]; \
|
|
BLAKE2B_G( 0, 1, 2, 3, 4, 5, 6, 7 ); \
|
|
V[0] = mm256_shufll_64( V[0] ); \
|
|
V[3] = mm256_swap_128( V[3] ); \
|
|
V[2] = mm256_shuflr_64( V[2] ); \
|
|
BLAKE2B_G( 14, 15, 8, 9, 10, 11, 12, 13 ); \
|
|
V[0] = mm256_shuflr_64( V[0] ); \
|
|
V[3] = mm256_swap_128( V[3] ); \
|
|
V[2] = mm256_shufll_64( V[2] ); \
|
|
}
|
|
|
|
/*
|
|
#define BLAKE2B_ROUND( R ) \
|
|
{ \
|
|
__m256i *V = (__m256i*)v; \
|
|
const uint8_t *sigmaR = sigma[R]; \
|
|
BLAKE2B_G( 0, 1, 2, 3, 4, 5, 6, 7 ); \
|
|
V[3] = mm256_shufll_64( V[3] ); \
|
|
V[2] = mm256_swap_128( V[2] ); \
|
|
V[1] = mm256_shuflr_64( V[1] ); \
|
|
BLAKE2B_G( 8, 9, 10, 11, 12, 13, 14, 15 ); \
|
|
V[3] = mm256_shuflr_64( V[3] ); \
|
|
V[2] = mm256_swap_128( V[2] ); \
|
|
V[1] = mm256_shufll_64( V[1] ); \
|
|
}
|
|
*/
|
|
|
|
#elif defined(__SSE2__) || defined(__ARM_NEON)
|
|
|
|
#define BLAKE2B_G( Va, Vb, Vc, Vd, Sa, Sb, Sc, Sd ) \
|
|
{ \
|
|
Va = v128_add64( Va, v128_add64( Vb, \
|
|
v128_set64( m[ sigmaR[ Sc ] ], m[ sigmaR[ Sa ] ] ) ) ); \
|
|
Vd = v128_ror64xor( Vd, Va, 32 ); \
|
|
Vc = v128_add64( Vc, Vd ); \
|
|
Vb = v128_ror64xor( Vb, Vc, 24 ); \
|
|
\
|
|
Va = v128_add64( Va, v128_add64( Vb, \
|
|
v128_set64( m[ sigmaR[ Sd ] ], m[ sigmaR[ Sb ] ] ) ) ); \
|
|
Vd = v128_ror64xor( Vd, Va, 16 ); \
|
|
Vc = v128_add64( Vc, Vd ); \
|
|
Vb = v128_ror64xor( Vb, Vc, 63 ); \
|
|
}
|
|
|
|
#define BLAKE2B_ROUND( R ) \
|
|
{ \
|
|
v128_t *V = (v128_t*)v; \
|
|
v128_t V2, V3, V6, V7; \
|
|
const uint8_t *sigmaR = sigma[R]; \
|
|
BLAKE2B_G( V[0], V[2], V[4], V[6], 0, 1, 2, 3 ); \
|
|
BLAKE2B_G( V[1], V[3], V[5], V[7], 4, 5, 6, 7 ); \
|
|
V2 = v128_alignr64( V[3], V[2], 1 ); \
|
|
V3 = v128_alignr64( V[2], V[3], 1 ); \
|
|
V6 = v128_alignr64( V[6], V[7], 1 ); \
|
|
V7 = v128_alignr64( V[7], V[6], 1 ); \
|
|
BLAKE2B_G( V[0], V2, V[5], V6, 8, 9, 10, 11 ); \
|
|
BLAKE2B_G( V[1], V3, V[4], V7, 12, 13, 14, 15 ); \
|
|
V[2] = v128_alignr64( V2, V3, 1 ); \
|
|
V[3] = v128_alignr64( V3, V2, 1 ); \
|
|
V[6] = v128_alignr64( V7, V6, 1 ); \
|
|
V[7] = v128_alignr64( V6, V7, 1 ); \
|
|
}
|
|
|
|
#else
|
|
|
|
#ifndef ROTR64
|
|
#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
|
|
#endif
|
|
|
|
#define BLAKE2B_G( R, Va, Vb, Vc, Vd, Sa, Sb ) \
|
|
{ \
|
|
Va = Va + Vb + m[ sigma[R][Sa] ]; \
|
|
Vd = ROTR64( Vd ^ Va, 32 ); \
|
|
Vc = Vc + Vd; \
|
|
Vb = ROTR64( Vb ^ Vc, 24 ); \
|
|
\
|
|
Va = Va + Vb + m[ sigma[R][Sb] ]; \
|
|
Vd = ROTR64( Vd ^ Va, 16 ); \
|
|
Vc = Vc + Vd; \
|
|
Vb = ROTR64( Vb ^ Vc, 63 ); \
|
|
}
|
|
|
|
#define BLAKE2B_ROUND( R ) \
|
|
{ \
|
|
BLAKE2B_G( R, v[ 0], v[ 4], v[ 8], v[12], 0, 1 ); \
|
|
BLAKE2B_G( R, v[ 1], v[ 5], v[ 9], v[13], 2, 3 ); \
|
|
BLAKE2B_G( R, v[ 2], v[ 6], v[10], v[14], 4, 5 ); \
|
|
BLAKE2B_G( R, v[ 3], v[ 7], v[11], v[15], 6, 7 ); \
|
|
BLAKE2B_G( R, v[ 0], v[ 5], v[10], v[15], 8, 9 ); \
|
|
BLAKE2B_G( R, v[ 1], v[ 6], v[11], v[12], 10, 11 ); \
|
|
BLAKE2B_G( R, v[ 2], v[ 7], v[ 8], v[13], 12, 13 ); \
|
|
BLAKE2B_G( R, v[ 3], v[ 4], v[ 9], v[14], 14, 15 ); \
|
|
}
|
|
|
|
#endif
|
|
|
|
// Initialization Vector.
|
|
|
|
static const uint64_t blake2b_iv[8] __attribute__ ((aligned (32))) =
|
|
{
|
|
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
|
|
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
|
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
|
|
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
|
};
|
|
|
|
static const uint8_t sigma[12][16] __attribute__ ((aligned (32))) =
|
|
{
|
|
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
|
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
|
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
|
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
|
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
|
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
|
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
|
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
|
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
|
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
|
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
|
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
|
};
|
|
|
|
// Compression function. "last" flag indicates last block.
|
|
|
|
static void blake2b_compress( sph_blake2b_ctx *ctx, int last )
|
|
{
|
|
uint64_t v[16] __attribute__ ((aligned (32)));
|
|
uint64_t m[16] __attribute__ ((aligned (32)));
|
|
int i;
|
|
|
|
for (i = 0; i < 8; i++) { // init work variables
|
|
v[i] = ctx->h[i];
|
|
v[i + 8] = blake2b_iv[i];
|
|
}
|
|
|
|
v[12] ^= ctx->t[0]; // low 64 bits of offset
|
|
v[13] ^= ctx->t[1]; // high 64 bits
|
|
if (last) // last block flag set ?
|
|
v[14] = ~v[14];
|
|
for (i = 0; i < 16; i++) // get little-endian words
|
|
m[i] = B2B_GET64(&ctx->b[8 * i]);
|
|
|
|
for (i = 0; i < 12; i++)
|
|
BLAKE2B_ROUND( i );
|
|
|
|
for( i = 0; i < 8; ++i )
|
|
ctx->h[i] ^= v[i] ^ v[i + 8];
|
|
}
|
|
|
|
// Initialize the hashing context "ctx" with optional key "key".
|
|
// 1 <= outlen <= 64 gives the digest size in bytes.
|
|
// Secret key (also <= 64 bytes) is optional (keylen = 0).
|
|
|
|
int sph_blake2b_init( sph_blake2b_ctx *ctx, size_t outlen, const void *key,
|
|
size_t keylen ) // (keylen=0: no key)
|
|
{
|
|
size_t i;
|
|
|
|
if (outlen == 0 || outlen > 64 || keylen > 64)
|
|
return -1; // illegal parameters
|
|
|
|
for (i = 0; i < 8; i++) // state, "param block"
|
|
ctx->h[i] = blake2b_iv[i];
|
|
ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen;
|
|
|
|
ctx->t[0] = 0; // input count low word
|
|
ctx->t[1] = 0; // input count high word
|
|
ctx->c = 0; // pointer within buffer
|
|
ctx->outlen = outlen;
|
|
|
|
for (i = keylen; i < 128; i++) // zero input block
|
|
ctx->b[i] = 0;
|
|
if (keylen > 0) {
|
|
sph_blake2b_update(ctx, key, keylen);
|
|
ctx->c = 128; // at the end
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Add "inlen" bytes from "in" into the hash.
|
|
|
|
void sph_blake2b_update( sph_blake2b_ctx *ctx, const void *in, size_t inlen )
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < inlen; i++) {
|
|
if (ctx->c == 128) { // buffer full ?
|
|
ctx->t[0] += ctx->c; // add counters
|
|
if (ctx->t[0] < ctx->c) // carry overflow ?
|
|
ctx->t[1]++; // high word
|
|
blake2b_compress(ctx, 0); // compress (not last)
|
|
ctx->c = 0; // counter to zero
|
|
}
|
|
ctx->b[ctx->c++] = ((const uint8_t *) in)[i];
|
|
}
|
|
}
|
|
|
|
// Generate the message digest (size given in init).
|
|
// Result placed in "out".
|
|
|
|
void sph_blake2b_final( sph_blake2b_ctx *ctx, void *out )
|
|
{
|
|
size_t i;
|
|
|
|
ctx->t[0] += ctx->c; // mark last block offset
|
|
if (ctx->t[0] < ctx->c) // carry overflow
|
|
ctx->t[1]++; // high word
|
|
|
|
while (ctx->c < 128) // fill up with zeros
|
|
ctx->b[ctx->c++] = 0;
|
|
|
|
blake2b_compress(ctx, 1); // final block flag = 1
|
|
|
|
// little endian convert and store
|
|
for (i = 0; i < ctx->outlen; i++) {
|
|
((uint8_t *) out)[i] =
|
|
(ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF;
|
|
}
|
|
}
|
|
|