mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.9.2
This commit is contained in:
@@ -68,7 +68,8 @@ cpuminer_SOURCES = \
|
||||
algo/blake/pentablake-4way.c \
|
||||
algo/blake/pentablake.c \
|
||||
algo/bmw/sph_bmw.c \
|
||||
algo/bmw/bmw-hash-4way.c \
|
||||
algo/bmw/bmw256-hash-4way.c \
|
||||
algo/bmw/bmw512-hash-4way.c \
|
||||
algo/bmw/bmw256.c \
|
||||
algo/cryptonight/cryptolight.c \
|
||||
algo/cryptonight/cryptonight-common.c\
|
||||
@@ -166,6 +167,8 @@ cpuminer_SOURCES = \
|
||||
algo/sha/sha256t-gate.c \
|
||||
algo/sha/sha256t-4way.c \
|
||||
algo/sha/sha256t.c \
|
||||
algo/sha/sha256q-4way.c \
|
||||
algo/sha/sha256q.c \
|
||||
algo/shabal/sph_shabal.c \
|
||||
algo/shabal/shabal-hash-4way.c \
|
||||
algo/shavite/sph_shavite.c \
|
||||
@@ -262,7 +265,7 @@ cpuminer_SOURCES = \
|
||||
algo/yescrypt/sha256_Y.c \
|
||||
algo/yescrypt/yescrypt-best.c \
|
||||
algo/yespower/yespower.c \
|
||||
algo/yespower/sha256.c \
|
||||
algo/yespower/sha256_p.c \
|
||||
algo/yespower/yespower-opt.c
|
||||
|
||||
disable_flags =
|
||||
|
@@ -12,7 +12,7 @@ the software, don't use it.
|
||||
Choose the exe that best matches you CPU's features or use trial and
|
||||
error to find the fastest one that doesn't crash. Pay attention to
|
||||
the features listed at cpuminer startup to ensure you are mining at
|
||||
optimum speed using all the available features.
|
||||
optimum speed using the best available features.
|
||||
|
||||
Architecture names and compile options used are only provided for Intel
|
||||
Core series. Even the newest Pentium and Celeron CPUs are often missing
|
||||
@@ -22,8 +22,6 @@ AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
||||
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
||||
these CPUs. Some algos may crash the miner with an invalid instruction.
|
||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
||||
Changes in v3.8.4 may have improved compatibility with some of these CPUs.
|
||||
|
||||
|
||||
Exe name Compile flags Arch name
|
||||
|
||||
|
@@ -33,11 +33,20 @@ Requirements
|
||||
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
|
||||
supported.
|
||||
|
||||
64 bit Linux or Windows operating system. Apple is not supported.
|
||||
64 bit Linux or Windows operating system. Apple and Android are not supported.
|
||||
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.9.2
|
||||
|
||||
Added sha256q algo.
|
||||
Yespower now uses openssl SHA256, but no observable hash rate increase
|
||||
on Ryzen.
|
||||
Ongoing rearchitecting.
|
||||
Lyra2z now hashes 8-way on CPUs with AVX2.
|
||||
Lyra2 (all including phi2) now runs optimized code with SSE2.
|
||||
|
||||
v3.9.1.1
|
||||
|
||||
Fixed lyra2v3 AVX and below.
|
||||
@@ -45,7 +54,7 @@ Fixed lyra2v3 AVX and below.
|
||||
Compiling on Windows using Cygwin now works. Simply use "./build.sh"
|
||||
just like on Linux. It isn't portable therefore the binaries package will
|
||||
continue to use the existing procedure.
|
||||
The Cygwin procedfure will be documented in more detail later and will
|
||||
The Cygwin procedure will be documented in more detail later and will
|
||||
include a list of packages that need to be installed.
|
||||
|
||||
v3.9.1
|
||||
|
@@ -210,6 +210,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
|
||||
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
||||
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
||||
case ALGO_SHA256Q: register_sha256q_algo ( gate ); break;
|
||||
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
||||
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
||||
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
||||
|
@@ -41,7 +41,6 @@ extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
@@ -50,6 +49,10 @@ extern "C"{
|
||||
|
||||
#define SPH_SIZE_bmw512 512
|
||||
|
||||
#if defined(__SSE2__)
|
||||
|
||||
// BMW-256 4 way 32
|
||||
|
||||
typedef struct {
|
||||
__m128i buf[64];
|
||||
__m128i H[16];
|
||||
@@ -59,6 +62,60 @@ typedef struct {
|
||||
|
||||
typedef bmw_4way_small_context bmw256_4way_context;
|
||||
|
||||
void bmw256_4way_init(void *cc);
|
||||
|
||||
void bmw256_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void bmw256_4way_close(void *cc, void *dst);
|
||||
|
||||
void bmw256_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif // __SSE2__
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// BMW-256 8 way 32
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[64];
|
||||
__m256i H[16];
|
||||
size_t ptr;
|
||||
uint32_t bit_count; // assume bit_count fits in 32 bits
|
||||
} bmw_8way_small_context __attribute__ ((aligned (64)));
|
||||
|
||||
typedef bmw_8way_small_context bmw256_8way_context;
|
||||
|
||||
void bmw256_8way_init( bmw256_8way_context *ctx );
|
||||
void bmw256_8way( bmw256_8way_context *ctx, const void *data, size_t len );
|
||||
void bmw256_8way_close( bmw256_8way_context *ctx, void *dst );
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__SSE2__)
|
||||
|
||||
// BMW-512 2 way 64
|
||||
|
||||
typedef struct {
|
||||
__m128i buf[16];
|
||||
__m128i H[16];
|
||||
size_t ptr;
|
||||
uint64_t bit_count;
|
||||
} bmw_2way_big_context __attribute__ ((aligned (64)));
|
||||
|
||||
typedef bmw_2way_big_context bmw512_2way_context;
|
||||
|
||||
void bmw512_2way_init( bmw512_2way_context *ctx );
|
||||
void bmw512_2way( bmw512_2way_context *ctx, const void *data, size_t len );
|
||||
void bmw512_2way_close( bmw512_2way_context *ctx, void *dst );
|
||||
|
||||
#endif // __SSE2__
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// BMW-512 4 way 64
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16];
|
||||
__m256i H[16];
|
||||
@@ -68,14 +125,6 @@ typedef struct {
|
||||
|
||||
typedef bmw_4way_big_context bmw512_4way_context;
|
||||
|
||||
void bmw256_4way_init(void *cc);
|
||||
|
||||
void bmw256_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void bmw256_4way_close(void *cc, void *dst);
|
||||
|
||||
void bmw256_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void bmw512_4way_init(void *cc);
|
||||
|
||||
@@ -86,10 +135,10 @@ void bmw512_4way_close(void *cc, void *dst);
|
||||
void bmw512_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
#endif // __AVX2__
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif // BMW_HASH_H__
|
||||
|
File diff suppressed because it is too large
Load Diff
1109
algo/bmw/bmw512-hash-4way.c
Normal file
1109
algo/bmw/bmw512-hash-4way.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -90,7 +90,7 @@ void allium_4way_hash( void *state, const void *input )
|
||||
}
|
||||
|
||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
@@ -100,40 +100,47 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
blake256_4way_init( &allium_4way_ctx.blake );
|
||||
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
allium_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
if ( fulltest( hash+(lane<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
work_set_target_ratio( work, hash+(lane<<3) );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id, lane );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -70,7 +70,7 @@ void allium_hash(void *state, const void *input)
|
||||
}
|
||||
|
||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
@@ -80,6 +80,7 @@ int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x3ffff;
|
||||
|
@@ -43,14 +43,14 @@ bool register_lyra2rev2_algo( algo_gate_t* gate );
|
||||
|
||||
void lyra2rev2_4way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev2_4way_ctx();
|
||||
|
||||
#else
|
||||
|
||||
void lyra2rev2_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev2_ctx();
|
||||
|
||||
#endif
|
||||
@@ -61,7 +61,7 @@ bool init_lyra2rev2_ctx();
|
||||
#define LYRA2Z_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
// #define LYRA2Z_8WAY
|
||||
#define LYRA2Z_8WAY
|
||||
#endif
|
||||
|
||||
|
||||
@@ -71,21 +71,21 @@ bool init_lyra2rev2_ctx();
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2z_8way_thread_init();
|
||||
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
|
||||
void lyra2z_4way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2z_4way_thread_init();
|
||||
|
||||
#else
|
||||
|
||||
void lyra2z_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2z_thread_init();
|
||||
|
||||
#endif
|
||||
@@ -102,14 +102,14 @@ bool lyra2z_thread_init();
|
||||
|
||||
void lyra2h_4way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2h_4way_thread_init();
|
||||
|
||||
#else
|
||||
|
||||
void lyra2h_hash( void *state, const void *input );
|
||||
int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2h_thread_init();
|
||||
|
||||
#endif
|
||||
@@ -126,14 +126,14 @@ bool register_allium_algo( algo_gate_t* gate );
|
||||
|
||||
void allium_4way_hash( void *state, const void *input );
|
||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_allium_4way_ctx();
|
||||
|
||||
#else
|
||||
|
||||
void allium_hash( void *state, const void *input );
|
||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_allium_ctx();
|
||||
|
||||
#endif
|
||||
@@ -146,7 +146,7 @@ bool register_phi2_algo( algo_gate_t* gate );
|
||||
|
||||
void phi2_hash( void *state, const void *input );
|
||||
int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_phi2_ctx();
|
||||
|
||||
#endif // LYRA2_GATE_H__
|
||||
|
@@ -50,7 +50,7 @@ void lyra2h_4way_hash( void *state, const void *input )
|
||||
}
|
||||
|
||||
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
@@ -63,6 +63,7 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep= vdata + 76; // 19*4
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
@@ -36,7 +36,7 @@ void lyra2h_hash( void *state, const void *input )
|
||||
}
|
||||
|
||||
int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
@@ -45,6 +45,7 @@ int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
@@ -81,8 +81,8 @@ void lyra2re_hash(void *state, const void *input)
|
||||
memcpy(state, hashA, 32);
|
||||
}
|
||||
|
||||
int scanhash_lyra2re(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_lyra2re( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -91,6 +91,7 @@ int scanhash_lyra2re(int thr_id, struct work *work,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
|
@@ -82,7 +82,7 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
@@ -95,6 +95,7 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
@@ -73,7 +73,7 @@ void lyra2rev2_hash( void *state, const void *input )
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -82,6 +82,7 @@ int scanhash_lyra2rev2(int thr_id, struct work *work,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
@@ -74,7 +74,6 @@ int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int num_found = 0;
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
|
@@ -50,7 +50,7 @@ void lyra2z_4way_hash( void *state, const void *input )
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
@@ -60,25 +60,23 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
for ( int i=0; i < 20; i++ )
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
lyra2z_4way_midstate( vdata );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2z_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
@@ -87,15 +85,19 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id, i );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -150,14 +152,14 @@ void lyra2z_8way_hash( void *state, const void *input )
|
||||
memcpy( state+ 32, hash1, 32 );
|
||||
memcpy( state+ 64, hash2, 32 );
|
||||
memcpy( state+ 96, hash3, 32 );
|
||||
memcpy( state+128, hash1, 32 );
|
||||
memcpy( state+160, hash2, 32 );
|
||||
memcpy( state+192, hash3, 32 );
|
||||
memcpy( state+224, hash1, 32 );
|
||||
memcpy( state+128, hash4, 32 );
|
||||
memcpy( state+160, hash5, 32 );
|
||||
memcpy( state+192, hash6, 32 );
|
||||
memcpy( state+224, hash7, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
@@ -167,15 +169,15 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
@@ -183,15 +185,8 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
lyra2z_8way_midstate( vdata );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
be32enc( noncep+4, n+4 );
|
||||
be32enc( noncep+5, n+5 );
|
||||
be32enc( noncep+6, n+6 );
|
||||
be32enc( noncep+7, n+7 );
|
||||
|
||||
*noncev = mm256_bswap_32(
|
||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||
lyra2z_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
@@ -199,15 +194,19 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id, i );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@@ -44,7 +44,7 @@ void lyra2z_hash( void *state, const void *input )
|
||||
}
|
||||
|
||||
int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
@@ -53,6 +53,7 @@ int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
@@ -16,7 +16,7 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
|
||||
}
|
||||
|
||||
int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
@@ -25,6 +25,7 @@ int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
|
@@ -92,42 +92,50 @@ void phi2_hash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_phi2(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) endiandata[36];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) endiandata[36];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
if(opt_benchmark){
|
||||
ptarget[7] = 0x00ff;
|
||||
}
|
||||
|
||||
if(opt_benchmark){
|
||||
ptarget[7] = 0x00ff;
|
||||
}
|
||||
phi2_has_roots = false;
|
||||
for ( int i=0; i < 36; i++ )
|
||||
{
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
if (i >= 20 && pdata[i]) phi2_has_roots = true;
|
||||
}
|
||||
|
||||
phi2_has_roots = false;
|
||||
for (int i=0; i < 36; i++) {
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
if (i >= 20 && pdata[i]) phi2_has_roots = true;
|
||||
}
|
||||
do {
|
||||
be32enc( &endiandata[19], n );
|
||||
phi2_hash( hash, endiandata );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
phi2_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] < Htarg && fulltest(hash, ptarget)) {
|
||||
work_set_target_ratio(work, hash);
|
||||
if ( hash[7] < Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE, "Share %d submitted by thread %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 1;
|
||||
}
|
||||
n++;
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -91,7 +91,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
|
||||
#elif defined(__SSE4_2__)
|
||||
#elif defined(__SSE2__)
|
||||
|
||||
// process 2 columns in parallel
|
||||
// returns void, all args updated
|
||||
@@ -132,7 +132,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
|
||||
|
||||
#endif // AVX2 else SSE4_2
|
||||
#endif // AVX2 else SSE2
|
||||
|
||||
// Scalar
|
||||
//Blake2b's G function
|
||||
|
219
algo/sha/sha256q-4way.c
Normal file
219
algo/sha/sha256q-4way.c
Normal file
@@ -0,0 +1,219 @@
|
||||
#include "sha256t-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "sha2-hash-4way.h"
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
static __thread sha256_8way_context sha256_ctx8 __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256q_8way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
sha256_8way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx8, sizeof ctx );
|
||||
|
||||
sha256_8way( &ctx, input + (64<<3), 16 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, vhash );
|
||||
|
||||
sha256_8way_init( &ctx );
|
||||
sha256_8way( &ctx, vhash, 32 );
|
||||
sha256_8way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000 };
|
||||
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
// Need big endian data
|
||||
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32(
|
||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
sha256q_8way_hash( hash, vdata );
|
||||
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
uint32_t lane_hash[8];
|
||||
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
work_set_target_ratio( work, lane_hash );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id, lane );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
|
||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SHA256T_4WAY)
|
||||
|
||||
static __thread sha256_4way_context sha256_ctx4 __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256q_4way_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
sha256_4way_context ctx;
|
||||
memcpy( &ctx, &sha256_ctx4, sizeof ctx );
|
||||
|
||||
sha256_4way( &ctx, input + (64<<2), 16 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, vhash );
|
||||
|
||||
sha256_4way_init( &ctx );
|
||||
sha256_4way( &ctx, vhash, 32 );
|
||||
sha256_4way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t lane_hash[8];
|
||||
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000 };
|
||||
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
sha256_4way_init( &sha256_ctx4 );
|
||||
sha256_4way( &sha256_ctx4, vdata, 64 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
||||
pdata[19] = n;
|
||||
|
||||
sha256q_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
work_set_target_ratio( work, lane_hash );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id, lane );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
}
|
||||
}
|
||||
|
||||
n += 4;
|
||||
|
||||
} while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
113
algo/sha/sha256q.c
Normal file
113
algo/sha/sha256q.c
Normal file
@@ -0,0 +1,113 @@
|
||||
#include "sha256t-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
static __thread SHA256_CTX sha256q_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256q_midstate( const void* input )
|
||||
{
|
||||
SHA256_Init( &sha256q_ctx );
|
||||
SHA256_Update( &sha256q_ctx, input, 64 );
|
||||
}
|
||||
|
||||
void sha256q_hash( void* output, const void* input )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[16];
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
|
||||
SHA256_CTX ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &sha256q_ctx, sizeof sha256q_ctx );
|
||||
|
||||
SHA256_Update( &ctx, input + midlen, tail );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, hash, 32 );
|
||||
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||
|
||||
memcpy( output, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_sha256q( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
#ifdef _MSC_VER
|
||||
uint32_t __declspec(align(32)) hash64[8];
|
||||
#else
|
||||
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||
#endif
|
||||
uint32_t endiandata[32];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
sha256q_midstate( endiandata );
|
||||
|
||||
for ( int m = 0; m < 6; m++ )
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
sha256q_hash( hash64, endiandata );
|
||||
if ( ( !(hash64[7] & mask) ) && fulltest( hash64, ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash64 );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE, "Share %d submitted by thread %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
}
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
@@ -83,7 +83,7 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
uint32_t lane_hash[8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
@@ -138,9 +138,9 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t lane_hash[8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
@@ -3,15 +3,15 @@
|
||||
bool register_sha256t_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(SHA256T_8WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
||||
gate->hash = (void*)&sha256t_8way_hash;
|
||||
#elif defined(SHA256T_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
||||
gate->hash = (void*)&sha256t_4way_hash;
|
||||
#else
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t;
|
||||
gate->hash = (void*)&sha256t_hash;
|
||||
#endif
|
||||
@@ -19,3 +19,23 @@ bool register_sha256t_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
}
|
||||
|
||||
bool register_sha256q_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(SHA256T_8WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256q_8way;
|
||||
gate->hash = (void*)&sha256q_8way_hash;
|
||||
#elif defined(SHA256T_4WAY)
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256q_4way;
|
||||
gate->hash = (void*)&sha256q_4way_hash;
|
||||
#else
|
||||
gate->optimizations = SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256q;
|
||||
gate->hash = (void*)&sha256q_hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
|
@@ -15,24 +15,34 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
bool register_blake2s_algo( algo_gate_t* gate );
|
||||
bool register_sha256t_algo( algo_gate_t* gate );
|
||||
bool register_sha256q_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(SHA256T_8WAY)
|
||||
|
||||
void sha256t_8way_hash( void *output, const void *input );
|
||||
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void sha256q_8way_hash( void *output, const void *input );
|
||||
int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(SHA256T_4WAY)
|
||||
|
||||
void sha256t_4way_hash( void *output, const void *input );
|
||||
int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void sha256q_4way_hash( void *output, const void *input );
|
||||
int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#else
|
||||
|
||||
void sha256t_hash( void *output, const void *input );
|
||||
int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void sha256q_hash( void *output, const void *input );
|
||||
int scanhash_sha256q( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -70,8 +70,11 @@ int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &endiandata[k], pdata[k] );
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
sha256t_midstate( endiandata );
|
||||
|
||||
@@ -87,7 +90,13 @@ int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( ( !(hash64[7] & mask) ) && fulltest( hash64, ptarget ) )
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE, "Share %d submitted by thread %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
}
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
break;
|
||||
|
@@ -346,7 +346,7 @@ void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
|
||||
memcpy( buf + ptr, data, clen );
|
||||
data = (const unsigned char *)data + clen;
|
||||
ptr += clen;
|
||||
len -= clen >> 1;
|
||||
len -= (clen >> 1);
|
||||
if ( ptr == sizeof ctx->buf )
|
||||
{
|
||||
if ( ( ctx->count0 = ctx->count0 + 1024 ) == 0 )
|
||||
@@ -365,16 +365,8 @@ void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
|
||||
}
|
||||
|
||||
uint32_t vp = ptr>>5;
|
||||
|
||||
// Terminating byte then zero pad
|
||||
casti_m256i( buf, vp++ ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
|
||||
|
||||
// Zero pad full vectors up to count
|
||||
for ( ; vp < 6; vp++ )
|
||||
casti_m256i( buf, vp ) = m256_zero;
|
||||
|
||||
// Count = { 0, 16, 64, 80 }. Outsize = 16 u32 = 512 bits = 0x0200
|
||||
// Count is misaligned to 16 bits and straddles a vector.
|
||||
// Count is misaligned to 16 bits and straddles 2 vectors.
|
||||
// Use u32 overlay to stage then u16 to load buf.
|
||||
union
|
||||
{
|
||||
@@ -387,6 +379,18 @@ void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
|
||||
count.u32[2] = ctx->count2;
|
||||
count.u32[3] = ctx->count3;
|
||||
|
||||
if ( vp == 0 ) // empty buf, xevan.
|
||||
{
|
||||
casti_m256i( buf, 0 ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
|
||||
memset_zero_256( (__m256i*)buf + 1, 5 );
|
||||
ctx->count0 = ctx->count1 = ctx->count2 = ctx->count3 = 0;
|
||||
}
|
||||
else // half full buf, everyone else.
|
||||
{
|
||||
casti_m256i( buf, vp++ ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
|
||||
memset_zero_256( (__m256i*)buf + vp, 6 - vp );
|
||||
}
|
||||
|
||||
casti_m256i( buf, 6 ) = _mm256_set_epi16( count.u16[0], 0,0,0,0,0,0,0,
|
||||
count.u16[0], 0,0,0,0,0,0,0 );
|
||||
casti_m256i( buf, 7 ) = _mm256_set_epi16(
|
||||
|
@@ -25,7 +25,8 @@
|
||||
#include "algo/haval/haval-hash-4way.h"
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
|
||||
typedef struct {
|
||||
union _sonoa_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
@@ -43,8 +44,10 @@ typedef struct {
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
haval256_5_4way_context haval;
|
||||
} sonoa_4way_ctx_holder;
|
||||
};
|
||||
|
||||
typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay;
|
||||
/*
|
||||
sonoa_4way_ctx_holder sonoa_4way_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_sonoa_4way_ctx()
|
||||
@@ -67,6 +70,7 @@ void init_sonoa_4way_ctx()
|
||||
sha512_4way_init( &sonoa_4way_ctx.sha512 );
|
||||
haval256_5_4way_init( &sonoa_4way_ctx.haval );
|
||||
};
|
||||
*/
|
||||
|
||||
void sonoa_4way_hash( void *state, const void *input )
|
||||
{
|
||||
@@ -77,19 +81,23 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
||||
sonoa_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &sonoa_4way_ctx, sizeof(sonoa_4way_ctx) );
|
||||
sonoa_4way_context_overlay ctx;
|
||||
// sonoa_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
// memcpy( &ctx, &sonoa_4way_ctx, sizeof(sonoa_4way_ctx) );
|
||||
|
||||
// 1
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
@@ -100,29 +108,36 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
||||
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||
@@ -130,6 +145,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, 512 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
@@ -215,10 +231,12 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
// 3
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
@@ -294,6 +312,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
@@ -399,10 +418,11 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
|
||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
mm256_reinterleave_4x64( vhashB, vhash, 512 );
|
||||
mm256_reinterleave_4x32_4x64( vhashB, vhash, 512 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhashB, 64 );
|
||||
@@ -438,7 +458,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
mm256_reinterleave_4x32( vhashB, vhash, 512 );
|
||||
mm256_reinterleave_4x64_4x32( vhashB, vhash, 512 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhashB, 64 );
|
||||
@@ -536,6 +556,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
|
||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
@@ -663,6 +684,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
@@ -800,11 +822,11 @@ void sonoa_4way_hash( void *state, const void *input )
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
mm256_reinterleave_4x32( vhashB, vhash, 512 );
|
||||
mm256_reinterleave_4x64_4x32( vhashB, vhash, 512 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashB, 64 );
|
||||
haval256_5_4way_close( &ctx.haval, state );
|
||||
|
||||
}
|
||||
|
||||
int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
@@ -3,7 +3,7 @@
|
||||
bool register_sonoa_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (SONOA_4WAY)
|
||||
init_sonoa_4way_ctx();
|
||||
// init_sonoa_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_sonoa_4way;
|
||||
gate->hash = (void*)&sonoa_4way_hash;
|
||||
#else
|
||||
|
@@ -17,7 +17,7 @@ void sonoa_4way_hash( void *state, const void *input );
|
||||
int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_sonoa_4way_ctx();
|
||||
//void init_sonoa_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -14,7 +14,6 @@
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/shavite/shavite-hash-2way.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
@@ -222,7 +221,7 @@ void x17_4way_hash( void *state, const void *input )
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
// 17 Haval parallel 32 bit
|
||||
mm256_reinterleave_4x32( vhashB, vhash, 512 );
|
||||
mm256_reinterleave_4x64_4x32( vhashB, vhash, 512 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashB, 64 );
|
||||
@@ -258,18 +257,18 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
for ( int m=0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
uint32_t mask = masks[ m ];
|
||||
do
|
||||
{
|
||||
*noncev = mm256_interleave_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ),
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ),
|
||||
*noncev );
|
||||
x17_4way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( ( ( hash7[ lane ] & mask ) == 0 ) )
|
||||
if ( ( hash7[ lane ] & mask ) == 0 )
|
||||
{
|
||||
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
|
@@ -12,8 +12,9 @@
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/luffa/luffa-hash-2way.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
#include "algo/shavite/shavite-hash-2way.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/simd/simd-hash-2way.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
@@ -24,16 +25,17 @@
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
#include "algo/haval/haval-hash-4way.h"
|
||||
|
||||
typedef struct {
|
||||
blake512_4way_context blake;
|
||||
union _xevan_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
hashState_groestl groestl;
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite;
|
||||
cube_2way_context cube;
|
||||
shavite512_2way_context shavite;
|
||||
simd_2way_context simd;
|
||||
hashState_echo echo;
|
||||
hamsi512_4way_context hamsi;
|
||||
@@ -42,39 +44,8 @@ typedef struct {
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
haval256_5_4way_context haval;
|
||||
} xevan_4way_ctx_holder;
|
||||
|
||||
xevan_4way_ctx_holder xevan_4way_ctx __attribute__ ((aligned (64)));
|
||||
static __thread blake512_4way_context xevan_blake_4way_mid
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void init_xevan_4way_ctx()
|
||||
{
|
||||
blake512_4way_init(&xevan_4way_ctx.blake);
|
||||
bmw512_4way_init( &xevan_4way_ctx.bmw );
|
||||
init_groestl( &xevan_4way_ctx.groestl, 64 );
|
||||
skein512_4way_init(&xevan_4way_ctx.skein);
|
||||
jh512_4way_init(&xevan_4way_ctx.jh);
|
||||
keccak512_4way_init(&xevan_4way_ctx.keccak);
|
||||
luffa_2way_init( &xevan_4way_ctx.luffa, 512 );
|
||||
cubehashInit( &xevan_4way_ctx.cube, 512, 16, 32 );
|
||||
sph_shavite512_init( &xevan_4way_ctx.shavite );
|
||||
simd_2way_init( &xevan_4way_ctx.simd, 512 );
|
||||
init_echo( &xevan_4way_ctx.echo, 512 );
|
||||
hamsi512_4way_init( &xevan_4way_ctx.hamsi );
|
||||
sph_fugue512_init( &xevan_4way_ctx.fugue );
|
||||
shabal512_4way_init( &xevan_4way_ctx.shabal );
|
||||
sph_whirlpool_init( &xevan_4way_ctx.whirlpool );
|
||||
sha512_4way_init( &xevan_4way_ctx.sha512 );
|
||||
haval256_5_4way_init( &xevan_4way_ctx.haval );
|
||||
};
|
||||
|
||||
void xevan_4way_blake512_midstate( const void* input )
|
||||
{
|
||||
memcpy( &xevan_blake_4way_mid, &xevan_4way_ctx.blake,
|
||||
sizeof(xevan_blake_4way_mid) );
|
||||
blake512_4way( &xevan_blake_4way_mid, input, 64 );
|
||||
}
|
||||
typedef union _xevan_4way_context_overlay xevan_4way_context_overlay;
|
||||
|
||||
void xevan_4way_hash( void *output, const void *input )
|
||||
{
|
||||
@@ -83,293 +54,283 @@ void xevan_4way_hash( void *output, const void *input )
|
||||
uint64_t hash2[16] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[16] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[16<<2] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash32[16<<2] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashA[16<<2] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[16<<2] __attribute__ ((aligned (64)));
|
||||
const int dataLen = 128;
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
xevan_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &xevan_4way_ctx, sizeof(xevan_4way_ctx) );
|
||||
xevan_4way_context_overlay ctx __attribute__ ((aligned (64)));
|
||||
|
||||
// parallel way
|
||||
memcpy( &ctx.blake, &xevan_blake_4way_mid,
|
||||
sizeof(xevan_blake_4way_mid) );
|
||||
blake512_4way( &ctx.blake, input + (midlen<<2), tail );
|
||||
// parallel 4 way
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, input, 80 );
|
||||
blake512_4way_close(&ctx.blake, vhash);
|
||||
memset( &vhash[8<<2], 0, 64<<2 );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
// Serial
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
|
||||
dataLen<<3 );
|
||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
|
||||
dataLen<<3 );
|
||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
|
||||
dataLen<<3 );
|
||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
|
||||
dataLen<<3 );
|
||||
|
||||
// Parallel 4way
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, dataLen<<3 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, dataLen );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhash, hash2, hash3, dataLen<<3 );
|
||||
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, dataLen );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhash, dataLen<<3 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0,
|
||||
dataLen );
|
||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1,
|
||||
dataLen );
|
||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2,
|
||||
dataLen );
|
||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3,
|
||||
dataLen );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
|
||||
|
||||
sph_shavite512( &ctx.shavite, hash0, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash1, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash2, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash3, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, dataLen<<3 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, dataLen<<3 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhash, hash2, hash3, dataLen<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, dataLen<<3 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhash, dataLen<<3 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
|
||||
|
||||
mm256_deinterleave_1x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
mm256_deinterleave_1x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, dataLen<<3 );
|
||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, dataLen<<3 );
|
||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, dataLen<<3 );
|
||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, dataLen<<3 );
|
||||
// Parallel
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash1, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash2, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash3, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
// Parallel 4way 32 bit
|
||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
// Serial
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
||||
sizeof(sph_whirlpool_context) );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
||||
sizeof(sph_whirlpool_context) );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
||||
sizeof(sph_whirlpool_context) );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, dataLen );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
mm256_reinterleave_4x32( vhash32, vhash, dataLen<<3 );
|
||||
haval256_5_4way( &ctx.haval, vhash32, dataLen );
|
||||
haval256_5_4way_close( &ctx.haval, vhash );
|
||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
mm256_reinterleave_4x64_4x32( vhashA, vhash, dataLen<<3 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashA, dataLen );
|
||||
haval256_5_4way_close( &ctx.haval, vhashA );
|
||||
|
||||
mm256_reinterleave_4x32_4x64( vhash, vhashA, dataLen<<3 );
|
||||
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
memset( &vhash[ 4<<2 ], 0, (dataLen-32) << 2 );
|
||||
memcpy( &ctx, &xevan_4way_ctx, sizeof(xevan_4way_ctx) );
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, vhash, dataLen );
|
||||
blake512_4way_close(&ctx.blake, vhash);
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way( &ctx.bmw, vhash, dataLen );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
|
||||
dataLen<<3 );
|
||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
|
||||
dataLen<<3 );
|
||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
|
||||
dataLen<<3 );
|
||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
|
||||
dataLen<<3 );
|
||||
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, dataLen );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, dataLen );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way( &ctx.keccak, vhash, dataLen );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, dataLen<<3 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, dataLen );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhash, hash2, hash3, dataLen<<3 );
|
||||
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, dataLen );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhash, dataLen<<3 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||
luffa_2way_init( &ctx.luffa, 512 );
|
||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0,
|
||||
dataLen );
|
||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1,
|
||||
dataLen );
|
||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2,
|
||||
dataLen );
|
||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3,
|
||||
dataLen );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
|
||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||
cube_2way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
|
||||
|
||||
sph_shavite512( &ctx.shavite, hash0, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash1, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash2, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
||||
sizeof(sph_shavite512_context) );
|
||||
sph_shavite512( &ctx.shavite, hash3, dataLen );
|
||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||
shavite512_2way_init( &ctx.shavite );
|
||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, dataLen<<3 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, dataLen<<3 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, dataLen<<3 );
|
||||
mm256_interleave_2x128( vhash, hash2, hash3, dataLen<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, dataLen<<3 );
|
||||
mm256_deinterleave_2x128( hash2, hash3, vhash, dataLen<<3 );
|
||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||
simd_2way_init( &ctx.simd, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
|
||||
|
||||
mm256_deinterleave_1x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||
mm256_deinterleave_1x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, dataLen<<3 );
|
||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||
(const BitSequence *) hash1, dataLen<<3 );
|
||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||
(const BitSequence *) hash2, dataLen<<3 );
|
||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *) hash3, dataLen<<3 );
|
||||
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, dataLen );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash1, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash2, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash3, dataLen );
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
|
||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, dataLen );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
||||
sizeof(sph_whirlpool_context) );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
||||
sizeof(sph_whirlpool_context) );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
||||
sizeof(sph_whirlpool_context) );
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, dataLen );
|
||||
sha512_4way_close( &ctx.sha512, vhash );
|
||||
|
||||
mm256_reinterleave_4x32( vhash32, vhash, dataLen<<3 );
|
||||
haval256_5_4way( &ctx.haval, vhash32, dataLen );
|
||||
mm256_reinterleave_4x64_4x32( vhashA, vhash, dataLen<<3 );
|
||||
|
||||
haval256_5_4way_init( &ctx.haval );
|
||||
haval256_5_4way( &ctx.haval, vhashA, dataLen );
|
||||
haval256_5_4way_close( &ctx.haval, output );
|
||||
}
|
||||
|
||||
int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
@@ -378,30 +339,26 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0cff;
|
||||
|
||||
for ( int k=0; k < 19; k++ )
|
||||
be32enc( &endiandata[k], pdata[k] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
|
||||
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
xevan_4way_blake512_midstate( vdata );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
*noncev = mm256_interleave_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
xevan_4way_hash( hash, vdata );
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
@@ -411,15 +368,20 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
nonces[ num_found++ ] = n + lane;
|
||||
work_set_target_ratio( work, lane_hash );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE,
|
||||
"Share %d submitted by thread %d, lane %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id, lane );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -8,7 +8,7 @@ void xevan_set_target( struct work* work, double job_diff )
|
||||
bool register_xevan_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (XEVAN_4WAY)
|
||||
init_xevan_4way_ctx();
|
||||
// init_xevan_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_xevan_4way;
|
||||
gate->hash = (void*)&xevan_4way_hash;
|
||||
#else
|
||||
|
@@ -15,16 +15,16 @@ bool register_xevan_algo( algo_gate_t* gate );
|
||||
void xevan_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_xevan_4way_ctx();
|
||||
//void init_xevan_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void xevan_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_xevan( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_xevan_ctx();
|
||||
|
||||
|
@@ -230,12 +230,14 @@ void xevan_hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_xevan(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_xevan( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
@@ -290,7 +290,7 @@ SHA256_Final_Y(unsigned char digest[32], SHA256_CTX_Y * ctx)
|
||||
|
||||
/* Initialize an HMAC-SHA256 operation with the given key. */
|
||||
void
|
||||
HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
|
||||
HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y * ctx, const void * _K, size_t Klen)
|
||||
{
|
||||
unsigned char pad[64];
|
||||
unsigned char khash[32];
|
||||
@@ -326,7 +326,7 @@ HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
|
||||
|
||||
/* Add bytes to the HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void *in, size_t len)
|
||||
HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y * ctx, const void *in, size_t len)
|
||||
{
|
||||
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
@@ -335,7 +335,7 @@ HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void *in, size_t len)
|
||||
|
||||
/* Finish an HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Final(unsigned char digest[32], HMAC_SHA256_CTX * ctx)
|
||||
HMAC_SHA256_Final_Y(unsigned char digest[32], HMAC_SHA256_CTX_Y * ctx)
|
||||
{
|
||||
unsigned char ihash[32];
|
||||
|
||||
@@ -361,7 +361,7 @@ void
|
||||
PBKDF2_SHA256_Y(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||
{
|
||||
HMAC_SHA256_CTX PShctx, hctx;
|
||||
HMAC_SHA256_CTX_Y PShctx, hctx;
|
||||
uint8_t _ALIGN(128) T[32];
|
||||
uint8_t _ALIGN(128) U[32];
|
||||
uint8_t ivec[4];
|
||||
@@ -370,8 +370,8 @@ PBKDF2_SHA256_Y(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
int k;
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
HMAC_SHA256_Init(&PShctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update(&PShctx, salt, saltlen);
|
||||
HMAC_SHA256_Init_Y(&PShctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update_Y(&PShctx, salt, saltlen);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
@@ -379,18 +379,18 @@ PBKDF2_SHA256_Y(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
be32enc(ivec, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
|
||||
HMAC_SHA256_Update(&hctx, ivec, 4);
|
||||
HMAC_SHA256_Final(U, &hctx);
|
||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX_Y));
|
||||
HMAC_SHA256_Update_Y(&hctx, ivec, 4);
|
||||
HMAC_SHA256_Final_Y(U, &hctx);
|
||||
|
||||
/* T_i = U_1 ... */
|
||||
memcpy(T, U, 32);
|
||||
|
||||
for (j = 2; j <= c; j++) {
|
||||
/* Compute U_j. */
|
||||
HMAC_SHA256_Init(&hctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update(&hctx, U, 32);
|
||||
HMAC_SHA256_Final(U, &hctx);
|
||||
HMAC_SHA256_Init_Y(&hctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update_Y(&hctx, U, 32);
|
||||
HMAC_SHA256_Final_Y(U, &hctx);
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for (k = 0; k < 32; k++)
|
||||
|
@@ -49,14 +49,14 @@ typedef struct HMAC_SHA256Context {
|
||||
typedef struct HMAC_SHA256Context {
|
||||
SHA256_CTX ictx;
|
||||
SHA256_CTX octx;
|
||||
} HMAC_SHA256_CTX;
|
||||
} HMAC_SHA256_CTX_Y;
|
||||
|
||||
void SHA256_Init_Y(SHA256_CTX_Y *);
|
||||
void SHA256_Update_Y(SHA256_CTX_Y *, const void *, size_t);
|
||||
void SHA256_Final_Y(unsigned char [32], SHA256_CTX_Y *);
|
||||
void HMAC_SHA256_Init(HMAC_SHA256_CTX *, const void *, size_t);
|
||||
void HMAC_SHA256_Update(HMAC_SHA256_CTX *, const void *, size_t);
|
||||
void HMAC_SHA256_Final(unsigned char [32], HMAC_SHA256_CTX *);
|
||||
void HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y *, const void *, size_t);
|
||||
void HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y *, const void *, size_t);
|
||||
void HMAC_SHA256_Final_Y(unsigned char [32], HMAC_SHA256_CTX_Y *);
|
||||
|
||||
/**
|
||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||
|
@@ -1354,14 +1354,14 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
|
||||
if ((t || flags) && buflen == sizeof(sha256)) {
|
||||
/* Compute ClientKey */
|
||||
{
|
||||
HMAC_SHA256_CTX ctx;
|
||||
HMAC_SHA256_Init(&ctx, buf, buflen);
|
||||
HMAC_SHA256_CTX_Y ctx;
|
||||
HMAC_SHA256_Init_Y(&ctx, buf, buflen);
|
||||
if ( yescrypt_client_key )
|
||||
HMAC_SHA256_Update( &ctx, (uint8_t*)yescrypt_client_key,
|
||||
HMAC_SHA256_Update_Y( &ctx, (uint8_t*)yescrypt_client_key,
|
||||
yescrypt_client_key_len );
|
||||
else
|
||||
HMAC_SHA256_Update( &ctx, salt, saltlen );
|
||||
HMAC_SHA256_Final(sha256, &ctx);
|
||||
HMAC_SHA256_Update_Y( &ctx, salt, saltlen );
|
||||
HMAC_SHA256_Final_Y(sha256, &ctx);
|
||||
}
|
||||
/* Compute StoredKey */
|
||||
{
|
||||
|
@@ -383,7 +383,7 @@ void yescrypthash(void *output, const void *input)
|
||||
}
|
||||
|
||||
int scanhash_yescrypt( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
@@ -393,6 +393,7 @@ int scanhash_yescrypt( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
for (int k = 0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
@@ -1,646 +0,0 @@
|
||||
/*-
|
||||
* Copyright 2005-2016 Colin Percival
|
||||
* Copyright 2016-2018 Alexander Peslyak
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "insecure_memzero.h"
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "sha256.h"
|
||||
|
||||
#ifdef __ICC
|
||||
/* Miscompile with icc 14.0.0 (at least), so don't use restrict there */
|
||||
#define restrict
|
||||
#elif __STDC_VERSION__ >= 199901L
|
||||
/* Have restrict */
|
||||
#elif defined(__GNUC__)
|
||||
#define restrict __restrict
|
||||
#else
|
||||
#define restrict
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Encode a length len*2 vector of (uint32_t) into a length len*8 vector of
|
||||
* (uint8_t) in big-endian form.
|
||||
*/
|
||||
static void
|
||||
be32enc_vect(uint8_t * dst, const uint32_t * src, size_t len)
|
||||
{
|
||||
|
||||
/* Encode vector, two words at a time. */
|
||||
do {
|
||||
be32enc(&dst[0], src[0]);
|
||||
be32enc(&dst[4], src[1]);
|
||||
src += 2;
|
||||
dst += 8;
|
||||
} while (--len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode a big-endian length len*8 vector of (uint8_t) into a length
|
||||
* len*2 vector of (uint32_t).
|
||||
*/
|
||||
static void
|
||||
be32dec_vect(uint32_t * dst, const uint8_t * src, size_t len)
|
||||
{
|
||||
|
||||
/* Decode vector, two words at a time. */
|
||||
do {
|
||||
dst[0] = be32dec(&src[0]);
|
||||
dst[1] = be32dec(&src[4]);
|
||||
src += 8;
|
||||
dst += 2;
|
||||
} while (--len);
|
||||
}
|
||||
|
||||
/* SHA256 round constants. */
|
||||
static const uint32_t Krnd[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define SHR(x, n) (x >> n)
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||
|
||||
/* SHA256 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
h += S1(e) + Ch(e, f, g) + k; \
|
||||
d += h; \
|
||||
h += S0(a) + Maj(a, b, c);
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, ii) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i + ii] + Krnd[i + ii])
|
||||
|
||||
/* Message schedule computation */
|
||||
#define MSCH(W, ii, i) \
|
||||
W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
|
||||
|
||||
/*
|
||||
* SHA256 block compression function. The 256-bit state is transformed via
|
||||
* the 512-bit input block to produce a new state.
|
||||
*/
|
||||
static void
|
||||
SHA256_Transform(uint32_t state[static restrict 8],
|
||||
const uint8_t block[static restrict 64],
|
||||
uint32_t W[static restrict 64], uint32_t S[static restrict 8])
|
||||
{
|
||||
int i;
|
||||
|
||||
/* 1. Prepare the first part of the message schedule W. */
|
||||
be32dec_vect(W, block, 8);
|
||||
|
||||
/* 2. Initialize working variables. */
|
||||
memcpy(S, state, 32);
|
||||
|
||||
/* 3. Mix. */
|
||||
for (i = 0; i < 64; i += 16) {
|
||||
RNDr(S, W, 0, i);
|
||||
RNDr(S, W, 1, i);
|
||||
RNDr(S, W, 2, i);
|
||||
RNDr(S, W, 3, i);
|
||||
RNDr(S, W, 4, i);
|
||||
RNDr(S, W, 5, i);
|
||||
RNDr(S, W, 6, i);
|
||||
RNDr(S, W, 7, i);
|
||||
RNDr(S, W, 8, i);
|
||||
RNDr(S, W, 9, i);
|
||||
RNDr(S, W, 10, i);
|
||||
RNDr(S, W, 11, i);
|
||||
RNDr(S, W, 12, i);
|
||||
RNDr(S, W, 13, i);
|
||||
RNDr(S, W, 14, i);
|
||||
RNDr(S, W, 15, i);
|
||||
|
||||
if (i == 48)
|
||||
break;
|
||||
MSCH(W, 0, i);
|
||||
MSCH(W, 1, i);
|
||||
MSCH(W, 2, i);
|
||||
MSCH(W, 3, i);
|
||||
MSCH(W, 4, i);
|
||||
MSCH(W, 5, i);
|
||||
MSCH(W, 6, i);
|
||||
MSCH(W, 7, i);
|
||||
MSCH(W, 8, i);
|
||||
MSCH(W, 9, i);
|
||||
MSCH(W, 10, i);
|
||||
MSCH(W, 11, i);
|
||||
MSCH(W, 12, i);
|
||||
MSCH(W, 13, i);
|
||||
MSCH(W, 14, i);
|
||||
MSCH(W, 15, i);
|
||||
}
|
||||
|
||||
/* 4. Mix local working variables into global state. */
|
||||
state[0] += S[0];
|
||||
state[1] += S[1];
|
||||
state[2] += S[2];
|
||||
state[3] += S[3];
|
||||
state[4] += S[4];
|
||||
state[5] += S[5];
|
||||
state[6] += S[6];
|
||||
state[7] += S[7];
|
||||
}
|
||||
|
||||
static const uint8_t PAD[64] = {
|
||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* Add padding and terminating bit-count. */
|
||||
static void
|
||||
SHA256_Pad(SHA256_CTX * ctx, uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
size_t r;
|
||||
|
||||
/* Figure out how many bytes we have buffered. */
|
||||
r = (ctx->count >> 3) & 0x3f;
|
||||
|
||||
/* Pad to 56 mod 64, transforming if we finish a block en route. */
|
||||
if (r < 56) {
|
||||
/* Pad to 56 mod 64. */
|
||||
memcpy(&ctx->buf[r], PAD, 56 - r);
|
||||
} else {
|
||||
/* Finish the current block and mix. */
|
||||
memcpy(&ctx->buf[r], PAD, 64 - r);
|
||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
||||
|
||||
/* The start of the final block is all zeroes. */
|
||||
memset(&ctx->buf[0], 0, 56);
|
||||
}
|
||||
|
||||
/* Add the terminating bit-count. */
|
||||
be64enc(&ctx->buf[56], ctx->count);
|
||||
|
||||
/* Mix in the final block. */
|
||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
||||
}
|
||||
|
||||
/* Magic initialization constants. */
|
||||
static const uint32_t initial_state[8] = {
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
|
||||
/**
|
||||
* SHA256_Init(ctx):
|
||||
* Initialize the SHA256 context ${ctx}.
|
||||
*/
|
||||
void
|
||||
SHA256_Init(SHA256_CTX * ctx)
|
||||
{
|
||||
|
||||
/* Zero bits processed so far. */
|
||||
ctx->count = 0;
|
||||
|
||||
/* Initialize state. */
|
||||
memcpy(ctx->state, initial_state, sizeof(initial_state));
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA256_Update(ctx, in, len):
|
||||
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
|
||||
*/
|
||||
static void
|
||||
_SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len,
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
uint32_t r;
|
||||
const uint8_t * src = in;
|
||||
|
||||
/* Return immediately if we have nothing to do. */
|
||||
if (len == 0)
|
||||
return;
|
||||
|
||||
/* Number of bytes left in the buffer from previous updates. */
|
||||
r = (ctx->count >> 3) & 0x3f;
|
||||
|
||||
/* Update number of bits. */
|
||||
ctx->count += (uint64_t)(len) << 3;
|
||||
|
||||
/* Handle the case where we don't need to perform any transforms. */
|
||||
if (len < 64 - r) {
|
||||
memcpy(&ctx->buf[r], src, len);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Finish the current block. */
|
||||
memcpy(&ctx->buf[r], src, 64 - r);
|
||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
||||
src += 64 - r;
|
||||
len -= 64 - r;
|
||||
|
||||
/* Perform complete blocks. */
|
||||
while (len >= 64) {
|
||||
SHA256_Transform(ctx->state, src, &tmp32[0], &tmp32[64]);
|
||||
src += 64;
|
||||
len -= 64;
|
||||
}
|
||||
|
||||
/* Copy left over data into buffer. */
|
||||
memcpy(ctx->buf, src, len);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
|
||||
/* Call the real function. */
|
||||
_SHA256_Update(ctx, in, len, tmp32);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA256_Final(digest, ctx):
|
||||
* Output the SHA256 hash of the data input to the context ${ctx} into the
|
||||
* buffer ${digest}.
|
||||
*/
|
||||
static void
|
||||
_SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx,
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
|
||||
/* Add padding. */
|
||||
SHA256_Pad(ctx, tmp32);
|
||||
|
||||
/* Write the hash. */
|
||||
be32enc_vect(digest, ctx->state, 4);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
|
||||
/* Call the real function. */
|
||||
_SHA256_Final(digest, ctx, tmp32);
|
||||
|
||||
/* Clear the context state. */
|
||||
insecure_memzero(ctx, sizeof(SHA256_CTX));
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA256_Buf(in, len, digest):
|
||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
||||
*/
|
||||
void
|
||||
SHA256_Buf(const void * in, size_t len, uint8_t digest[32])
|
||||
{
|
||||
SHA256_CTX ctx;
|
||||
uint32_t tmp32[72];
|
||||
|
||||
SHA256_Init(&ctx);
|
||||
_SHA256_Update(&ctx, in, len, tmp32);
|
||||
_SHA256_Final(digest, &ctx, tmp32);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(&ctx, sizeof(SHA256_CTX));
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Init(ctx, K, Klen):
|
||||
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
|
||||
* ${K}.
|
||||
*/
|
||||
static void
|
||||
_HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen,
|
||||
uint32_t tmp32[static restrict 72], uint8_t pad[static restrict 64],
|
||||
uint8_t khash[static restrict 32])
|
||||
{
|
||||
const uint8_t * K = _K;
|
||||
size_t i;
|
||||
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if (Klen > 64) {
|
||||
SHA256_Init(&ctx->ictx);
|
||||
_SHA256_Update(&ctx->ictx, K, Klen, tmp32);
|
||||
_SHA256_Final(khash, &ctx->ictx, tmp32);
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
SHA256_Init(&ctx->ictx);
|
||||
memset(pad, 0x36, 64);
|
||||
for (i = 0; i < Klen; i++)
|
||||
pad[i] ^= K[i];
|
||||
_SHA256_Update(&ctx->ictx, pad, 64, tmp32);
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
SHA256_Init(&ctx->octx);
|
||||
memset(pad, 0x5c, 64);
|
||||
for (i = 0; i < Klen; i++)
|
||||
pad[i] ^= K[i];
|
||||
_SHA256_Update(&ctx->octx, pad, 64, tmp32);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
uint8_t pad[64];
|
||||
uint8_t khash[32];
|
||||
|
||||
/* Call the real function. */
|
||||
_HMAC_SHA256_Init(ctx, _K, Klen, tmp32, pad, khash);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(khash, 32);
|
||||
insecure_memzero(pad, 64);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Update(ctx, in, len):
|
||||
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
|
||||
*/
|
||||
static void
|
||||
_HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len,
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
_SHA256_Update(&ctx->ictx, in, len, tmp32);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
|
||||
/* Call the real function. */
|
||||
_HMAC_SHA256_Update(ctx, in, len, tmp32);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Final(digest, ctx):
|
||||
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
|
||||
* buffer ${digest}.
|
||||
*/
|
||||
static void
|
||||
_HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx,
|
||||
uint32_t tmp32[static restrict 72], uint8_t ihash[static restrict 32])
|
||||
{
|
||||
|
||||
/* Finish the inner SHA256 operation. */
|
||||
_SHA256_Final(ihash, &ctx->ictx, tmp32);
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
_SHA256_Update(&ctx->octx, ihash, 32, tmp32);
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
_SHA256_Final(digest, &ctx->octx, tmp32);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
uint8_t ihash[32];
|
||||
|
||||
/* Call the real function. */
|
||||
_HMAC_SHA256_Final(digest, ctx, tmp32, ihash);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(ihash, 32);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
||||
* length ${Klen}, and write the result to ${digest}.
|
||||
*/
|
||||
void
|
||||
HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len,
|
||||
uint8_t digest[32])
|
||||
{
|
||||
HMAC_SHA256_CTX ctx;
|
||||
uint32_t tmp32[72];
|
||||
uint8_t tmp8[96];
|
||||
|
||||
_HMAC_SHA256_Init(&ctx, K, Klen, tmp32, &tmp8[0], &tmp8[64]);
|
||||
_HMAC_SHA256_Update(&ctx, in, len, tmp32);
|
||||
_HMAC_SHA256_Final(digest, &ctx, tmp32, &tmp8[0]);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(&ctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(tmp8, 96);
|
||||
}
|
||||
|
||||
/* Add padding and terminating bit-count, but don't invoke Transform yet. */
|
||||
static int
|
||||
SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8],
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
uint32_t r;
|
||||
|
||||
r = (ctx->count >> 3) & 0x3f;
|
||||
if (r >= 56)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Convert length to a vector of bytes -- we do this now rather
|
||||
* than later because the length will change after we pad.
|
||||
*/
|
||||
be64enc(len, ctx->count);
|
||||
|
||||
/* Add 1--56 bytes so that the resulting length is 56 mod 64. */
|
||||
_SHA256_Update(ctx, PAD, 56 - r, tmp32);
|
||||
|
||||
/* Add the terminating bit-count. */
|
||||
ctx->buf[63] = len[7];
|
||||
_SHA256_Update(ctx, len, 7, tmp32);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||
*/
|
||||
void
|
||||
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||
{
|
||||
HMAC_SHA256_CTX Phctx, PShctx, hctx;
|
||||
uint32_t tmp32[72];
|
||||
union {
|
||||
uint8_t tmp8[96];
|
||||
uint32_t state[8];
|
||||
} u;
|
||||
size_t i;
|
||||
uint8_t ivec[4];
|
||||
uint8_t U[32];
|
||||
uint8_t T[32];
|
||||
uint64_t j;
|
||||
int k;
|
||||
size_t clen;
|
||||
|
||||
/* Sanity-check. */
|
||||
assert(dkLen <= 32 * (size_t)(UINT32_MAX));
|
||||
|
||||
if (c == 1 && (dkLen & 31) == 0 && (saltlen & 63) <= 51) {
|
||||
uint32_t oldcount;
|
||||
uint8_t * ivecp;
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
_HMAC_SHA256_Init(&hctx, passwd, passwdlen,
|
||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
||||
_HMAC_SHA256_Update(&hctx, salt, saltlen, tmp32);
|
||||
|
||||
/* Prepare ictx padding. */
|
||||
oldcount = hctx.ictx.count & (0x3f << 3);
|
||||
_HMAC_SHA256_Update(&hctx, "\0\0\0", 4, tmp32);
|
||||
if ((hctx.ictx.count & (0x3f << 3)) < oldcount ||
|
||||
SHA256_Pad_Almost(&hctx.ictx, u.tmp8, tmp32))
|
||||
goto generic; /* Can't happen due to saltlen check */
|
||||
ivecp = hctx.ictx.buf + (oldcount >> 3);
|
||||
|
||||
/* Prepare octx padding. */
|
||||
hctx.octx.count += 32 << 3;
|
||||
SHA256_Pad_Almost(&hctx.octx, u.tmp8, tmp32);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
be32enc(ivecp, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(u.state, hctx.ictx.state, sizeof(u.state));
|
||||
SHA256_Transform(u.state, hctx.ictx.buf,
|
||||
&tmp32[0], &tmp32[64]);
|
||||
be32enc_vect(hctx.octx.buf, u.state, 4);
|
||||
memcpy(u.state, hctx.octx.state, sizeof(u.state));
|
||||
SHA256_Transform(u.state, hctx.octx.buf,
|
||||
&tmp32[0], &tmp32[64]);
|
||||
be32enc_vect(&buf[i * 32], u.state, 4);
|
||||
}
|
||||
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
generic:
|
||||
/* Compute HMAC state after processing P. */
|
||||
_HMAC_SHA256_Init(&Phctx, passwd, passwdlen,
|
||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
||||
_HMAC_SHA256_Update(&PShctx, salt, saltlen, tmp32);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
be32enc(ivec, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
|
||||
_HMAC_SHA256_Update(&hctx, ivec, 4, tmp32);
|
||||
_HMAC_SHA256_Final(T, &hctx, tmp32, u.tmp8);
|
||||
|
||||
if (c > 1) {
|
||||
/* T_i = U_1 ... */
|
||||
memcpy(U, T, 32);
|
||||
|
||||
for (j = 2; j <= c; j++) {
|
||||
/* Compute U_j. */
|
||||
memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
||||
_HMAC_SHA256_Update(&hctx, U, 32, tmp32);
|
||||
_HMAC_SHA256_Final(U, &hctx, tmp32, u.tmp8);
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for (k = 0; k < 32; k++)
|
||||
T[k] ^= U[k];
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy as many bytes as necessary into buf. */
|
||||
clen = dkLen - i * 32;
|
||||
if (clen > 32)
|
||||
clen = 32;
|
||||
memcpy(&buf[i * 32], T, clen);
|
||||
}
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(&Phctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(&PShctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(U, 32);
|
||||
insecure_memzero(T, 32);
|
||||
|
||||
cleanup:
|
||||
insecure_memzero(&hctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(&u, sizeof(u));
|
||||
}
|
@@ -1,680 +0,0 @@
|
||||
/*-
|
||||
* Copyright 2005-2016 Colin Percival
|
||||
* Copyright 2016-2018 Alexander Peslyak
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "insecure_memzero.h"
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "sha256.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
#ifdef __ICC
|
||||
/* Miscompile with icc 14.0.0 (at least), so don't use restrict there */
|
||||
#define restrict
|
||||
#elif __STDC_VERSION__ >= 199901L
|
||||
/* Have restrict */
|
||||
#elif defined(__GNUC__)
|
||||
#define restrict __restrict
|
||||
#else
|
||||
#define restrict
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Encode a length len*2 vector of (uint32_t) into a length len*8 vector of
|
||||
* (uint8_t) in big-endian form.
|
||||
*/
|
||||
static void
|
||||
be32enc_vect(uint8_t * dst, const uint32_t * src, size_t len)
|
||||
{
|
||||
|
||||
/* Encode vector, two words at a time. */
|
||||
do {
|
||||
be32enc(&dst[0], src[0]);
|
||||
be32enc(&dst[4], src[1]);
|
||||
src += 2;
|
||||
dst += 8;
|
||||
} while (--len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode a big-endian length len*8 vector of (uint8_t) into a length
|
||||
* len*2 vector of (uint32_t).
|
||||
*/
|
||||
static void
|
||||
be32dec_vect(uint32_t * dst, const uint8_t * src, size_t len)
|
||||
{
|
||||
|
||||
/* Decode vector, two words at a time. */
|
||||
do {
|
||||
dst[0] = be32dec(&src[0]);
|
||||
dst[1] = be32dec(&src[4]);
|
||||
src += 8;
|
||||
dst += 2;
|
||||
} while (--len);
|
||||
}
|
||||
|
||||
/* SHA256 round constants. */
|
||||
static const uint32_t Krnd[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define SHR(x, n) (x >> n)
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||
|
||||
#if 0 //defined(__SHA__)
|
||||
|
||||
// ABEF = _mm_sha256rnds2_epu32( CDGH, ABEF, k )
|
||||
//_mm_sha256rnds2_epu32 (__m128i a, __m128i b, __m128i k)
|
||||
// b = { ABEF } a = { CDGH }
|
||||
//
|
||||
//a = _mm_set_epi32( S[(66 - i) % 8], S[(67 - i) % 8],
|
||||
// S[(70 - i) % 8], S[(71 - i) % 8] );
|
||||
//b = _mm_set_epi32( S[(64 - i) % 8], S[(65 - i) % 8],
|
||||
// S[(68 - i) % 8], S[(69 - i) % 8] );
|
||||
//k = _mm_set1_epi32( W[i + ii] + Krnd[i + ii] )
|
||||
// _mm_sha256rnds2_epu32(a,b,k)
|
||||
|
||||
#define RNDr( S, W, i, ii ) do \
|
||||
{ \
|
||||
uint32_t abef[4]; \
|
||||
__m128i ABEF = _mm_set_epi32( S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8] ); \
|
||||
__m128i CDGH = _mm_set_epi32( S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8] ); \
|
||||
__m128i K = _mm_set1_epi32( W[i + ii] + Krnd[i + ii] ); \
|
||||
casti_m128i( abef, 0 ) = _mm_sha256rnds2_epu32( CDGH, ABEF, K ); \
|
||||
S[(66 - i) % 8] = abef[3]; \
|
||||
S[(67 - i) % 8] = abef[2]; \
|
||||
S[(64 - i) % 8] = abef[1]; \
|
||||
S[(65 - i) % 8] = abef[0]; \
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
|
||||
/* SHA256 round function */
|
||||
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
h += S1(e) + Ch(e, f, g) + k; \
|
||||
d += h; \
|
||||
h += S0(a) + Maj(a, b, c);
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, ii) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i + ii] + Krnd[i + ii])
|
||||
|
||||
#endif
|
||||
|
||||
/* Message schedule computation */
|
||||
#define MSCH(W, ii, i) \
|
||||
W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
|
||||
|
||||
/*
|
||||
* SHA256 block compression function. The 256-bit state is transformed via
|
||||
* the 512-bit input block to produce a new state.
|
||||
*/
|
||||
static void
|
||||
SHA256_Transform(uint32_t state[static restrict 8],
|
||||
const uint8_t block[static restrict 64],
|
||||
uint32_t W[static restrict 64], uint32_t S[static restrict 8])
|
||||
{
|
||||
int i;
|
||||
|
||||
/* 1. Prepare the first part of the message schedule W. */
|
||||
be32dec_vect(W, block, 8);
|
||||
|
||||
/* 2. Initialize working variables. */
|
||||
memcpy(S, state, 32);
|
||||
|
||||
/* 3. Mix. */
|
||||
for (i = 0; i < 64; i += 16) {
|
||||
RNDr(S, W, 0, i);
|
||||
RNDr(S, W, 1, i);
|
||||
RNDr(S, W, 2, i);
|
||||
RNDr(S, W, 3, i);
|
||||
RNDr(S, W, 4, i);
|
||||
RNDr(S, W, 5, i);
|
||||
RNDr(S, W, 6, i);
|
||||
RNDr(S, W, 7, i);
|
||||
RNDr(S, W, 8, i);
|
||||
RNDr(S, W, 9, i);
|
||||
RNDr(S, W, 10, i);
|
||||
RNDr(S, W, 11, i);
|
||||
RNDr(S, W, 12, i);
|
||||
RNDr(S, W, 13, i);
|
||||
RNDr(S, W, 14, i);
|
||||
RNDr(S, W, 15, i);
|
||||
|
||||
if (i == 48)
|
||||
break;
|
||||
MSCH(W, 0, i);
|
||||
MSCH(W, 1, i);
|
||||
MSCH(W, 2, i);
|
||||
MSCH(W, 3, i);
|
||||
MSCH(W, 4, i);
|
||||
MSCH(W, 5, i);
|
||||
MSCH(W, 6, i);
|
||||
MSCH(W, 7, i);
|
||||
MSCH(W, 8, i);
|
||||
MSCH(W, 9, i);
|
||||
MSCH(W, 10, i);
|
||||
MSCH(W, 11, i);
|
||||
MSCH(W, 12, i);
|
||||
MSCH(W, 13, i);
|
||||
MSCH(W, 14, i);
|
||||
MSCH(W, 15, i);
|
||||
}
|
||||
|
||||
/* 4. Mix local working variables into global state. */
|
||||
state[0] += S[0];
|
||||
state[1] += S[1];
|
||||
state[2] += S[2];
|
||||
state[3] += S[3];
|
||||
state[4] += S[4];
|
||||
state[5] += S[5];
|
||||
state[6] += S[6];
|
||||
state[7] += S[7];
|
||||
}
|
||||
|
||||
static const uint8_t PAD[64] = {
|
||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* Add padding and terminating bit-count. */
|
||||
static void
|
||||
SHA256_Pad(SHA256_CTX * ctx, uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
size_t r;
|
||||
|
||||
/* Figure out how many bytes we have buffered. */
|
||||
r = (ctx->count >> 3) & 0x3f;
|
||||
|
||||
/* Pad to 56 mod 64, transforming if we finish a block en route. */
|
||||
if (r < 56) {
|
||||
/* Pad to 56 mod 64. */
|
||||
memcpy(&ctx->buf[r], PAD, 56 - r);
|
||||
} else {
|
||||
/* Finish the current block and mix. */
|
||||
memcpy(&ctx->buf[r], PAD, 64 - r);
|
||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
||||
|
||||
/* The start of the final block is all zeroes. */
|
||||
memset(&ctx->buf[0], 0, 56);
|
||||
}
|
||||
|
||||
/* Add the terminating bit-count. */
|
||||
be64enc(&ctx->buf[56], ctx->count);
|
||||
|
||||
/* Mix in the final block. */
|
||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
||||
}
|
||||
|
||||
/* Magic initialization constants. */
|
||||
static const uint32_t initial_state[8] = {
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
|
||||
/**
|
||||
* SHA256_Init(ctx):
|
||||
* Initialize the SHA256 context ${ctx}.
|
||||
*/
|
||||
void
|
||||
SHA256_Init(SHA256_CTX * ctx)
|
||||
{
|
||||
|
||||
/* Zero bits processed so far. */
|
||||
ctx->count = 0;
|
||||
|
||||
/* Initialize state. */
|
||||
memcpy(ctx->state, initial_state, sizeof(initial_state));
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA256_Update(ctx, in, len):
|
||||
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
|
||||
*/
|
||||
static void
|
||||
_SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len,
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
uint32_t r;
|
||||
const uint8_t * src = in;
|
||||
|
||||
/* Return immediately if we have nothing to do. */
|
||||
if (len == 0)
|
||||
return;
|
||||
|
||||
/* Number of bytes left in the buffer from previous updates. */
|
||||
r = (ctx->count >> 3) & 0x3f;
|
||||
|
||||
/* Update number of bits. */
|
||||
ctx->count += (uint64_t)(len) << 3;
|
||||
|
||||
/* Handle the case where we don't need to perform any transforms. */
|
||||
if (len < 64 - r) {
|
||||
memcpy(&ctx->buf[r], src, len);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Finish the current block. */
|
||||
memcpy(&ctx->buf[r], src, 64 - r);
|
||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
||||
src += 64 - r;
|
||||
len -= 64 - r;
|
||||
|
||||
/* Perform complete blocks. */
|
||||
while (len >= 64) {
|
||||
SHA256_Transform(ctx->state, src, &tmp32[0], &tmp32[64]);
|
||||
src += 64;
|
||||
len -= 64;
|
||||
}
|
||||
|
||||
/* Copy left over data into buffer. */
|
||||
memcpy(ctx->buf, src, len);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
|
||||
/* Call the real function. */
|
||||
_SHA256_Update(ctx, in, len, tmp32);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA256_Final(digest, ctx):
|
||||
* Output the SHA256 hash of the data input to the context ${ctx} into the
|
||||
* buffer ${digest}.
|
||||
*/
|
||||
static void
|
||||
_SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx,
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
|
||||
/* Add padding. */
|
||||
SHA256_Pad(ctx, tmp32);
|
||||
|
||||
/* Write the hash. */
|
||||
be32enc_vect(digest, ctx->state, 4);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
|
||||
/* Call the real function. */
|
||||
_SHA256_Final(digest, ctx, tmp32);
|
||||
|
||||
/* Clear the context state. */
|
||||
insecure_memzero(ctx, sizeof(SHA256_CTX));
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA256_Buf(in, len, digest):
|
||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
||||
*/
|
||||
void
|
||||
SHA256_Buf(const void * in, size_t len, uint8_t digest[32])
|
||||
{
|
||||
SHA256_CTX ctx;
|
||||
uint32_t tmp32[72];
|
||||
|
||||
SHA256_Init(&ctx);
|
||||
_SHA256_Update(&ctx, in, len, tmp32);
|
||||
_SHA256_Final(digest, &ctx, tmp32);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(&ctx, sizeof(SHA256_CTX));
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Init(ctx, K, Klen):
|
||||
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
|
||||
* ${K}.
|
||||
*/
|
||||
static void
|
||||
_HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen,
|
||||
uint32_t tmp32[static restrict 72], uint8_t pad[static restrict 64],
|
||||
uint8_t khash[static restrict 32])
|
||||
{
|
||||
const uint8_t * K = _K;
|
||||
size_t i;
|
||||
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if (Klen > 64) {
|
||||
SHA256_Init(&ctx->ictx);
|
||||
_SHA256_Update(&ctx->ictx, K, Klen, tmp32);
|
||||
_SHA256_Final(khash, &ctx->ictx, tmp32);
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
SHA256_Init(&ctx->ictx);
|
||||
memset(pad, 0x36, 64);
|
||||
for (i = 0; i < Klen; i++)
|
||||
pad[i] ^= K[i];
|
||||
_SHA256_Update(&ctx->ictx, pad, 64, tmp32);
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
SHA256_Init(&ctx->octx);
|
||||
memset(pad, 0x5c, 64);
|
||||
for (i = 0; i < Klen; i++)
|
||||
pad[i] ^= K[i];
|
||||
_SHA256_Update(&ctx->octx, pad, 64, tmp32);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
uint8_t pad[64];
|
||||
uint8_t khash[32];
|
||||
|
||||
/* Call the real function. */
|
||||
_HMAC_SHA256_Init(ctx, _K, Klen, tmp32, pad, khash);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(khash, 32);
|
||||
insecure_memzero(pad, 64);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Update(ctx, in, len):
|
||||
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
|
||||
*/
|
||||
static void
|
||||
_HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len,
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
_SHA256_Update(&ctx->ictx, in, len, tmp32);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
|
||||
/* Call the real function. */
|
||||
_HMAC_SHA256_Update(ctx, in, len, tmp32);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Final(digest, ctx):
|
||||
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
|
||||
* buffer ${digest}.
|
||||
*/
|
||||
static void
|
||||
_HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx,
|
||||
uint32_t tmp32[static restrict 72], uint8_t ihash[static restrict 32])
|
||||
{
|
||||
|
||||
/* Finish the inner SHA256 operation. */
|
||||
_SHA256_Final(ihash, &ctx->ictx, tmp32);
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
_SHA256_Update(&ctx->octx, ihash, 32, tmp32);
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
_SHA256_Final(digest, &ctx->octx, tmp32);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
uint8_t ihash[32];
|
||||
|
||||
/* Call the real function. */
|
||||
_HMAC_SHA256_Final(digest, ctx, tmp32, ihash);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(ihash, 32);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
||||
* length ${Klen}, and write the result to ${digest}.
|
||||
*/
|
||||
void
|
||||
HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len,
|
||||
uint8_t digest[32])
|
||||
{
|
||||
HMAC_SHA256_CTX ctx;
|
||||
uint32_t tmp32[72];
|
||||
uint8_t tmp8[96];
|
||||
|
||||
_HMAC_SHA256_Init(&ctx, K, Klen, tmp32, &tmp8[0], &tmp8[64]);
|
||||
_HMAC_SHA256_Update(&ctx, in, len, tmp32);
|
||||
_HMAC_SHA256_Final(digest, &ctx, tmp32, &tmp8[0]);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(&ctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(tmp8, 96);
|
||||
}
|
||||
|
||||
/* Add padding and terminating bit-count, but don't invoke Transform yet. */
|
||||
static int
|
||||
SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8],
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
uint32_t r;
|
||||
|
||||
r = (ctx->count >> 3) & 0x3f;
|
||||
if (r >= 56)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Convert length to a vector of bytes -- we do this now rather
|
||||
* than later because the length will change after we pad.
|
||||
*/
|
||||
be64enc(len, ctx->count);
|
||||
|
||||
/* Add 1--56 bytes so that the resulting length is 56 mod 64. */
|
||||
_SHA256_Update(ctx, PAD, 56 - r, tmp32);
|
||||
|
||||
/* Add the terminating bit-count. */
|
||||
ctx->buf[63] = len[7];
|
||||
_SHA256_Update(ctx, len, 7, tmp32);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||
*/
|
||||
void
|
||||
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||
{
|
||||
HMAC_SHA256_CTX Phctx, PShctx, hctx;
|
||||
uint32_t tmp32[72];
|
||||
union {
|
||||
uint8_t tmp8[96];
|
||||
uint32_t state[8];
|
||||
} u;
|
||||
size_t i;
|
||||
uint8_t ivec[4];
|
||||
uint8_t U[32];
|
||||
uint8_t T[32];
|
||||
uint64_t j;
|
||||
int k;
|
||||
size_t clen;
|
||||
|
||||
/* Sanity-check. */
|
||||
assert(dkLen <= 32 * (size_t)(UINT32_MAX));
|
||||
|
||||
if (c == 1 && (dkLen & 31) == 0 && (saltlen & 63) <= 51) {
|
||||
uint32_t oldcount;
|
||||
uint8_t * ivecp;
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
_HMAC_SHA256_Init(&hctx, passwd, passwdlen,
|
||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
||||
_HMAC_SHA256_Update(&hctx, salt, saltlen, tmp32);
|
||||
|
||||
/* Prepare ictx padding. */
|
||||
oldcount = hctx.ictx.count & (0x3f << 3);
|
||||
_HMAC_SHA256_Update(&hctx, "\0\0\0", 4, tmp32);
|
||||
if ((hctx.ictx.count & (0x3f << 3)) < oldcount ||
|
||||
SHA256_Pad_Almost(&hctx.ictx, u.tmp8, tmp32))
|
||||
goto generic; /* Can't happen due to saltlen check */
|
||||
ivecp = hctx.ictx.buf + (oldcount >> 3);
|
||||
|
||||
/* Prepare octx padding. */
|
||||
hctx.octx.count += 32 << 3;
|
||||
SHA256_Pad_Almost(&hctx.octx, u.tmp8, tmp32);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
be32enc(ivecp, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(u.state, hctx.ictx.state, sizeof(u.state));
|
||||
SHA256_Transform(u.state, hctx.ictx.buf,
|
||||
&tmp32[0], &tmp32[64]);
|
||||
be32enc_vect(hctx.octx.buf, u.state, 4);
|
||||
memcpy(u.state, hctx.octx.state, sizeof(u.state));
|
||||
SHA256_Transform(u.state, hctx.octx.buf,
|
||||
&tmp32[0], &tmp32[64]);
|
||||
be32enc_vect(&buf[i * 32], u.state, 4);
|
||||
}
|
||||
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
generic:
|
||||
/* Compute HMAC state after processing P. */
|
||||
_HMAC_SHA256_Init(&Phctx, passwd, passwdlen,
|
||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
||||
_HMAC_SHA256_Update(&PShctx, salt, saltlen, tmp32);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
be32enc(ivec, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
|
||||
_HMAC_SHA256_Update(&hctx, ivec, 4, tmp32);
|
||||
_HMAC_SHA256_Final(T, &hctx, tmp32, u.tmp8);
|
||||
|
||||
if (c > 1) {
|
||||
/* T_i = U_1 ... */
|
||||
memcpy(U, T, 32);
|
||||
|
||||
for (j = 2; j <= c; j++) {
|
||||
/* Compute U_j. */
|
||||
memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
||||
_HMAC_SHA256_Update(&hctx, U, 32, tmp32);
|
||||
_HMAC_SHA256_Final(U, &hctx, tmp32, u.tmp8);
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for (k = 0; k < 32; k++)
|
||||
T[k] ^= U[k];
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy as many bytes as necessary into buf. */
|
||||
clen = dkLen - i * 32;
|
||||
if (clen > 32)
|
||||
clen = 32;
|
||||
memcpy(&buf[i * 32], T, clen);
|
||||
}
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(&Phctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(&PShctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(U, 32);
|
||||
insecure_memzero(T, 32);
|
||||
|
||||
cleanup:
|
||||
insecure_memzero(&hctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(&u, sizeof(u));
|
||||
}
|
@@ -1,672 +0,0 @@
|
||||
/*-
|
||||
* Copyright 2005-2016 Colin Percival
|
||||
* Copyright 2016-2018 Alexander Peslyak
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "insecure_memzero.h"
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "sha256.h"
|
||||
|
||||
#ifdef __ICC
|
||||
/* Miscompile with icc 14.0.0 (at least), so don't use restrict there */
|
||||
#define restrict
|
||||
#elif __STDC_VERSION__ >= 199901L
|
||||
/* Have restrict */
|
||||
#elif defined(__GNUC__)
|
||||
#define restrict __restrict
|
||||
#else
|
||||
#define restrict
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Encode a length len*2 vector of (uint32_t) into a length len*8 vector of
|
||||
* (uint8_t) in big-endian form.
|
||||
*/
|
||||
static void
|
||||
be32enc_vect(uint8_t * dst, const uint32_t * src, size_t len)
|
||||
{
|
||||
|
||||
/* Encode vector, two words at a time. */
|
||||
do {
|
||||
be32enc(&dst[0], src[0]);
|
||||
be32enc(&dst[4], src[1]);
|
||||
src += 2;
|
||||
dst += 8;
|
||||
} while (--len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode a big-endian length len*8 vector of (uint8_t) into a length
|
||||
* len*2 vector of (uint32_t).
|
||||
*/
|
||||
static void
|
||||
be32dec_vect(uint32_t * dst, const uint8_t * src, size_t len)
|
||||
{
|
||||
|
||||
/* Decode vector, two words at a time. */
|
||||
do {
|
||||
dst[0] = be32dec(&src[0]);
|
||||
dst[1] = be32dec(&src[4]);
|
||||
src += 8;
|
||||
dst += 2;
|
||||
} while (--len);
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* SHA256 round constants. */
|
||||
static const uint32_t Krnd[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define SHR(x, n) (x >> n)
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||
|
||||
/* SHA256 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
h += S1(e) + Ch(e, f, g) + k; \
|
||||
d += h; \
|
||||
h += S0(a) + Maj(a, b, c);
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, ii) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i + ii] + Krnd[i + ii])
|
||||
|
||||
/* Message schedule computation */
|
||||
#define MSCH(W, ii, i) \
|
||||
W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
|
||||
|
||||
/*
|
||||
* SHA256 block compression function. The 256-bit state is transformed via
|
||||
* the 512-bit input block to produce a new state.
|
||||
*/
|
||||
static void
|
||||
SHA256_Transform(uint32_t state[static restrict 8],
|
||||
const uint8_t block[static restrict 64],
|
||||
uint32_t W[static restrict 64], uint32_t S[static restrict 8])
|
||||
{
|
||||
int i;
|
||||
|
||||
/* 1. Prepare the first part of the message schedule W. */
|
||||
be32dec_vect(W, block, 8);
|
||||
|
||||
/* 2. Initialize working variables. */
|
||||
memcpy(S, state, 32);
|
||||
|
||||
/* 3. Mix. */
|
||||
for (i = 0; i < 64; i += 16) {
|
||||
RNDr(S, W, 0, i);
|
||||
RNDr(S, W, 1, i);
|
||||
RNDr(S, W, 2, i);
|
||||
RNDr(S, W, 3, i);
|
||||
RNDr(S, W, 4, i);
|
||||
RNDr(S, W, 5, i);
|
||||
RNDr(S, W, 6, i);
|
||||
RNDr(S, W, 7, i);
|
||||
RNDr(S, W, 8, i);
|
||||
RNDr(S, W, 9, i);
|
||||
RNDr(S, W, 10, i);
|
||||
RNDr(S, W, 11, i);
|
||||
RNDr(S, W, 12, i);
|
||||
RNDr(S, W, 13, i);
|
||||
RNDr(S, W, 14, i);
|
||||
RNDr(S, W, 15, i);
|
||||
|
||||
if (i == 48)
|
||||
break;
|
||||
MSCH(W, 0, i);
|
||||
MSCH(W, 1, i);
|
||||
MSCH(W, 2, i);
|
||||
MSCH(W, 3, i);
|
||||
MSCH(W, 4, i);
|
||||
MSCH(W, 5, i);
|
||||
MSCH(W, 6, i);
|
||||
MSCH(W, 7, i);
|
||||
MSCH(W, 8, i);
|
||||
MSCH(W, 9, i);
|
||||
MSCH(W, 10, i);
|
||||
MSCH(W, 11, i);
|
||||
MSCH(W, 12, i);
|
||||
MSCH(W, 13, i);
|
||||
MSCH(W, 14, i);
|
||||
MSCH(W, 15, i);
|
||||
}
|
||||
|
||||
/* 4. Mix local working variables into global state. */
|
||||
state[0] += S[0];
|
||||
state[1] += S[1];
|
||||
state[2] += S[2];
|
||||
state[3] += S[3];
|
||||
state[4] += S[4];
|
||||
state[5] += S[5];
|
||||
state[6] += S[6];
|
||||
state[7] += S[7];
|
||||
}
|
||||
#endif
|
||||
static const uint8_t PAD[64] = {
|
||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* Add padding and terminating bit-count. */
|
||||
static void
|
||||
SHA256_Pad(SHA256_CTX * ctx, uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
size_t r;
|
||||
|
||||
/* Figure out how many bytes we have buffered. */
|
||||
r = (ctx->count >> 3) & 0x3f;
|
||||
|
||||
/* Pad to 56 mod 64, transforming if we finish a block en route. */
|
||||
if (r < 56) {
|
||||
/* Pad to 56 mod 64. */
|
||||
memcpy(&ctx->buf[r], PAD, 56 - r);
|
||||
} else {
|
||||
/* Finish the current block and mix. */
|
||||
memcpy(&ctx->buf[r], PAD, 64 - r);
|
||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
||||
|
||||
/* The start of the final block is all zeroes. */
|
||||
memset(&ctx->buf[0], 0, 56);
|
||||
}
|
||||
|
||||
/* Add the terminating bit-count. */
|
||||
be64enc(&ctx->buf[56], ctx->count);
|
||||
|
||||
/* Mix in the final block. */
|
||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
||||
}
|
||||
#if 0
|
||||
/* Magic initialization constants. */
|
||||
static const uint32_t initial_state[8] = {
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
|
||||
/**
|
||||
* SHA256_Init(ctx):
|
||||
* Initialize the SHA256 context ${ctx}.
|
||||
*/
|
||||
void
|
||||
SHA256_Init(SHA256_CTX * ctx)
|
||||
{
|
||||
|
||||
/* Zero bits processed so far. */
|
||||
ctx->count = 0;
|
||||
|
||||
/* Initialize state. */
|
||||
memcpy(ctx->state, initial_state, sizeof(initial_state));
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA256_Update(ctx, in, len):
|
||||
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
|
||||
*/
|
||||
static void
|
||||
_SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len,
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
uint32_t r;
|
||||
const uint8_t * src = in;
|
||||
|
||||
/* Return immediately if we have nothing to do. */
|
||||
if (len == 0)
|
||||
return;
|
||||
|
||||
/* Number of bytes left in the buffer from previous updates. */
|
||||
r = (ctx->count >> 3) & 0x3f;
|
||||
|
||||
/* Update number of bits. */
|
||||
ctx->count += (uint64_t)(len) << 3;
|
||||
|
||||
/* Handle the case where we don't need to perform any transforms. */
|
||||
if (len < 64 - r) {
|
||||
memcpy(&ctx->buf[r], src, len);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Finish the current block. */
|
||||
memcpy(&ctx->buf[r], src, 64 - r);
|
||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
||||
src += 64 - r;
|
||||
len -= 64 - r;
|
||||
|
||||
/* Perform complete blocks. */
|
||||
while (len >= 64) {
|
||||
SHA256_Transform(ctx->state, src, &tmp32[0], &tmp32[64]);
|
||||
src += 64;
|
||||
len -= 64;
|
||||
}
|
||||
|
||||
/* Copy left over data into buffer. */
|
||||
memcpy(ctx->buf, src, len);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
|
||||
/* Call the real function. */
|
||||
_SHA256_Update(ctx, in, len, tmp32);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA256_Final(digest, ctx):
|
||||
* Output the SHA256 hash of the data input to the context ${ctx} into the
|
||||
* buffer ${digest}.
|
||||
*/
|
||||
static void
|
||||
_SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx,
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
|
||||
/* Add padding. */
|
||||
SHA256_Pad(ctx, tmp32);
|
||||
|
||||
/* Write the hash. */
|
||||
be32enc_vect(digest, ctx->state, 4);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
|
||||
/* Call the real function. */
|
||||
_SHA256_Final(digest, ctx, tmp32);
|
||||
|
||||
/* Clear the context state. */
|
||||
insecure_memzero(ctx, sizeof(SHA256_CTX));
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
#endif
|
||||
/**
|
||||
* SHA256_Buf(in, len, digest):
|
||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
||||
*/
|
||||
void
|
||||
SHA256_Buf(const void * in, size_t len, uint8_t digest[32])
|
||||
{
|
||||
SHA256_CTX ctx;
|
||||
uint32_t tmp32[72];
|
||||
|
||||
SHA256_Init(&ctx);
|
||||
SHA256_Update(&ctx, in, len);
|
||||
SHA256_Final(digest, &ctx);
|
||||
// _SHA256_Update(&ctx, in, len, tmp32);
|
||||
// _SHA256_Final(digest, &ctx, tmp32);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(&ctx, sizeof(SHA256_CTX));
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Init(ctx, K, Klen):
|
||||
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
|
||||
* ${K}.
|
||||
*/
|
||||
static void
|
||||
_HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen,
|
||||
uint32_t tmp32[static restrict 72], uint8_t pad[static restrict 64],
|
||||
uint8_t khash[static restrict 32])
|
||||
{
|
||||
const uint8_t * K = _K;
|
||||
size_t i;
|
||||
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if (Klen > 64) {
|
||||
SHA256_Init(&ctx->ictx);
|
||||
SHA256_Update(&ctx->ictx, K, Klen);
|
||||
SHA256_Final(khash, &ctx->ictx);
|
||||
// _SHA256_Update(&ctx->ictx, K, Klen, tmp32);
|
||||
// _SHA256_Final(khash, &ctx->ictx, tmp32);
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
SHA256_Init(&ctx->ictx);
|
||||
memset(pad, 0x36, 64);
|
||||
for (i = 0; i < Klen; i++)
|
||||
pad[i] ^= K[i];
|
||||
SHA256_Update(&ctx->ictx, pad, 64);
|
||||
// _SHA256_Update(&ctx->ictx, pad, 64, tmp32);
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
SHA256_Init(&ctx->octx);
|
||||
memset(pad, 0x5c, 64);
|
||||
for (i = 0; i < Klen; i++)
|
||||
pad[i] ^= K[i];
|
||||
SHA256_Update(&ctx->octx, pad, 64);
|
||||
// _SHA256_Update(&ctx->octx, pad, 64, tmp32);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
uint8_t pad[64];
|
||||
uint8_t khash[32];
|
||||
|
||||
/* Call the real function. */
|
||||
_HMAC_SHA256_Init(ctx, _K, Klen, tmp32, pad, khash);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(khash, 32);
|
||||
insecure_memzero(pad, 64);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Update(ctx, in, len):
|
||||
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
|
||||
*/
|
||||
static void
|
||||
_HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len,
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
SHA256_Update(&ctx->ictx, in, len);
|
||||
// _SHA256_Update(&ctx->ictx, in, len, tmp32);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
|
||||
/* Call the real function. */
|
||||
_HMAC_SHA256_Update(ctx, in, len, tmp32);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Final(digest, ctx):
|
||||
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
|
||||
* buffer ${digest}.
|
||||
*/
|
||||
static void
|
||||
_HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx,
|
||||
uint32_t tmp32[static restrict 72], uint8_t ihash[static restrict 32])
|
||||
{
|
||||
/* Finish the inner SHA256 operation. */
|
||||
_SHA256_Final(ihash, &ctx->ictx, tmp32);
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
_SHA256_Update(&ctx->octx, ihash, 32, tmp32);
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
_SHA256_Final(digest, &ctx->octx, tmp32);
|
||||
|
||||
|
||||
// _SHA256_Final(ihash, &ctx->ictx, tmp32);
|
||||
// _SHA256_Update(&ctx->octx, ihash, 32, tmp32);
|
||||
// _SHA256_Final(digest, &ctx->octx, tmp32);
|
||||
}
|
||||
|
||||
/* Wrapper function for intermediate-values sanitization. */
|
||||
void
|
||||
HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx)
|
||||
{
|
||||
uint32_t tmp32[72];
|
||||
uint8_t ihash[32];
|
||||
|
||||
/* Call the real function. */
|
||||
_HMAC_SHA256_Final(digest, ctx, tmp32, ihash);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(ihash, 32);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
||||
* length ${Klen}, and write the result to ${digest}.
|
||||
*/
|
||||
void
|
||||
HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len,
|
||||
uint8_t digest[32])
|
||||
{
|
||||
HMAC_SHA256_CTX ctx;
|
||||
uint32_t tmp32[72];
|
||||
uint8_t tmp8[96];
|
||||
|
||||
_HMAC_SHA256_Init(&ctx, K, Klen, tmp32, &tmp8[0], &tmp8[64]);
|
||||
_HMAC_SHA256_Update(&ctx, in, len, tmp32);
|
||||
_HMAC_SHA256_Final(digest, &ctx, tmp32, &tmp8[0]);
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(&ctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(tmp8, 96);
|
||||
}
|
||||
|
||||
/* Add padding and terminating bit-count, but don't invoke Transform yet. */
|
||||
static int
|
||||
SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8],
|
||||
uint32_t tmp32[static restrict 72])
|
||||
{
|
||||
uint32_t r;
|
||||
|
||||
r = (ctx->count >> 3) & 0x3f;
|
||||
if (r >= 56)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Convert length to a vector of bytes -- we do this now rather
|
||||
* than later because the length will change after we pad.
|
||||
*/
|
||||
be64enc(len, ctx->count);
|
||||
|
||||
/* Add 1--56 bytes so that the resulting length is 56 mod 64. */
|
||||
SHA256_Update(ctx, PAD, 56 - r, tmp);
|
||||
|
||||
/* Add the terminating bit-count. */
|
||||
ctx->buf[63] = len[7];
|
||||
SHA256_Update(ctx, len, 7, tmp);
|
||||
|
||||
/* Add 1--56 bytes so that the resulting length is 56 mod 64. */
|
||||
// _SHA256_Update(ctx, PAD, 56 - r, tmp32);
|
||||
|
||||
/* Add the terminating bit-count. */
|
||||
// ctx->buf[63] = len[7];
|
||||
// _SHA256_Update(ctx, len, 7, tmp32);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||
*/
|
||||
void
|
||||
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||
{
|
||||
HMAC_SHA256_CTX Phctx, PShctx, hctx;
|
||||
uint32_t tmp32[72];
|
||||
union {
|
||||
uint8_t tmp8[96];
|
||||
uint32_t state[8];
|
||||
} u;
|
||||
size_t i;
|
||||
uint8_t ivec[4];
|
||||
uint8_t U[32];
|
||||
uint8_t T[32];
|
||||
uint64_t j;
|
||||
int k;
|
||||
size_t clen;
|
||||
|
||||
/* Sanity-check. */
|
||||
assert(dkLen <= 32 * (size_t)(UINT32_MAX));
|
||||
|
||||
if (c == 1 && (dkLen & 31) == 0 && (saltlen & 63) <= 51) {
|
||||
uint32_t oldcount;
|
||||
uint8_t * ivecp;
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
_HMAC_SHA256_Init(&hctx, passwd, passwdlen,
|
||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
||||
_HMAC_SHA256_Update(&hctx, salt, saltlen, tmp32);
|
||||
|
||||
/* Prepare ictx padding. */
|
||||
oldcount = hctx.ictx.count & (0x3f << 3);
|
||||
_HMAC_SHA256_Update(&hctx, "\0\0\0", 4, tmp32);
|
||||
if ((hctx.ictx.count & (0x3f << 3)) < oldcount ||
|
||||
SHA256_Pad_Almost(&hctx.ictx, u.tmp8, tmp32))
|
||||
goto generic; /* Can't happen due to saltlen check */
|
||||
ivecp = hctx.ictx.buf + (oldcount >> 3);
|
||||
|
||||
/* Prepare octx padding. */
|
||||
hctx.octx.count += 32 << 3;
|
||||
SHA256_Pad_Almost(&hctx.octx, u.tmp8, tmp32);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
be32enc(ivecp, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(u.state, hctx.ictx.state, sizeof(u.state));
|
||||
|
||||
SHA256_Transform(u.state, hctx.ictx.buf );
|
||||
be32enc_vect(hctx.octx.buf, u.state, 4);
|
||||
memcpy(u.state, hctx.octx.state, sizeof(u.state));
|
||||
SHA256_Transform(u.state, hctx.octx.buf );
|
||||
|
||||
// SHA256_Transform(u.state, hctx.ictx.buf,
|
||||
// &tmp32[0], &tmp32[64]);
|
||||
// be32enc_vect(hctx.octx.buf, u.state, 4);
|
||||
// memcpy(u.state, hctx.octx.state, sizeof(u.state));
|
||||
// SHA256_Transform(u.state, hctx.octx.buf,
|
||||
// &tmp32[0], &tmp32[64]);
|
||||
|
||||
be32enc_vect(&buf[i * 32], u.state, 4);
|
||||
}
|
||||
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
generic:
|
||||
/* Compute HMAC state after processing P. */
|
||||
_HMAC_SHA256_Init(&Phctx, passwd, passwdlen,
|
||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
||||
_HMAC_SHA256_Update(&PShctx, salt, saltlen, tmp32);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
be32enc(ivec, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
|
||||
_HMAC_SHA256_Update(&hctx, ivec, 4, tmp32);
|
||||
_HMAC_SHA256_Final(T, &hctx, tmp32, u.tmp8);
|
||||
|
||||
if (c > 1) {
|
||||
/* T_i = U_1 ... */
|
||||
memcpy(U, T, 32);
|
||||
|
||||
for (j = 2; j <= c; j++) {
|
||||
/* Compute U_j. */
|
||||
memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
||||
_HMAC_SHA256_Update(&hctx, U, 32, tmp32);
|
||||
_HMAC_SHA256_Final(U, &hctx, tmp32, u.tmp8);
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for (k = 0; k < 32; k++)
|
||||
T[k] ^= U[k];
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy as many bytes as necessary into buf. */
|
||||
clen = dkLen - i * 32;
|
||||
if (clen > 32)
|
||||
clen = 32;
|
||||
memcpy(&buf[i * 32], T, clen);
|
||||
}
|
||||
|
||||
/* Clean the stack. */
|
||||
insecure_memzero(&Phctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(&PShctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(U, 32);
|
||||
insecure_memzero(T, 32);
|
||||
|
||||
cleanup:
|
||||
insecure_memzero(&hctx, sizeof(HMAC_SHA256_CTX));
|
||||
insecure_memzero(tmp32, 288);
|
||||
insecure_memzero(&u, sizeof(u));
|
||||
}
|
@@ -1,129 +0,0 @@
|
||||
/*-
|
||||
* Copyright 2005-2016 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SHA256_H_
|
||||
#define _SHA256_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use #defines in order to avoid namespace collisions with anyone else's
|
||||
* SHA256 code (e.g., the code in OpenSSL).
|
||||
*/
|
||||
#define SHA256_Init libcperciva_SHA256_Init
|
||||
#define SHA256_Update libcperciva_SHA256_Update
|
||||
#define SHA256_Final libcperciva_SHA256_Final
|
||||
#define SHA256_Buf libcperciva_SHA256_Buf
|
||||
#define SHA256_CTX libcperciva_SHA256_CTX
|
||||
#define HMAC_SHA256_Init libcperciva_HMAC_SHA256_Init
|
||||
#define HMAC_SHA256_Update libcperciva_HMAC_SHA256_Update
|
||||
#define HMAC_SHA256_Final libcperciva_HMAC_SHA256_Final
|
||||
#define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf
|
||||
#define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX
|
||||
|
||||
/* Context structure for SHA256 operations. */
|
||||
typedef struct {
|
||||
uint32_t state[8];
|
||||
uint64_t count;
|
||||
uint8_t buf[64];
|
||||
} SHA256_CTX;
|
||||
|
||||
/**
|
||||
* SHA256_Init(ctx):
|
||||
* Initialize the SHA256 context ${ctx}.
|
||||
*/
|
||||
void SHA256_Init(SHA256_CTX *);
|
||||
|
||||
/**
|
||||
* SHA256_Update(ctx, in, len):
|
||||
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
|
||||
*/
|
||||
void SHA256_Update(SHA256_CTX *, const void *, size_t);
|
||||
|
||||
/**
|
||||
* SHA256_Final(digest, ctx):
|
||||
* Output the SHA256 hash of the data input to the context ${ctx} into the
|
||||
* buffer ${digest}.
|
||||
*/
|
||||
void SHA256_Final(uint8_t[32], SHA256_CTX *);
|
||||
|
||||
/**
|
||||
* SHA256_Buf(in, len, digest):
|
||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
||||
*/
|
||||
void SHA256_Buf(const void *, size_t, uint8_t[32]);
|
||||
|
||||
/* Context structure for HMAC-SHA256 operations. */
|
||||
typedef struct {
|
||||
SHA256_CTX ictx;
|
||||
SHA256_CTX octx;
|
||||
} HMAC_SHA256_CTX;
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Init(ctx, K, Klen):
|
||||
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
|
||||
* ${K}.
|
||||
*/
|
||||
void HMAC_SHA256_Init(HMAC_SHA256_CTX *, const void *, size_t);
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Update(ctx, in, len):
|
||||
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
|
||||
*/
|
||||
void HMAC_SHA256_Update(HMAC_SHA256_CTX *, const void *, size_t);
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Final(digest, ctx):
|
||||
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
|
||||
* buffer ${digest}.
|
||||
*/
|
||||
void HMAC_SHA256_Final(uint8_t[32], HMAC_SHA256_CTX *);
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
||||
* length ${Klen}, and write the result to ${digest}.
|
||||
*/
|
||||
void HMAC_SHA256_Buf(const void *, size_t, const void *, size_t, uint8_t[32]);
|
||||
|
||||
/**
|
||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||
*/
|
||||
void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
uint64_t, uint8_t *, size_t);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* !_SHA256_H_ */
|
@@ -1,134 +0,0 @@
|
||||
/*-
|
||||
* Copyright 2005-2016 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SHA256_H_
|
||||
#define _SHA256_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <openssl.sha>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use #defines in order to avoid namespace collisions with anyone else's
|
||||
* SHA256 code (e.g., the code in OpenSSL).
|
||||
*/
|
||||
/*
|
||||
#define SHA256_Init libcperciva_SHA256_Init
|
||||
#define SHA256_Update libcperciva_SHA256_Update
|
||||
#define SHA256_Final libcperciva_SHA256_Final
|
||||
#define SHA256_CTX libcperciva_SHA256_CTX
|
||||
*/
|
||||
#define SHA256_Buf libcperciva_SHA256_Buf
|
||||
#define HMAC_SHA256_Init libcperciva_HMAC_SHA256_Init
|
||||
#define HMAC_SHA256_Update libcperciva_HMAC_SHA256_Update
|
||||
#define HMAC_SHA256_Final libcperciva_HMAC_SHA256_Final
|
||||
#define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf
|
||||
#define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX
|
||||
|
||||
#if 0
|
||||
/* Context structure for SHA256 operations. */
|
||||
typedef struct {
|
||||
uint32_t state[8];
|
||||
uint64_t count;
|
||||
uint8_t buf[64];
|
||||
} SHA256_CTX;
|
||||
|
||||
/**
|
||||
* SHA256_Init(ctx):
|
||||
* Initialize the SHA256 context ${ctx}.
|
||||
*/
|
||||
void SHA256_Init(SHA256_CTX *);
|
||||
|
||||
/**
|
||||
* SHA256_Update(ctx, in, len):
|
||||
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
|
||||
*/
|
||||
void SHA256_Update(SHA256_CTX *, const void *, size_t);
|
||||
|
||||
/**
|
||||
* SHA256_Final(digest, ctx):
|
||||
* Output the SHA256 hash of the data input to the context ${ctx} into the
|
||||
* buffer ${digest}.
|
||||
*/
|
||||
void SHA256_Final(uint8_t[32], SHA256_CTX *);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* SHA256_Buf(in, len, digest):
|
||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
||||
*/
|
||||
void SHA256_Buf(const void *, size_t, uint8_t[32]);
|
||||
|
||||
/* Context structure for HMAC-SHA256 operations. */
|
||||
typedef struct {
|
||||
SHA256_CTX ictx;
|
||||
SHA256_CTX octx;
|
||||
} HMAC_SHA256_CTX;
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Init(ctx, K, Klen):
|
||||
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
|
||||
* ${K}.
|
||||
*/
|
||||
void HMAC_SHA256_Init(HMAC_SHA256_CTX *, const void *, size_t);
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Update(ctx, in, len):
|
||||
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
|
||||
*/
|
||||
void HMAC_SHA256_Update(HMAC_SHA256_CTX *, const void *, size_t);
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Final(digest, ctx):
|
||||
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
|
||||
* buffer ${digest}.
|
||||
*/
|
||||
void HMAC_SHA256_Final(uint8_t[32], HMAC_SHA256_CTX *);
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
||||
* length ${Klen}, and write the result to ${digest}.
|
||||
*/
|
||||
void HMAC_SHA256_Buf(const void *, size_t, const void *, size_t, uint8_t[32]);
|
||||
|
||||
/**
|
||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||
*/
|
||||
void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
uint64_t, uint8_t *, size_t);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* !_SHA256_H_ */
|
218
algo/yespower/sha256_p.c
Normal file
218
algo/yespower/sha256_p.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/*-
|
||||
* Copyright 2005,2007,2009 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "sha256_p.h"
|
||||
#include "compat.h"
|
||||
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define SHR(x, n) (x >> n)
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||
|
||||
/* SHA256 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
t0 = h + S1(e) + Ch(e, f, g) + k; \
|
||||
t1 = S0(a) + Maj(a, b, c); \
|
||||
d += t0; \
|
||||
h = t0 + t1;
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, k) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i] + k)
|
||||
|
||||
/*
|
||||
static unsigned char PAD[64] = {
|
||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
*/
|
||||
/**
|
||||
* SHA256_Buf(in, len, digest):
|
||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
||||
*/
|
||||
void
|
||||
SHA256_Buf( const void * in, size_t len, uint8_t digest[32] )
|
||||
{
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init( &ctx );
|
||||
SHA256_Update( &ctx, in, len );
|
||||
SHA256_Final( digest, &ctx );
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
||||
* length ${Klen}, and write the result to ${digest}.
|
||||
*/
|
||||
void
|
||||
HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len,
|
||||
uint8_t digest[32])
|
||||
{
|
||||
HMAC_SHA256_CTX ctx;
|
||||
|
||||
HMAC_SHA256_Init( &ctx, K, Klen );
|
||||
HMAC_SHA256_Update( &ctx, in, len );
|
||||
HMAC_SHA256_Final( digest, &ctx );
|
||||
}
|
||||
|
||||
/* Initialize an HMAC-SHA256 operation with the given key. */
|
||||
void
|
||||
HMAC_SHA256_Init( HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen )
|
||||
{
|
||||
unsigned char pad[64];
|
||||
unsigned char khash[32];
|
||||
const unsigned char * K = _K;
|
||||
size_t i;
|
||||
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if (Klen > 64) {
|
||||
SHA256_Init( &ctx->ictx );
|
||||
SHA256_Update( &ctx->ictx, K, Klen );
|
||||
SHA256_Final( khash, &ctx->ictx );
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
SHA256_Init( &ctx->ictx );
|
||||
memset( pad, 0x36, 64 );
|
||||
for ( i = 0; i < Klen; i++ )
|
||||
pad[i] ^= K[i];
|
||||
SHA256_Update( &ctx->ictx, pad, 64 );
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
SHA256_Init( &ctx->octx );
|
||||
memset(pad, 0x5c, 64);
|
||||
for ( i = 0; i < Klen; i++ )
|
||||
pad[i] ^= K[i];
|
||||
SHA256_Update( &ctx->octx, pad, 64 );
|
||||
|
||||
/* Clean the stack. */
|
||||
//memset(khash, 0, 32);
|
||||
}
|
||||
|
||||
/* Add bytes to the HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void *in, size_t len)
|
||||
{
|
||||
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
SHA256_Update( &ctx->ictx, in, len );
|
||||
}
|
||||
|
||||
/* Finish an HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Final(unsigned char digest[32], HMAC_SHA256_CTX * ctx )
|
||||
{
|
||||
unsigned char ihash[32];
|
||||
|
||||
/* Finish the inner SHA256 operation. */
|
||||
SHA256_Final( ihash, &ctx->ictx );
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
SHA256_Update( &ctx->octx, ihash, 32 );
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
SHA256_Final( digest, &ctx->octx );
|
||||
|
||||
/* Clean the stack. */
|
||||
//memset(ihash, 0, 32);
|
||||
}
|
||||
|
||||
/**
|
||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||
*/
|
||||
void
|
||||
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||
{
|
||||
HMAC_SHA256_CTX PShctx, hctx;
|
||||
uint8_t _ALIGN(128) T[32];
|
||||
uint8_t _ALIGN(128) U[32];
|
||||
uint8_t ivec[4];
|
||||
size_t i, clen;
|
||||
uint64_t j;
|
||||
int k;
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
HMAC_SHA256_Init(&PShctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update(&PShctx, salt, saltlen);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
be32enc(ivec, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
|
||||
HMAC_SHA256_Update(&hctx, ivec, 4);
|
||||
HMAC_SHA256_Final(U, &hctx);
|
||||
|
||||
/* T_i = U_1 ... */
|
||||
memcpy(T, U, 32);
|
||||
|
||||
for (j = 2; j <= c; j++) {
|
||||
/* Compute U_j. */
|
||||
HMAC_SHA256_Init(&hctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update(&hctx, U, 32);
|
||||
HMAC_SHA256_Final(U, &hctx);
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for (k = 0; k < 32; k++)
|
||||
T[k] ^= U[k];
|
||||
}
|
||||
|
||||
/* Copy as many bytes as necessary into buf. */
|
||||
clen = dkLen - i * 32;
|
||||
if (clen > 32)
|
||||
clen = 32;
|
||||
memcpy(&buf[i * 32], T, clen);
|
||||
}
|
||||
|
||||
/* Clean PShctx, since we never called _Final on it. */
|
||||
//memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y));
|
||||
}
|
@@ -1,496 +0,0 @@
|
||||
/*-
|
||||
* Copyright 2005,2007,2009 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "sha256_p.h"
|
||||
#include "compat.h"
|
||||
|
||||
/*
|
||||
* Encode a length len/4 vector of (uint32_t) into a length len vector of
|
||||
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
|
||||
*/
|
||||
static void
|
||||
be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len / 4; i++)
|
||||
be32enc(dst + i * 4, src[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode a big-endian length len vector of (unsigned char) into a length
|
||||
* len/4 vector of (uint32_t). Assumes len is a multiple of 4.
|
||||
*/
|
||||
static void
|
||||
be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len / 4; i++)
|
||||
dst[i] = be32dec(src + i * 4);
|
||||
}
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define SHR(x, n) (x >> n)
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||
|
||||
/* SHA256 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
t0 = h + S1(e) + Ch(e, f, g) + k; \
|
||||
t1 = S0(a) + Maj(a, b, c); \
|
||||
d += t0; \
|
||||
h = t0 + t1;
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, k) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i] + k)
|
||||
|
||||
/*
|
||||
* SHA256 block compression function. The 256-bit state is transformed via
|
||||
* the 512-bit input block to produce a new state.
|
||||
*/
|
||||
static void
|
||||
SHA256_Transform_p(uint32_t * state, const unsigned char block[64])
|
||||
{
|
||||
uint32_t _ALIGN(128) W[64], S[8];
|
||||
uint32_t t0, t1;
|
||||
int i;
|
||||
|
||||
/* 1. Prepare message schedule W. */
|
||||
be32dec_vect(W, block, 64);
|
||||
for (i = 16; i < 64; i++)
|
||||
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
|
||||
|
||||
/* 2. Initialize working variables. */
|
||||
memcpy(S, state, 32);
|
||||
|
||||
/* 3. Mix. */
|
||||
RNDr(S, W, 0, 0x428a2f98);
|
||||
RNDr(S, W, 1, 0x71374491);
|
||||
RNDr(S, W, 2, 0xb5c0fbcf);
|
||||
RNDr(S, W, 3, 0xe9b5dba5);
|
||||
RNDr(S, W, 4, 0x3956c25b);
|
||||
RNDr(S, W, 5, 0x59f111f1);
|
||||
RNDr(S, W, 6, 0x923f82a4);
|
||||
RNDr(S, W, 7, 0xab1c5ed5);
|
||||
RNDr(S, W, 8, 0xd807aa98);
|
||||
RNDr(S, W, 9, 0x12835b01);
|
||||
RNDr(S, W, 10, 0x243185be);
|
||||
RNDr(S, W, 11, 0x550c7dc3);
|
||||
RNDr(S, W, 12, 0x72be5d74);
|
||||
RNDr(S, W, 13, 0x80deb1fe);
|
||||
RNDr(S, W, 14, 0x9bdc06a7);
|
||||
RNDr(S, W, 15, 0xc19bf174);
|
||||
RNDr(S, W, 16, 0xe49b69c1);
|
||||
RNDr(S, W, 17, 0xefbe4786);
|
||||
RNDr(S, W, 18, 0x0fc19dc6);
|
||||
RNDr(S, W, 19, 0x240ca1cc);
|
||||
RNDr(S, W, 20, 0x2de92c6f);
|
||||
RNDr(S, W, 21, 0x4a7484aa);
|
||||
RNDr(S, W, 22, 0x5cb0a9dc);
|
||||
RNDr(S, W, 23, 0x76f988da);
|
||||
RNDr(S, W, 24, 0x983e5152);
|
||||
RNDr(S, W, 25, 0xa831c66d);
|
||||
RNDr(S, W, 26, 0xb00327c8);
|
||||
RNDr(S, W, 27, 0xbf597fc7);
|
||||
RNDr(S, W, 28, 0xc6e00bf3);
|
||||
RNDr(S, W, 29, 0xd5a79147);
|
||||
RNDr(S, W, 30, 0x06ca6351);
|
||||
RNDr(S, W, 31, 0x14292967);
|
||||
RNDr(S, W, 32, 0x27b70a85);
|
||||
RNDr(S, W, 33, 0x2e1b2138);
|
||||
RNDr(S, W, 34, 0x4d2c6dfc);
|
||||
RNDr(S, W, 35, 0x53380d13);
|
||||
RNDr(S, W, 36, 0x650a7354);
|
||||
RNDr(S, W, 37, 0x766a0abb);
|
||||
RNDr(S, W, 38, 0x81c2c92e);
|
||||
RNDr(S, W, 39, 0x92722c85);
|
||||
RNDr(S, W, 40, 0xa2bfe8a1);
|
||||
RNDr(S, W, 41, 0xa81a664b);
|
||||
RNDr(S, W, 42, 0xc24b8b70);
|
||||
RNDr(S, W, 43, 0xc76c51a3);
|
||||
RNDr(S, W, 44, 0xd192e819);
|
||||
RNDr(S, W, 45, 0xd6990624);
|
||||
RNDr(S, W, 46, 0xf40e3585);
|
||||
RNDr(S, W, 47, 0x106aa070);
|
||||
RNDr(S, W, 48, 0x19a4c116);
|
||||
RNDr(S, W, 49, 0x1e376c08);
|
||||
RNDr(S, W, 50, 0x2748774c);
|
||||
RNDr(S, W, 51, 0x34b0bcb5);
|
||||
RNDr(S, W, 52, 0x391c0cb3);
|
||||
RNDr(S, W, 53, 0x4ed8aa4a);
|
||||
RNDr(S, W, 54, 0x5b9cca4f);
|
||||
RNDr(S, W, 55, 0x682e6ff3);
|
||||
RNDr(S, W, 56, 0x748f82ee);
|
||||
RNDr(S, W, 57, 0x78a5636f);
|
||||
RNDr(S, W, 58, 0x84c87814);
|
||||
RNDr(S, W, 59, 0x8cc70208);
|
||||
RNDr(S, W, 60, 0x90befffa);
|
||||
RNDr(S, W, 61, 0xa4506ceb);
|
||||
RNDr(S, W, 62, 0xbef9a3f7);
|
||||
RNDr(S, W, 63, 0xc67178f2);
|
||||
|
||||
/* 4. Mix local working variables into global state */
|
||||
for (i = 0; i < 8; i++)
|
||||
state[i] += S[i];
|
||||
#if 0
|
||||
/* Clean the stack. */
|
||||
memset(W, 0, 256);
|
||||
memset(S, 0, 32);
|
||||
t0 = t1 = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static unsigned char PAD[64] = {
|
||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
// only called by SHA256_Final_p
|
||||
/* Add padding and terminating bit-count. */
|
||||
static void
|
||||
SHA256_Pad_p(SHA256_CTX_p * ctx)
|
||||
{
|
||||
unsigned char len[8];
|
||||
uint32_t r, plen;
|
||||
|
||||
/*
|
||||
* Convert length to a vector of bytes -- we do this now rather
|
||||
* than later because the length will change after we pad.
|
||||
*/
|
||||
be32enc_vect(len, ctx->count, 8);
|
||||
|
||||
/* Add 1--64 bytes so that the resulting length is 56 mod 64 */
|
||||
r = (ctx->count[1] >> 3) & 0x3f;
|
||||
plen = (r < 56) ? (56 - r) : (120 - r);
|
||||
SHA256_Update_p(ctx, PAD, (size_t)plen);
|
||||
/* Add the terminating bit-count */
|
||||
SHA256_Update_p(ctx, len, 8);
|
||||
}
|
||||
|
||||
/* SHA-256 initialization. Begins a SHA-256 operation. */
|
||||
void
|
||||
SHA256_Init_p(SHA256_CTX_p * ctx)
|
||||
{
|
||||
/* Zero bits processed so far */
|
||||
ctx->count[0] = ctx->count[1] = 0;
|
||||
|
||||
/* Magic initialization constants */
|
||||
ctx->state[0] = 0x6A09E667;
|
||||
ctx->state[1] = 0xBB67AE85;
|
||||
ctx->state[2] = 0x3C6EF372;
|
||||
ctx->state[3] = 0xA54FF53A;
|
||||
ctx->state[4] = 0x510E527F;
|
||||
ctx->state[5] = 0x9B05688C;
|
||||
ctx->state[6] = 0x1F83D9AB;
|
||||
ctx->state[7] = 0x5BE0CD19;
|
||||
}
|
||||
|
||||
/* Add bytes into the hash */
|
||||
void
|
||||
SHA256_Update_p(SHA256_CTX_p * ctx, const void *in, size_t len)
|
||||
{
|
||||
uint32_t bitlen[2];
|
||||
uint32_t r;
|
||||
const unsigned char *src = in;
|
||||
|
||||
/* Number of bytes left in the buffer from previous updates */
|
||||
r = (ctx->count[1] >> 3) & 0x3f;
|
||||
|
||||
/* Convert the length into a number of bits */
|
||||
bitlen[1] = ((uint32_t)len) << 3;
|
||||
bitlen[0] = (uint32_t)(len >> 29);
|
||||
|
||||
/* Update number of bits */
|
||||
if ((ctx->count[1] += bitlen[1]) < bitlen[1])
|
||||
ctx->count[0]++;
|
||||
ctx->count[0] += bitlen[0];
|
||||
|
||||
/* Handle the case where we don't need to perform any transforms */
|
||||
if (len < 64 - r) {
|
||||
memcpy(&ctx->buf[r], src, len);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Finish the current block */
|
||||
memcpy(&ctx->buf[r], src, 64 - r);
|
||||
SHA256_Transform_p(ctx->state, ctx->buf);
|
||||
src += 64 - r;
|
||||
len -= 64 - r;
|
||||
|
||||
/* Perform complete blocks */
|
||||
while (len >= 64) {
|
||||
SHA256_Transform_p(ctx->state, src);
|
||||
src += 64;
|
||||
len -= 64;
|
||||
}
|
||||
|
||||
/* Copy left over data into buffer */
|
||||
memcpy(ctx->buf, src, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* SHA-256 finalization. Pads the input data, exports the hash value,
|
||||
* and clears the context state.
|
||||
*/
|
||||
void
|
||||
SHA256_Final_p(unsigned char digest[32], SHA256_CTX_p * ctx)
|
||||
{
|
||||
/* Add padding */
|
||||
SHA256_Pad_p(ctx);
|
||||
|
||||
/* Write the hash */
|
||||
be32enc_vect(digest, ctx->state, 32);
|
||||
|
||||
/* Clear the context state */
|
||||
memset((void *)ctx, 0, sizeof(*ctx));
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA256_Buf(in, len, digest):
|
||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
||||
*/
|
||||
void
|
||||
SHA256_Buf_p(const void * in, size_t len, uint8_t digest[32])
|
||||
{
|
||||
// SHA256_CTX_p ctx;
|
||||
// uint32_t tmp32[72];
|
||||
|
||||
#if defined(__SHA__)
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init(&ctx);
|
||||
SHA256_Update(&ctx, in, len);
|
||||
SHA256_Final(digest, &ctx);
|
||||
#else
|
||||
SHA256_CTX_p ctx;
|
||||
SHA256_Init_p(&ctx);
|
||||
SHA256_Update_p(&ctx, in, len);
|
||||
SHA256_Final_p(digest, &ctx);
|
||||
#endif
|
||||
|
||||
/* Clean the stack. */
|
||||
// insecure_memzero(&ctx, sizeof(SHA256_CTX));
|
||||
// insecure_memzero(tmp32, 288);
|
||||
}
|
||||
|
||||
/**
|
||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
||||
* length ${Klen}, and write the result to ${digest}.
|
||||
*/
|
||||
void
|
||||
HMAC_SHA256_Buf_p(const void * K, size_t Klen, const void * in, size_t len,
|
||||
uint8_t digest[32])
|
||||
{
|
||||
HMAC_SHA256_CTX_p ctx;
|
||||
// uint32_t tmp32[72];
|
||||
// uint8_t tmp8[96];
|
||||
|
||||
HMAC_SHA256_Init_p(&ctx, K, Klen);
|
||||
HMAC_SHA256_Update_p(&ctx, in, len);
|
||||
HMAC_SHA256_Final_p(digest, &ctx);
|
||||
|
||||
/* Clean the stack. */
|
||||
// insecure_memzero(&ctx, sizeof(HMAC_SHA256_CTX));
|
||||
// insecure_memzero(tmp32, 288);
|
||||
// insecure_memzero(tmp8, 96);
|
||||
}
|
||||
|
||||
/* Initialize an HMAC-SHA256 operation with the given key. */
|
||||
void
|
||||
HMAC_SHA256_Init_p(HMAC_SHA256_CTX_p * ctx, const void * _K, size_t Klen)
|
||||
{
|
||||
unsigned char pad[64];
|
||||
unsigned char khash[32];
|
||||
const unsigned char * K = _K;
|
||||
size_t i;
|
||||
|
||||
/* If Klen > 64, the key is really SHA256(K). */
|
||||
if (Klen > 64) {
|
||||
#if defined(__SHA__)
|
||||
SHA256_Init(&ctx->ictx);
|
||||
SHA256_Update(&ctx->ictx, K, Klen);
|
||||
SHA256_Final(khash, &ctx->ictx);
|
||||
#else
|
||||
SHA256_Init_p(&ctx->ictx);
|
||||
SHA256_Update_p(&ctx->ictx, K, Klen);
|
||||
SHA256_Final_p(khash, &ctx->ictx);
|
||||
#endif
|
||||
K = khash;
|
||||
Klen = 32;
|
||||
}
|
||||
|
||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||
#if defined(__SHA__)
|
||||
SHA256_Init(&ctx->ictx);
|
||||
#else
|
||||
SHA256_Init_p(&ctx->ictx);
|
||||
#endif
|
||||
memset(pad, 0x36, 64);
|
||||
for (i = 0; i < Klen; i++)
|
||||
pad[i] ^= K[i];
|
||||
#if defined(__SHA__)
|
||||
SHA256_Update(&ctx->ictx, pad, 64);
|
||||
#else
|
||||
SHA256_Update_p(&ctx->ictx, pad, 64);
|
||||
#endif
|
||||
|
||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||
#if defined(__SHA__)
|
||||
SHA256_Init(&ctx->octx);
|
||||
#else
|
||||
SHA256_Init_p(&ctx->octx);
|
||||
#endif
|
||||
memset(pad, 0x5c, 64);
|
||||
for (i = 0; i < Klen; i++)
|
||||
pad[i] ^= K[i];
|
||||
#if defined(__SHA__)
|
||||
SHA256_Update(&ctx->octx, pad, 64);
|
||||
#else
|
||||
SHA256_Update_p(&ctx->octx, pad, 64);
|
||||
#endif
|
||||
|
||||
/* Clean the stack. */
|
||||
//memset(khash, 0, 32);
|
||||
}
|
||||
|
||||
/* Add bytes to the HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Update_p(HMAC_SHA256_CTX_p * ctx, const void *in, size_t len)
|
||||
{
|
||||
|
||||
/* Feed data to the inner SHA256 operation. */
|
||||
#if defined(__SHA__)
|
||||
SHA256_Update(&ctx->ictx, in, len);
|
||||
#else
|
||||
SHA256_Update_p(&ctx->ictx, in, len);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Finish an HMAC-SHA256 operation. */
|
||||
void
|
||||
HMAC_SHA256_Final_p(unsigned char digest[32], HMAC_SHA256_CTX_p * ctx)
|
||||
{
|
||||
unsigned char ihash[32];
|
||||
|
||||
#if defined(__SHA__)
|
||||
/* Finish the inner SHA256 operation. */
|
||||
SHA256_Final(ihash, &ctx->ictx);
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
SHA256_Update(&ctx->octx, ihash, 32);
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
SHA256_Final(digest, &ctx->octx);
|
||||
#else
|
||||
/* Finish the inner SHA256 operation. */
|
||||
SHA256_Final_p(ihash, &ctx->ictx);
|
||||
|
||||
/* Feed the inner hash to the outer SHA256 operation. */
|
||||
SHA256_Update_p(&ctx->octx, ihash, 32);
|
||||
|
||||
/* Finish the outer SHA256 operation. */
|
||||
SHA256_Final_p(digest, &ctx->octx);
|
||||
#endif
|
||||
|
||||
/* Clean the stack. */
|
||||
//memset(ihash, 0, 32);
|
||||
}
|
||||
|
||||
/**
|
||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||
*/
|
||||
void
|
||||
PBKDF2_SHA256_p(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||
{
|
||||
HMAC_SHA256_CTX_p PShctx, hctx;
|
||||
uint8_t _ALIGN(128) T[32];
|
||||
uint8_t _ALIGN(128) U[32];
|
||||
uint8_t ivec[4];
|
||||
size_t i, clen;
|
||||
uint64_t j;
|
||||
int k;
|
||||
|
||||
/* Compute HMAC state after processing P and S. */
|
||||
HMAC_SHA256_Init_p(&PShctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update_p(&PShctx, salt, saltlen);
|
||||
|
||||
/* Iterate through the blocks. */
|
||||
for (i = 0; i * 32 < dkLen; i++) {
|
||||
/* Generate INT(i + 1). */
|
||||
be32enc(ivec, (uint32_t)(i + 1));
|
||||
|
||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX_p));
|
||||
HMAC_SHA256_Update_p(&hctx, ivec, 4);
|
||||
HMAC_SHA256_Final_p(U, &hctx);
|
||||
|
||||
/* T_i = U_1 ... */
|
||||
memcpy(T, U, 32);
|
||||
|
||||
for (j = 2; j <= c; j++) {
|
||||
/* Compute U_j. */
|
||||
HMAC_SHA256_Init_p(&hctx, passwd, passwdlen);
|
||||
HMAC_SHA256_Update_p(&hctx, U, 32);
|
||||
HMAC_SHA256_Final_p(U, &hctx);
|
||||
|
||||
/* ... xor U_j ... */
|
||||
for (k = 0; k < 32; k++)
|
||||
T[k] ^= U[k];
|
||||
}
|
||||
|
||||
/* Copy as many bytes as necessary into buf. */
|
||||
clen = dkLen - i * 32;
|
||||
if (clen > 32)
|
||||
clen = 32;
|
||||
memcpy(&buf[i * 32], T, clen);
|
||||
}
|
||||
|
||||
/* Clean PShctx, since we never called _Final on it. */
|
||||
//memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y));
|
||||
}
|
@@ -33,45 +33,24 @@
|
||||
#include <stdint.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
typedef struct SHA256Context {
|
||||
uint32_t state[8];
|
||||
uint32_t count[2];
|
||||
unsigned char buf[64];
|
||||
} SHA256_CTX_p;
|
||||
|
||||
/*
|
||||
typedef struct HMAC_SHA256Context {
|
||||
SHA256_CTX_Y ictx;
|
||||
SHA256_CTX_Y octx;
|
||||
} HMAC_SHA256_CTX_Y;
|
||||
*/
|
||||
|
||||
typedef struct HMAC_SHA256Context {
|
||||
#if defined(__SHA__)
|
||||
SHA256_CTX ictx;
|
||||
SHA256_CTX octx;
|
||||
#else
|
||||
SHA256_CTX_p ictx;
|
||||
SHA256_CTX_p octx;
|
||||
#endif
|
||||
} HMAC_SHA256_CTX_p;
|
||||
} HMAC_SHA256_CTX;
|
||||
|
||||
void SHA256_Init_p(SHA256_CTX_p *);
|
||||
void SHA256_Update_p(SHA256_CTX_p *, const void *, size_t);
|
||||
void SHA256_Final_p(unsigned char [32], SHA256_CTX_p *);
|
||||
void SHA256_Buf_p(const void * in, size_t len, uint8_t digest[32]);
|
||||
void HMAC_SHA256_Init_p(HMAC_SHA256_CTX_p *, const void *, size_t);
|
||||
void HMAC_SHA256_Update_p(HMAC_SHA256_CTX_p *, const void *, size_t);
|
||||
void HMAC_SHA256_Final_p(unsigned char [32], HMAC_SHA256_CTX_p *);
|
||||
void HMAC_SHA256_Buf_p(const void * K, size_t Klen, const void * in,
|
||||
size_t len, uint8_t digest[32]);
|
||||
void SHA256_Buf( const void * in, size_t len, uint8_t digest[32] );
|
||||
void HMAC_SHA256_Init( HMAC_SHA256_CTX *, const void *, size_t );
|
||||
void HMAC_SHA256_Update( HMAC_SHA256_CTX *, const void *, size_t );
|
||||
void HMAC_SHA256_Final( unsigned char [32], HMAC_SHA256_CTX * );
|
||||
void HMAC_SHA256_Buf( const void * K, size_t Klen, const void * in,
|
||||
size_t len, uint8_t digest[32] );
|
||||
|
||||
/**
|
||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||
*/
|
||||
void PBKDF2_SHA256_p(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
uint64_t, uint8_t *, size_t);
|
||||
void PBKDF2_SHA256( const uint8_t *, size_t, const uint8_t *, size_t,
|
||||
uint64_t, uint8_t *, size_t);
|
||||
|
||||
#endif /* !_SHA256_H_ */
|
@@ -62,6 +62,7 @@
|
||||
#warning "Note: building generic code for non-x86. That's OK."
|
||||
#endif
|
||||
*/
|
||||
|
||||
/*
|
||||
* The SSE4 code version has fewer instructions than the generic SSE2 version,
|
||||
* but all of the instructions are SIMD, thereby wasting the scalar execution
|
||||
@@ -96,7 +97,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "insecure_memzero.h"
|
||||
#include "sha256.h"
|
||||
#include "sha256_p.h"
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "yespower.h"
|
||||
@@ -528,7 +529,7 @@ static volatile uint64_t Smask2var = Smask2;
|
||||
/* 64-bit without AVX. This relies on out-of-order execution and register
|
||||
* renaming. It may actually be fastest on CPUs with AVX(2) as well - e.g.,
|
||||
* it runs great on Haswell. */
|
||||
//#warning "Note: using x86-64 inline assembly for pwxform. That's great."
|
||||
#warning "Note: using x86-64 inline assembly for pwxform. That's great."
|
||||
#undef MAYBE_MEMORY_BARRIER
|
||||
#define MAYBE_MEMORY_BARRIER \
|
||||
__asm__("" : : : "memory");
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -51,7 +51,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sha256.h"
|
||||
#include "sha256_p.h"
|
||||
#include "sysendian.h"
|
||||
|
||||
#include "yespower.h"
|
||||
@@ -534,11 +534,12 @@ int yespower(yespower_local_t *local,
|
||||
|
||||
if (pers) {
|
||||
HMAC_SHA256_Buf(dst, sizeof(*dst), pers, perslen,
|
||||
return true;
|
||||
(uint8_t *)sha256);
|
||||
SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst);
|
||||
}
|
||||
} else {
|
||||
HMAC_SHA256_Buf((uint8_t *)B + B_size - 64, 64,
|
||||
HMAC_SHA256_Buf_P((uint8_t *)B + B_size - 64, 64,
|
||||
sha256, sizeof(sha256), (uint8_t *)dst);
|
||||
}
|
||||
|
||||
|
@@ -38,7 +38,7 @@ void yespower_hash( const char *input, char *output, uint32_t len )
|
||||
}
|
||||
|
||||
int scanhash_yespower( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
@@ -48,6 +48,7 @@ int scanhash_yespower( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
for (int k = 0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
32
avxdefs.h
32
avxdefs.h
@@ -662,57 +662,57 @@ do { \
|
||||
|
||||
#define mm128_ror1x64_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_srli_si128( v1, 8 ) | _mm_slli_si128( v2, 24 ); \
|
||||
v2 = _mm_srli_si128( v2, 8 ) | _mm_slli_si128( v1, 24 ); \
|
||||
__m128i t = _mm_srli_si128( v1, 8 ) | _mm_slli_si128( v2, 8 ); \
|
||||
v2 = _mm_srli_si128( v2, 8 ) | _mm_slli_si128( v1, 8 ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_rol1x64_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_slli_si128( v1, 8 ) | _mm_srli_si128( v2, 24 ); \
|
||||
v2 = _mm_slli_si128( v2, 8 ) | _mm_srli_si128( v1, 24 ); \
|
||||
__m128i t = _mm_slli_si128( v1, 8 ) | _mm_srli_si128( v2, 8 ); \
|
||||
v2 = _mm_slli_si128( v2, 8 ) | _mm_srli_si128( v1, 8 ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_ror1x32_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_srli_si128( v1, 4 ) | _mm_slli_si128( v2, 28 ); \
|
||||
v2 = _mm_srli_si128( v2, 4 ) | _mm_slli_si128( v1, 28 ); \
|
||||
__m128i t = _mm_srli_si128( v1, 4 ) | _mm_slli_si128( v2, 12 ); \
|
||||
v2 = _mm_srli_si128( v2, 4 ) | _mm_slli_si128( v1, 12 ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_rol1x32_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_slli_si128( v1, 4 ) | _mm_srli_si128( v2, 28 ); \
|
||||
v2 = _mm_slli_si128( v2, 4 ) | _mm_srli_si128( v1, 28 ); \
|
||||
__m128i t = _mm_slli_si128( v1, 4 ) | _mm_srli_si128( v2, 12 ); \
|
||||
v2 = _mm_slli_si128( v2, 4 ) | _mm_srli_si128( v1, 12 ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_ror1x16_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_srli_si128( v1, 2 ) | _mm_slli_si128( v2, 30 ); \
|
||||
v2 = _mm_srli_si128( v2, 2 ) | _mm_slli_si128( v1, 30 ); \
|
||||
__m128i t = _mm_srli_si128( v1, 2 ) | _mm_slli_si128( v2, 14 ); \
|
||||
v2 = _mm_srli_si128( v2, 2 ) | _mm_slli_si128( v1, 14 ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_rol1x16_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_slli_si128( v1, 2 ) | _mm_srli_si128( v2, 30 ); \
|
||||
v2 = _mm_slli_si128( v2, 2 ) | _mm_srli_si128( v1, 30 ); \
|
||||
__m128i t = _mm_slli_si128( v1, 2 ) | _mm_srli_si128( v2, 14 ); \
|
||||
v2 = _mm_slli_si128( v2, 2 ) | _mm_srli_si128( v1, 14 ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_ror1x8_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_srli_si128( v1, 1 ) | _mm_slli_si128( v2, 31 ); \
|
||||
v2 = _mm_srli_si128( v2, 1 ) | _mm_slli_si128( v1, 31 ); \
|
||||
__m128i t = _mm_srli_si128( v1, 1 ) | _mm_slli_si128( v2, 15 ); \
|
||||
v2 = _mm_srli_si128( v2, 1 ) | _mm_slli_si128( v1, 15 ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_rol1x8_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_slli_si128( v1, 1 ) | _mm_srli_si128( v2, 31 ); \
|
||||
v2 = _mm_slli_si128( v2, 1 ) | _mm_srli_si128( v1, 31 ); \
|
||||
__m128i t = _mm_slli_si128( v1, 1 ) | _mm_srli_si128( v2, 15 ); \
|
||||
v2 = _mm_slli_si128( v2, 1 ) | _mm_srli_si128( v1, 15 ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.1.1.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.2.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.9.1.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.9.1.1'
|
||||
PACKAGE_VERSION='3.9.2'
|
||||
PACKAGE_STRING='cpuminer-opt 3.9.2'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.9.1.1 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.9.2 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.9.1.1:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.9.2:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.9.1.1
|
||||
cpuminer-opt configure 3.9.2
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.9.1.1, which was
|
||||
It was created by cpuminer-opt $as_me 3.9.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.9.1.1'
|
||||
VERSION='3.9.2'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.9.1.1, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.9.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.9.1.1
|
||||
cpuminer-opt config.status 3.9.2
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.9.1.1])
|
||||
AC_INIT([cpuminer-opt], [3.9.2])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
@@ -847,7 +847,8 @@ static int share_result( int result, struct work *work, const char *reason )
|
||||
float rate;
|
||||
char rate_s[8] = {0};
|
||||
double sharediff = work ? work->sharediff : stratum.sharediff;
|
||||
bool solved = result && (net_diff > 0.0 ) && ( sharediff >= net_diff );
|
||||
bool solved = result && accepted_share_count && (net_diff > 0.0 )
|
||||
&& ( sharediff >= net_diff );
|
||||
char sol[32] = {0};
|
||||
int i;
|
||||
|
||||
@@ -857,15 +858,17 @@ static int share_result( int result, struct work *work, const char *reason )
|
||||
hashcount += thr_hashcount[i];
|
||||
hashrate += thr_hashrates[i];
|
||||
}
|
||||
solved = result && ( (uint64_t)hashcount > 0 ) && (net_diff > 0.0 )
|
||||
&& ( sharediff >= net_diff );
|
||||
result ? accepted_share_count++ : rejected_share_count++;
|
||||
|
||||
if ( solved )
|
||||
{
|
||||
solved_block_count++;
|
||||
if ( use_colors )
|
||||
sprintf( sol, CL_GRN " Solved" CL_WHT " %d", solved_block_count );
|
||||
sprintf( sol, CL_GRN " Solved: %d" CL_WHT, solved_block_count );
|
||||
else
|
||||
sprintf( sol, " Solved %d", solved_block_count );
|
||||
sprintf( sol, ", Solved: %d", solved_block_count );
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&stats_lock);
|
||||
|
281
interleave.h
281
interleave.h
@@ -356,6 +356,36 @@ static inline void mm256_interleave_8x32x256( void *d, const void *s00,
|
||||
s04+28, s05+28, s06+28, s07+28 );
|
||||
}
|
||||
|
||||
static inline void mm256_be_interleave_8x32x256( void *d, const void *s00,
|
||||
const void *s01, const void *s02, const void *s03, const void *s04,
|
||||
const void *s05, const void *s06, const void *s07 )
|
||||
{
|
||||
casti_m256i( d, 0 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00, s01, s02, s03,
|
||||
s04, s05, s06, s07 ) );
|
||||
casti_m256i( d, 1 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00+ 4, s01+ 4, s02+ 4, s03+ 4,
|
||||
s04+ 4, s05+ 4, s06+ 4, s07+ 4 ) );
|
||||
casti_m256i( d, 2 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00+ 8, s01+ 8, s02+ 8, s03+ 8,
|
||||
s04+ 8, s05+ 8, s06+ 8, s07+ 8 ) );
|
||||
casti_m256i( d, 3 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00+12, s01+12, s02+12, s03+12,
|
||||
s04+12, s05+12, s06+12, s07+12 ) );
|
||||
casti_m256i( d, 4 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00+16, s01+16, s02+16, s03+16,
|
||||
s04+16, s05+16, s06+16, s07+16 ) );
|
||||
casti_m256i( d, 5 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00+20, s01+20, s02+20, s03+20,
|
||||
s04+20, s05+20, s06+20, s07+20 ) );
|
||||
casti_m256i( d, 6 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00+24, s01+24, s02+24, s03+24,
|
||||
s04+24, s05+24, s06+24, s07+24 ) );
|
||||
casti_m256i( d, 7 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00+28, s01+28, s02+28, s03+28,
|
||||
s04+28, s05+28, s06+28, s07+28 ) );
|
||||
}
|
||||
|
||||
static inline void mm256_interleave_8x32x128( void *d, const void *s00,
|
||||
const void *s01, const void *s02, const void *s03, const void *s04,
|
||||
const void *s05, const void *s06, const void *s07 )
|
||||
@@ -370,6 +400,24 @@ static inline void mm256_interleave_8x32x128( void *d, const void *s00,
|
||||
s04+12, s05+12, s06+12, s07+12 );
|
||||
}
|
||||
|
||||
static inline void mm256_be_interleave_8x32x128( void *d, const void *s00,
|
||||
const void *s01, const void *s02, const void *s03, const void *s04,
|
||||
const void *s05, const void *s06, const void *s07 )
|
||||
{
|
||||
casti_m256i( d, 0 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00, s01, s02, s03,
|
||||
s04, s05, s06, s07 ) );
|
||||
casti_m256i( d, 1 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00+ 4, s01+ 4, s02+ 4, s03+ 4,
|
||||
s04+ 4, s05+ 4, s06+ 4, s07+ 4 ) );
|
||||
casti_m256i( d, 2 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00+ 8, s01+ 8, s02+ 8, s03+ 8,
|
||||
s04+ 8, s05+ 8, s06+ 8, s07+ 8 ) );
|
||||
casti_m256i( d, 3 ) = mm256_bswap_32(
|
||||
mm256_put_32( s00+12, s01+12, s02+12, s03+12,
|
||||
s04+12, s05+12, s06+12, s07+12 ) );
|
||||
}
|
||||
|
||||
// can be called directly for 32 byte hash using AVX2
|
||||
static inline void mm256_deinterleave_8x32x256( void *d00, void *d01,
|
||||
void *d02, void *d03, void *d04, void *d05, void *d06,
|
||||
@@ -394,6 +442,21 @@ static inline void mm256_interleave_4x64x256( void *d, const void *s0,
|
||||
casti_m256i( d,3 ) = mm256_put_64( s0+24, s1+24, s2+24, s3+24 );
|
||||
}
|
||||
|
||||
// bswap the data as it's interleaved.
|
||||
// A bit of a missnomer, but be is nice and short.
|
||||
static inline void mm256_be_interleave_4x64x256( void *d, const void *s0,
|
||||
const void *s1, const void *s2, const void *s3 )
|
||||
{
|
||||
casti_m256i( d,0 ) = mm256_bswap_32(
|
||||
mm256_put_64( s0, s1, s2, s3 ) );
|
||||
casti_m256i( d,1 ) = mm256_bswap_32(
|
||||
mm256_put_64( s0+ 8, s1+ 8, s2+ 8, s3+ 8 ) );
|
||||
casti_m256i( d,2 ) = mm256_bswap_32(
|
||||
mm256_put_64( s0+16, s1+16, s2+16, s3+16 ) );
|
||||
casti_m256i( d,3 ) = mm256_bswap_32(
|
||||
mm256_put_64( s0+24, s1+24, s2+24, s3+24 ) );
|
||||
}
|
||||
|
||||
static inline void mm256_interleave_4x64x128( void *d, const void *s0,
|
||||
const void *s1, const void *s2, const void *s3 )
|
||||
{
|
||||
@@ -401,6 +464,14 @@ static inline void mm256_interleave_4x64x128( void *d, const void *s0,
|
||||
casti_m256i( d,1 ) = mm256_put_64( s0+ 8, s1+ 8, s2+ 8, s3+ 8 );
|
||||
}
|
||||
|
||||
static inline void mm256_be_interleave_4x64x128( void *d, const void *s0,
|
||||
const void *s1, const void *s2, const void *s3 )
|
||||
{
|
||||
casti_m256i( d,0 ) = mm256_bswap_32(
|
||||
mm256_put_64( s0, s1, s2, s3 ) );
|
||||
casti_m256i( d,1 ) = mm256_bswap_32(
|
||||
mm256_put_64( s0+ 8, s1+ 8, s2+ 8, s3+ 8 ) );
|
||||
}
|
||||
|
||||
// 4 lanes of 256 bits using 64 bit interleaving (standard final hash size)
|
||||
static inline void mm256_deinterleave_4x64x256( void *d0, void *d1, void *d2,
|
||||
@@ -496,6 +567,28 @@ static inline void mm256_interleave_8x32( void *d, const void *s0,
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
static inline void mm256_be_interleave_8x32( void *d, const void *s0,
|
||||
const void *s1, const void *s2, const void *s3, const void *s4,
|
||||
const void *s5, const void *s6, const void *s7, int bit_len )
|
||||
{
|
||||
mm256_be_interleave_8x32x256( d, s0, s1, s2, s3, s4, s5, s6, s7 );
|
||||
if ( bit_len <= 256 ) return;
|
||||
mm256_be_interleave_8x32x256( d+256, s0+32, s1+32, s2+32, s3+32,
|
||||
s4+32, s5+32, s6+32, s7+32 );
|
||||
if ( bit_len <= 512 ) return;
|
||||
if ( bit_len <= 640 )
|
||||
{
|
||||
mm256_be_interleave_8x32x128( d+512, s0+64, s1+64, s2+64, s3+64,
|
||||
s4+64, s5+64, s6+64, s7+64 );
|
||||
return;
|
||||
}
|
||||
mm256_be_interleave_8x32x256( d+512, s0+64, s1+64, s2+64, s3+64,
|
||||
s4+64, s5+64, s6+64, s7+64 );
|
||||
mm256_be_interleave_8x32x256( d+768, s0+96, s1+96, s2+96, s3+96,
|
||||
s4+96, s5+96, s6+96, s7+96 );
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
/*
|
||||
// Slower but it works with 32 bit data
|
||||
// bit_len must be multiple of 32
|
||||
@@ -595,6 +688,23 @@ static inline void mm256_interleave_4x64( void *d, const void *s0,
|
||||
mm256_interleave_4x64x256( d+384, s0+96, s1+96, s2+96, s3+96 );
|
||||
}
|
||||
|
||||
static inline void mm256_be_interleave_4x64( void *d, const void *s0,
|
||||
const void *s1, const void *s2, const void *s3, int bit_len )
|
||||
{
|
||||
mm256_be_interleave_4x64x256( d, s0, s1, s2, s3 );
|
||||
if ( bit_len <= 256 ) return;
|
||||
mm256_be_interleave_4x64x256( d+128, s0+32, s1+32, s2+32, s3+32 );
|
||||
if ( bit_len <= 512 ) return;
|
||||
if ( bit_len <= 640 )
|
||||
{
|
||||
mm256_be_interleave_4x64x128( d+256, s0+64, s1+64, s2+64, s3+64 );
|
||||
return;
|
||||
}
|
||||
// bit_len == 1024
|
||||
mm256_be_interleave_4x64x256( d+256, s0+64, s1+64, s2+64, s3+64 );
|
||||
mm256_be_interleave_4x64x256( d+384, s0+96, s1+96, s2+96, s3+96 );
|
||||
}
|
||||
|
||||
/*
|
||||
// Slower version
|
||||
// bit_len must be multiple of 64
|
||||
@@ -676,7 +786,9 @@ static inline void mm256_extract_lane_4x64( void *d, const void *s,
|
||||
|
||||
// Convert from 4x32 SSE2 interleaving to 4x64 AVX2.
|
||||
// Can't do it in place
|
||||
static inline void mm256_reinterleave_4x64( void *dst, void *src, int bit_len )
|
||||
#define mm256_reinterleave_4x64 mm256_reinterleave_4x32_4x64
|
||||
static inline void mm256_reinterleave_4x32_4x64( void *dst, void *src,
|
||||
int bit_len )
|
||||
{
|
||||
__m256i* d = (__m256i*)dst;
|
||||
uint32_t *s = (uint32_t*)src;
|
||||
@@ -736,7 +848,9 @@ static inline void mm256_reinterleave_4x64x( uint64_t *dst, uint32_t *src,
|
||||
|
||||
// Convert 4x64 byte (256 bit) vectors to 4x32 (128 bit) vectors for AVX
|
||||
// bit_len must be multiple of 64
|
||||
static inline void mm256_reinterleave_4x32( void *dst, void *src, int bit_len )
|
||||
#define mm256_reinterleave_4x32 mm256_reinterleave_4x64_4x32
|
||||
static inline void mm256_reinterleave_4x64_4x32( void *dst, void *src,
|
||||
int bit_len )
|
||||
{
|
||||
__m256i *d = (__m256i*)dst;
|
||||
uint32_t *s = (uint32_t*)src;
|
||||
@@ -862,7 +976,8 @@ static inline void mm_reinterleave_4x32( void *dst, void *src, int bit_len )
|
||||
}
|
||||
*/
|
||||
|
||||
static inline void mm256_interleave_2x128( const void *d, const void *s0,
|
||||
#define mm256_interleave_2x128 mm256_interleave_1x128
|
||||
static inline void mm256_interleave_1x128( const void *d, const void *s0,
|
||||
void *s1, const int bit_len )
|
||||
{
|
||||
casti_m256i( d, 0 ) = mm256_put_64( s0 , s0+ 8, s1 , s1+ 8 );
|
||||
@@ -879,7 +994,8 @@ static inline void mm256_interleave_2x128( const void *d, const void *s0,
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
static inline void mm256_deinterleave_2x128( void *d0, void *d1, void *s,
|
||||
#define mm256_deinterleave_2x128 mm256_deinterleave_1x128
|
||||
static inline void mm256_deinterleave_1x128( void *d0, void *d1, void *s,
|
||||
int bit_len )
|
||||
{
|
||||
mm256_deinterleave_2x128x256( d0, d1, 0, s );
|
||||
@@ -1078,38 +1194,38 @@ static inline void mm512_deinterleave_16x32x512( void *d00, void *d01,
|
||||
void *d12, void *d13, void *d14, void *d15, const int n,
|
||||
const void *s )
|
||||
{
|
||||
casti_m512i(d00,n) = mm512_get_32( s, 0, 16, 32, 48, 64, 80, 96, 112,
|
||||
128, 144, 160, 176, 192, 208, 224, 240 );
|
||||
casti_m512i(d01,n) = mm512_get_32( s, 1, 17, 33, 49, 65, 81, 97, 113,
|
||||
129, 145, 161, 177, 193, 209, 225, 241 );
|
||||
casti_m512i(d02,n) = mm512_get_32( s, 2, 18, 34, 50, 66, 82, 98, 114,
|
||||
130, 146, 162, 178, 194, 210, 226, 242 );
|
||||
casti_m512i(d03,n) = mm512_get_32( s, 3, 19, 35, 51, 67, 83, 99, 115,
|
||||
131, 147, 163, 179, 195, 211, 227, 243 );
|
||||
casti_m512i(d04,n) = mm512_get_32( s, 4, 20, 36, 52, 68, 84, 100, 116,
|
||||
132, 148, 164, 180, 196, 212, 228, 244 );
|
||||
casti_m512i(d05,n) = mm512_get_32( s, 5, 21, 37, 53, 69, 85, 101, 117,
|
||||
133, 149, 165, 181, 197, 213, 229, 245 );
|
||||
casti_m512i(d06,n) = mm512_get_32( s, 6, 22, 38, 54, 70, 86, 102, 118,
|
||||
134, 150, 166, 182, 198, 214, 230, 246 );
|
||||
casti_m512i(d07,n) = mm512_get_32( s, 7, 23, 39, 55, 71, 87, 103, 119,
|
||||
135, 151, 167, 183, 199, 215, 231, 247 );
|
||||
casti_m512i(d08,n) = mm512_get_32( s, 8, 24, 40, 56, 72, 88, 104, 120,
|
||||
136, 152, 168, 184, 200, 216, 232, 248 );
|
||||
casti_m512i(d09,n) = mm512_get_32( s, 9, 25, 41, 57, 73, 89, 105, 121,
|
||||
137, 153, 169, 185, 201, 217, 233, 249 );
|
||||
casti_m512i(d10,n) = mm512_get_32( s, 10, 26, 42, 58, 74, 90, 106, 122,
|
||||
138, 154, 170, 186, 202, 218, 234, 250 );
|
||||
casti_m512i(d11,n) = mm512_get_32( s, 11, 27, 43, 59, 75, 91, 107, 123,
|
||||
139, 155, 171, 187, 203, 219, 235, 251 );
|
||||
casti_m512i(d12,n) = mm512_get_32( s, 12, 28, 44, 60, 76, 92, 108, 124,
|
||||
140, 156, 172, 188, 204, 220, 236, 252 );
|
||||
casti_m512i(d13,n) = mm512_get_32( s, 13, 29, 45, 61, 77, 93, 109, 125,
|
||||
141, 157, 173, 189, 205, 221, 237, 253 );
|
||||
casti_m512i(d14,n) = mm512_get_32( s, 14, 30, 46, 62, 78, 94, 110, 126,
|
||||
142, 158, 174, 190, 206, 222, 238, 254 );
|
||||
casti_m512i(d15,n) = mm512_get_32( s, 15, 31, 47, 63, 79, 95, 111, 127,
|
||||
143, 159, 175, 191, 207, 223, 239, 255 );
|
||||
casti_m512i(d00,n) = mm512_get_32( s, 0, 16, 32, 48, 64, 80, 96,112,
|
||||
128,144,160,176,192,208,224,240 );
|
||||
casti_m512i(d01,n) = mm512_get_32( s, 1, 17, 33, 49, 65, 81, 97,113,
|
||||
129,145,161,177,193,209,225,241 );
|
||||
casti_m512i(d02,n) = mm512_get_32( s, 2, 18, 34, 50, 66, 82, 98,114,
|
||||
130,146,162,178,194,210,226,242 );
|
||||
casti_m512i(d03,n) = mm512_get_32( s, 3, 19, 35, 51, 67, 83, 99,115,
|
||||
131,147,163,179,195,211,227,243 );
|
||||
casti_m512i(d04,n) = mm512_get_32( s, 4, 20, 36, 52, 68, 84,100,116,
|
||||
132,148,164,180,196,212,228,244 );
|
||||
casti_m512i(d05,n) = mm512_get_32( s, 5, 21, 37, 53, 69, 85,101,117,
|
||||
133,149,165,181,197,213,229,245 );
|
||||
casti_m512i(d06,n) = mm512_get_32( s, 6, 22, 38, 54, 70, 86,102,118,
|
||||
134,150,166,182,198,214,230,246 );
|
||||
casti_m512i(d07,n) = mm512_get_32( s, 7, 23, 39, 55, 71, 87,103,119,
|
||||
135,151,167,183,199,215,231,247 );
|
||||
casti_m512i(d08,n) = mm512_get_32( s, 8, 24, 40, 56, 72, 88,104,120,
|
||||
136,152,168,184,200,216,232,248 );
|
||||
casti_m512i(d09,n) = mm512_get_32( s, 9, 25, 41, 57, 73, 89,105,121,
|
||||
137,153,169,185,201,217,233,249 );
|
||||
casti_m512i(d10,n) = mm512_get_32( s, 10, 26, 42, 58, 74, 90,106,122,
|
||||
138,154,170,186,202,218,234,250 );
|
||||
casti_m512i(d11,n) = mm512_get_32( s, 11, 27, 43, 59, 75, 91,107,123,
|
||||
139,155,171,187,203,219,235,251 );
|
||||
casti_m512i(d12,n) = mm512_get_32( s, 12, 28, 44, 60, 76, 92,108,124,
|
||||
140,156,172,188,204,220,236,252 );
|
||||
casti_m512i(d13,n) = mm512_get_32( s, 13, 29, 45, 61, 77, 93,109,125,
|
||||
141,157,173,189,205,221,237,253 );
|
||||
casti_m512i(d14,n) = mm512_get_32( s, 14, 30, 46, 62, 78, 94,110,126,
|
||||
142,158,174,190,206,222,238,254 );
|
||||
casti_m512i(d15,n) = mm512_get_32( s, 15, 31, 47, 63, 79, 95,111,127,
|
||||
143,159,175,191,207,223,239,255 );
|
||||
}
|
||||
|
||||
static inline void mm512_interleave_8x64x512( void *d, const void *s0,
|
||||
@@ -1363,6 +1479,99 @@ static inline void mm512_deinterleave_4x128( void *d0, void *d1, void *d2,
|
||||
mm512_deinterleave_4x128x512( d0, d1, d2, d3, 1, s+256 );
|
||||
}
|
||||
|
||||
// input one 8x64 buffer and return 2*4*128
|
||||
static inline void mm512_reinterleave_8x64_4x128( void *dst0, void *dst1,
|
||||
const void *src, int bit_len )
|
||||
{
|
||||
__m512i* d0 = (__m512i*)dst0;
|
||||
__m512i* d1 = (__m512i*)dst1;
|
||||
uint64_t *s = (uint64_t*)src;
|
||||
|
||||
d0[0] = _mm512_set_epi64( s[ 11], s[ 3], s[ 10], s[ 2],
|
||||
s[ 9], s[ 1], s[ 8], s[ 0] );
|
||||
d0[1] = _mm512_set_epi64( s[ 27], s[ 19], s[ 26], s[ 18],
|
||||
s[ 25], s[ 17], s[ 24], s[ 16] );
|
||||
d0[2] = _mm512_set_epi64( s[ 15], s[ 7], s[ 14], s[ 6],
|
||||
s[ 13], s[ 5], s[ 12], s[ 4] );
|
||||
d0[3] = _mm512_set_epi64( s[ 31], s[ 23], s[ 30], s[ 22],
|
||||
s[ 29], s[ 21], s[ 28], s[ 20] );
|
||||
d1[0] = _mm512_set_epi64( s[ 43], s[ 35], s[ 42], s[ 34],
|
||||
s[ 41], s[ 33], s[ 40], s[ 32] );
|
||||
d1[1] = _mm512_set_epi64( s[ 59], s[ 51], s[ 58], s[ 50],
|
||||
s[ 57], s[ 49], s[ 56], s[ 48] );
|
||||
d1[2] = _mm512_set_epi64( s[ 47], s[ 39], s[ 46], s[ 38],
|
||||
s[ 45], s[ 37], s[ 44], s[ 36] );
|
||||
d1[3] = _mm512_set_epi64( s[ 63], s[ 55], s[ 62], s[ 54],
|
||||
s[ 61], s[ 53], s[ 60], s[ 52] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
d0[4] = _mm512_set_epi64( s[ 75], s[ 67], s[ 74], s[ 66],
|
||||
s[ 73], s[ 65], s[ 72], s[ 64] );
|
||||
d0[5] = _mm512_set_epi64( s[ 91], s[ 83], s[ 90], s[ 82],
|
||||
s[ 89], s[ 81], s[ 88], s[ 80] );
|
||||
d0[6] = _mm512_set_epi64( s[ 79], s[ 71], s[ 78], s[ 70],
|
||||
s[ 77], s[ 69], s[ 76], s[ 68] );
|
||||
d0[7] = _mm512_set_epi64( s[ 95], s[ 87], s[ 94], s[ 86],
|
||||
s[ 93], s[ 85], s[ 92], s[ 84] );
|
||||
d1[4] = _mm512_set_epi64( s[107], s[ 99], s[106], s[ 98],
|
||||
s[105], s[ 97], s[104], s[ 96] );
|
||||
d1[5] = _mm512_set_epi64( s[123], s[115], s[122], s[114],
|
||||
s[121], s[113], s[120], s[112] );
|
||||
d1[6] = _mm512_set_epi64( s[111], s[103], s[110], s[102],
|
||||
s[109], s[101], s[108], s[100] );
|
||||
d1[7] = _mm512_set_epi64( s[127], s[119], s[126], s[118],
|
||||
s[125], s[117], s[124], s[116] );
|
||||
|
||||
}
|
||||
|
||||
// input 2 4x128 return 8x64
|
||||
static inline void mm512_reinterleave_4x128_8x64( void *dst, const void *src0,
|
||||
const void *src1, int bit_len )
|
||||
{
|
||||
__m512i* d = (__m512i*)dst;
|
||||
uint64_t *s0 = (uint64_t*)src0;
|
||||
uint64_t *s1 = (uint64_t*)src1;
|
||||
|
||||
d[0] = _mm512_set_epi64( s1[ 6], s1[ 4], s1[ 2], s1[ 0],
|
||||
s0[ 6], s0[ 4], s0[ 2], s0[ 0] );
|
||||
d[1] = _mm512_set_epi64( s1[ 7], s1[ 5], s1[ 3], s1[ 1],
|
||||
s0[ 7], s0[ 5], s0[ 3], s0[ 1] );
|
||||
d[2] = _mm512_set_epi64( s1[14], s1[12], s1[10], s1[ 8],
|
||||
s0[14], s0[12], s0[10], s0[ 8] );
|
||||
d[3] = _mm512_set_epi64( s1[15], s1[13], s1[11], s1[ 9],
|
||||
s0[15], s0[13], s0[11], s0[ 9] );
|
||||
d[4] = _mm512_set_epi64( s1[22], s1[20], s1[18], s1[16],
|
||||
s0[22], s0[20], s0[18], s0[16] );
|
||||
d[5] = _mm512_set_epi64( s1[23], s1[21], s1[19], s1[17],
|
||||
s0[24], s0[21], s0[19], s0[17] );
|
||||
d[6] = _mm512_set_epi64( s1[22], s1[28], s1[26], s1[24],
|
||||
s0[22], s0[28], s0[26], s0[24] );
|
||||
d[7] = _mm512_set_epi64( s1[31], s1[29], s1[27], s1[25],
|
||||
s0[31], s0[29], s0[27], s0[25] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
d[0] = _mm512_set_epi64( s1[38], s1[36], s1[34], s1[32],
|
||||
s0[38], s0[36], s0[34], s0[32] );
|
||||
d[1] = _mm512_set_epi64( s1[39], s1[37], s1[35], s1[33],
|
||||
s0[39], s0[37], s0[35], s0[33] );
|
||||
d[2] = _mm512_set_epi64( s1[46], s1[44], s1[42], s1[40],
|
||||
s0[46], s0[44], s0[42], s0[40] );
|
||||
d[3] = _mm512_set_epi64( s1[47], s1[45], s1[43], s1[41],
|
||||
s0[47], s0[45], s0[43], s0[41] );
|
||||
d[4] = _mm512_set_epi64( s1[54], s1[52], s1[50], s1[48],
|
||||
s0[54], s0[52], s0[50], s0[48] );
|
||||
d[5] = _mm512_set_epi64( s1[55], s1[53], s1[51], s1[49],
|
||||
s0[55], s0[53], s0[51], s0[49] );
|
||||
|
||||
d[6] = _mm512_set_epi64( s1[62], s1[60], s1[58], s1[56],
|
||||
s0[62], s0[60], s0[58], s0[56] );
|
||||
d[7] = _mm512_set_epi64( s1[63], s1[61], s1[59], s1[57],
|
||||
s0[63], s0[61], s0[59], s0[57] );
|
||||
|
||||
}
|
||||
|
||||
static inline void mm512_extract_lane_4x128( void *d, const void *s,
|
||||
const int lane, const int bit_len )
|
||||
{
|
||||
|
5
miner.h
5
miner.h
@@ -538,6 +538,7 @@ enum algos {
|
||||
ALGO_SCRYPTJANE,
|
||||
ALGO_SHA256D,
|
||||
ALGO_SHA256T,
|
||||
ALGO_SHA256Q,
|
||||
ALGO_SHAVITE3,
|
||||
ALGO_SKEIN,
|
||||
ALGO_SKEIN2,
|
||||
@@ -625,6 +626,7 @@ static const char* const algo_names[] = {
|
||||
"scryptjane",
|
||||
"sha256d",
|
||||
"sha256t",
|
||||
"sha256q",
|
||||
"shavite3",
|
||||
"skein",
|
||||
"skein2",
|
||||
@@ -774,7 +776,8 @@ Options:\n\
|
||||
scryptjane:nf\n\
|
||||
sha256d Double SHA-256\n\
|
||||
sha256t Triple SHA-256, Onecoin (OC)\n\
|
||||
shavite3 Shavite3\n\
|
||||
sha256q Quad SHA-256, Pyrite (PYE)\n\
|
||||
shavite3 Shavite3\n\
|
||||
skein Skein+Sha (Skeincoin)\n\
|
||||
skein2 Double Skein (Woodcoin)\n\
|
||||
skunk Signatum (SIGT)\n\
|
||||
|
Reference in New Issue
Block a user