mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
7fec680835 | ||
![]() |
1b0a5aadf6 | ||
![]() |
0a3c52810e | ||
![]() |
4d4386a374 | ||
![]() |
ce259b915a | ||
![]() |
02202ab803 |
@@ -68,7 +68,8 @@ cpuminer_SOURCES = \
|
|||||||
algo/blake/pentablake-4way.c \
|
algo/blake/pentablake-4way.c \
|
||||||
algo/blake/pentablake.c \
|
algo/blake/pentablake.c \
|
||||||
algo/bmw/sph_bmw.c \
|
algo/bmw/sph_bmw.c \
|
||||||
algo/bmw/bmw-hash-4way.c \
|
algo/bmw/bmw256-hash-4way.c \
|
||||||
|
algo/bmw/bmw512-hash-4way.c \
|
||||||
algo/bmw/bmw256.c \
|
algo/bmw/bmw256.c \
|
||||||
algo/cryptonight/cryptolight.c \
|
algo/cryptonight/cryptolight.c \
|
||||||
algo/cryptonight/cryptonight-common.c\
|
algo/cryptonight/cryptonight-common.c\
|
||||||
@@ -162,10 +163,13 @@ cpuminer_SOURCES = \
|
|||||||
algo/sha/sph_sha2.c \
|
algo/sha/sph_sha2.c \
|
||||||
algo/sha/sph_sha2big.c \
|
algo/sha/sph_sha2big.c \
|
||||||
algo/sha/sha2-hash-4way.c \
|
algo/sha/sha2-hash-4way.c \
|
||||||
|
algo/sha/sha256_hash_11way.c \
|
||||||
algo/sha/sha2.c \
|
algo/sha/sha2.c \
|
||||||
algo/sha/sha256t-gate.c \
|
algo/sha/sha256t-gate.c \
|
||||||
algo/sha/sha256t-4way.c \
|
algo/sha/sha256t-4way.c \
|
||||||
algo/sha/sha256t.c \
|
algo/sha/sha256t.c \
|
||||||
|
algo/sha/sha256q-4way.c \
|
||||||
|
algo/sha/sha256q.c \
|
||||||
algo/shabal/sph_shabal.c \
|
algo/shabal/sph_shabal.c \
|
||||||
algo/shabal/shabal-hash-4way.c \
|
algo/shabal/shabal-hash-4way.c \
|
||||||
algo/shavite/sph_shavite.c \
|
algo/shavite/sph_shavite.c \
|
||||||
@@ -262,7 +266,7 @@ cpuminer_SOURCES = \
|
|||||||
algo/yescrypt/sha256_Y.c \
|
algo/yescrypt/sha256_Y.c \
|
||||||
algo/yescrypt/yescrypt-best.c \
|
algo/yescrypt/yescrypt-best.c \
|
||||||
algo/yespower/yespower.c \
|
algo/yespower/yespower.c \
|
||||||
algo/yespower/sha256.c \
|
algo/yespower/sha256_p.c \
|
||||||
algo/yespower/yespower-opt.c
|
algo/yespower/yespower-opt.c
|
||||||
|
|
||||||
disable_flags =
|
disable_flags =
|
||||||
|
@@ -12,7 +12,7 @@ the software, don't use it.
|
|||||||
Choose the exe that best matches you CPU's features or use trial and
|
Choose the exe that best matches you CPU's features or use trial and
|
||||||
error to find the fastest one that doesn't crash. Pay attention to
|
error to find the fastest one that doesn't crash. Pay attention to
|
||||||
the features listed at cpuminer startup to ensure you are mining at
|
the features listed at cpuminer startup to ensure you are mining at
|
||||||
optimum speed using all the available features.
|
optimum speed using the best available features.
|
||||||
|
|
||||||
Architecture names and compile options used are only provided for Intel
|
Architecture names and compile options used are only provided for Intel
|
||||||
Core series. Even the newest Pentium and Celeron CPUs are often missing
|
Core series. Even the newest Pentium and Celeron CPUs are often missing
|
||||||
@@ -22,8 +22,6 @@ AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
|||||||
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
||||||
these CPUs. Some algos may crash the miner with an invalid instruction.
|
these CPUs. Some algos may crash the miner with an invalid instruction.
|
||||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
||||||
Changes in v3.8.4 may have improved compatibility with some of these CPUs.
|
|
||||||
|
|
||||||
|
|
||||||
Exe name Compile flags Arch name
|
Exe name Compile flags Arch name
|
||||||
|
|
||||||
|
@@ -33,11 +33,44 @@ Requirements
|
|||||||
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
|
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
|
||||||
supported.
|
supported.
|
||||||
|
|
||||||
64 bit Linux or Windows operating system. Apple is not supported.
|
64 bit Linux or Windows operating system. Apple and Android are not supported.
|
||||||
|
|
||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
v3.9.2.4
|
||||||
|
|
||||||
|
Yet another affinity fix. Hopefully the last one.
|
||||||
|
|
||||||
|
v3.9.2.3
|
||||||
|
|
||||||
|
Another cpu-affinity fix.
|
||||||
|
Disabled test code that fails to compile on some CPUs with limited
|
||||||
|
AVX512 capabilities.
|
||||||
|
|
||||||
|
v3.9.2.2
|
||||||
|
|
||||||
|
Fixed some day one cpu-affinity issues.
|
||||||
|
|
||||||
|
v3.9.2
|
||||||
|
|
||||||
|
Added sha256q algo.
|
||||||
|
Yespower now uses openssl SHA256, but no observable hash rate increase
|
||||||
|
on Ryzen.
|
||||||
|
Ongoing rearchitecting.
|
||||||
|
Lyra2z now hashes 8-way on CPUs with AVX2.
|
||||||
|
Lyra2 (all including phi2) now runs optimized code with SSE2.
|
||||||
|
|
||||||
|
v3.9.1.1
|
||||||
|
|
||||||
|
Fixed lyra2v3 AVX and below.
|
||||||
|
|
||||||
|
Compiling on Windows using Cygwin now works. Simply use "./build.sh"
|
||||||
|
just like on Linux. It isn't portable therefore the binaries package will
|
||||||
|
continue to use the existing procedure.
|
||||||
|
The Cygwin procedure will be documented in more detail later and will
|
||||||
|
include a list of packages that need to be installed.
|
||||||
|
|
||||||
v3.9.1
|
v3.9.1
|
||||||
|
|
||||||
Fixed AVX2 version of anime algo.
|
Fixed AVX2 version of anime algo.
|
||||||
|
@@ -210,6 +210,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
|||||||
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
|
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
|
||||||
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
||||||
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
||||||
|
case ALGO_SHA256Q: register_sha256q_algo ( gate ); break;
|
||||||
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
||||||
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
||||||
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
||||||
@@ -344,9 +345,9 @@ const char* const algo_alias_map[][2] =
|
|||||||
{ NULL, NULL }
|
{ NULL, NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
// if arg is a valid alias for a known algo it is updated with the proper name.
|
// if arg is a valid alias for a known algo it is updated with the proper
|
||||||
// No validation of the algo or alias is done, It is the responsinility of the
|
// name. No validation of the algo or alias is done, It is the responsinility
|
||||||
// calling function to validate the algo after return.
|
// of the calling function to validate the algo after return.
|
||||||
void get_algo_alias( char** algo_or_alias )
|
void get_algo_alias( char** algo_or_alias )
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@@ -361,3 +362,22 @@ void get_algo_alias( char** algo_or_alias )
|
|||||||
|
|
||||||
#undef ALIAS
|
#undef ALIAS
|
||||||
#undef PROPER
|
#undef PROPER
|
||||||
|
|
||||||
|
// only for parallel when there are lanes.
|
||||||
|
bool submit_solution( struct work *work, void *hash,
|
||||||
|
struct thr_info *thr, int lane )
|
||||||
|
{
|
||||||
|
work_set_target_ratio( work, hash );
|
||||||
|
if ( submit_work( thr, work ) )
|
||||||
|
{
|
||||||
|
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr->id, lane );
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -196,8 +196,9 @@ void four_way_not_tested();
|
|||||||
int null_scanhash();
|
int null_scanhash();
|
||||||
|
|
||||||
// The one and only, a callback for scanhash.
|
// The one and only, a callback for scanhash.
|
||||||
|
bool submit_solution( struct work *work, void *hash,
|
||||||
|
struct thr_info *thr, int lane );
|
||||||
|
|
||||||
bool submit_work( struct thr_info *thr, const struct work *work_in );
|
bool submit_work( struct thr_info *thr, const struct work *work_in );
|
||||||
|
|
||||||
// displays warning
|
// displays warning
|
||||||
|
@@ -41,7 +41,6 @@ extern "C"{
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#ifdef __AVX2__
|
|
||||||
|
|
||||||
#include "algo/sha/sph_types.h"
|
#include "algo/sha/sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "avxdefs.h"
|
||||||
@@ -50,6 +49,10 @@ extern "C"{
|
|||||||
|
|
||||||
#define SPH_SIZE_bmw512 512
|
#define SPH_SIZE_bmw512 512
|
||||||
|
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
|
||||||
|
// BMW-256 4 way 32
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
__m128i buf[64];
|
__m128i buf[64];
|
||||||
__m128i H[16];
|
__m128i H[16];
|
||||||
@@ -59,6 +62,60 @@ typedef struct {
|
|||||||
|
|
||||||
typedef bmw_4way_small_context bmw256_4way_context;
|
typedef bmw_4way_small_context bmw256_4way_context;
|
||||||
|
|
||||||
|
void bmw256_4way_init(void *cc);
|
||||||
|
|
||||||
|
void bmw256_4way(void *cc, const void *data, size_t len);
|
||||||
|
|
||||||
|
void bmw256_4way_close(void *cc, void *dst);
|
||||||
|
|
||||||
|
void bmw256_4way_addbits_and_close(
|
||||||
|
void *cc, unsigned ub, unsigned n, void *dst);
|
||||||
|
|
||||||
|
#endif // __SSE2__
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
|
// BMW-256 8 way 32
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
__m256i buf[64];
|
||||||
|
__m256i H[16];
|
||||||
|
size_t ptr;
|
||||||
|
uint32_t bit_count; // assume bit_count fits in 32 bits
|
||||||
|
} bmw_8way_small_context __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
typedef bmw_8way_small_context bmw256_8way_context;
|
||||||
|
|
||||||
|
void bmw256_8way_init( bmw256_8way_context *ctx );
|
||||||
|
void bmw256_8way( bmw256_8way_context *ctx, const void *data, size_t len );
|
||||||
|
void bmw256_8way_close( bmw256_8way_context *ctx, void *dst );
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
|
||||||
|
// BMW-512 2 way 64
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
__m128i buf[16];
|
||||||
|
__m128i H[16];
|
||||||
|
size_t ptr;
|
||||||
|
uint64_t bit_count;
|
||||||
|
} bmw_2way_big_context __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
typedef bmw_2way_big_context bmw512_2way_context;
|
||||||
|
|
||||||
|
void bmw512_2way_init( bmw512_2way_context *ctx );
|
||||||
|
void bmw512_2way( bmw512_2way_context *ctx, const void *data, size_t len );
|
||||||
|
void bmw512_2way_close( bmw512_2way_context *ctx, void *dst );
|
||||||
|
|
||||||
|
#endif // __SSE2__
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
|
// BMW-512 4 way 64
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
__m256i buf[16];
|
__m256i buf[16];
|
||||||
__m256i H[16];
|
__m256i H[16];
|
||||||
@@ -68,14 +125,6 @@ typedef struct {
|
|||||||
|
|
||||||
typedef bmw_4way_big_context bmw512_4way_context;
|
typedef bmw_4way_big_context bmw512_4way_context;
|
||||||
|
|
||||||
void bmw256_4way_init(void *cc);
|
|
||||||
|
|
||||||
void bmw256_4way(void *cc, const void *data, size_t len);
|
|
||||||
|
|
||||||
void bmw256_4way_close(void *cc, void *dst);
|
|
||||||
|
|
||||||
void bmw256_4way_addbits_and_close(
|
|
||||||
void *cc, unsigned ub, unsigned n, void *dst);
|
|
||||||
|
|
||||||
void bmw512_4way_init(void *cc);
|
void bmw512_4way_init(void *cc);
|
||||||
|
|
||||||
@@ -86,10 +135,10 @@ void bmw512_4way_close(void *cc, void *dst);
|
|||||||
void bmw512_4way_addbits_and_close(
|
void bmw512_4way_addbits_and_close(
|
||||||
void *cc, unsigned ub, unsigned n, void *dst);
|
void *cc, unsigned ub, unsigned n, void *dst);
|
||||||
|
|
||||||
#endif
|
#endif // __AVX2__
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif // BMW_HASH_H__
|
||||||
|
File diff suppressed because it is too large
Load Diff
1109
algo/bmw/bmw512-hash-4way.c
Normal file
1109
algo/bmw/bmw512-hash-4way.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -11,6 +11,8 @@ extern "C"{
|
|||||||
#pragma warning (disable: 4146)
|
#pragma warning (disable: 4146)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define SPH_FUGUE_NOCOPY 1
|
||||||
|
|
||||||
static const sph_u32 IV224[] = {
|
static const sph_u32 IV224[] = {
|
||||||
SPH_C32(0xf4c9120d), SPH_C32(0x6286f757), SPH_C32(0xee39e01c),
|
SPH_C32(0xf4c9120d), SPH_C32(0x6286f757), SPH_C32(0xee39e01c),
|
||||||
SPH_C32(0xe074e3cb), SPH_C32(0xa1127c62), SPH_C32(0x9a43d215),
|
SPH_C32(0xe074e3cb), SPH_C32(0xa1127c62), SPH_C32(0x9a43d215),
|
||||||
|
@@ -11,6 +11,10 @@
|
|||||||
#include <sys/endian.h>
|
#include <sys/endian.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__CYGWIN__)
|
||||||
|
#include <endian.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "tmmintrin.h"
|
#include "tmmintrin.h"
|
||||||
#include "smmintrin.h"
|
#include "smmintrin.h"
|
||||||
|
|
||||||
|
@@ -8,6 +8,10 @@
|
|||||||
#include <sys/endian.h>
|
#include <sys/endian.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__CYGWIN__)
|
||||||
|
#include <endian.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "tmmintrin.h"
|
#include "tmmintrin.h"
|
||||||
#include "smmintrin.h"
|
#include "smmintrin.h"
|
||||||
#include "immintrin.h"
|
#include "immintrin.h"
|
||||||
|
@@ -91,7 +91,7 @@ extern "C"{
|
|||||||
#pragma warning (disable: 4146)
|
#pragma warning (disable: 4146)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
static const sph_u64 RC[] = {
|
static const sph_u64 RC[] = {
|
||||||
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
|
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
|
||||||
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
|
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
|
||||||
@@ -106,7 +106,7 @@ static const sph_u64 RC[] = {
|
|||||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
|
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
|
||||||
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
|
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
|
||||||
};
|
};
|
||||||
|
*/
|
||||||
#define kekDECL_STATE \
|
#define kekDECL_STATE \
|
||||||
sph_u64 keca00, keca01, keca02, keca03, keca04; \
|
sph_u64 keca00, keca01, keca02, keca03, keca04; \
|
||||||
sph_u64 keca10, keca11, keca12, keca13, keca14; \
|
sph_u64 keca10, keca11, keca12, keca13, keca14; \
|
||||||
@@ -756,6 +756,20 @@ static const sph_u64 RC[] = {
|
|||||||
* tested faster saving space
|
* tested faster saving space
|
||||||
*/
|
*/
|
||||||
#define KECCAK_F_1600_ do { \
|
#define KECCAK_F_1600_ do { \
|
||||||
|
static const sph_u64 RC[] = { \
|
||||||
|
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082), \
|
||||||
|
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000), \
|
||||||
|
SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001), \
|
||||||
|
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009), \
|
||||||
|
SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088), \
|
||||||
|
SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A), \
|
||||||
|
SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B), \
|
||||||
|
SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003), \
|
||||||
|
SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080), \
|
||||||
|
SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A), \
|
||||||
|
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080), \
|
||||||
|
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008) \
|
||||||
|
}; \
|
||||||
int j; \
|
int j; \
|
||||||
for (j = 0; j < 24; j += 4) { \
|
for (j = 0; j < 24; j += 4) { \
|
||||||
KF_ELT( 0, 1, RC[j + 0]); \
|
KF_ELT( 0, 1, RC[j + 0]); \
|
||||||
@@ -791,7 +805,7 @@ static const sph_u64 RC[] = {
|
|||||||
/* load initial constants */
|
/* load initial constants */
|
||||||
#define KEC_I
|
#define KEC_I
|
||||||
|
|
||||||
static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 };
|
//static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 };
|
||||||
/*
|
/*
|
||||||
unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
|
unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
|
||||||
*/
|
*/
|
||||||
@@ -799,6 +813,7 @@ static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0
|
|||||||
/* load hash for loop */
|
/* load hash for loop */
|
||||||
#define KEC_U \
|
#define KEC_U \
|
||||||
do { \
|
do { \
|
||||||
|
static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
|
||||||
/*memcpy(hashbuf, hash, 64); */ \
|
/*memcpy(hashbuf, hash, 64); */ \
|
||||||
memcpy(hash + 64, keczword, 8); \
|
memcpy(hash + 64, keczword, 8); \
|
||||||
} while (0);
|
} while (0);
|
||||||
|
@@ -90,7 +90,7 @@ void allium_4way_hash( void *state, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
@@ -100,40 +100,41 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
uint32_t *nonces = work->nonces;
|
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||||
int num_found = 0;
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint32_t *noncep = vdata + 76; // 19*4
|
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||||
|
|
||||||
swab32_array( edata, pdata, 20 );
|
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||||
|
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||||
|
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||||
|
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||||
|
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||||
blake256_4way_init( &allium_4way_ctx.blake );
|
blake256_4way_init( &allium_4way_ctx.blake );
|
||||||
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );
|
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
be32enc( noncep, n );
|
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||||
be32enc( noncep+1, n+1 );
|
|
||||||
be32enc( noncep+2, n+2 );
|
|
||||||
be32enc( noncep+3, n+3 );
|
|
||||||
|
|
||||||
allium_4way_hash( hash, vdata );
|
allium_4way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int lane = 0; lane < 4; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
|
||||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
if ( fulltest( hash+(lane<<3), ptarget ) )
|
||||||
nonces[ num_found++ ] = n+i;
|
{
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, hash+(lane<<3), mythr, lane );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||||
&& !work_restart[thr_id].restart);
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -70,7 +70,7 @@ void allium_hash(void *state, const void *input)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[8];
|
uint32_t _ALIGN(128) hash[8];
|
||||||
uint32_t _ALIGN(128) endiandata[20];
|
uint32_t _ALIGN(128) endiandata[20];
|
||||||
@@ -80,6 +80,7 @@ int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
ptarget[7] = 0x3ffff;
|
ptarget[7] = 0x3ffff;
|
||||||
|
@@ -1,6 +1,43 @@
|
|||||||
#include "lyra2-gate.h"
|
#include "lyra2-gate.h"
|
||||||
|
|
||||||
|
|
||||||
|
// huge pages
|
||||||
|
//
|
||||||
|
// Use MAP_PRIVATE instead
|
||||||
|
// In register algo:
|
||||||
|
// replace thread safe whole matrix with a char**
|
||||||
|
// alloc huge pages matrixsize * threads
|
||||||
|
// make pointers to each thread to each thread, creating an
|
||||||
|
// array[thread][matrix].
|
||||||
|
// Each thread can create its own matrix pointer:
|
||||||
|
// my_matrix = the matrix + ( thread_id * matrix_size )
|
||||||
|
//
|
||||||
|
// Compiler version check?
|
||||||
|
// Fallback?
|
||||||
|
//
|
||||||
|
// create a generic utility to map & unmap huge pages.
|
||||||
|
// ptr = malloc_huge( size );
|
||||||
|
// Yespower wrapper checks for 64 byte alignment, seems unnecessary as
|
||||||
|
// it should be aligned to the page boundary. It may be desireable to
|
||||||
|
// have the matrix size rounded up if necessary to something bigger
|
||||||
|
// than 64 byte, say 4 kbytes a small page size.
|
||||||
|
|
||||||
|
// Define some constants for indivual parameters and matrix size for
|
||||||
|
// each algo. Use the parameter constants where apropriate.
|
||||||
|
// Convert algos that don't yet do so to use dynamic alllocation.
|
||||||
|
// Alloc huge pages globally. If ok each thread will create a pointer to
|
||||||
|
// its chunk. If fail each thread will use use _mm_alloc for itself.
|
||||||
|
// BLOCK_LEN_BYTES is 768.
|
||||||
|
|
||||||
|
#define LYRA2REV3_NROWS 4
|
||||||
|
#define LYRA2REV3_NCOLS 4
|
||||||
|
/*
|
||||||
|
#define LYRA2REV3_MATRIX_SIZE ((BLOCK_LEN_BYTES)*(LYRA2REV3_NCOLS)* \
|
||||||
|
(LYRA2REV3_NROWS)*8)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define LYRA2REV3_MATRIX_SIZE ((BLOCK_LEN_BYTES)<<4)
|
||||||
|
|
||||||
__thread uint64_t* l2v3_wholeMatrix;
|
__thread uint64_t* l2v3_wholeMatrix;
|
||||||
|
|
||||||
bool lyra2rev3_thread_init()
|
bool lyra2rev3_thread_init()
|
||||||
|
@@ -43,25 +43,25 @@ bool register_lyra2rev2_algo( algo_gate_t* gate );
|
|||||||
|
|
||||||
void lyra2rev2_4way_hash( void *state, const void *input );
|
void lyra2rev2_4way_hash( void *state, const void *input );
|
||||||
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool init_lyra2rev2_4way_ctx();
|
bool init_lyra2rev2_4way_ctx();
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void lyra2rev2_hash( void *state, const void *input );
|
void lyra2rev2_hash( void *state, const void *input );
|
||||||
int scanhash_lyra2rev2( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2rev2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool init_lyra2rev2_ctx();
|
bool init_lyra2rev2_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
|
|
||||||
#if defined(__SSE4_2__)
|
#if defined(__SSE2__)
|
||||||
#define LYRA2Z_4WAY
|
#define LYRA2Z_4WAY
|
||||||
#endif
|
#endif
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
// #define LYRA2Z_8WAY
|
#define LYRA2Z_8WAY
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@@ -71,21 +71,21 @@ bool init_lyra2rev2_ctx();
|
|||||||
|
|
||||||
void lyra2z_8way_hash( void *state, const void *input );
|
void lyra2z_8way_hash( void *state, const void *input );
|
||||||
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool lyra2z_8way_thread_init();
|
bool lyra2z_8way_thread_init();
|
||||||
|
|
||||||
#elif defined(LYRA2Z_4WAY)
|
#elif defined(LYRA2Z_4WAY)
|
||||||
|
|
||||||
void lyra2z_4way_hash( void *state, const void *input );
|
void lyra2z_4way_hash( void *state, const void *input );
|
||||||
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool lyra2z_4way_thread_init();
|
bool lyra2z_4way_thread_init();
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void lyra2z_hash( void *state, const void *input );
|
void lyra2z_hash( void *state, const void *input );
|
||||||
int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool lyra2z_thread_init();
|
bool lyra2z_thread_init();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -102,14 +102,14 @@ bool lyra2z_thread_init();
|
|||||||
|
|
||||||
void lyra2h_4way_hash( void *state, const void *input );
|
void lyra2h_4way_hash( void *state, const void *input );
|
||||||
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool lyra2h_4way_thread_init();
|
bool lyra2h_4way_thread_init();
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void lyra2h_hash( void *state, const void *input );
|
void lyra2h_hash( void *state, const void *input );
|
||||||
int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool lyra2h_thread_init();
|
bool lyra2h_thread_init();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -126,14 +126,14 @@ bool register_allium_algo( algo_gate_t* gate );
|
|||||||
|
|
||||||
void allium_4way_hash( void *state, const void *input );
|
void allium_4way_hash( void *state, const void *input );
|
||||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool init_allium_4way_ctx();
|
bool init_allium_4way_ctx();
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void allium_hash( void *state, const void *input );
|
void allium_hash( void *state, const void *input );
|
||||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool init_allium_ctx();
|
bool init_allium_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -146,7 +146,7 @@ bool register_phi2_algo( algo_gate_t* gate );
|
|||||||
|
|
||||||
void phi2_hash( void *state, const void *input );
|
void phi2_hash( void *state, const void *input );
|
||||||
int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
void init_phi2_ctx();
|
void init_phi2_ctx();
|
||||||
|
|
||||||
#endif // LYRA2_GATE_H__
|
#endif // LYRA2_GATE_H__
|
||||||
|
@@ -236,7 +236,7 @@ int LYRA2REV3( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
|||||||
//Tries to allocate enough space for the whole memory matrix
|
//Tries to allocate enough space for the whole memory matrix
|
||||||
|
|
||||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
||||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||||
const int64_t BLOCK_LEN = BLOCK_LEN_BLAKE2_SAFE_INT64;
|
const int64_t BLOCK_LEN = BLOCK_LEN_BLAKE2_SAFE_INT64;
|
||||||
/*
|
/*
|
||||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
||||||
|
@@ -50,7 +50,7 @@ void lyra2h_4way_hash( void *state, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
@@ -63,6 +63,7 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *nonces = work->nonces;
|
uint32_t *nonces = work->nonces;
|
||||||
int num_found = 0;
|
int num_found = 0;
|
||||||
uint32_t *noncep= vdata + 76; // 19*4
|
uint32_t *noncep= vdata + 76; // 19*4
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
ptarget[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
|
@@ -36,7 +36,7 @@ void lyra2h_hash( void *state, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(64) hash[8];
|
uint32_t _ALIGN(64) hash[8];
|
||||||
uint32_t _ALIGN(64) endiandata[20];
|
uint32_t _ALIGN(64) endiandata[20];
|
||||||
@@ -45,6 +45,7 @@ int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
ptarget[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
|
@@ -81,8 +81,8 @@ void lyra2re_hash(void *state, const void *input)
|
|||||||
memcpy(state, hashA, 32);
|
memcpy(state, hashA, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2re(int thr_id, struct work *work,
|
int scanhash_lyra2re( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -91,6 +91,7 @@ int scanhash_lyra2re(int thr_id, struct work *work,
|
|||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
swab32_array( endiandata, pdata, 20 );
|
swab32_array( endiandata, pdata, 20 );
|
||||||
|
|
||||||
|
@@ -82,7 +82,7 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
@@ -95,6 +95,7 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *nonces = work->nonces;
|
uint32_t *nonces = work->nonces;
|
||||||
int num_found = 0;
|
int num_found = 0;
|
||||||
uint32_t *noncep = vdata + 76; // 19*4
|
uint32_t *noncep = vdata + 76; // 19*4
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||||
|
@@ -73,7 +73,7 @@ void lyra2rev2_hash( void *state, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2rev2(int thr_id, struct work *work,
|
int scanhash_lyra2rev2(int thr_id, struct work *work,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done)
|
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -82,6 +82,7 @@ int scanhash_lyra2rev2(int thr_id, struct work *work,
|
|||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||||
|
@@ -74,7 +74,6 @@ int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
int num_found = 0;
|
|
||||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
@@ -104,13 +103,7 @@ int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
work_set_target_ratio( work, lane_hash );
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
if ( submit_work( mythr, work ) )
|
|
||||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
|
||||||
accepted_share_count + rejected_share_count + 1,
|
|
||||||
thr_id, lane );
|
|
||||||
else
|
|
||||||
applog( LOG_WARNING, "Failed to submit share." );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
@@ -50,7 +50,7 @@ void lyra2z_4way_hash( void *state, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
@@ -60,25 +60,23 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
uint32_t *nonces = work->nonces;
|
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||||
int num_found = 0;
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint32_t *noncep = vdata + 76; // 19*4
|
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
ptarget[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
|
|
||||||
for ( int i=0; i < 20; i++ )
|
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||||
be32enc( &edata[i], pdata[i] );
|
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||||
|
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||||
|
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||||
|
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||||
|
|
||||||
lyra2z_4way_midstate( vdata );
|
lyra2z_4way_midstate( vdata );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
be32enc( noncep, n );
|
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||||
be32enc( noncep+1, n+1 );
|
|
||||||
be32enc( noncep+2, n+2 );
|
|
||||||
be32enc( noncep+3, n+3 );
|
|
||||||
|
|
||||||
lyra2z_4way_hash( hash, vdata );
|
lyra2z_4way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
@@ -87,15 +85,19 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
nonces[ num_found++ ] = n+i;
|
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
work_set_target_ratio( work, hash+(i<<3) );
|
||||||
|
if ( submit_work( mythr, work ) )
|
||||||
|
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr_id, i );
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||||
&& !work_restart[thr_id].restart);
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -150,14 +152,14 @@ void lyra2z_8way_hash( void *state, const void *input )
|
|||||||
memcpy( state+ 32, hash1, 32 );
|
memcpy( state+ 32, hash1, 32 );
|
||||||
memcpy( state+ 64, hash2, 32 );
|
memcpy( state+ 64, hash2, 32 );
|
||||||
memcpy( state+ 96, hash3, 32 );
|
memcpy( state+ 96, hash3, 32 );
|
||||||
memcpy( state+128, hash1, 32 );
|
memcpy( state+128, hash4, 32 );
|
||||||
memcpy( state+160, hash2, 32 );
|
memcpy( state+160, hash5, 32 );
|
||||||
memcpy( state+192, hash3, 32 );
|
memcpy( state+192, hash6, 32 );
|
||||||
memcpy( state+224, hash1, 32 );
|
memcpy( state+224, hash7, 32 );
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
@@ -167,15 +169,15 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
uint32_t *nonces = work->nonces;
|
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||||
int num_found = 0;
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint32_t *noncep = vdata + 152; // 19*8
|
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
ptarget[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
|
|
||||||
for ( int i=0; i < 19; i++ )
|
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||||
be32enc( &edata[i], pdata[i] );
|
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
|
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||||
edata, edata, edata, edata, 640 );
|
edata, edata, edata, edata, 640 );
|
||||||
@@ -183,15 +185,8 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
lyra2z_8way_midstate( vdata );
|
lyra2z_8way_midstate( vdata );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
be32enc( noncep, n );
|
*noncev = mm256_bswap_32(
|
||||||
be32enc( noncep+1, n+1 );
|
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||||
be32enc( noncep+2, n+2 );
|
|
||||||
be32enc( noncep+3, n+3 );
|
|
||||||
be32enc( noncep+4, n+4 );
|
|
||||||
be32enc( noncep+5, n+5 );
|
|
||||||
be32enc( noncep+6, n+6 );
|
|
||||||
be32enc( noncep+7, n+7 );
|
|
||||||
|
|
||||||
lyra2z_8way_hash( hash, vdata );
|
lyra2z_8way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
@@ -199,15 +194,13 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
nonces[ num_found++ ] = n+i;
|
submit_solution( work, hash+(i<<3), mythr, i );
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||||
&& !work_restart[thr_id].restart);
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -44,7 +44,7 @@ void lyra2z_hash( void *state, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(64) hash[8];
|
uint32_t _ALIGN(64) hash[8];
|
||||||
uint32_t _ALIGN(64) endiandata[20];
|
uint32_t _ALIGN(64) endiandata[20];
|
||||||
@@ -53,6 +53,7 @@ int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
ptarget[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
|
@@ -16,39 +16,43 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
if (opt_benchmark)
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
ptarget[7] = 0x0000ff;
|
|
||||||
|
|
||||||
for (int i=0; i < 19; i++) {
|
if (opt_benchmark)
|
||||||
be32enc(&endiandata[i], pdata[i]);
|
ptarget[7] = 0x0000ff;
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
for (int i=0; i < 19; i++)
|
||||||
be32enc(&endiandata[19], nonce);
|
be32enc(&endiandata[i], pdata[i]);
|
||||||
lyra2z330_hash( hash, endiandata, work->height );
|
|
||||||
|
do
|
||||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
{
|
||||||
work_set_target_ratio(work, hash);
|
be32enc(&endiandata[19], nonce);
|
||||||
pdata[19] = nonce;
|
lyra2z330_hash( hash, endiandata, work->height );
|
||||||
*hashes_done = pdata[19] - first_nonce;
|
if ( hash[7] <= Htarg && fulltest(hash, ptarget) )
|
||||||
return 1;
|
{
|
||||||
}
|
work_set_target_ratio(work, hash);
|
||||||
nonce++;
|
pdata[19] = nonce;
|
||||||
|
if ( submit_work( mythr, work ) )
|
||||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
applog( LOG_NOTICE, "Share %d submitted by thread %d",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
pdata[19] = nonce;
|
mythr->id );
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
else
|
||||||
return 0;
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
|
}
|
||||||
|
nonce++;
|
||||||
|
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||||
|
pdata[19] = nonce;
|
||||||
|
*hashes_done = pdata[19] - first_nonce + 1;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void lyra2z330_set_target( struct work* work, double job_diff )
|
void lyra2z330_set_target( struct work* work, double job_diff )
|
||||||
|
@@ -92,42 +92,50 @@ void phi2_hash(void *state, const void *input)
|
|||||||
memcpy(state, hash, 32);
|
memcpy(state, hash, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_phi2(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[8];
|
uint32_t _ALIGN(128) hash[8];
|
||||||
uint32_t _ALIGN(128) endiandata[36];
|
uint32_t _ALIGN(128) endiandata[36];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
if(opt_benchmark){
|
||||||
const uint32_t first_nonce = pdata[19];
|
ptarget[7] = 0x00ff;
|
||||||
uint32_t n = first_nonce;
|
}
|
||||||
|
|
||||||
if(opt_benchmark){
|
phi2_has_roots = false;
|
||||||
ptarget[7] = 0x00ff;
|
for ( int i=0; i < 36; i++ )
|
||||||
}
|
{
|
||||||
|
be32enc(&endiandata[i], pdata[i]);
|
||||||
|
if (i >= 20 && pdata[i]) phi2_has_roots = true;
|
||||||
|
}
|
||||||
|
|
||||||
phi2_has_roots = false;
|
do {
|
||||||
for (int i=0; i < 36; i++) {
|
be32enc( &endiandata[19], n );
|
||||||
be32enc(&endiandata[i], pdata[i]);
|
phi2_hash( hash, endiandata );
|
||||||
if (i >= 20 && pdata[i]) phi2_has_roots = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
if ( hash[7] < Htarg && fulltest( hash, ptarget ) )
|
||||||
be32enc(&endiandata[19], n);
|
{
|
||||||
phi2_hash(hash, endiandata);
|
pdata[19] = n;
|
||||||
|
work_set_target_ratio( work, hash );
|
||||||
if (hash[7] < Htarg && fulltest(hash, ptarget)) {
|
if ( submit_work( mythr, work ) )
|
||||||
work_set_target_ratio(work, hash);
|
applog( LOG_NOTICE, "Share %d submitted by thread %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr_id );
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
pdata[19] = n;
|
}
|
||||||
return 1;
|
n++;
|
||||||
}
|
|
||||||
n++;
|
|
||||||
|
|
||||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -108,7 +108,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
|||||||
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||||
G_2X64( s0, s2, s4, s6 ); \
|
G_2X64( s0, s2, s4, s6 ); \
|
||||||
G_2X64( s1, s3, s5, s7 ); \
|
G_2X64( s1, s3, s5, s7 ); \
|
||||||
mm128_rol1x64_256( s2, s3 ); \
|
mm128_ror1x64_256( s2, s3 ); \
|
||||||
mm128_swap128_256( s4, s5 ); \
|
mm128_swap128_256( s4, s5 ); \
|
||||||
mm128_rol1x64_256( s6, s7 ); \
|
mm128_rol1x64_256( s6, s7 ); \
|
||||||
G_2X64( s0, s2, s4, s6 ); \
|
G_2X64( s0, s2, s4, s6 ); \
|
||||||
@@ -132,7 +132,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
|||||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||||
|
|
||||||
|
|
||||||
#endif // AVX2
|
#endif // AVX2 else SSE2
|
||||||
|
|
||||||
// Scalar
|
// Scalar
|
||||||
//Blake2b's G function
|
//Blake2b's G function
|
||||||
|
@@ -30,7 +30,7 @@
|
|||||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(__SSE4_2__)
|
#if defined(__SSE2__)
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -716,4 +716,4 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif // __AVX2__
|
#endif // __AVX2__
|
||||||
#endif // __SSE4_2__
|
#endif // __SSE2__
|
||||||
|
@@ -44,7 +44,8 @@
|
|||||||
#include "sph_types.h"
|
#include "sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "avxdefs.h"
|
||||||
|
|
||||||
#if defined(__SSE4_2__)
|
#if defined(__SSE2__)
|
||||||
|
//#if defined(__SSE4_2__)
|
||||||
|
|
||||||
//#define SPH_SIZE_sha256 256
|
//#define SPH_SIZE_sha256 256
|
||||||
|
|
||||||
@@ -60,6 +61,26 @@ void sha256_4way_init( sha256_4way_context *sc );
|
|||||||
void sha256_4way( sha256_4way_context *sc, const void *data, size_t len );
|
void sha256_4way( sha256_4way_context *sc, const void *data, size_t len );
|
||||||
void sha256_4way_close( sha256_4way_context *sc, void *dst );
|
void sha256_4way_close( sha256_4way_context *sc, void *dst );
|
||||||
|
|
||||||
|
/*
|
||||||
|
// SHA-256 7 way hybrid
|
||||||
|
// Combines SSE, MMX and scalar data to do 8 + 2 + 1 parallel.
|
||||||
|
typedef struct {
|
||||||
|
__m128i bufx[64>>2];
|
||||||
|
__m128i valx[8];
|
||||||
|
__m64 bufy[64>>2];
|
||||||
|
__m64 valy[8];
|
||||||
|
uint32_t bufz[64>>2];
|
||||||
|
uint32_t valz[8];
|
||||||
|
uint32_t count_high, count_low;
|
||||||
|
} sha256_7way_context;
|
||||||
|
|
||||||
|
void sha256_7way_init( sha256_7way_context *ctx );
|
||||||
|
void sha256_7way( sha256_7way_context *ctx, const void *datax,
|
||||||
|
void *datay, void *dataz, size_t len );
|
||||||
|
void sha256_7way_close( sha256_7way_context *ctx, void *dstx, void *dstyx,
|
||||||
|
void *dstz );
|
||||||
|
*/
|
||||||
|
|
||||||
#if defined (__AVX2__)
|
#if defined (__AVX2__)
|
||||||
|
|
||||||
// SHA-256 8 way
|
// SHA-256 8 way
|
||||||
@@ -88,6 +109,24 @@ void sha512_4way_init( sha512_4way_context *sc);
|
|||||||
void sha512_4way( sha512_4way_context *sc, const void *data, size_t len );
|
void sha512_4way( sha512_4way_context *sc, const void *data, size_t len );
|
||||||
void sha512_4way_close( sha512_4way_context *sc, void *dst );
|
void sha512_4way_close( sha512_4way_context *sc, void *dst );
|
||||||
|
|
||||||
#endif
|
// SHA-256 11 way hybrid
|
||||||
#endif
|
// Combines AVX2, MMX and scalar data to do 8 + 2 + 1 parallel.
|
||||||
#endif
|
typedef struct {
|
||||||
|
__m256i bufx[64>>2];
|
||||||
|
__m256i valx[8];
|
||||||
|
__m64 bufy[64>>2];
|
||||||
|
__m64 valy[8];
|
||||||
|
uint32_t bufz[64>>2];
|
||||||
|
uint32_t valz[8];
|
||||||
|
uint32_t count_high, count_low;
|
||||||
|
} sha256_11way_context;
|
||||||
|
|
||||||
|
void sha256_11way_init( sha256_11way_context *ctx );
|
||||||
|
void sha256_11way_update( sha256_11way_context *ctx, const void *datax,
|
||||||
|
const void *datay, const void *dataz, size_t len );
|
||||||
|
void sha256_11way_close( sha256_11way_context *ctx, void *dstx, void *dstyx,
|
||||||
|
void *dstz );
|
||||||
|
|
||||||
|
#endif // __AVX2__
|
||||||
|
#endif // __SSE2__
|
||||||
|
#endif // SHA256_4WAY_H__
|
||||||
|
536
algo/sha/sha256_hash_11way.c
Normal file
536
algo/sha/sha256_hash_11way.c
Normal file
@@ -0,0 +1,536 @@
|
|||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "sha2-hash-4way.h"
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
|
// naming convention for variables and macros
|
||||||
|
// VARx: AVX2 8 way 32 bit
|
||||||
|
// VARy: MMX 2 way 32 bit
|
||||||
|
// VARz: scalar integer 32 bit
|
||||||
|
|
||||||
|
|
||||||
|
static const uint32_t H256[8] =
|
||||||
|
{
|
||||||
|
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||||
|
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint32_t K256[64] =
|
||||||
|
{
|
||||||
|
0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
|
||||||
|
0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
|
||||||
|
0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
|
||||||
|
0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
|
||||||
|
0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
|
||||||
|
0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
|
||||||
|
0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
|
||||||
|
0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
|
||||||
|
0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
|
||||||
|
0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
|
||||||
|
0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
|
||||||
|
0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
|
||||||
|
0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
|
||||||
|
0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
|
||||||
|
0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
|
||||||
|
0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CHx(X, Y, Z) \
|
||||||
|
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
||||||
|
|
||||||
|
#define CHy(X, Y, Z) \
|
||||||
|
_mm_xor_si64( _mm_and_si64( _mm_xor_si64( Y, Z ), X ), Z )
|
||||||
|
|
||||||
|
#define CHz(X, Y, Z) ((( (Y) ^ (Z) ) & (X) ) ^ (Z) )
|
||||||
|
|
||||||
|
|
||||||
|
#define MAJx(X, Y, Z) \
|
||||||
|
_mm256_or_si256( _mm256_and_si256( X, Y ), \
|
||||||
|
_mm256_and_si256( _mm256_or_si256( X, Y ), Z ) )
|
||||||
|
|
||||||
|
#define MAJy(X, Y, Z) \
|
||||||
|
_mm_or_si64( _mm_and_si64( X, Y ), \
|
||||||
|
_mm_and_si64( _mm_or_si64( X, Y ), Z ) )
|
||||||
|
|
||||||
|
#define MAJz(X, Y, Z) ( ( (X) & (Y) ) | ( ( (X) | (Y) ) & (Z) ) )
|
||||||
|
|
||||||
|
#define BSG2_0x(x) \
|
||||||
|
_mm256_xor_si256( _mm256_xor_si256( \
|
||||||
|
mm256_ror_32(x,2), mm256_ror_32(x,13) ), _mm256_srli_epi32(x,22) )
|
||||||
|
|
||||||
|
#define BSG2_0y(x) \
|
||||||
|
_mm_xor_si64( _mm_xor_si64( \
|
||||||
|
mm64_ror_32(x,2), mm64_ror_32(x,13) ), _mm_srli_pi32(x,22) )
|
||||||
|
|
||||||
|
#define BSG2_0z(x) ( ror_32(x,2) ^ ror_32(x,13) ^ ((x)>>22) )
|
||||||
|
|
||||||
|
#define BSG2_1x(x) \
|
||||||
|
_mm256_xor_si256( _mm256_xor_si256( \
|
||||||
|
mm256_ror_32(x,6), mm256_ror_32(x,11) ), _mm256_srli_epi32(x,25) )
|
||||||
|
|
||||||
|
#define BSG2_1y(x) \
|
||||||
|
_mm_xor_si64( _mm_xor_si64( \
|
||||||
|
mm64_ror_32(x,6), mm64_ror_32(x,11) ), _mm_srli_pi32(x,25) )
|
||||||
|
|
||||||
|
#define BSG2_1z(x) ( ror_32(x,6) ^ ror_32(x,11) ^ ((x)>>25) )
|
||||||
|
|
||||||
|
#define SSG2_0x(x) \
|
||||||
|
_mm256_xor_si256( _mm256_xor_si256( \
|
||||||
|
mm256_ror_32(x,7), mm256_ror_32(x,18) ), _mm256_srli_epi32(x,3) )
|
||||||
|
|
||||||
|
#define SSG2_0y(x) \
|
||||||
|
_mm_xor_si64( _mm_xor_si64( \
|
||||||
|
mm64_ror_32(x,7), mm64_ror_32(x,18) ), _mm_srli_pi32(x,3) )
|
||||||
|
|
||||||
|
#define SSG2_0z(x) (( ror_32(x,7) ^ ror_32(x,18) ) ^ ((x)>>3) )
|
||||||
|
|
||||||
|
#define SSG2_1x(x) \
|
||||||
|
_mm256_xor_si256( _mm256_xor_si256( \
|
||||||
|
mm256_ror_32(x,17), mm256_ror_32(x,19) ), _mm256_srli_epi32(x,10) )
|
||||||
|
|
||||||
|
#define SSG2_1y(x) \
|
||||||
|
_mm_xor_si64( _mm_xor_si64( \
|
||||||
|
mm64_ror_32(x,17), mm64_ror_32(x,19) ), _mm_srli_pi32(x,10) )
|
||||||
|
|
||||||
|
#define SSG2_1z(x) ( ror_32(x,17) ^ ror_32(x,19) ^ ((x)>>10) )
|
||||||
|
|
||||||
|
#define SHA2x_MEXP( a, b, c, d ) \
|
||||||
|
_mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||||
|
SSG2_1x( Wx[a] ), Wx[b] ), SSG2_0x( Wx[c] ) ), Wx[d] )
|
||||||
|
|
||||||
|
#define SHA2y_MEXP( a, b, c, d ) \
|
||||||
|
_mm_add_pi32( _mm_add_pi32( _mm_add_pi32( \
|
||||||
|
SSG2_1y( Wy[a] ), Wy[b] ), SSG2_0y( Wy[c] ) ), Wy[d] )
|
||||||
|
|
||||||
|
#define SHA2z_MEXP( a, b, c, d ) \
|
||||||
|
( SSG2_1z( Wz[a] ) + Wz[b] + SSG2_0z( Wz[c] ) + Wz[d] )
|
||||||
|
|
||||||
|
|
||||||
|
#define SHA2s_11WAY_STEP( Ax, Bx, Cx, Dx, Ex, Fx, Gx, Hx, \
|
||||||
|
Ay, By, Cy, Dy, Ey, Fy, Gy, Hy, \
|
||||||
|
Az, Bz, Cz, Dz, Ez, Fz, Gz, Hz, i, j) \
|
||||||
|
do { \
|
||||||
|
__m256i T1x, T2x; \
|
||||||
|
__m64 T1y, T2y; \
|
||||||
|
uint32_t T1z, T2z; \
|
||||||
|
T1x = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||||
|
_mm256_add_epi32( Hx, BSG2_1x(Ex) ), CHx(Ex, Fx, Gx) ), \
|
||||||
|
_mm256_set1_epi32( K256[( (j)+(i) )] ) ), Wx[i] ); \
|
||||||
|
T1y = _mm_add_pi32( _mm_add_pi32( _mm_add_pi32( \
|
||||||
|
_mm_add_pi32( Hy, BSG2_1y(Ey) ), CHy(Ey, Fy, Gy) ), \
|
||||||
|
_mm_set1_pi32( K256[( (j)+(i) )] ) ), Wy[i] ); \
|
||||||
|
T1z = Hz + BSG2_1z( Ez ) + CHz( Ez, Fz, Gz ) + K256[ ((j)+(i)) ] + Wz[i]; \
|
||||||
|
T2x = _mm256_add_epi32( BSG2_0x(Ax), MAJx(Ax, Bx, Cx) ); \
|
||||||
|
T2y = _mm_add_pi32( BSG2_0y(Ay), MAJy(Ay, By, Cy) ); \
|
||||||
|
T2z = BSG2_0z( Az ) + MAJz( Az, Bz, Cz ); \
|
||||||
|
Dx = _mm256_add_epi32( Dx, T1x ); \
|
||||||
|
Dy = _mm_add_pi32( Dy, T1y ); \
|
||||||
|
Dz = Dz + T1z; \
|
||||||
|
Hx = _mm256_add_epi32( T1x, T2x ); \
|
||||||
|
Hy = _mm_add_pi32( T1y, T2y ); \
|
||||||
|
Hz = T1z + T2z; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
void sha256_11way_round( __m256i *inx, __m256i rx[8], __m64 *iny, __m64 ry[8],
|
||||||
|
uint32_t *inz, uint32_t rz[8] )
|
||||||
|
{
|
||||||
|
__m256i Ax, Bx, Cx, Dx, Ex, Fx, Gx, Hx;
|
||||||
|
__m256i Wx[16];
|
||||||
|
__m64 Ay, By, Cy, Dy, Ey, Fy, Gy, Hy;
|
||||||
|
__m64 Wy[16];
|
||||||
|
uint32_t Az, Bz, Cz, Dz, Ez, Fz, Gz, Hz;
|
||||||
|
uint32_t Wz[16];
|
||||||
|
|
||||||
|
Wx[ 0] = mm256_bswap_32( inx[ 0] );
|
||||||
|
Wy[ 0] = mm64_bswap_32( iny[ 0] );
|
||||||
|
Wz[ 0] = bswap_32( inz[ 0] );
|
||||||
|
|
||||||
|
Wx[ 1] = mm256_bswap_32( inx[ 1] );
|
||||||
|
Wy[ 1] = mm64_bswap_32( iny[ 1] );
|
||||||
|
Wz[ 1] = bswap_32( inz[ 1] );
|
||||||
|
|
||||||
|
Wx[ 2] = mm256_bswap_32( inx[ 2] );
|
||||||
|
Wy[ 2] = mm64_bswap_32( iny[ 2] );
|
||||||
|
Wz[ 2] = bswap_32( inz[ 2] );
|
||||||
|
|
||||||
|
Wx[ 3] = mm256_bswap_32( inx[ 3] );
|
||||||
|
Wy[ 3] = mm64_bswap_32( iny[ 3] );
|
||||||
|
Wz[ 3] = bswap_32( inz[ 3] );
|
||||||
|
|
||||||
|
Wx[ 4] = mm256_bswap_32( inx[ 4] );
|
||||||
|
Wy[ 4] = mm64_bswap_32( iny[ 4] );
|
||||||
|
Wz[ 4] = bswap_32( inz[ 4] );
|
||||||
|
|
||||||
|
Wx[ 5] = mm256_bswap_32( inx[ 5] );
|
||||||
|
Wy[ 5] = mm64_bswap_32( iny[ 5] );
|
||||||
|
Wz[ 5] = bswap_32( inz[ 5] );
|
||||||
|
|
||||||
|
Wx[ 6] = mm256_bswap_32( inx[ 6] );
|
||||||
|
Wy[ 6] = mm64_bswap_32( iny[ 6] );
|
||||||
|
Wz[ 6] = bswap_32( inz[ 6] );
|
||||||
|
|
||||||
|
Wx[ 7] = mm256_bswap_32( inx[ 7] );
|
||||||
|
Wy[ 7] = mm64_bswap_32( iny[ 7] );
|
||||||
|
Wz[ 7] = bswap_32( inz[ 7] );
|
||||||
|
|
||||||
|
Wx[ 8] = mm256_bswap_32( inx[ 8] );
|
||||||
|
Wy[ 8] = mm64_bswap_32( iny[ 8] );
|
||||||
|
Wz[ 8] = bswap_32( inz[ 8] );
|
||||||
|
|
||||||
|
Wx[ 9] = mm256_bswap_32( inx[ 9] );
|
||||||
|
Wy[ 9] = mm64_bswap_32( iny[ 9] );
|
||||||
|
Wz[ 9] = bswap_32( inz[ 9] );
|
||||||
|
|
||||||
|
Wx[10] = mm256_bswap_32( inx[10] );
|
||||||
|
Wy[10] = mm64_bswap_32( iny[10] );
|
||||||
|
Wz[10] = bswap_32( inz[10] );
|
||||||
|
|
||||||
|
Wx[11] = mm256_bswap_32( inx[11] );
|
||||||
|
Wy[11] = mm64_bswap_32( iny[11] );
|
||||||
|
Wz[11] = bswap_32( inz[11] );
|
||||||
|
|
||||||
|
Wx[12] = mm256_bswap_32( inx[12] );
|
||||||
|
Wy[12] = mm64_bswap_32( iny[12] );
|
||||||
|
Wz[12] = bswap_32( inz[12] );
|
||||||
|
|
||||||
|
Wx[13] = mm256_bswap_32( inx[13] );
|
||||||
|
Wy[13] = mm64_bswap_32( iny[13] );
|
||||||
|
Wz[13] = bswap_32( inz[13] );
|
||||||
|
|
||||||
|
Wx[14] = mm256_bswap_32( inx[14] );
|
||||||
|
Wy[14] = mm64_bswap_32( iny[14] );
|
||||||
|
Wz[14] = bswap_32( inz[14] );
|
||||||
|
|
||||||
|
Wx[15] = mm256_bswap_32( inx[15] );
|
||||||
|
Wy[15] = mm64_bswap_32( iny[15] );
|
||||||
|
Wz[15] = bswap_32( inz[15] );
|
||||||
|
|
||||||
|
Ax = rx[0]; Ay = ry[0]; Az = rz[0];
|
||||||
|
Bx = rx[1]; By = ry[1]; Bz = rz[1];
|
||||||
|
Cx = rx[2]; Cy = ry[2]; Cz = rz[2];
|
||||||
|
Dx = rx[3]; Dy = ry[3]; Dz = rz[3];
|
||||||
|
Ex = rx[4]; Ey = ry[4]; Ez = rz[4];
|
||||||
|
Fx = rx[5]; Fy = ry[5]; Fz = rz[5];
|
||||||
|
Gx = rx[6]; Gy = ry[6]; Gz = rz[6];
|
||||||
|
Hx = rx[7]; Hy = ry[7]; Hz = rz[7];
|
||||||
|
|
||||||
|
SHA2s_11WAY_STEP( Ax, Bx, Cx, Dx, Ex, Fx, Gx, Hx,
|
||||||
|
Ay, By, Cy, Dy, Ey, Fy, Gy, Hy,
|
||||||
|
Az, Bz, Cz, Dz, Ez, Fz, Gz, Hz, 0, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Hx, Ax, Bx, Cx, Dx, Ex, Fx, Gx,
|
||||||
|
Hy, Ay, By, Cy, Dy, Ey, Fy, Gy,
|
||||||
|
Hz, Az, Bz, Cz, Dz, Ez, Fz, Gz, 1, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Gx, Hx, Ax, Bx, Cx, Dx, Ex, Fx,
|
||||||
|
Gy, Hy, Ay, By, Cy, Dy, Ey, Fy,
|
||||||
|
Gz, Hz, Az, Bz, Cz, Dz, Ez, Fz, 2, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Fx, Gx, Hx, Ax, Bx, Cx, Dx, Ex,
|
||||||
|
Fy, Gy, Hy, Ay, By, Cy, Dy, Ey,
|
||||||
|
Fz, Gz, Hz, Az, Bz, Cz, Dz, Ez, 3, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Ex, Fx, Gx, Hx, Ax, Bx, Cx, Dx,
|
||||||
|
Ey, Fy, Gy, Hy, Ay, By, Cy, Dy,
|
||||||
|
Ez, Fz, Gz, Hz, Az, Bz, Cz, Dz, 4, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Dx, Ex, Fx, Gx, Hx, Ax, Bx, Cx,
|
||||||
|
Dy, Ey, Fy, Gy, Hy, Ay, By, Cy,
|
||||||
|
Dz, Ez, Fz, Gz, Hz, Az, Bz, Cz, 5, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Cx, Dx, Ex, Fx, Gx, Hx, Ax, Bx,
|
||||||
|
Cy, Dy, Ey, Fy, Gy, Hy, Ay, By,
|
||||||
|
Cz, Dz, Ez, Fz, Gz, Hz, Az, Bz, 6, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Bx, Cx, Dx, Ex, Fx, Gx, Hx, Ax,
|
||||||
|
By, Cy, Dy, Ey, Fy, Gy, Hy, Ay,
|
||||||
|
Bz, Cz, Dz, Ez, Fz, Gz, Hz, Az, 7, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Ax, Bx, Cx, Dx, Ex, Fx, Gx, Hx,
|
||||||
|
Ay, By, Cy, Dy, Ey, Fy, Gy, Hy,
|
||||||
|
Az, Bz, Cz, Dz, Ez, Fz, Gz, Hz, 8, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Hx, Ax, Bx, Cx, Dx, Ex, Fx, Gx,
|
||||||
|
Hy, Ay, By, Cy, Dy, Ey, Fy, Gy,
|
||||||
|
Hz, Az, Bz, Cz, Dz, Ez, Fz, Gz, 9, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Gx, Hx, Ax, Bx, Cx, Dx, Ex, Fx,
|
||||||
|
Gy, Hy, Ay, By, Cy, Dy, Ey, Fy,
|
||||||
|
Gz, Hz, Az, Bz, Cz, Dz, Ez, Fz, 10, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Fx, Gx, Hx, Ax, Bx, Cx, Dx, Ex,
|
||||||
|
Fy, Gy, Hy, Ay, By, Cy, Dy, Ey,
|
||||||
|
Fz, Gz, Hz, Az, Bz, Cz, Dz, Ez, 11, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Ex, Fx, Gx, Hx, Ax, Bx, Cx, Dx,
|
||||||
|
Ey, Fy, Gy, Hy, Ay, By, Cy, Dy,
|
||||||
|
Ez, Fz, Gz, Hz, Az, Bz, Cz, Dz, 12, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Dx, Ex, Fx, Gx, Hx, Ax, Bx, Cx,
|
||||||
|
Dy, Ey, Fy, Gy, Hy, Ay, By, Cy,
|
||||||
|
Dz, Ez, Fz, Gz, Hz, Az, Bz, Cz, 13, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Cx, Dx, Ex, Fx, Gx, Hx, Ax, Bx,
|
||||||
|
Cy, Dy, Ey, Fy, Gy, Hy, Ay, By,
|
||||||
|
Cz, Dz, Ez, Fz, Gz, Hz, Az, Bz, 14, 0 );
|
||||||
|
SHA2s_11WAY_STEP( Bx, Cx, Dx, Ex, Fx, Gx, Hx, Ax,
|
||||||
|
By, Cy, Dy, Ey, Fy, Gy, Hy, Ay,
|
||||||
|
Bz, Cz, Dz, Ez, Fz, Gz, Hz, Az, 15, 0 );
|
||||||
|
|
||||||
|
for ( int j = 16; j < 64; j += 16 )
|
||||||
|
{
|
||||||
|
Wx[ 0] = SHA2x_MEXP( 14, 9, 1, 0 );
|
||||||
|
Wy[ 0] = SHA2y_MEXP( 14, 9, 1, 0 );
|
||||||
|
Wz[ 0] = SHA2z_MEXP( 14, 9, 1, 0 );
|
||||||
|
|
||||||
|
Wx[ 1] = SHA2x_MEXP( 15, 10, 2, 1 );
|
||||||
|
Wy[ 1] = SHA2y_MEXP( 15, 10, 2, 1 );
|
||||||
|
Wz[ 1] = SHA2z_MEXP( 15, 10, 2, 1 );
|
||||||
|
|
||||||
|
Wx[ 2] = SHA2x_MEXP( 0, 11, 3, 2 );
|
||||||
|
Wy[ 2] = SHA2y_MEXP( 0, 11, 3, 2 );
|
||||||
|
Wz[ 2] = SHA2z_MEXP( 0, 11, 3, 2 );
|
||||||
|
|
||||||
|
Wx[ 3] = SHA2x_MEXP( 1, 12, 4, 3 );
|
||||||
|
Wy[ 3] = SHA2y_MEXP( 1, 12, 4, 3 );
|
||||||
|
Wz[ 3] = SHA2z_MEXP( 1, 12, 4, 3 );
|
||||||
|
|
||||||
|
Wx[ 4] = SHA2x_MEXP( 2, 13, 5, 4 );
|
||||||
|
Wy[ 4] = SHA2y_MEXP( 2, 13, 5, 4 );
|
||||||
|
Wz[ 4] = SHA2z_MEXP( 2, 13, 5, 4 );
|
||||||
|
|
||||||
|
Wx[ 5] = SHA2x_MEXP( 3, 14, 6, 5 );
|
||||||
|
Wy[ 5] = SHA2y_MEXP( 3, 14, 6, 5 );
|
||||||
|
Wz[ 5] = SHA2z_MEXP( 3, 14, 6, 5 );
|
||||||
|
|
||||||
|
Wx[ 6] = SHA2x_MEXP( 4, 15, 7, 6 );
|
||||||
|
Wy[ 6] = SHA2y_MEXP( 4, 15, 7, 6 );
|
||||||
|
Wz[ 6] = SHA2z_MEXP( 4, 15, 7, 6 );
|
||||||
|
|
||||||
|
Wx[ 7] = SHA2x_MEXP( 5, 0, 8, 7);
|
||||||
|
Wy[ 7] = SHA2y_MEXP( 5, 0, 8, 7);
|
||||||
|
Wz[ 7] = SHA2z_MEXP( 5, 0, 8, 7);
|
||||||
|
|
||||||
|
Wx[ 8] = SHA2x_MEXP( 6, 1, 9, 8);
|
||||||
|
Wy[ 8] = SHA2y_MEXP( 6, 1, 9, 8);
|
||||||
|
Wz[ 8] = SHA2z_MEXP( 6, 1, 9, 8);
|
||||||
|
|
||||||
|
Wx[ 9] = SHA2x_MEXP( 7, 2, 10, 9 );
|
||||||
|
Wy[ 9] = SHA2y_MEXP( 7, 2, 10, 9);
|
||||||
|
Wz[ 9] = SHA2z_MEXP( 7, 2, 10, 9);
|
||||||
|
|
||||||
|
Wx[10] = SHA2x_MEXP( 8, 3, 11, 10 );
|
||||||
|
Wy[10] = SHA2y_MEXP( 8, 3, 11, 10);
|
||||||
|
Wz[10] = SHA2z_MEXP( 8, 3, 11, 10);
|
||||||
|
|
||||||
|
Wx[11] = SHA2x_MEXP( 9, 4, 12, 11);
|
||||||
|
Wy[11] = SHA2y_MEXP( 9, 4, 12, 11);
|
||||||
|
Wz[11] = SHA2z_MEXP( 9, 4, 12, 11 );
|
||||||
|
|
||||||
|
Wx[12] = SHA2x_MEXP( 10, 5, 13, 12 );
|
||||||
|
Wy[12] = SHA2y_MEXP( 10, 5, 13, 12 );
|
||||||
|
Wz[12] = SHA2z_MEXP( 10, 5, 13, 12 );
|
||||||
|
|
||||||
|
Wx[13] = SHA2x_MEXP( 11, 6, 14, 13 );
|
||||||
|
Wy[13] = SHA2y_MEXP( 11, 6, 14, 13 );
|
||||||
|
Wz[13] = SHA2z_MEXP( 11, 6, 14, 13 );
|
||||||
|
|
||||||
|
Wx[14] = SHA2x_MEXP( 12, 7, 15, 14 );
|
||||||
|
Wy[14] = SHA2y_MEXP( 12, 7, 15, 14 );
|
||||||
|
Wz[14] = SHA2z_MEXP( 12, 7, 15, 14 );
|
||||||
|
|
||||||
|
Wx[15] = SHA2x_MEXP( 13, 8, 0, 15 );
|
||||||
|
Wy[15] = SHA2y_MEXP( 13, 8, 0, 15 );
|
||||||
|
Wz[15] = SHA2z_MEXP( 13, 8, 0, 15 );
|
||||||
|
|
||||||
|
|
||||||
|
SHA2s_11WAY_STEP( Ax, Bx, Cx, Dx, Ex, Fx, Gx, Hx,
|
||||||
|
Ay, By, Cy, Dy, Ey, Fy, Gy, Hy,
|
||||||
|
Az, Bz, Cz, Dz, Ez, Fz, Gz, Hz, 0, j );
|
||||||
|
SHA2s_11WAY_STEP( Hx, Ax, Bx, Cx, Dx, Ex, Fx, Gx,
|
||||||
|
Hy, Ay, By, Cy, Dy, Ey, Fy, Gy,
|
||||||
|
Hz, Az, Bz, Cz, Dz, Ez, Fz, Gz, 1, j );
|
||||||
|
SHA2s_11WAY_STEP( Gx, Hx, Ax, Bx, Cx, Dx, Ex, Fx,
|
||||||
|
Gy, Hy, Ay, By, Cy, Dy, Ey, Fy,
|
||||||
|
Gz, Hz, Az, Bz, Cz, Dz, Ez, Fz, 2, j );
|
||||||
|
SHA2s_11WAY_STEP( Fx, Gx, Hx, Ax, Bx, Cx, Dx, Ex,
|
||||||
|
Fy, Gy, Hy, Ay, By, Cy, Dy, Ey,
|
||||||
|
Fz, Gz, Hz, Az, Bz, Cz, Dz, Ez, 3, j );
|
||||||
|
SHA2s_11WAY_STEP( Ex, Fx, Gx, Hx, Ax, Bx, Cx, Dx,
|
||||||
|
Ey, Fy, Gy, Hy, Ay, By, Cy, Dy,
|
||||||
|
Ez, Fz, Gz, Hz, Az, Bz, Cz, Dz, 4, j );
|
||||||
|
SHA2s_11WAY_STEP( Dx, Ex, Fx, Gx, Hx, Ax, Bx, Cx,
|
||||||
|
Dy, Ey, Fy, Gy, Hy, Ay, By, Cy,
|
||||||
|
Dz, Ez, Fz, Gz, Hz, Az, Bz, Cz, 5, j );
|
||||||
|
SHA2s_11WAY_STEP( Cx, Dx, Ex, Fx, Gx, Hx, Ax, Bx,
|
||||||
|
Cy, Dy, Ey, Fy, Gy, Hy, Ay, By,
|
||||||
|
Cz, Dz, Ez, Fz, Gz, Hz, Az, Bz, 6, j );
|
||||||
|
SHA2s_11WAY_STEP( Bx, Cx, Dx, Ex, Fx, Gx, Hx, Ax,
|
||||||
|
By, Cy, Dy, Ey, Fy, Gy, Hy, Ay,
|
||||||
|
Bz, Cz, Dz, Ez, Fz, Gz, Hz, Az, 7, j );
|
||||||
|
SHA2s_11WAY_STEP( Ax, Bx, Cx, Dx, Ex, Fx, Gx, Hx,
|
||||||
|
Ay, By, Cy, Dy, Ey, Fy, Gy, Hy,
|
||||||
|
Az, Bz, Cz, Dz, Ez, Fz, Gz, Hz, 8, j );
|
||||||
|
SHA2s_11WAY_STEP( Hx, Ax, Bx, Cx, Dx, Ex, Fx, Gx,
|
||||||
|
Hy, Ay, By, Cy, Dy, Ey, Fy, Gy,
|
||||||
|
Hz, Az, Bz, Cz, Dz, Ez, Fz, Gz, 9, j );
|
||||||
|
SHA2s_11WAY_STEP( Gx, Hx, Ax, Bx, Cx, Dx, Ex, Fx,
|
||||||
|
Gy, Hy, Ay, By, Cy, Dy, Ey, Fy,
|
||||||
|
Gz, Hz, Az, Bz, Cz, Dz, Ez, Fz, 10, j );
|
||||||
|
SHA2s_11WAY_STEP( Fx, Gx, Hx, Ax, Bx, Cx, Dx, Ex,
|
||||||
|
Fy, Gy, Hy, Ay, By, Cy, Dy, Ey,
|
||||||
|
Fz, Gz, Hz, Az, Bz, Cz, Dz, Ez, 11, j );
|
||||||
|
SHA2s_11WAY_STEP( Ex, Fx, Gx, Hx, Ax, Bx, Cx, Dx,
|
||||||
|
Ey, Fy, Gy, Hy, Ay, By, Cy, Dy,
|
||||||
|
Ez, Fz, Gz, Hz, Az, Bz, Cz, Dz, 12, j );
|
||||||
|
SHA2s_11WAY_STEP( Dx, Ex, Fx, Gx, Hx, Ax, Bx, Cx,
|
||||||
|
Dy, Ey, Fy, Gy, Hy, Ay, By, Cy,
|
||||||
|
Dz, Ez, Fz, Gz, Hz, Az, Bz, Cz, 13, j );
|
||||||
|
SHA2s_11WAY_STEP( Cx, Dx, Ex, Fx, Gx, Hx, Ax, Bx,
|
||||||
|
Cy, Dy, Ey, Fy, Gy, Hy, Ay, By,
|
||||||
|
Cz, Dz, Ez, Fz, Gz, Hz, Az, Bz, 14, j );
|
||||||
|
SHA2s_11WAY_STEP( Bx, Cx, Dx, Ex, Fx, Gx, Hx, Ax,
|
||||||
|
By, Cy, Dy, Ey, Fy, Gy, Hy, Ay,
|
||||||
|
Bz, Cz, Dz, Ez, Fz, Gz, Hz, Az, 15, j );
|
||||||
|
}
|
||||||
|
|
||||||
|
rx[0] = _mm256_add_epi32( rx[0], Ax );
|
||||||
|
ry[0] = _mm_add_pi32( ry[0], Ay );
|
||||||
|
rz[0] = rz[0]+ Az;
|
||||||
|
rx[1] = _mm256_add_epi32( rx[1], Bx );
|
||||||
|
ry[1] = _mm_add_pi32( ry[1], By );
|
||||||
|
rz[1] = rz[1]+ Bz;
|
||||||
|
rx[2] = _mm256_add_epi32( rx[2], Cx );
|
||||||
|
ry[2] = _mm_add_pi32( ry[2], Cy );
|
||||||
|
rz[3] = rz[3]+ Dz;
|
||||||
|
rx[4] = _mm256_add_epi32( rx[4], Ex );
|
||||||
|
ry[4] = _mm_add_pi32( ry[4], Ey );
|
||||||
|
rz[4] = rz[4]+ Ez;
|
||||||
|
rx[5] = _mm256_add_epi32( rx[5], Fx );
|
||||||
|
ry[5] = _mm_add_pi32( ry[5], Fy );
|
||||||
|
rz[5] = rz[5]+ Fz;
|
||||||
|
rx[6] = _mm256_add_epi32( rx[6], Gx );
|
||||||
|
ry[6] = _mm_add_pi32( ry[6], Gy );
|
||||||
|
rz[6] = rz[6]+ Gz;
|
||||||
|
rx[7] = _mm256_add_epi32( rx[7], Hx );
|
||||||
|
ry[7] = _mm_add_pi32( ry[7], Hy );
|
||||||
|
rz[7] = rz[7]+ Hz;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void sha256_11way_init( sha256_11way_context *ctx )
|
||||||
|
{
|
||||||
|
ctx->count_high = ctx->count_low = 0;
|
||||||
|
ctx->valx[0] = _mm256_set1_epi32( H256[0] );
|
||||||
|
ctx->valy[0] = _mm_set1_pi32( H256[0] );
|
||||||
|
ctx->valx[1] = _mm256_set1_epi32( H256[0] );
|
||||||
|
ctx->valy[1] = _mm_set1_pi32( H256[0] );
|
||||||
|
ctx->valx[2] = _mm256_set1_epi32( H256[0] );
|
||||||
|
ctx->valy[2] = _mm_set1_pi32( H256[0] );
|
||||||
|
ctx->valx[3] = _mm256_set1_epi32( H256[0] );
|
||||||
|
ctx->valy[3] = _mm_set1_pi32( H256[0] );
|
||||||
|
ctx->valx[4] = _mm256_set1_epi32( H256[0] );
|
||||||
|
ctx->valy[4] = _mm_set1_pi32( H256[0] );
|
||||||
|
ctx->valx[5] = _mm256_set1_epi32( H256[0] );
|
||||||
|
ctx->valy[5] = _mm_set1_pi32( H256[0] );
|
||||||
|
ctx->valx[6] = _mm256_set1_epi32( H256[0] );
|
||||||
|
ctx->valy[6] = _mm_set1_pi32( H256[0] );
|
||||||
|
ctx->valx[7] = _mm256_set1_epi32( H256[0] );
|
||||||
|
ctx->valy[7] = _mm_set1_pi32( H256[0] );
|
||||||
|
memcpy( ctx->valz, H256, 32 );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void sha256_11way_update( sha256_11way_context *ctx, const void *datax,
|
||||||
|
const void *datay, const void *dataz, size_t len )
|
||||||
|
{
|
||||||
|
__m256i *vdatax = (__m256i*) datax;
|
||||||
|
__m64 *vdatay = (__m64*) datay;
|
||||||
|
uint32_t *idataz = (uint32_t*)dataz;
|
||||||
|
size_t ptr;
|
||||||
|
const int buf_size = 64;
|
||||||
|
|
||||||
|
ptr = (unsigned)ctx->count_low & (buf_size - 1U);
|
||||||
|
while ( len > 0 )
|
||||||
|
{
|
||||||
|
size_t clen;
|
||||||
|
uint32_t clow, clow2;
|
||||||
|
|
||||||
|
clen = buf_size - ptr;
|
||||||
|
if ( clen > len )
|
||||||
|
clen = len;
|
||||||
|
memcpy_256( ctx->bufx + (ptr>>2), vdatax + (ptr>>2), clen>>2 );
|
||||||
|
memcpy_64 ( ctx->bufy + (ptr>>2), vdatay + (ptr>>2), clen>>2 );
|
||||||
|
memcpy ( ctx->bufz + ptr, idataz + ptr, clen );
|
||||||
|
ptr += clen;
|
||||||
|
len -= clen;
|
||||||
|
if ( ptr == buf_size )
|
||||||
|
{
|
||||||
|
sha256_11way_round( ctx->bufx, ctx->valx,
|
||||||
|
ctx->bufy, ctx->valy,
|
||||||
|
ctx->bufz, ctx->valz );
|
||||||
|
ptr = 0;
|
||||||
|
}
|
||||||
|
clow = ctx->count_low;
|
||||||
|
clow2 = clow + clen;
|
||||||
|
ctx->count_low = clow2;
|
||||||
|
if ( clow2 < clow )
|
||||||
|
ctx->count_high++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void sha256_11way_close( sha256_11way_context *ctx, void *dstx, void *dsty,
|
||||||
|
void *dstz)
|
||||||
|
{
|
||||||
|
unsigned ptr, u;
|
||||||
|
uint32_t low, high;
|
||||||
|
const int buf_size = 64;
|
||||||
|
const int pad = buf_size - 8;
|
||||||
|
|
||||||
|
ptr = (unsigned)ctx->count_low & (buf_size - 1U);
|
||||||
|
ctx->bufx[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
|
||||||
|
ctx->bufy[ ptr>>2 ] = _mm_set1_pi32( 0x80 );
|
||||||
|
ctx->bufz[ ptr>>2 ] = 0x80;
|
||||||
|
ptr += 4;
|
||||||
|
|
||||||
|
if ( ptr > pad )
|
||||||
|
{
|
||||||
|
memset_zero_256( ctx->bufx + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||||
|
memset_zero_64( ctx->bufy + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||||
|
memset( ctx->bufz + (ptr>>2), 0, (buf_size - ptr) >> 2 );
|
||||||
|
sha256_11way_round( ctx->bufx, ctx->valx,
|
||||||
|
ctx->bufy, ctx->valy,
|
||||||
|
ctx->bufz, ctx->valz );
|
||||||
|
memset_zero_256( ctx->bufx, pad >> 2 );
|
||||||
|
memset_zero_64( ctx->bufy, pad >> 2 );
|
||||||
|
memset( ctx->bufz, 0, pad >> 2 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
memset_zero_256( ctx->bufx + (ptr>>2), (pad - ptr) >> 2 );
|
||||||
|
memset_zero_64( ctx->bufy + (ptr>>2), (pad - ptr) >> 2 );
|
||||||
|
memset( ctx->bufz + (ptr>>2), 0, (pad - ptr) >> 2 );
|
||||||
|
}
|
||||||
|
|
||||||
|
low = ctx->count_low;
|
||||||
|
high = (ctx->count_high << 3) | (low >> 29);
|
||||||
|
low = low << 3;
|
||||||
|
|
||||||
|
ctx->bufx[ pad >> 2 ] =
|
||||||
|
mm256_bswap_32( _mm256_set1_epi32( high ) );
|
||||||
|
ctx->bufy[ pad >> 2 ] =
|
||||||
|
mm64_bswap_32( _mm_set1_pi32( high ) );
|
||||||
|
ctx->bufz[ pad >> 2 ] =
|
||||||
|
bswap_32( high );
|
||||||
|
|
||||||
|
|
||||||
|
ctx->bufx[ ( pad+4 ) >> 2 ] =
|
||||||
|
mm256_bswap_32( _mm256_set1_epi32( low ) );
|
||||||
|
ctx->bufy[ ( pad+4 ) >> 2 ] =
|
||||||
|
mm64_bswap_32( _mm_set1_pi32( low ) );
|
||||||
|
ctx->bufz[ ( pad+4 ) >> 2 ] =
|
||||||
|
bswap_32( low );
|
||||||
|
|
||||||
|
sha256_11way_round( ctx->bufx, ctx->valx,
|
||||||
|
ctx->bufy, ctx->valy,
|
||||||
|
ctx->bufz, ctx->valz );
|
||||||
|
|
||||||
|
for ( u = 0; u < 8; u ++ )
|
||||||
|
{
|
||||||
|
casti_m256i( dstx, u ) = mm256_bswap_32( ctx->valx[u] );
|
||||||
|
casti_m64 ( dsty, u ) = mm64_bswap_32( ctx->valy[u] );
|
||||||
|
((uint32_t*)dstz)[u] = bswap_32( ctx->valz[u] );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
219
algo/sha/sha256q-4way.c
Normal file
219
algo/sha/sha256q-4way.c
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
#include "sha256t-gate.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "sha2-hash-4way.h"
|
||||||
|
|
||||||
|
#if defined(SHA256T_8WAY)
|
||||||
|
|
||||||
|
static __thread sha256_8way_context sha256_ctx8 __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
void sha256q_8way_hash( void* output, const void* input )
|
||||||
|
{
|
||||||
|
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||||
|
sha256_8way_context ctx;
|
||||||
|
memcpy( &ctx, &sha256_ctx8, sizeof ctx );
|
||||||
|
|
||||||
|
sha256_8way( &ctx, input + (64<<3), 16 );
|
||||||
|
sha256_8way_close( &ctx, vhash );
|
||||||
|
|
||||||
|
sha256_8way_init( &ctx );
|
||||||
|
sha256_8way( &ctx, vhash, 32 );
|
||||||
|
sha256_8way_close( &ctx, vhash );
|
||||||
|
|
||||||
|
sha256_8way_init( &ctx );
|
||||||
|
sha256_8way( &ctx, vhash, 32 );
|
||||||
|
sha256_8way_close( &ctx, vhash );
|
||||||
|
|
||||||
|
sha256_8way_init( &ctx );
|
||||||
|
sha256_8way( &ctx, vhash, 32 );
|
||||||
|
sha256_8way_close( &ctx, output );
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
|
const uint64_t htmax[] = { 0,
|
||||||
|
0xF,
|
||||||
|
0xFF,
|
||||||
|
0xFFF,
|
||||||
|
0xFFFF,
|
||||||
|
0x10000000 };
|
||||||
|
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||||
|
0xFFFFFFF0,
|
||||||
|
0xFFFFFF00,
|
||||||
|
0xFFFFF000,
|
||||||
|
0xFFFF0000,
|
||||||
|
0 };
|
||||||
|
|
||||||
|
// Need big endian data
|
||||||
|
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||||
|
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
|
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
|
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||||
|
edata, edata, edata, edata, 640 );
|
||||||
|
sha256_8way_init( &sha256_ctx8 );
|
||||||
|
sha256_8way( &sha256_ctx8, vdata, 64 );
|
||||||
|
|
||||||
|
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||||
|
{
|
||||||
|
uint32_t mask = masks[m];
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*noncev = mm256_bswap_32(
|
||||||
|
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||||
|
|
||||||
|
pdata[19] = n;
|
||||||
|
|
||||||
|
sha256q_8way_hash( hash, vdata );
|
||||||
|
|
||||||
|
uint32_t *hash7 = &(hash[7<<3]);
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
|
if ( !( hash7[ lane ] & mask ) )
|
||||||
|
{
|
||||||
|
// deinterleave hash for lane
|
||||||
|
uint32_t lane_hash[8];
|
||||||
|
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
||||||
|
|
||||||
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
work_set_target_ratio( work, lane_hash );
|
||||||
|
if ( submit_work( mythr, work ) )
|
||||||
|
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr_id, lane );
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n += 8;
|
||||||
|
|
||||||
|
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(SHA256T_4WAY)
|
||||||
|
|
||||||
|
static __thread sha256_4way_context sha256_ctx4 __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
void sha256q_4way_hash( void* output, const void* input )
|
||||||
|
{
|
||||||
|
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||||
|
sha256_4way_context ctx;
|
||||||
|
memcpy( &ctx, &sha256_ctx4, sizeof ctx );
|
||||||
|
|
||||||
|
sha256_4way( &ctx, input + (64<<2), 16 );
|
||||||
|
sha256_4way_close( &ctx, vhash );
|
||||||
|
|
||||||
|
sha256_4way_init( &ctx );
|
||||||
|
sha256_4way( &ctx, vhash, 32 );
|
||||||
|
sha256_4way_close( &ctx, vhash );
|
||||||
|
|
||||||
|
sha256_4way_init( &ctx );
|
||||||
|
sha256_4way( &ctx, vhash, 32 );
|
||||||
|
sha256_4way_close( &ctx, vhash );
|
||||||
|
|
||||||
|
sha256_4way_init( &ctx );
|
||||||
|
sha256_4way( &ctx, vhash, 32 );
|
||||||
|
sha256_4way_close( &ctx, output );
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t *hash7 = &(hash[7<<2]);
|
||||||
|
uint32_t lane_hash[8];
|
||||||
|
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
|
const uint64_t htmax[] = { 0,
|
||||||
|
0xF,
|
||||||
|
0xFF,
|
||||||
|
0xFFF,
|
||||||
|
0xFFFF,
|
||||||
|
0x10000000 };
|
||||||
|
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||||
|
0xFFFFFFF0,
|
||||||
|
0xFFFFFF00,
|
||||||
|
0xFFFFF000,
|
||||||
|
0xFFFF0000,
|
||||||
|
0 };
|
||||||
|
|
||||||
|
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||||
|
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||||
|
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||||
|
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||||
|
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
|
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||||
|
sha256_4way_init( &sha256_ctx4 );
|
||||||
|
sha256_4way( &sha256_ctx4, vdata, 64 );
|
||||||
|
|
||||||
|
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||||
|
{
|
||||||
|
uint32_t mask = masks[m];
|
||||||
|
do {
|
||||||
|
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
||||||
|
pdata[19] = n;
|
||||||
|
|
||||||
|
sha256q_4way_hash( hash, vdata );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
|
if ( !( hash7[ lane ] & mask ) )
|
||||||
|
{
|
||||||
|
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||||
|
|
||||||
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
work_set_target_ratio( work, lane_hash );
|
||||||
|
if ( submit_work( mythr, work ) )
|
||||||
|
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr_id, lane );
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
n += 4;
|
||||||
|
|
||||||
|
} while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
113
algo/sha/sha256q.c
Normal file
113
algo/sha/sha256q.c
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
#include "sha256t-gate.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
|
static __thread SHA256_CTX sha256q_ctx __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
void sha256q_midstate( const void* input )
|
||||||
|
{
|
||||||
|
SHA256_Init( &sha256q_ctx );
|
||||||
|
SHA256_Update( &sha256q_ctx, input, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
void sha256q_hash( void* output, const void* input )
|
||||||
|
{
|
||||||
|
uint32_t _ALIGN(64) hash[16];
|
||||||
|
const int midlen = 64; // bytes
|
||||||
|
const int tail = 80 - midlen; // 16
|
||||||
|
|
||||||
|
SHA256_CTX ctx __attribute__ ((aligned (64)));
|
||||||
|
memcpy( &ctx, &sha256q_ctx, sizeof sha256q_ctx );
|
||||||
|
|
||||||
|
SHA256_Update( &ctx, input + midlen, tail );
|
||||||
|
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||||
|
|
||||||
|
SHA256_Init( &ctx );
|
||||||
|
SHA256_Update( &ctx, hash, 32 );
|
||||||
|
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||||
|
|
||||||
|
SHA256_Init( &ctx );
|
||||||
|
SHA256_Update( &ctx, hash, 32 );
|
||||||
|
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||||
|
|
||||||
|
SHA256_Init( &ctx );
|
||||||
|
SHA256_Update( &ctx, hash, 32 );
|
||||||
|
SHA256_Final( (unsigned char*)hash, &ctx );
|
||||||
|
|
||||||
|
memcpy( output, hash, 32 );
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_sha256q( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
uint32_t n = pdata[19] - 1;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
uint32_t __declspec(align(32)) hash64[8];
|
||||||
|
#else
|
||||||
|
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||||
|
#endif
|
||||||
|
uint32_t endiandata[32];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
|
uint64_t htmax[] = {
|
||||||
|
0,
|
||||||
|
0xF,
|
||||||
|
0xFF,
|
||||||
|
0xFFF,
|
||||||
|
0xFFFF,
|
||||||
|
0x10000000
|
||||||
|
};
|
||||||
|
uint32_t masks[] = {
|
||||||
|
0xFFFFFFFF,
|
||||||
|
0xFFFFFFF0,
|
||||||
|
0xFFFFFF00,
|
||||||
|
0xFFFFF000,
|
||||||
|
0xFFFF0000,
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
// we need bigendian data...
|
||||||
|
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||||
|
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||||
|
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||||
|
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||||
|
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
|
sha256q_midstate( endiandata );
|
||||||
|
|
||||||
|
for ( int m = 0; m < 6; m++ )
|
||||||
|
{
|
||||||
|
if ( Htarg <= htmax[m] )
|
||||||
|
{
|
||||||
|
uint32_t mask = masks[m];
|
||||||
|
do {
|
||||||
|
pdata[19] = ++n;
|
||||||
|
be32enc(&endiandata[19], n);
|
||||||
|
sha256q_hash( hash64, endiandata );
|
||||||
|
if ( ( !(hash64[7] & mask) ) && fulltest( hash64, ptarget ) )
|
||||||
|
{
|
||||||
|
work_set_target_ratio( work, hash64 );
|
||||||
|
if ( submit_work( mythr, work ) )
|
||||||
|
applog( LOG_NOTICE, "Share %d submitted by thread %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr_id );
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
}
|
||||||
|
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
pdata[19] = n;
|
||||||
|
return 0;
|
||||||
|
}
|
@@ -5,6 +5,137 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "sha2-hash-4way.h"
|
#include "sha2-hash-4way.h"
|
||||||
|
|
||||||
|
#if defined(SHA256T_11WAY)
|
||||||
|
|
||||||
|
static __thread sha256_11way_context sha256_ctx11 __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
void sha256t_11way_hash( void *outx, void *outy, void *outz, const void *inpx,
|
||||||
|
const void *inpy, const void*inpz )
|
||||||
|
{
|
||||||
|
uint32_t hashx[8*8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hashy[8*2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hashz[8] __attribute__ ((aligned (64)));
|
||||||
|
sha256_11way_context ctx;
|
||||||
|
const void *inpx64 = inpx+(64<<3);
|
||||||
|
const void *inpy64 = inpy+(64<<1);
|
||||||
|
const void *inpz64 = inpz+ 64;
|
||||||
|
|
||||||
|
memcpy( &ctx, &sha256_ctx11, sizeof ctx );
|
||||||
|
sha256_11way_update( &ctx, inpx64, inpy64, inpz64, 16 );
|
||||||
|
sha256_11way_close( &ctx, hashx, hashy, hashz );
|
||||||
|
|
||||||
|
sha256_11way_init( &ctx );
|
||||||
|
sha256_11way_update( &ctx, hashx, hashy, hashz, 32 );
|
||||||
|
sha256_11way_close( &ctx, hashx, hashy, hashz );
|
||||||
|
|
||||||
|
sha256_11way_init( &ctx );
|
||||||
|
sha256_11way_update( &ctx, hashx, hashy, hashz, 32 );
|
||||||
|
sha256_11way_close( &ctx, outx, outy, outz );
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t datax[20*8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t datay[20*2] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t dataz[20] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t hashx[8*8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t hashy[8*2] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t hashz[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t *hash7;
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
__m256i *noncex = (__m256i*) datax + 19;
|
||||||
|
__m64 *noncey = (__m64*) datay + 19;
|
||||||
|
uint32_t *noncez = (uint32_t*)dataz + 19;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
int i;
|
||||||
|
const uint64_t htmax[] = { 0,
|
||||||
|
0xF,
|
||||||
|
0xFF,
|
||||||
|
0xFFF,
|
||||||
|
0xFFFF,
|
||||||
|
0x10000000 };
|
||||||
|
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||||
|
0xFFFFFFF0,
|
||||||
|
0xFFFFFF00,
|
||||||
|
0xFFFFF000,
|
||||||
|
0xFFFF0000,
|
||||||
|
0 };
|
||||||
|
|
||||||
|
// Use dataz (scalar) to stage bswapped data for the vectors.
|
||||||
|
casti_m256i( dataz, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||||
|
casti_m256i( dataz, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
|
casti_m128i( dataz, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
|
mm256_interleave_8x32( datax, dataz, dataz, dataz, dataz,
|
||||||
|
dataz, dataz, dataz, dataz, 640 );
|
||||||
|
mm64_interleave_2x32( datay, dataz, dataz, 640 );
|
||||||
|
|
||||||
|
sha256_11way_init( &sha256_ctx11 );
|
||||||
|
sha256_11way_update( &sha256_ctx11, datax, datay, dataz, 64 );
|
||||||
|
|
||||||
|
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||||
|
{
|
||||||
|
uint32_t mask = masks[m];
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*noncex = mm256_bswap_32(
|
||||||
|
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||||
|
*noncey = mm64_bswap_32( _mm_set_pi32( n+9, n+8 ) );
|
||||||
|
*noncez = bswap_32( n+10 );
|
||||||
|
|
||||||
|
pdata[19] = n;
|
||||||
|
|
||||||
|
sha256t_11way_hash( hashx, hashy, hashz, datax, datay, dataz );
|
||||||
|
|
||||||
|
if ( opt_benchmark ) { n += 11; continue; }
|
||||||
|
|
||||||
|
hash7 = &(hashx[7<<3]);
|
||||||
|
for ( i = 0; i < 8; i++ ) if ( !( hash7[ i ] & mask ) )
|
||||||
|
{
|
||||||
|
// deinterleave hash for lane
|
||||||
|
mm256_extract_lane_8x32( lane_hash, hashx, i, 256 );
|
||||||
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n + i;
|
||||||
|
submit_solution( work, lane_hash, mythr, i );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
hash7 = &(hashy[7<<1]);
|
||||||
|
for( i = 0; i < 2; i++ ) if ( !(hash7[ 0] & mask ) )
|
||||||
|
|
||||||
|
{
|
||||||
|
mm64_extract_lane_2x32( lane_hash, hashy, i, 256 );
|
||||||
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n + 8 + i;
|
||||||
|
submit_solution( work, lane_hash, mythr, i+8 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( !(hashz[7] & mask ) && fulltest( hashz, ptarget ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n+10;
|
||||||
|
submit_solution( work, hashz, mythr, 10 );
|
||||||
|
}
|
||||||
|
n += 11;
|
||||||
|
|
||||||
|
} while ( (n < max_nonce-12) && !work_restart[thr_id].restart );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(SHA256T_8WAY)
|
#if defined(SHA256T_8WAY)
|
||||||
|
|
||||||
static __thread sha256_8way_context sha256_ctx8 __attribute__ ((aligned (64)));
|
static __thread sha256_8way_context sha256_ctx8 __attribute__ ((aligned (64)));
|
||||||
@@ -29,7 +160,7 @@ void sha256t_8way_hash( void* output, const void* input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||||
@@ -71,44 +202,38 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm256_bswap_32(
|
*noncev = mm256_bswap_32(
|
||||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||||
|
pdata[19] = n;
|
||||||
pdata[19] = n;
|
|
||||||
|
|
||||||
sha256t_8way_hash( hash, vdata );
|
sha256t_8way_hash( hash, vdata );
|
||||||
|
|
||||||
uint32_t *hash7 = &(hash[7<<3]);
|
uint32_t *hash7 = &(hash[7<<3]);
|
||||||
|
|
||||||
for ( int lane = 0; lane < 8; lane++ )
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
if ( !( hash7[ lane ] & mask ) )
|
if ( !( hash7[ lane ] & mask ) )
|
||||||
{
|
{
|
||||||
// deinterleave hash for lane
|
// deinterleave hash for lane
|
||||||
uint32_t lane_hash[8];
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
||||||
|
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
work_set_target_ratio( work, lane_hash );
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
if ( submit_work( mythr, work ) )
|
|
||||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
|
||||||
accepted_share_count + rejected_share_count + 1,
|
|
||||||
thr_id, lane );
|
|
||||||
else
|
|
||||||
applog( LOG_WARNING, "Failed to submit share." );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
|
|
||||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(SHA256T_4WAY)
|
#endif
|
||||||
|
|
||||||
|
#if defined(SHA256T_4WAY)
|
||||||
|
|
||||||
static __thread sha256_4way_context sha256_ctx4 __attribute__ ((aligned (64)));
|
static __thread sha256_4way_context sha256_ctx4 __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
@@ -136,9 +261,9 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||||
uint32_t *hash7 = &(hash[7<<2]);
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8];
|
|
||||||
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
||||||
|
uint32_t *hash7 = &(hash[7<<2]);
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
@@ -187,22 +312,14 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
work_set_target_ratio( work, lane_hash );
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
if ( submit_work( mythr, work ) )
|
}
|
||||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
|
||||||
accepted_share_count + rejected_share_count + 1,
|
|
||||||
thr_id, lane );
|
|
||||||
else
|
|
||||||
applog( LOG_WARNING, "Failed to submit share." );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
n += 4;
|
n += 4;
|
||||||
|
|
||||||
} while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
|
} while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -2,16 +2,20 @@
|
|||||||
|
|
||||||
bool register_sha256t_algo( algo_gate_t* gate )
|
bool register_sha256t_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined(SHA256T_8WAY)
|
#if defined(SHA256T_11WAY)
|
||||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256t_11way;
|
||||||
|
gate->hash = (void*)&sha256t_11way_hash;
|
||||||
|
#elif defined(SHA256T_8WAY)
|
||||||
|
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
||||||
gate->hash = (void*)&sha256t_8way_hash;
|
gate->hash = (void*)&sha256t_8way_hash;
|
||||||
#elif defined(SHA256T_4WAY)
|
#elif defined(SHA256T_4WAY)
|
||||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
||||||
gate->hash = (void*)&sha256t_4way_hash;
|
gate->hash = (void*)&sha256t_4way_hash;
|
||||||
#else
|
#else
|
||||||
gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
|
gate->optimizations = SHA_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_sha256t;
|
gate->scanhash = (void*)&scanhash_sha256t;
|
||||||
gate->hash = (void*)&sha256t_hash;
|
gate->hash = (void*)&sha256t_hash;
|
||||||
#endif
|
#endif
|
||||||
@@ -19,3 +23,23 @@ bool register_sha256t_algo( algo_gate_t* gate )
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool register_sha256q_algo( algo_gate_t* gate )
|
||||||
|
{
|
||||||
|
#if defined(SHA256T_8WAY)
|
||||||
|
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256q_8way;
|
||||||
|
gate->hash = (void*)&sha256q_8way_hash;
|
||||||
|
#elif defined(SHA256T_4WAY)
|
||||||
|
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256q_4way;
|
||||||
|
gate->hash = (void*)&sha256q_4way_hash;
|
||||||
|
#else
|
||||||
|
gate->optimizations = SHA_OPT;
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256q;
|
||||||
|
gate->hash = (void*)&sha256q_hash;
|
||||||
|
#endif
|
||||||
|
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||||
|
return true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -6,34 +6,55 @@
|
|||||||
|
|
||||||
// Override multi way on ryzen, SHA is better.
|
// Override multi way on ryzen, SHA is better.
|
||||||
#if !defined(RYZEN_)
|
#if !defined(RYZEN_)
|
||||||
#if defined(__SSE4_2__)
|
#if defined(__SSE2__)
|
||||||
#define SHA256T_4WAY
|
#define SHA256T_4WAY
|
||||||
#endif
|
#endif
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
#define SHA256T_8WAY
|
#define SHA256T_8WAY
|
||||||
|
// #define SHA256T_11WAY
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool register_blake2s_algo( algo_gate_t* gate );
|
bool register_sha256t_algo( algo_gate_t* gate );
|
||||||
|
bool register_sha256q_algo( algo_gate_t* gate );
|
||||||
|
|
||||||
|
#if defined(SHA256T_11WAY)
|
||||||
|
|
||||||
|
void sha256t_11way_hash( void *outx, void *outy, void *outz, const void *inpx,
|
||||||
|
const void *inpy, const void *inpz );
|
||||||
|
int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
//void sha256q_8way_hash( void *output, const void *input );
|
||||||
|
//int scanhash_sha256q_11way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
// uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(SHA256T_8WAY)
|
#if defined(SHA256T_8WAY)
|
||||||
|
|
||||||
void sha256t_8way_hash( void *output, const void *input );
|
void sha256t_8way_hash( void *output, const void *input );
|
||||||
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
void sha256q_8way_hash( void *output, const void *input );
|
||||||
|
int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
#endif
|
||||||
|
|
||||||
#elif defined (SHA256T_4WAY)
|
#if defined(SHA256T_4WAY)
|
||||||
|
|
||||||
void sha256t_4way_hash( void *output, const void *input );
|
void sha256t_4way_hash( void *output, const void *input );
|
||||||
int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
#else
|
void sha256q_4way_hash( void *output, const void *input );
|
||||||
|
int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
#endif
|
||||||
|
|
||||||
void sha256t_hash( void *output, const void *input );
|
void sha256t_hash( void *output, const void *input );
|
||||||
int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
void sha256q_hash( void *output, const void *input );
|
||||||
#endif
|
int scanhash_sha256q( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -5,8 +5,6 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
#if !defined(SHA256T_4WAY)
|
|
||||||
|
|
||||||
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
|
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
void sha256t_midstate( const void* input )
|
void sha256t_midstate( const void* input )
|
||||||
@@ -72,8 +70,11 @@ int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
};
|
};
|
||||||
|
|
||||||
// we need bigendian data...
|
// we need bigendian data...
|
||||||
for ( int k = 0; k < 19; k++ )
|
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||||
be32enc( &endiandata[k], pdata[k] );
|
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||||
|
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||||
|
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||||
|
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
sha256t_midstate( endiandata );
|
sha256t_midstate( endiandata );
|
||||||
|
|
||||||
@@ -89,7 +90,13 @@ int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( ( !(hash64[7] & mask) ) && fulltest( hash64, ptarget ) )
|
if ( ( !(hash64[7] & mask) ) && fulltest( hash64, ptarget ) )
|
||||||
{
|
{
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return true;
|
work_set_target_ratio( work, hash64 );
|
||||||
|
if ( submit_work( mythr, work ) )
|
||||||
|
applog( LOG_NOTICE, "Share %d submitted by thread %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr_id );
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
}
|
}
|
||||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||||
break;
|
break;
|
||||||
@@ -100,4 +107,3 @@ int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
@@ -346,7 +346,7 @@ void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
|
|||||||
memcpy( buf + ptr, data, clen );
|
memcpy( buf + ptr, data, clen );
|
||||||
data = (const unsigned char *)data + clen;
|
data = (const unsigned char *)data + clen;
|
||||||
ptr += clen;
|
ptr += clen;
|
||||||
len -= clen >> 1;
|
len -= (clen >> 1);
|
||||||
if ( ptr == sizeof ctx->buf )
|
if ( ptr == sizeof ctx->buf )
|
||||||
{
|
{
|
||||||
if ( ( ctx->count0 = ctx->count0 + 1024 ) == 0 )
|
if ( ( ctx->count0 = ctx->count0 + 1024 ) == 0 )
|
||||||
@@ -365,16 +365,8 @@ void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint32_t vp = ptr>>5;
|
uint32_t vp = ptr>>5;
|
||||||
|
|
||||||
// Terminating byte then zero pad
|
|
||||||
casti_m256i( buf, vp++ ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
|
|
||||||
|
|
||||||
// Zero pad full vectors up to count
|
|
||||||
for ( ; vp < 6; vp++ )
|
|
||||||
casti_m256i( buf, vp ) = m256_zero;
|
|
||||||
|
|
||||||
// Count = { 0, 16, 64, 80 }. Outsize = 16 u32 = 512 bits = 0x0200
|
// Count = { 0, 16, 64, 80 }. Outsize = 16 u32 = 512 bits = 0x0200
|
||||||
// Count is misaligned to 16 bits and straddles a vector.
|
// Count is misaligned to 16 bits and straddles 2 vectors.
|
||||||
// Use u32 overlay to stage then u16 to load buf.
|
// Use u32 overlay to stage then u16 to load buf.
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
@@ -387,6 +379,18 @@ void shavite512_2way_update_close( shavite512_2way_context *ctx, void *dst,
|
|||||||
count.u32[2] = ctx->count2;
|
count.u32[2] = ctx->count2;
|
||||||
count.u32[3] = ctx->count3;
|
count.u32[3] = ctx->count3;
|
||||||
|
|
||||||
|
if ( vp == 0 ) // empty buf, xevan.
|
||||||
|
{
|
||||||
|
casti_m256i( buf, 0 ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
|
||||||
|
memset_zero_256( (__m256i*)buf + 1, 5 );
|
||||||
|
ctx->count0 = ctx->count1 = ctx->count2 = ctx->count3 = 0;
|
||||||
|
}
|
||||||
|
else // half full buf, everyone else.
|
||||||
|
{
|
||||||
|
casti_m256i( buf, vp++ ) = _mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 );
|
||||||
|
memset_zero_256( (__m256i*)buf + vp, 6 - vp );
|
||||||
|
}
|
||||||
|
|
||||||
casti_m256i( buf, 6 ) = _mm256_set_epi16( count.u16[0], 0,0,0,0,0,0,0,
|
casti_m256i( buf, 6 ) = _mm256_set_epi16( count.u16[0], 0,0,0,0,0,0,0,
|
||||||
count.u16[0], 0,0,0,0,0,0,0 );
|
count.u16[0], 0,0,0,0,0,0,0 );
|
||||||
casti_m256i( buf, 7 ) = _mm256_set_epi16(
|
casti_m256i( buf, 7 ) = _mm256_set_epi16(
|
||||||
|
@@ -25,7 +25,8 @@
|
|||||||
#include "algo/haval/haval-hash-4way.h"
|
#include "algo/haval/haval-hash-4way.h"
|
||||||
#include "algo/sha/sha2-hash-4way.h"
|
#include "algo/sha/sha2-hash-4way.h"
|
||||||
|
|
||||||
typedef struct {
|
union _sonoa_4way_context_overlay
|
||||||
|
{
|
||||||
blake512_4way_context blake;
|
blake512_4way_context blake;
|
||||||
bmw512_4way_context bmw;
|
bmw512_4way_context bmw;
|
||||||
hashState_groestl groestl;
|
hashState_groestl groestl;
|
||||||
@@ -43,8 +44,10 @@ typedef struct {
|
|||||||
sph_whirlpool_context whirlpool;
|
sph_whirlpool_context whirlpool;
|
||||||
sha512_4way_context sha512;
|
sha512_4way_context sha512;
|
||||||
haval256_5_4way_context haval;
|
haval256_5_4way_context haval;
|
||||||
} sonoa_4way_ctx_holder;
|
};
|
||||||
|
|
||||||
|
typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay;
|
||||||
|
/*
|
||||||
sonoa_4way_ctx_holder sonoa_4way_ctx __attribute__ ((aligned (64)));
|
sonoa_4way_ctx_holder sonoa_4way_ctx __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
void init_sonoa_4way_ctx()
|
void init_sonoa_4way_ctx()
|
||||||
@@ -67,6 +70,7 @@ void init_sonoa_4way_ctx()
|
|||||||
sha512_4way_init( &sonoa_4way_ctx.sha512 );
|
sha512_4way_init( &sonoa_4way_ctx.sha512 );
|
||||||
haval256_5_4way_init( &sonoa_4way_ctx.haval );
|
haval256_5_4way_init( &sonoa_4way_ctx.haval );
|
||||||
};
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
void sonoa_4way_hash( void *state, const void *input )
|
void sonoa_4way_hash( void *state, const void *input )
|
||||||
{
|
{
|
||||||
@@ -77,19 +81,23 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
||||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
||||||
sonoa_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
sonoa_4way_context_overlay ctx;
|
||||||
memcpy( &ctx, &sonoa_4way_ctx, sizeof(sonoa_4way_ctx) );
|
// sonoa_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||||
|
// memcpy( &ctx, &sonoa_4way_ctx, sizeof(sonoa_4way_ctx) );
|
||||||
|
|
||||||
// 1
|
// 1
|
||||||
|
|
||||||
|
blake512_4way_init( &ctx.blake );
|
||||||
blake512_4way( &ctx.blake, input, 80 );
|
blake512_4way( &ctx.blake, input, 80 );
|
||||||
blake512_4way_close( &ctx.blake, vhash );
|
blake512_4way_close( &ctx.blake, vhash );
|
||||||
|
|
||||||
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||||
@@ -100,29 +108,36 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
skein512_4way_close( &ctx.skein, vhash );
|
||||||
|
|
||||||
|
jh512_4way_init( &ctx.jh );
|
||||||
jh512_4way( &ctx.jh, vhash, 64 );
|
jh512_4way( &ctx.jh, vhash, 64 );
|
||||||
jh512_4way_close( &ctx.jh, vhash );
|
jh512_4way_close( &ctx.jh, vhash );
|
||||||
|
|
||||||
|
keccak512_4way_init( &ctx.keccak );
|
||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, 64 );
|
||||||
|
|
||||||
|
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||||
cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
cube_2way_update_close( &ctx.cube, vhashA, vhashA, 64 );
|
||||||
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||||
cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
cube_2way_update_close( &ctx.cube, vhashB, vhashB, 64 );
|
||||||
|
|
||||||
|
shavite512_2way_init( &ctx.shavite );
|
||||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||||
shavite512_2way_init( &ctx.shavite );
|
shavite512_2way_init( &ctx.shavite );
|
||||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||||
|
|
||||||
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
simd_2way_update_close( &ctx.simd, vhashA, vhashA, 512 );
|
||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||||
@@ -130,6 +145,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, 512 );
|
mm256_deinterleave_2x128( hash0, hash1, vhashA, 512 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
||||||
|
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
(const BitSequence *) hash0, 512 );
|
(const BitSequence *) hash0, 512 );
|
||||||
init_echo( &ctx.echo, 512 );
|
init_echo( &ctx.echo, 512 );
|
||||||
@@ -215,10 +231,12 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
|
|
||||||
// 3
|
// 3
|
||||||
|
|
||||||
bmw512_4way_init( &ctx.bmw );
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
@@ -294,6 +312,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
@@ -399,10 +418,11 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
shabal512_4way_init( &ctx.shabal );
|
||||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash );
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64( vhashB, vhash, 512 );
|
mm256_reinterleave_4x32_4x64( vhashB, vhash, 512 );
|
||||||
|
|
||||||
hamsi512_4way_init( &ctx.hamsi );
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhashB, 64 );
|
hamsi512_4way( &ctx.hamsi, vhashB, 64 );
|
||||||
@@ -438,7 +458,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x32( vhashB, vhash, 512 );
|
mm256_reinterleave_4x64_4x32( vhashB, vhash, 512 );
|
||||||
|
|
||||||
shabal512_4way_init( &ctx.shabal );
|
shabal512_4way_init( &ctx.shabal );
|
||||||
shabal512_4way( &ctx.shabal, vhashB, 64 );
|
shabal512_4way( &ctx.shabal, vhashB, 64 );
|
||||||
@@ -536,6 +556,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
@@ -663,6 +684,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
sha512_4way_init( &ctx.sha512 );
|
||||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||||
sha512_4way_close( &ctx.sha512, vhash );
|
sha512_4way_close( &ctx.sha512, vhash );
|
||||||
|
|
||||||
@@ -800,11 +822,11 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||||
sha512_4way_close( &ctx.sha512, vhash );
|
sha512_4way_close( &ctx.sha512, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x32( vhashB, vhash, 512 );
|
mm256_reinterleave_4x64_4x32( vhashB, vhash, 512 );
|
||||||
|
|
||||||
|
haval256_5_4way_init( &ctx.haval );
|
||||||
haval256_5_4way( &ctx.haval, vhashB, 64 );
|
haval256_5_4way( &ctx.haval, vhashB, 64 );
|
||||||
haval256_5_4way_close( &ctx.haval, state );
|
haval256_5_4way_close( &ctx.haval, state );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
@@ -819,10 +841,7 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t *nonces = work->nonces;
|
|
||||||
int num_found = 0;
|
|
||||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
// uint32_t *noncep = vdata + 73; // 9*8 + 1
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||||
@@ -855,18 +874,23 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
nonces[ num_found++ ] = n + lane;
|
|
||||||
work_set_target_ratio( work, lane_hash );
|
work_set_target_ratio( work, lane_hash );
|
||||||
|
if ( submit_work( mythr, work ) )
|
||||||
|
applog( LOG_NOTICE,
|
||||||
|
"Share %d submitted by thread %d, lane %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr_id, lane );
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -3,7 +3,7 @@
|
|||||||
bool register_sonoa_algo( algo_gate_t* gate )
|
bool register_sonoa_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined (SONOA_4WAY)
|
#if defined (SONOA_4WAY)
|
||||||
init_sonoa_4way_ctx();
|
// init_sonoa_4way_ctx();
|
||||||
gate->scanhash = (void*)&scanhash_sonoa_4way;
|
gate->scanhash = (void*)&scanhash_sonoa_4way;
|
||||||
gate->hash = (void*)&sonoa_4way_hash;
|
gate->hash = (void*)&sonoa_4way_hash;
|
||||||
#else
|
#else
|
||||||
|
@@ -17,7 +17,7 @@ void sonoa_4way_hash( void *state, const void *input );
|
|||||||
int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_sonoa_4way_ctx();
|
//void init_sonoa_4way_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -14,7 +14,6 @@
|
|||||||
#include "algo/keccak/keccak-hash-4way.h"
|
#include "algo/keccak/keccak-hash-4way.h"
|
||||||
#include "algo/luffa/luffa-hash-2way.h"
|
#include "algo/luffa/luffa-hash-2way.h"
|
||||||
#include "algo/cubehash/cube-hash-2way.h"
|
#include "algo/cubehash/cube-hash-2way.h"
|
||||||
#include "algo/shavite/sph_shavite.h"
|
|
||||||
#include "algo/shavite/shavite-hash-2way.h"
|
#include "algo/shavite/shavite-hash-2way.h"
|
||||||
#include "algo/simd/simd-hash-2way.h"
|
#include "algo/simd/simd-hash-2way.h"
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
@@ -222,7 +221,7 @@ void x17_4way_hash( void *state, const void *input )
|
|||||||
sha512_4way_close( &ctx.sha512, vhash );
|
sha512_4way_close( &ctx.sha512, vhash );
|
||||||
|
|
||||||
// 17 Haval parallel 32 bit
|
// 17 Haval parallel 32 bit
|
||||||
mm256_reinterleave_4x32( vhashB, vhash, 512 );
|
mm256_reinterleave_4x64_4x32( vhashB, vhash, 512 );
|
||||||
|
|
||||||
haval256_5_4way_init( &ctx.haval );
|
haval256_5_4way_init( &ctx.haval );
|
||||||
haval256_5_4way( &ctx.haval, vhashB, 64 );
|
haval256_5_4way( &ctx.haval, vhashB, 64 );
|
||||||
@@ -242,8 +241,6 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t *nonces = work->nonces;
|
|
||||||
int num_found = 0;
|
|
||||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
@@ -260,35 +257,40 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint64_t *edata = (uint64_t*)endiandata;
|
uint64_t *edata = (uint64_t*)endiandata;
|
||||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||||
|
|
||||||
for ( int m=0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||||
{
|
{
|
||||||
uint32_t mask = masks[m];
|
uint32_t mask = masks[ m ];
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm256_interleave_blend_32( mm256_bswap_32(
|
*noncev = mm256_interleave_blend_32( mm256_bswap_32(
|
||||||
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ),
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ),
|
||||||
*noncev );
|
*noncev );
|
||||||
x17_4way_hash( hash, vdata );
|
x17_4way_hash( hash, vdata );
|
||||||
|
|
||||||
for ( int lane = 0; lane < 4; lane++ )
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
if ( ( ( hash7[ lane ] & mask ) == 0 ) )
|
if ( ( hash7[ lane ] & mask ) == 0 )
|
||||||
{
|
{
|
||||||
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
nonces[ num_found++ ] = n + lane;
|
|
||||||
work_set_target_ratio( work, lane_hash );
|
work_set_target_ratio( work, lane_hash );
|
||||||
|
if ( submit_work( mythr, work ) )
|
||||||
|
applog( LOG_NOTICE,
|
||||||
|
"Share %d submitted by thread %d, lane %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr_id, lane );
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -12,8 +12,9 @@
|
|||||||
#include "algo/jh/jh-hash-4way.h"
|
#include "algo/jh/jh-hash-4way.h"
|
||||||
#include "algo/keccak/keccak-hash-4way.h"
|
#include "algo/keccak/keccak-hash-4way.h"
|
||||||
#include "algo/skein/skein-hash-4way.h"
|
#include "algo/skein/skein-hash-4way.h"
|
||||||
#include "algo/shavite/sph_shavite.h"
|
|
||||||
#include "algo/luffa/luffa-hash-2way.h"
|
#include "algo/luffa/luffa-hash-2way.h"
|
||||||
|
#include "algo/cubehash/cube-hash-2way.h"
|
||||||
|
#include "algo/shavite/shavite-hash-2way.h"
|
||||||
#include "algo/cubehash/cubehash_sse2.h"
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
#include "algo/simd/simd-hash-2way.h"
|
#include "algo/simd/simd-hash-2way.h"
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
@@ -24,16 +25,17 @@
|
|||||||
#include "algo/sha/sha2-hash-4way.h"
|
#include "algo/sha/sha2-hash-4way.h"
|
||||||
#include "algo/haval/haval-hash-4way.h"
|
#include "algo/haval/haval-hash-4way.h"
|
||||||
|
|
||||||
typedef struct {
|
union _xevan_4way_context_overlay
|
||||||
blake512_4way_context blake;
|
{
|
||||||
|
blake512_4way_context blake;
|
||||||
bmw512_4way_context bmw;
|
bmw512_4way_context bmw;
|
||||||
hashState_groestl groestl;
|
hashState_groestl groestl;
|
||||||
skein512_4way_context skein;
|
skein512_4way_context skein;
|
||||||
jh512_4way_context jh;
|
jh512_4way_context jh;
|
||||||
keccak512_4way_context keccak;
|
keccak512_4way_context keccak;
|
||||||
luffa_2way_context luffa;
|
luffa_2way_context luffa;
|
||||||
cubehashParam cube;
|
cube_2way_context cube;
|
||||||
sph_shavite512_context shavite;
|
shavite512_2way_context shavite;
|
||||||
simd_2way_context simd;
|
simd_2way_context simd;
|
||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
hamsi512_4way_context hamsi;
|
hamsi512_4way_context hamsi;
|
||||||
@@ -42,39 +44,8 @@ typedef struct {
|
|||||||
sph_whirlpool_context whirlpool;
|
sph_whirlpool_context whirlpool;
|
||||||
sha512_4way_context sha512;
|
sha512_4way_context sha512;
|
||||||
haval256_5_4way_context haval;
|
haval256_5_4way_context haval;
|
||||||
} xevan_4way_ctx_holder;
|
|
||||||
|
|
||||||
xevan_4way_ctx_holder xevan_4way_ctx __attribute__ ((aligned (64)));
|
|
||||||
static __thread blake512_4way_context xevan_blake_4way_mid
|
|
||||||
__attribute__ ((aligned (64)));
|
|
||||||
|
|
||||||
void init_xevan_4way_ctx()
|
|
||||||
{
|
|
||||||
blake512_4way_init(&xevan_4way_ctx.blake);
|
|
||||||
bmw512_4way_init( &xevan_4way_ctx.bmw );
|
|
||||||
init_groestl( &xevan_4way_ctx.groestl, 64 );
|
|
||||||
skein512_4way_init(&xevan_4way_ctx.skein);
|
|
||||||
jh512_4way_init(&xevan_4way_ctx.jh);
|
|
||||||
keccak512_4way_init(&xevan_4way_ctx.keccak);
|
|
||||||
luffa_2way_init( &xevan_4way_ctx.luffa, 512 );
|
|
||||||
cubehashInit( &xevan_4way_ctx.cube, 512, 16, 32 );
|
|
||||||
sph_shavite512_init( &xevan_4way_ctx.shavite );
|
|
||||||
simd_2way_init( &xevan_4way_ctx.simd, 512 );
|
|
||||||
init_echo( &xevan_4way_ctx.echo, 512 );
|
|
||||||
hamsi512_4way_init( &xevan_4way_ctx.hamsi );
|
|
||||||
sph_fugue512_init( &xevan_4way_ctx.fugue );
|
|
||||||
shabal512_4way_init( &xevan_4way_ctx.shabal );
|
|
||||||
sph_whirlpool_init( &xevan_4way_ctx.whirlpool );
|
|
||||||
sha512_4way_init( &xevan_4way_ctx.sha512 );
|
|
||||||
haval256_5_4way_init( &xevan_4way_ctx.haval );
|
|
||||||
};
|
};
|
||||||
|
typedef union _xevan_4way_context_overlay xevan_4way_context_overlay;
|
||||||
void xevan_4way_blake512_midstate( const void* input )
|
|
||||||
{
|
|
||||||
memcpy( &xevan_blake_4way_mid, &xevan_4way_ctx.blake,
|
|
||||||
sizeof(xevan_blake_4way_mid) );
|
|
||||||
blake512_4way( &xevan_blake_4way_mid, input, 64 );
|
|
||||||
}
|
|
||||||
|
|
||||||
void xevan_4way_hash( void *output, const void *input )
|
void xevan_4way_hash( void *output, const void *input )
|
||||||
{
|
{
|
||||||
@@ -83,293 +54,283 @@ void xevan_4way_hash( void *output, const void *input )
|
|||||||
uint64_t hash2[16] __attribute__ ((aligned (64)));
|
uint64_t hash2[16] __attribute__ ((aligned (64)));
|
||||||
uint64_t hash3[16] __attribute__ ((aligned (64)));
|
uint64_t hash3[16] __attribute__ ((aligned (64)));
|
||||||
uint64_t vhash[16<<2] __attribute__ ((aligned (64)));
|
uint64_t vhash[16<<2] __attribute__ ((aligned (64)));
|
||||||
uint64_t vhash32[16<<2] __attribute__ ((aligned (64)));
|
uint64_t vhashA[16<<2] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t vhashB[16<<2] __attribute__ ((aligned (64)));
|
||||||
const int dataLen = 128;
|
const int dataLen = 128;
|
||||||
const int midlen = 64; // bytes
|
xevan_4way_context_overlay ctx __attribute__ ((aligned (64)));
|
||||||
const int tail = 80 - midlen; // 16
|
|
||||||
xevan_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
|
||||||
memcpy( &ctx, &xevan_4way_ctx, sizeof(xevan_4way_ctx) );
|
|
||||||
|
|
||||||
// parallel way
|
// parallel 4 way
|
||||||
memcpy( &ctx.blake, &xevan_blake_4way_mid,
|
|
||||||
sizeof(xevan_blake_4way_mid) );
|
blake512_4way_init( &ctx.blake );
|
||||||
blake512_4way( &ctx.blake, input + (midlen<<2), tail );
|
blake512_4way( &ctx.blake, input, 80 );
|
||||||
blake512_4way_close(&ctx.blake, vhash);
|
blake512_4way_close(&ctx.blake, vhash);
|
||||||
memset( &vhash[8<<2], 0, 64<<2 );
|
memset( &vhash[8<<2], 0, 64<<2 );
|
||||||
|
|
||||||
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way( &ctx.bmw, vhash, dataLen );
|
bmw512_4way( &ctx.bmw, vhash, dataLen );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
// Serial
|
// Serial
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||||
|
|
||||||
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
|
||||||
dataLen<<3 );
|
dataLen<<3 );
|
||||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
|
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
|
||||||
dataLen<<3 );
|
dataLen<<3 );
|
||||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
|
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
|
||||||
dataLen<<3 );
|
dataLen<<3 );
|
||||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
|
||||||
dataLen<<3 );
|
dataLen<<3 );
|
||||||
|
|
||||||
// Parallel 4way
|
// Parallel 4way
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||||
|
|
||||||
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way( &ctx.skein, vhash, dataLen );
|
skein512_4way( &ctx.skein, vhash, dataLen );
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
skein512_4way_close( &ctx.skein, vhash );
|
||||||
|
|
||||||
|
jh512_4way_init( &ctx.jh );
|
||||||
jh512_4way( &ctx.jh, vhash, dataLen );
|
jh512_4way( &ctx.jh, vhash, dataLen );
|
||||||
jh512_4way_close( &ctx.jh, vhash );
|
jh512_4way_close( &ctx.jh, vhash );
|
||||||
|
|
||||||
|
keccak512_4way_init( &ctx.keccak );
|
||||||
keccak512_4way( &ctx.keccak, vhash, dataLen );
|
keccak512_4way( &ctx.keccak, vhash, dataLen );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||||
mm256_interleave_2x128( vhash, hash0, hash1, dataLen<<3 );
|
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, dataLen );
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhash, dataLen<<3 );
|
|
||||||
mm256_interleave_2x128( vhash, hash2, hash3, dataLen<<3 );
|
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, dataLen );
|
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhash, dataLen<<3 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
|
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||||
|
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0,
|
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||||
dataLen );
|
cube_2way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
|
||||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1,
|
cube_2way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
|
||||||
dataLen );
|
|
||||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2,
|
|
||||||
dataLen );
|
|
||||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3,
|
|
||||||
dataLen );
|
|
||||||
|
|
||||||
sph_shavite512( &ctx.shavite, hash0, dataLen );
|
shavite512_2way_init( &ctx.shavite );
|
||||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
shavite512_2way_init( &ctx.shavite );
|
||||||
sizeof(sph_shavite512_context) );
|
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||||
sph_shavite512( &ctx.shavite, hash1, dataLen );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
|
||||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
|
||||||
sizeof(sph_shavite512_context) );
|
|
||||||
sph_shavite512( &ctx.shavite, hash2, dataLen );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
|
||||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
|
||||||
sizeof(sph_shavite512_context) );
|
|
||||||
sph_shavite512( &ctx.shavite, hash3, dataLen );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
|
||||||
|
|
||||||
mm256_interleave_2x128( vhash, hash0, hash1, dataLen<<3 );
|
|
||||||
simd_2way_update_close( &ctx.simd, vhash, vhash, dataLen<<3 );
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhash, dataLen<<3 );
|
|
||||||
mm256_interleave_2x128( vhash, hash2, hash3, dataLen<<3 );
|
|
||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhash, vhash, dataLen<<3 );
|
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhash, dataLen<<3 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
|
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
|
||||||
|
|
||||||
|
mm256_deinterleave_1x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||||
|
mm256_deinterleave_1x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||||
|
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
(const BitSequence *) hash0, dataLen<<3 );
|
(const BitSequence *) hash0, dataLen<<3 );
|
||||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||||
(const BitSequence *) hash1, dataLen<<3 );
|
(const BitSequence *) hash1, dataLen<<3 );
|
||||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||||
(const BitSequence *) hash2, dataLen<<3 );
|
(const BitSequence *) hash2, dataLen<<3 );
|
||||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
(const BitSequence *) hash3, dataLen<<3 );
|
(const BitSequence *) hash3, dataLen<<3 );
|
||||||
// Parallel
|
// Parallel
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||||
|
|
||||||
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, dataLen );
|
hamsi512_4way( &ctx.hamsi, vhash, dataLen );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||||
|
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash0, dataLen );
|
sph_fugue512( &ctx.fugue, hash0, dataLen );
|
||||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash1, dataLen );
|
sph_fugue512( &ctx.fugue, hash1, dataLen );
|
||||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash2, dataLen );
|
sph_fugue512( &ctx.fugue, hash2, dataLen );
|
||||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash3, dataLen );
|
sph_fugue512( &ctx.fugue, hash3, dataLen );
|
||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
|
||||||
// Parallel 4way 32 bit
|
// Parallel 4way 32 bit
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||||
|
|
||||||
|
shabal512_4way_init( &ctx.shabal );
|
||||||
shabal512_4way( &ctx.shabal, vhash, dataLen );
|
shabal512_4way( &ctx.shabal, vhash, dataLen );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash );
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
|
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||||
|
|
||||||
// Serial
|
// Serial
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
|
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sizeof(sph_whirlpool_context) );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
|
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sizeof(sph_whirlpool_context) );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
|
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sizeof(sph_whirlpool_context) );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
|
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||||
|
|
||||||
|
sha512_4way_init( &ctx.sha512 );
|
||||||
sha512_4way( &ctx.sha512, vhash, dataLen );
|
sha512_4way( &ctx.sha512, vhash, dataLen );
|
||||||
sha512_4way_close( &ctx.sha512, vhash );
|
sha512_4way_close( &ctx.sha512, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x32( vhash32, vhash, dataLen<<3 );
|
mm256_reinterleave_4x64_4x32( vhashA, vhash, dataLen<<3 );
|
||||||
haval256_5_4way( &ctx.haval, vhash32, dataLen );
|
|
||||||
haval256_5_4way_close( &ctx.haval, vhash );
|
haval256_5_4way_init( &ctx.haval );
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
haval256_5_4way( &ctx.haval, vhashA, dataLen );
|
||||||
|
haval256_5_4way_close( &ctx.haval, vhashA );
|
||||||
|
|
||||||
|
mm256_reinterleave_4x32_4x64( vhash, vhashA, dataLen<<3 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
|
||||||
memset( &vhash[ 4<<2 ], 0, (dataLen-32) << 2 );
|
memset( &vhash[ 4<<2 ], 0, (dataLen-32) << 2 );
|
||||||
memcpy( &ctx, &xevan_4way_ctx, sizeof(xevan_4way_ctx) );
|
|
||||||
|
|
||||||
|
blake512_4way_init( &ctx.blake );
|
||||||
blake512_4way( &ctx.blake, vhash, dataLen );
|
blake512_4way( &ctx.blake, vhash, dataLen );
|
||||||
blake512_4way_close(&ctx.blake, vhash);
|
blake512_4way_close(&ctx.blake, vhash);
|
||||||
|
|
||||||
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way( &ctx.bmw, vhash, dataLen );
|
bmw512_4way( &ctx.bmw, vhash, dataLen );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||||
|
|
||||||
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
|
||||||
dataLen<<3 );
|
dataLen<<3 );
|
||||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
|
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1,
|
||||||
dataLen<<3 );
|
dataLen<<3 );
|
||||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
|
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2,
|
||||||
dataLen<<3 );
|
dataLen<<3 );
|
||||||
memcpy( &ctx.groestl, &xevan_4way_ctx.groestl, sizeof(hashState_groestl) );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
|
||||||
dataLen<<3 );
|
dataLen<<3 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||||
|
|
||||||
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way( &ctx.skein, vhash, dataLen );
|
skein512_4way( &ctx.skein, vhash, dataLen );
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
skein512_4way_close( &ctx.skein, vhash );
|
||||||
|
|
||||||
|
jh512_4way_init( &ctx.jh );
|
||||||
jh512_4way( &ctx.jh, vhash, dataLen );
|
jh512_4way( &ctx.jh, vhash, dataLen );
|
||||||
jh512_4way_close( &ctx.jh, vhash );
|
jh512_4way_close( &ctx.jh, vhash );
|
||||||
|
|
||||||
|
keccak512_4way_init( &ctx.keccak );
|
||||||
keccak512_4way( &ctx.keccak, vhash, dataLen );
|
keccak512_4way( &ctx.keccak, vhash, dataLen );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, dataLen<<3 );
|
||||||
mm256_interleave_2x128( vhash, hash0, hash1, dataLen<<3 );
|
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, dataLen );
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhash, dataLen<<3 );
|
|
||||||
mm256_interleave_2x128( vhash, hash2, hash3, dataLen<<3 );
|
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, dataLen );
|
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, dataLen );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhash, dataLen<<3 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
|
luffa_2way_update_close( &ctx.luffa, vhashB, vhashB, dataLen );
|
||||||
|
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0,
|
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||||
dataLen );
|
cube_2way_update_close( &ctx.cube, vhashA, vhashA, dataLen );
|
||||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
cube_2way_init( &ctx.cube, 512, 16, 32 );
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1,
|
cube_2way_update_close( &ctx.cube, vhashB, vhashB, dataLen );
|
||||||
dataLen );
|
|
||||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2,
|
|
||||||
dataLen );
|
|
||||||
memcpy( &ctx.cube, &xevan_4way_ctx.cube, sizeof(cubehashParam) );
|
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3,
|
|
||||||
dataLen );
|
|
||||||
|
|
||||||
sph_shavite512( &ctx.shavite, hash0, dataLen );
|
shavite512_2way_init( &ctx.shavite );
|
||||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, dataLen );
|
||||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
shavite512_2way_init( &ctx.shavite );
|
||||||
sizeof(sph_shavite512_context) );
|
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, dataLen );
|
||||||
sph_shavite512( &ctx.shavite, hash1, dataLen );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
|
||||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
|
||||||
sizeof(sph_shavite512_context) );
|
|
||||||
sph_shavite512( &ctx.shavite, hash2, dataLen );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
|
||||||
memcpy( &ctx.shavite, &xevan_4way_ctx.shavite,
|
|
||||||
sizeof(sph_shavite512_context) );
|
|
||||||
sph_shavite512( &ctx.shavite, hash3, dataLen );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
|
||||||
|
|
||||||
mm256_interleave_2x128( vhash, hash0, hash1, dataLen<<3 );
|
|
||||||
simd_2way_update_close( &ctx.simd, vhash, vhash, dataLen<<3 );
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhash, dataLen<<3 );
|
|
||||||
mm256_interleave_2x128( vhash, hash2, hash3, dataLen<<3 );
|
|
||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhash, vhash, dataLen<<3 );
|
simd_2way_update_close( &ctx.simd, vhashA, vhashA, dataLen<<3 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhash, dataLen<<3 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
|
simd_2way_update_close( &ctx.simd, vhashB, vhashB, dataLen<<3 );
|
||||||
|
|
||||||
|
mm256_deinterleave_1x128( hash0, hash1, vhashA, dataLen<<3 );
|
||||||
|
mm256_deinterleave_1x128( hash2, hash3, vhashB, dataLen<<3 );
|
||||||
|
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
(const BitSequence *) hash0, dataLen<<3 );
|
(const BitSequence *) hash0, dataLen<<3 );
|
||||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||||
(const BitSequence *) hash1, dataLen<<3 );
|
(const BitSequence *) hash1, dataLen<<3 );
|
||||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||||
(const BitSequence *) hash2, dataLen<<3 );
|
(const BitSequence *) hash2, dataLen<<3 );
|
||||||
memcpy( &ctx.echo, &xevan_4way_ctx.echo, sizeof(hashState_echo) );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
(const BitSequence *) hash3, dataLen<<3 );
|
(const BitSequence *) hash3, dataLen<<3 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||||
|
|
||||||
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, dataLen );
|
hamsi512_4way( &ctx.hamsi, vhash, dataLen );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||||
|
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash0, dataLen );
|
sph_fugue512( &ctx.fugue, hash0, dataLen );
|
||||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash1, dataLen );
|
sph_fugue512( &ctx.fugue, hash1, dataLen );
|
||||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash2, dataLen );
|
sph_fugue512( &ctx.fugue, hash2, dataLen );
|
||||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||||
memcpy( &ctx.fugue, &xevan_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash3, dataLen );
|
sph_fugue512( &ctx.fugue, hash3, dataLen );
|
||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||||
|
|
||||||
|
shabal512_4way_init( &ctx.shabal );
|
||||||
shabal512_4way( &ctx.shabal, vhash, dataLen );
|
shabal512_4way( &ctx.shabal, vhash, dataLen );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash );
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
|
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
|
||||||
|
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
|
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sizeof(sph_whirlpool_context) );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
|
sph_whirlpool( &ctx.whirlpool, hash1, dataLen );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sizeof(sph_whirlpool_context) );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
|
sph_whirlpool( &ctx.whirlpool, hash2, dataLen );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||||
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sizeof(sph_whirlpool_context) );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
|
sph_whirlpool( &ctx.whirlpool, hash3, dataLen );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
|
||||||
|
|
||||||
|
sha512_4way_init( &ctx.sha512 );
|
||||||
sha512_4way( &ctx.sha512, vhash, dataLen );
|
sha512_4way( &ctx.sha512, vhash, dataLen );
|
||||||
sha512_4way_close( &ctx.sha512, vhash );
|
sha512_4way_close( &ctx.sha512, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x32( vhash32, vhash, dataLen<<3 );
|
mm256_reinterleave_4x64_4x32( vhashA, vhash, dataLen<<3 );
|
||||||
haval256_5_4way( &ctx.haval, vhash32, dataLen );
|
|
||||||
|
haval256_5_4way_init( &ctx.haval );
|
||||||
|
haval256_5_4way( &ctx.haval, vhashA, dataLen );
|
||||||
haval256_5_4way_close( &ctx.haval, output );
|
haval256_5_4way_close( &ctx.haval, output );
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &(hash[7<<2]);
|
uint32_t *hash7 = &(hash[7<<2]);
|
||||||
@@ -378,30 +339,26 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t _ALIGN(64) endiandata[20];
|
uint32_t _ALIGN(64) endiandata[20];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
uint32_t *nonces = work->nonces;
|
|
||||||
int num_found = 0;
|
|
||||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
ptarget[7] = 0x0cff;
|
ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
for ( int k=0; k < 19; k++ )
|
|
||||||
be32enc( &endiandata[k], pdata[k] );
|
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
uint64_t *edata = (uint64_t*)endiandata;
|
||||||
|
|
||||||
|
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||||
|
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
|
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||||
|
|
||||||
xevan_4way_blake512_midstate( vdata );
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
be32enc( noncep, n );
|
*noncev = mm256_interleave_blend_32( mm256_bswap_32(
|
||||||
be32enc( noncep+2, n+1 );
|
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ), *noncev );
|
||||||
be32enc( noncep+4, n+2 );
|
|
||||||
be32enc( noncep+6, n+3 );
|
|
||||||
|
|
||||||
xevan_4way_hash( hash, vdata );
|
xevan_4way_hash( hash, vdata );
|
||||||
for ( int lane = 0; lane < 4; lane++ )
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
@@ -411,15 +368,20 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
nonces[ num_found++ ] = n + lane;
|
|
||||||
work_set_target_ratio( work, lane_hash );
|
work_set_target_ratio( work, lane_hash );
|
||||||
|
if ( submit_work( mythr, work ) )
|
||||||
|
applog( LOG_NOTICE,
|
||||||
|
"Share %d submitted by thread %d, lane %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr_id, lane );
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -8,7 +8,7 @@ void xevan_set_target( struct work* work, double job_diff )
|
|||||||
bool register_xevan_algo( algo_gate_t* gate )
|
bool register_xevan_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined (XEVAN_4WAY)
|
#if defined (XEVAN_4WAY)
|
||||||
init_xevan_4way_ctx();
|
// init_xevan_4way_ctx();
|
||||||
gate->scanhash = (void*)&scanhash_xevan_4way;
|
gate->scanhash = (void*)&scanhash_xevan_4way;
|
||||||
gate->hash = (void*)&xevan_4way_hash;
|
gate->hash = (void*)&xevan_4way_hash;
|
||||||
#else
|
#else
|
||||||
|
@@ -15,16 +15,16 @@ bool register_xevan_algo( algo_gate_t* gate );
|
|||||||
void xevan_4way_hash( void *state, const void *input );
|
void xevan_4way_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_xevan_4way_ctx();
|
//void init_xevan_4way_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void xevan_hash( void *state, const void *input );
|
void xevan_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_xevan( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_xevan( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_xevan_ctx();
|
void init_xevan_ctx();
|
||||||
|
|
||||||
|
@@ -230,12 +230,14 @@ void xevan_hash(void *output, const void *input)
|
|||||||
memcpy(output, hash, 32);
|
memcpy(output, hash, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_xevan(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
int scanhash_xevan( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(64) hash[8];
|
uint32_t _ALIGN(64) hash[8];
|
||||||
uint32_t _ALIGN(64) endiandata[20];
|
uint32_t _ALIGN(64) endiandata[20];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
@@ -290,7 +290,7 @@ SHA256_Final_Y(unsigned char digest[32], SHA256_CTX_Y * ctx)
|
|||||||
|
|
||||||
/* Initialize an HMAC-SHA256 operation with the given key. */
|
/* Initialize an HMAC-SHA256 operation with the given key. */
|
||||||
void
|
void
|
||||||
HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
|
HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y * ctx, const void * _K, size_t Klen)
|
||||||
{
|
{
|
||||||
unsigned char pad[64];
|
unsigned char pad[64];
|
||||||
unsigned char khash[32];
|
unsigned char khash[32];
|
||||||
@@ -326,7 +326,7 @@ HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
|
|||||||
|
|
||||||
/* Add bytes to the HMAC-SHA256 operation. */
|
/* Add bytes to the HMAC-SHA256 operation. */
|
||||||
void
|
void
|
||||||
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void *in, size_t len)
|
HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y * ctx, const void *in, size_t len)
|
||||||
{
|
{
|
||||||
|
|
||||||
/* Feed data to the inner SHA256 operation. */
|
/* Feed data to the inner SHA256 operation. */
|
||||||
@@ -335,7 +335,7 @@ HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void *in, size_t len)
|
|||||||
|
|
||||||
/* Finish an HMAC-SHA256 operation. */
|
/* Finish an HMAC-SHA256 operation. */
|
||||||
void
|
void
|
||||||
HMAC_SHA256_Final(unsigned char digest[32], HMAC_SHA256_CTX * ctx)
|
HMAC_SHA256_Final_Y(unsigned char digest[32], HMAC_SHA256_CTX_Y * ctx)
|
||||||
{
|
{
|
||||||
unsigned char ihash[32];
|
unsigned char ihash[32];
|
||||||
|
|
||||||
@@ -361,7 +361,7 @@ void
|
|||||||
PBKDF2_SHA256_Y(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
PBKDF2_SHA256_Y(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||||
{
|
{
|
||||||
HMAC_SHA256_CTX PShctx, hctx;
|
HMAC_SHA256_CTX_Y PShctx, hctx;
|
||||||
uint8_t _ALIGN(128) T[32];
|
uint8_t _ALIGN(128) T[32];
|
||||||
uint8_t _ALIGN(128) U[32];
|
uint8_t _ALIGN(128) U[32];
|
||||||
uint8_t ivec[4];
|
uint8_t ivec[4];
|
||||||
@@ -370,8 +370,8 @@ PBKDF2_SHA256_Y(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
|||||||
int k;
|
int k;
|
||||||
|
|
||||||
/* Compute HMAC state after processing P and S. */
|
/* Compute HMAC state after processing P and S. */
|
||||||
HMAC_SHA256_Init(&PShctx, passwd, passwdlen);
|
HMAC_SHA256_Init_Y(&PShctx, passwd, passwdlen);
|
||||||
HMAC_SHA256_Update(&PShctx, salt, saltlen);
|
HMAC_SHA256_Update_Y(&PShctx, salt, saltlen);
|
||||||
|
|
||||||
/* Iterate through the blocks. */
|
/* Iterate through the blocks. */
|
||||||
for (i = 0; i * 32 < dkLen; i++) {
|
for (i = 0; i * 32 < dkLen; i++) {
|
||||||
@@ -379,18 +379,18 @@ PBKDF2_SHA256_Y(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
|||||||
be32enc(ivec, (uint32_t)(i + 1));
|
be32enc(ivec, (uint32_t)(i + 1));
|
||||||
|
|
||||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
|
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX_Y));
|
||||||
HMAC_SHA256_Update(&hctx, ivec, 4);
|
HMAC_SHA256_Update_Y(&hctx, ivec, 4);
|
||||||
HMAC_SHA256_Final(U, &hctx);
|
HMAC_SHA256_Final_Y(U, &hctx);
|
||||||
|
|
||||||
/* T_i = U_1 ... */
|
/* T_i = U_1 ... */
|
||||||
memcpy(T, U, 32);
|
memcpy(T, U, 32);
|
||||||
|
|
||||||
for (j = 2; j <= c; j++) {
|
for (j = 2; j <= c; j++) {
|
||||||
/* Compute U_j. */
|
/* Compute U_j. */
|
||||||
HMAC_SHA256_Init(&hctx, passwd, passwdlen);
|
HMAC_SHA256_Init_Y(&hctx, passwd, passwdlen);
|
||||||
HMAC_SHA256_Update(&hctx, U, 32);
|
HMAC_SHA256_Update_Y(&hctx, U, 32);
|
||||||
HMAC_SHA256_Final(U, &hctx);
|
HMAC_SHA256_Final_Y(U, &hctx);
|
||||||
|
|
||||||
/* ... xor U_j ... */
|
/* ... xor U_j ... */
|
||||||
for (k = 0; k < 32; k++)
|
for (k = 0; k < 32; k++)
|
||||||
|
@@ -49,14 +49,14 @@ typedef struct HMAC_SHA256Context {
|
|||||||
typedef struct HMAC_SHA256Context {
|
typedef struct HMAC_SHA256Context {
|
||||||
SHA256_CTX ictx;
|
SHA256_CTX ictx;
|
||||||
SHA256_CTX octx;
|
SHA256_CTX octx;
|
||||||
} HMAC_SHA256_CTX;
|
} HMAC_SHA256_CTX_Y;
|
||||||
|
|
||||||
void SHA256_Init_Y(SHA256_CTX_Y *);
|
void SHA256_Init_Y(SHA256_CTX_Y *);
|
||||||
void SHA256_Update_Y(SHA256_CTX_Y *, const void *, size_t);
|
void SHA256_Update_Y(SHA256_CTX_Y *, const void *, size_t);
|
||||||
void SHA256_Final_Y(unsigned char [32], SHA256_CTX_Y *);
|
void SHA256_Final_Y(unsigned char [32], SHA256_CTX_Y *);
|
||||||
void HMAC_SHA256_Init(HMAC_SHA256_CTX *, const void *, size_t);
|
void HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y *, const void *, size_t);
|
||||||
void HMAC_SHA256_Update(HMAC_SHA256_CTX *, const void *, size_t);
|
void HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y *, const void *, size_t);
|
||||||
void HMAC_SHA256_Final(unsigned char [32], HMAC_SHA256_CTX *);
|
void HMAC_SHA256_Final_Y(unsigned char [32], HMAC_SHA256_CTX_Y *);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||||
|
@@ -1354,14 +1354,14 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
|
|||||||
if ((t || flags) && buflen == sizeof(sha256)) {
|
if ((t || flags) && buflen == sizeof(sha256)) {
|
||||||
/* Compute ClientKey */
|
/* Compute ClientKey */
|
||||||
{
|
{
|
||||||
HMAC_SHA256_CTX ctx;
|
HMAC_SHA256_CTX_Y ctx;
|
||||||
HMAC_SHA256_Init(&ctx, buf, buflen);
|
HMAC_SHA256_Init_Y(&ctx, buf, buflen);
|
||||||
if ( yescrypt_client_key )
|
if ( yescrypt_client_key )
|
||||||
HMAC_SHA256_Update( &ctx, (uint8_t*)yescrypt_client_key,
|
HMAC_SHA256_Update_Y( &ctx, (uint8_t*)yescrypt_client_key,
|
||||||
yescrypt_client_key_len );
|
yescrypt_client_key_len );
|
||||||
else
|
else
|
||||||
HMAC_SHA256_Update( &ctx, salt, saltlen );
|
HMAC_SHA256_Update_Y( &ctx, salt, saltlen );
|
||||||
HMAC_SHA256_Final(sha256, &ctx);
|
HMAC_SHA256_Final_Y(sha256, &ctx);
|
||||||
}
|
}
|
||||||
/* Compute StoredKey */
|
/* Compute StoredKey */
|
||||||
{
|
{
|
||||||
|
@@ -383,7 +383,7 @@ void yescrypthash(void *output, const void *input)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_yescrypt( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_yescrypt( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(64) vhash[8];
|
uint32_t _ALIGN(64) vhash[8];
|
||||||
uint32_t _ALIGN(64) endiandata[20];
|
uint32_t _ALIGN(64) endiandata[20];
|
||||||
@@ -393,6 +393,7 @@ int scanhash_yescrypt( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
for (int k = 0; k < 19; k++)
|
for (int k = 0; k < 19; k++)
|
||||||
be32enc(&endiandata[k], pdata[k]);
|
be32enc(&endiandata[k], pdata[k]);
|
||||||
|
@@ -1,646 +0,0 @@
|
|||||||
/*-
|
|
||||||
* Copyright 2005-2016 Colin Percival
|
|
||||||
* Copyright 2016-2018 Alexander Peslyak
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
||||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
||||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "insecure_memzero.h"
|
|
||||||
#include "sysendian.h"
|
|
||||||
|
|
||||||
#include "sha256.h"
|
|
||||||
|
|
||||||
#ifdef __ICC
|
|
||||||
/* Miscompile with icc 14.0.0 (at least), so don't use restrict there */
|
|
||||||
#define restrict
|
|
||||||
#elif __STDC_VERSION__ >= 199901L
|
|
||||||
/* Have restrict */
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
#define restrict __restrict
|
|
||||||
#else
|
|
||||||
#define restrict
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Encode a length len*2 vector of (uint32_t) into a length len*8 vector of
|
|
||||||
* (uint8_t) in big-endian form.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
be32enc_vect(uint8_t * dst, const uint32_t * src, size_t len)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Encode vector, two words at a time. */
|
|
||||||
do {
|
|
||||||
be32enc(&dst[0], src[0]);
|
|
||||||
be32enc(&dst[4], src[1]);
|
|
||||||
src += 2;
|
|
||||||
dst += 8;
|
|
||||||
} while (--len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Decode a big-endian length len*8 vector of (uint8_t) into a length
|
|
||||||
* len*2 vector of (uint32_t).
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
be32dec_vect(uint32_t * dst, const uint8_t * src, size_t len)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Decode vector, two words at a time. */
|
|
||||||
do {
|
|
||||||
dst[0] = be32dec(&src[0]);
|
|
||||||
dst[1] = be32dec(&src[4]);
|
|
||||||
src += 8;
|
|
||||||
dst += 2;
|
|
||||||
} while (--len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* SHA256 round constants. */
|
|
||||||
static const uint32_t Krnd[64] = {
|
|
||||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
|
||||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
|
||||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
|
||||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
|
||||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
|
||||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
|
||||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
|
||||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
|
||||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
|
||||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
|
||||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
|
||||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
|
||||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
|
||||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
|
||||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
|
||||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Elementary functions used by SHA256 */
|
|
||||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
|
||||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
|
||||||
#define SHR(x, n) (x >> n)
|
|
||||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
|
||||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
|
||||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
|
||||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
|
||||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
|
||||||
|
|
||||||
/* SHA256 round function */
|
|
||||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
|
||||||
h += S1(e) + Ch(e, f, g) + k; \
|
|
||||||
d += h; \
|
|
||||||
h += S0(a) + Maj(a, b, c);
|
|
||||||
|
|
||||||
/* Adjusted round function for rotating state */
|
|
||||||
#define RNDr(S, W, i, ii) \
|
|
||||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
|
||||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
|
||||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
|
||||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
|
||||||
W[i + ii] + Krnd[i + ii])
|
|
||||||
|
|
||||||
/* Message schedule computation */
|
|
||||||
#define MSCH(W, ii, i) \
|
|
||||||
W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SHA256 block compression function. The 256-bit state is transformed via
|
|
||||||
* the 512-bit input block to produce a new state.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
SHA256_Transform(uint32_t state[static restrict 8],
|
|
||||||
const uint8_t block[static restrict 64],
|
|
||||||
uint32_t W[static restrict 64], uint32_t S[static restrict 8])
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* 1. Prepare the first part of the message schedule W. */
|
|
||||||
be32dec_vect(W, block, 8);
|
|
||||||
|
|
||||||
/* 2. Initialize working variables. */
|
|
||||||
memcpy(S, state, 32);
|
|
||||||
|
|
||||||
/* 3. Mix. */
|
|
||||||
for (i = 0; i < 64; i += 16) {
|
|
||||||
RNDr(S, W, 0, i);
|
|
||||||
RNDr(S, W, 1, i);
|
|
||||||
RNDr(S, W, 2, i);
|
|
||||||
RNDr(S, W, 3, i);
|
|
||||||
RNDr(S, W, 4, i);
|
|
||||||
RNDr(S, W, 5, i);
|
|
||||||
RNDr(S, W, 6, i);
|
|
||||||
RNDr(S, W, 7, i);
|
|
||||||
RNDr(S, W, 8, i);
|
|
||||||
RNDr(S, W, 9, i);
|
|
||||||
RNDr(S, W, 10, i);
|
|
||||||
RNDr(S, W, 11, i);
|
|
||||||
RNDr(S, W, 12, i);
|
|
||||||
RNDr(S, W, 13, i);
|
|
||||||
RNDr(S, W, 14, i);
|
|
||||||
RNDr(S, W, 15, i);
|
|
||||||
|
|
||||||
if (i == 48)
|
|
||||||
break;
|
|
||||||
MSCH(W, 0, i);
|
|
||||||
MSCH(W, 1, i);
|
|
||||||
MSCH(W, 2, i);
|
|
||||||
MSCH(W, 3, i);
|
|
||||||
MSCH(W, 4, i);
|
|
||||||
MSCH(W, 5, i);
|
|
||||||
MSCH(W, 6, i);
|
|
||||||
MSCH(W, 7, i);
|
|
||||||
MSCH(W, 8, i);
|
|
||||||
MSCH(W, 9, i);
|
|
||||||
MSCH(W, 10, i);
|
|
||||||
MSCH(W, 11, i);
|
|
||||||
MSCH(W, 12, i);
|
|
||||||
MSCH(W, 13, i);
|
|
||||||
MSCH(W, 14, i);
|
|
||||||
MSCH(W, 15, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 4. Mix local working variables into global state. */
|
|
||||||
state[0] += S[0];
|
|
||||||
state[1] += S[1];
|
|
||||||
state[2] += S[2];
|
|
||||||
state[3] += S[3];
|
|
||||||
state[4] += S[4];
|
|
||||||
state[5] += S[5];
|
|
||||||
state[6] += S[6];
|
|
||||||
state[7] += S[7];
|
|
||||||
}
|
|
||||||
|
|
||||||
static const uint8_t PAD[64] = {
|
|
||||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Add padding and terminating bit-count. */
|
|
||||||
static void
|
|
||||||
SHA256_Pad(SHA256_CTX * ctx, uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
size_t r;
|
|
||||||
|
|
||||||
/* Figure out how many bytes we have buffered. */
|
|
||||||
r = (ctx->count >> 3) & 0x3f;
|
|
||||||
|
|
||||||
/* Pad to 56 mod 64, transforming if we finish a block en route. */
|
|
||||||
if (r < 56) {
|
|
||||||
/* Pad to 56 mod 64. */
|
|
||||||
memcpy(&ctx->buf[r], PAD, 56 - r);
|
|
||||||
} else {
|
|
||||||
/* Finish the current block and mix. */
|
|
||||||
memcpy(&ctx->buf[r], PAD, 64 - r);
|
|
||||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
|
||||||
|
|
||||||
/* The start of the final block is all zeroes. */
|
|
||||||
memset(&ctx->buf[0], 0, 56);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add the terminating bit-count. */
|
|
||||||
be64enc(&ctx->buf[56], ctx->count);
|
|
||||||
|
|
||||||
/* Mix in the final block. */
|
|
||||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Magic initialization constants. */
|
|
||||||
static const uint32_t initial_state[8] = {
|
|
||||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
|
||||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Init(ctx):
|
|
||||||
* Initialize the SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
SHA256_Init(SHA256_CTX * ctx)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Zero bits processed so far. */
|
|
||||||
ctx->count = 0;
|
|
||||||
|
|
||||||
/* Initialize state. */
|
|
||||||
memcpy(ctx->state, initial_state, sizeof(initial_state));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Update(ctx, in, len):
|
|
||||||
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len,
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
uint32_t r;
|
|
||||||
const uint8_t * src = in;
|
|
||||||
|
|
||||||
/* Return immediately if we have nothing to do. */
|
|
||||||
if (len == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* Number of bytes left in the buffer from previous updates. */
|
|
||||||
r = (ctx->count >> 3) & 0x3f;
|
|
||||||
|
|
||||||
/* Update number of bits. */
|
|
||||||
ctx->count += (uint64_t)(len) << 3;
|
|
||||||
|
|
||||||
/* Handle the case where we don't need to perform any transforms. */
|
|
||||||
if (len < 64 - r) {
|
|
||||||
memcpy(&ctx->buf[r], src, len);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Finish the current block. */
|
|
||||||
memcpy(&ctx->buf[r], src, 64 - r);
|
|
||||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
|
||||||
src += 64 - r;
|
|
||||||
len -= 64 - r;
|
|
||||||
|
|
||||||
/* Perform complete blocks. */
|
|
||||||
while (len >= 64) {
|
|
||||||
SHA256_Transform(ctx->state, src, &tmp32[0], &tmp32[64]);
|
|
||||||
src += 64;
|
|
||||||
len -= 64;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy left over data into buffer. */
|
|
||||||
memcpy(ctx->buf, src, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_SHA256_Update(ctx, in, len, tmp32);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Final(digest, ctx):
|
|
||||||
* Output the SHA256 hash of the data input to the context ${ctx} into the
|
|
||||||
* buffer ${digest}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx,
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Add padding. */
|
|
||||||
SHA256_Pad(ctx, tmp32);
|
|
||||||
|
|
||||||
/* Write the hash. */
|
|
||||||
be32enc_vect(digest, ctx->state, 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_SHA256_Final(digest, ctx, tmp32);
|
|
||||||
|
|
||||||
/* Clear the context state. */
|
|
||||||
insecure_memzero(ctx, sizeof(SHA256_CTX));
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Buf(in, len, digest):
|
|
||||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
SHA256_Buf(const void * in, size_t len, uint8_t digest[32])
|
|
||||||
{
|
|
||||||
SHA256_CTX ctx;
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
SHA256_Init(&ctx);
|
|
||||||
_SHA256_Update(&ctx, in, len, tmp32);
|
|
||||||
_SHA256_Final(digest, &ctx, tmp32);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(&ctx, sizeof(SHA256_CTX));
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Init(ctx, K, Klen):
|
|
||||||
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
|
|
||||||
* ${K}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen,
|
|
||||||
uint32_t tmp32[static restrict 72], uint8_t pad[static restrict 64],
|
|
||||||
uint8_t khash[static restrict 32])
|
|
||||||
{
|
|
||||||
const uint8_t * K = _K;
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
/* If Klen > 64, the key is really SHA256(K). */
|
|
||||||
if (Klen > 64) {
|
|
||||||
SHA256_Init(&ctx->ictx);
|
|
||||||
_SHA256_Update(&ctx->ictx, K, Klen, tmp32);
|
|
||||||
_SHA256_Final(khash, &ctx->ictx, tmp32);
|
|
||||||
K = khash;
|
|
||||||
Klen = 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
|
||||||
SHA256_Init(&ctx->ictx);
|
|
||||||
memset(pad, 0x36, 64);
|
|
||||||
for (i = 0; i < Klen; i++)
|
|
||||||
pad[i] ^= K[i];
|
|
||||||
_SHA256_Update(&ctx->ictx, pad, 64, tmp32);
|
|
||||||
|
|
||||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
|
||||||
SHA256_Init(&ctx->octx);
|
|
||||||
memset(pad, 0x5c, 64);
|
|
||||||
for (i = 0; i < Klen; i++)
|
|
||||||
pad[i] ^= K[i];
|
|
||||||
_SHA256_Update(&ctx->octx, pad, 64, tmp32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
uint8_t pad[64];
|
|
||||||
uint8_t khash[32];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_HMAC_SHA256_Init(ctx, _K, Klen, tmp32, pad, khash);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(khash, 32);
|
|
||||||
insecure_memzero(pad, 64);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Update(ctx, in, len):
|
|
||||||
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len,
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Feed data to the inner SHA256 operation. */
|
|
||||||
_SHA256_Update(&ctx->ictx, in, len, tmp32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_HMAC_SHA256_Update(ctx, in, len, tmp32);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Final(digest, ctx):
|
|
||||||
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
|
|
||||||
* buffer ${digest}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx,
|
|
||||||
uint32_t tmp32[static restrict 72], uint8_t ihash[static restrict 32])
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Finish the inner SHA256 operation. */
|
|
||||||
_SHA256_Final(ihash, &ctx->ictx, tmp32);
|
|
||||||
|
|
||||||
/* Feed the inner hash to the outer SHA256 operation. */
|
|
||||||
_SHA256_Update(&ctx->octx, ihash, 32, tmp32);
|
|
||||||
|
|
||||||
/* Finish the outer SHA256 operation. */
|
|
||||||
_SHA256_Final(digest, &ctx->octx, tmp32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
uint8_t ihash[32];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_HMAC_SHA256_Final(digest, ctx, tmp32, ihash);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(ihash, 32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
|
||||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
|
||||||
* length ${Klen}, and write the result to ${digest}.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len,
|
|
||||||
uint8_t digest[32])
|
|
||||||
{
|
|
||||||
HMAC_SHA256_CTX ctx;
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
uint8_t tmp8[96];
|
|
||||||
|
|
||||||
_HMAC_SHA256_Init(&ctx, K, Klen, tmp32, &tmp8[0], &tmp8[64]);
|
|
||||||
_HMAC_SHA256_Update(&ctx, in, len, tmp32);
|
|
||||||
_HMAC_SHA256_Final(digest, &ctx, tmp32, &tmp8[0]);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(&ctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(tmp8, 96);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add padding and terminating bit-count, but don't invoke Transform yet. */
|
|
||||||
static int
|
|
||||||
SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8],
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
uint32_t r;
|
|
||||||
|
|
||||||
r = (ctx->count >> 3) & 0x3f;
|
|
||||||
if (r >= 56)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Convert length to a vector of bytes -- we do this now rather
|
|
||||||
* than later because the length will change after we pad.
|
|
||||||
*/
|
|
||||||
be64enc(len, ctx->count);
|
|
||||||
|
|
||||||
/* Add 1--56 bytes so that the resulting length is 56 mod 64. */
|
|
||||||
_SHA256_Update(ctx, PAD, 56 - r, tmp32);
|
|
||||||
|
|
||||||
/* Add the terminating bit-count. */
|
|
||||||
ctx->buf[63] = len[7];
|
|
||||||
_SHA256_Update(ctx, len, 7, tmp32);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
|
||||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
|
||||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
|
||||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
|
||||||
{
|
|
||||||
HMAC_SHA256_CTX Phctx, PShctx, hctx;
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
union {
|
|
||||||
uint8_t tmp8[96];
|
|
||||||
uint32_t state[8];
|
|
||||||
} u;
|
|
||||||
size_t i;
|
|
||||||
uint8_t ivec[4];
|
|
||||||
uint8_t U[32];
|
|
||||||
uint8_t T[32];
|
|
||||||
uint64_t j;
|
|
||||||
int k;
|
|
||||||
size_t clen;
|
|
||||||
|
|
||||||
/* Sanity-check. */
|
|
||||||
assert(dkLen <= 32 * (size_t)(UINT32_MAX));
|
|
||||||
|
|
||||||
if (c == 1 && (dkLen & 31) == 0 && (saltlen & 63) <= 51) {
|
|
||||||
uint32_t oldcount;
|
|
||||||
uint8_t * ivecp;
|
|
||||||
|
|
||||||
/* Compute HMAC state after processing P and S. */
|
|
||||||
_HMAC_SHA256_Init(&hctx, passwd, passwdlen,
|
|
||||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
|
||||||
_HMAC_SHA256_Update(&hctx, salt, saltlen, tmp32);
|
|
||||||
|
|
||||||
/* Prepare ictx padding. */
|
|
||||||
oldcount = hctx.ictx.count & (0x3f << 3);
|
|
||||||
_HMAC_SHA256_Update(&hctx, "\0\0\0", 4, tmp32);
|
|
||||||
if ((hctx.ictx.count & (0x3f << 3)) < oldcount ||
|
|
||||||
SHA256_Pad_Almost(&hctx.ictx, u.tmp8, tmp32))
|
|
||||||
goto generic; /* Can't happen due to saltlen check */
|
|
||||||
ivecp = hctx.ictx.buf + (oldcount >> 3);
|
|
||||||
|
|
||||||
/* Prepare octx padding. */
|
|
||||||
hctx.octx.count += 32 << 3;
|
|
||||||
SHA256_Pad_Almost(&hctx.octx, u.tmp8, tmp32);
|
|
||||||
|
|
||||||
/* Iterate through the blocks. */
|
|
||||||
for (i = 0; i * 32 < dkLen; i++) {
|
|
||||||
/* Generate INT(i + 1). */
|
|
||||||
be32enc(ivecp, (uint32_t)(i + 1));
|
|
||||||
|
|
||||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
|
||||||
memcpy(u.state, hctx.ictx.state, sizeof(u.state));
|
|
||||||
SHA256_Transform(u.state, hctx.ictx.buf,
|
|
||||||
&tmp32[0], &tmp32[64]);
|
|
||||||
be32enc_vect(hctx.octx.buf, u.state, 4);
|
|
||||||
memcpy(u.state, hctx.octx.state, sizeof(u.state));
|
|
||||||
SHA256_Transform(u.state, hctx.octx.buf,
|
|
||||||
&tmp32[0], &tmp32[64]);
|
|
||||||
be32enc_vect(&buf[i * 32], u.state, 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
generic:
|
|
||||||
/* Compute HMAC state after processing P. */
|
|
||||||
_HMAC_SHA256_Init(&Phctx, passwd, passwdlen,
|
|
||||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
|
||||||
|
|
||||||
/* Compute HMAC state after processing P and S. */
|
|
||||||
memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
_HMAC_SHA256_Update(&PShctx, salt, saltlen, tmp32);
|
|
||||||
|
|
||||||
/* Iterate through the blocks. */
|
|
||||||
for (i = 0; i * 32 < dkLen; i++) {
|
|
||||||
/* Generate INT(i + 1). */
|
|
||||||
be32enc(ivec, (uint32_t)(i + 1));
|
|
||||||
|
|
||||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
|
||||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
_HMAC_SHA256_Update(&hctx, ivec, 4, tmp32);
|
|
||||||
_HMAC_SHA256_Final(T, &hctx, tmp32, u.tmp8);
|
|
||||||
|
|
||||||
if (c > 1) {
|
|
||||||
/* T_i = U_1 ... */
|
|
||||||
memcpy(U, T, 32);
|
|
||||||
|
|
||||||
for (j = 2; j <= c; j++) {
|
|
||||||
/* Compute U_j. */
|
|
||||||
memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
_HMAC_SHA256_Update(&hctx, U, 32, tmp32);
|
|
||||||
_HMAC_SHA256_Final(U, &hctx, tmp32, u.tmp8);
|
|
||||||
|
|
||||||
/* ... xor U_j ... */
|
|
||||||
for (k = 0; k < 32; k++)
|
|
||||||
T[k] ^= U[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy as many bytes as necessary into buf. */
|
|
||||||
clen = dkLen - i * 32;
|
|
||||||
if (clen > 32)
|
|
||||||
clen = 32;
|
|
||||||
memcpy(&buf[i * 32], T, clen);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(&Phctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(&PShctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(U, 32);
|
|
||||||
insecure_memzero(T, 32);
|
|
||||||
|
|
||||||
cleanup:
|
|
||||||
insecure_memzero(&hctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(&u, sizeof(u));
|
|
||||||
}
|
|
@@ -1,680 +0,0 @@
|
|||||||
/*-
|
|
||||||
* Copyright 2005-2016 Colin Percival
|
|
||||||
* Copyright 2016-2018 Alexander Peslyak
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
||||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
||||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "insecure_memzero.h"
|
|
||||||
#include "sysendian.h"
|
|
||||||
|
|
||||||
#include "sha256.h"
|
|
||||||
#include "avxdefs.h"
|
|
||||||
|
|
||||||
#ifdef __ICC
|
|
||||||
/* Miscompile with icc 14.0.0 (at least), so don't use restrict there */
|
|
||||||
#define restrict
|
|
||||||
#elif __STDC_VERSION__ >= 199901L
|
|
||||||
/* Have restrict */
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
#define restrict __restrict
|
|
||||||
#else
|
|
||||||
#define restrict
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Encode a length len*2 vector of (uint32_t) into a length len*8 vector of
|
|
||||||
* (uint8_t) in big-endian form.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
be32enc_vect(uint8_t * dst, const uint32_t * src, size_t len)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Encode vector, two words at a time. */
|
|
||||||
do {
|
|
||||||
be32enc(&dst[0], src[0]);
|
|
||||||
be32enc(&dst[4], src[1]);
|
|
||||||
src += 2;
|
|
||||||
dst += 8;
|
|
||||||
} while (--len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Decode a big-endian length len*8 vector of (uint8_t) into a length
|
|
||||||
* len*2 vector of (uint32_t).
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
be32dec_vect(uint32_t * dst, const uint8_t * src, size_t len)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Decode vector, two words at a time. */
|
|
||||||
do {
|
|
||||||
dst[0] = be32dec(&src[0]);
|
|
||||||
dst[1] = be32dec(&src[4]);
|
|
||||||
src += 8;
|
|
||||||
dst += 2;
|
|
||||||
} while (--len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* SHA256 round constants. */
|
|
||||||
static const uint32_t Krnd[64] = {
|
|
||||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
|
||||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
|
||||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
|
||||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
|
||||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
|
||||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
|
||||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
|
||||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
|
||||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
|
||||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
|
||||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
|
||||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
|
||||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
|
||||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
|
||||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
|
||||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Elementary functions used by SHA256 */
|
|
||||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
|
||||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
|
||||||
#define SHR(x, n) (x >> n)
|
|
||||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
|
||||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
|
||||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
|
||||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
|
||||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
|
||||||
|
|
||||||
#if 0 //defined(__SHA__)
|
|
||||||
|
|
||||||
// ABEF = _mm_sha256rnds2_epu32( CDGH, ABEF, k )
|
|
||||||
//_mm_sha256rnds2_epu32 (__m128i a, __m128i b, __m128i k)
|
|
||||||
// b = { ABEF } a = { CDGH }
|
|
||||||
//
|
|
||||||
//a = _mm_set_epi32( S[(66 - i) % 8], S[(67 - i) % 8],
|
|
||||||
// S[(70 - i) % 8], S[(71 - i) % 8] );
|
|
||||||
//b = _mm_set_epi32( S[(64 - i) % 8], S[(65 - i) % 8],
|
|
||||||
// S[(68 - i) % 8], S[(69 - i) % 8] );
|
|
||||||
//k = _mm_set1_epi32( W[i + ii] + Krnd[i + ii] )
|
|
||||||
// _mm_sha256rnds2_epu32(a,b,k)
|
|
||||||
|
|
||||||
#define RNDr( S, W, i, ii ) do \
|
|
||||||
{ \
|
|
||||||
uint32_t abef[4]; \
|
|
||||||
__m128i ABEF = _mm_set_epi32( S[(66 - i) % 8], S[(67 - i) % 8], \
|
|
||||||
S[(70 - i) % 8], S[(71 - i) % 8] ); \
|
|
||||||
__m128i CDGH = _mm_set_epi32( S[(64 - i) % 8], S[(65 - i) % 8], \
|
|
||||||
S[(68 - i) % 8], S[(69 - i) % 8] ); \
|
|
||||||
__m128i K = _mm_set1_epi32( W[i + ii] + Krnd[i + ii] ); \
|
|
||||||
casti_m128i( abef, 0 ) = _mm_sha256rnds2_epu32( CDGH, ABEF, K ); \
|
|
||||||
S[(66 - i) % 8] = abef[3]; \
|
|
||||||
S[(67 - i) % 8] = abef[2]; \
|
|
||||||
S[(64 - i) % 8] = abef[1]; \
|
|
||||||
S[(65 - i) % 8] = abef[0]; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
/* SHA256 round function */
|
|
||||||
|
|
||||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
|
||||||
h += S1(e) + Ch(e, f, g) + k; \
|
|
||||||
d += h; \
|
|
||||||
h += S0(a) + Maj(a, b, c);
|
|
||||||
|
|
||||||
/* Adjusted round function for rotating state */
|
|
||||||
#define RNDr(S, W, i, ii) \
|
|
||||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
|
||||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
|
||||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
|
||||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
|
||||||
W[i + ii] + Krnd[i + ii])
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Message schedule computation */
|
|
||||||
#define MSCH(W, ii, i) \
|
|
||||||
W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SHA256 block compression function. The 256-bit state is transformed via
|
|
||||||
* the 512-bit input block to produce a new state.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
SHA256_Transform(uint32_t state[static restrict 8],
|
|
||||||
const uint8_t block[static restrict 64],
|
|
||||||
uint32_t W[static restrict 64], uint32_t S[static restrict 8])
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* 1. Prepare the first part of the message schedule W. */
|
|
||||||
be32dec_vect(W, block, 8);
|
|
||||||
|
|
||||||
/* 2. Initialize working variables. */
|
|
||||||
memcpy(S, state, 32);
|
|
||||||
|
|
||||||
/* 3. Mix. */
|
|
||||||
for (i = 0; i < 64; i += 16) {
|
|
||||||
RNDr(S, W, 0, i);
|
|
||||||
RNDr(S, W, 1, i);
|
|
||||||
RNDr(S, W, 2, i);
|
|
||||||
RNDr(S, W, 3, i);
|
|
||||||
RNDr(S, W, 4, i);
|
|
||||||
RNDr(S, W, 5, i);
|
|
||||||
RNDr(S, W, 6, i);
|
|
||||||
RNDr(S, W, 7, i);
|
|
||||||
RNDr(S, W, 8, i);
|
|
||||||
RNDr(S, W, 9, i);
|
|
||||||
RNDr(S, W, 10, i);
|
|
||||||
RNDr(S, W, 11, i);
|
|
||||||
RNDr(S, W, 12, i);
|
|
||||||
RNDr(S, W, 13, i);
|
|
||||||
RNDr(S, W, 14, i);
|
|
||||||
RNDr(S, W, 15, i);
|
|
||||||
|
|
||||||
if (i == 48)
|
|
||||||
break;
|
|
||||||
MSCH(W, 0, i);
|
|
||||||
MSCH(W, 1, i);
|
|
||||||
MSCH(W, 2, i);
|
|
||||||
MSCH(W, 3, i);
|
|
||||||
MSCH(W, 4, i);
|
|
||||||
MSCH(W, 5, i);
|
|
||||||
MSCH(W, 6, i);
|
|
||||||
MSCH(W, 7, i);
|
|
||||||
MSCH(W, 8, i);
|
|
||||||
MSCH(W, 9, i);
|
|
||||||
MSCH(W, 10, i);
|
|
||||||
MSCH(W, 11, i);
|
|
||||||
MSCH(W, 12, i);
|
|
||||||
MSCH(W, 13, i);
|
|
||||||
MSCH(W, 14, i);
|
|
||||||
MSCH(W, 15, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 4. Mix local working variables into global state. */
|
|
||||||
state[0] += S[0];
|
|
||||||
state[1] += S[1];
|
|
||||||
state[2] += S[2];
|
|
||||||
state[3] += S[3];
|
|
||||||
state[4] += S[4];
|
|
||||||
state[5] += S[5];
|
|
||||||
state[6] += S[6];
|
|
||||||
state[7] += S[7];
|
|
||||||
}
|
|
||||||
|
|
||||||
static const uint8_t PAD[64] = {
|
|
||||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Add padding and terminating bit-count. */
|
|
||||||
static void
|
|
||||||
SHA256_Pad(SHA256_CTX * ctx, uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
size_t r;
|
|
||||||
|
|
||||||
/* Figure out how many bytes we have buffered. */
|
|
||||||
r = (ctx->count >> 3) & 0x3f;
|
|
||||||
|
|
||||||
/* Pad to 56 mod 64, transforming if we finish a block en route. */
|
|
||||||
if (r < 56) {
|
|
||||||
/* Pad to 56 mod 64. */
|
|
||||||
memcpy(&ctx->buf[r], PAD, 56 - r);
|
|
||||||
} else {
|
|
||||||
/* Finish the current block and mix. */
|
|
||||||
memcpy(&ctx->buf[r], PAD, 64 - r);
|
|
||||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
|
||||||
|
|
||||||
/* The start of the final block is all zeroes. */
|
|
||||||
memset(&ctx->buf[0], 0, 56);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add the terminating bit-count. */
|
|
||||||
be64enc(&ctx->buf[56], ctx->count);
|
|
||||||
|
|
||||||
/* Mix in the final block. */
|
|
||||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Magic initialization constants. */
|
|
||||||
static const uint32_t initial_state[8] = {
|
|
||||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
|
||||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Init(ctx):
|
|
||||||
* Initialize the SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
SHA256_Init(SHA256_CTX * ctx)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Zero bits processed so far. */
|
|
||||||
ctx->count = 0;
|
|
||||||
|
|
||||||
/* Initialize state. */
|
|
||||||
memcpy(ctx->state, initial_state, sizeof(initial_state));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Update(ctx, in, len):
|
|
||||||
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len,
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
uint32_t r;
|
|
||||||
const uint8_t * src = in;
|
|
||||||
|
|
||||||
/* Return immediately if we have nothing to do. */
|
|
||||||
if (len == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* Number of bytes left in the buffer from previous updates. */
|
|
||||||
r = (ctx->count >> 3) & 0x3f;
|
|
||||||
|
|
||||||
/* Update number of bits. */
|
|
||||||
ctx->count += (uint64_t)(len) << 3;
|
|
||||||
|
|
||||||
/* Handle the case where we don't need to perform any transforms. */
|
|
||||||
if (len < 64 - r) {
|
|
||||||
memcpy(&ctx->buf[r], src, len);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Finish the current block. */
|
|
||||||
memcpy(&ctx->buf[r], src, 64 - r);
|
|
||||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
|
||||||
src += 64 - r;
|
|
||||||
len -= 64 - r;
|
|
||||||
|
|
||||||
/* Perform complete blocks. */
|
|
||||||
while (len >= 64) {
|
|
||||||
SHA256_Transform(ctx->state, src, &tmp32[0], &tmp32[64]);
|
|
||||||
src += 64;
|
|
||||||
len -= 64;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy left over data into buffer. */
|
|
||||||
memcpy(ctx->buf, src, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_SHA256_Update(ctx, in, len, tmp32);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Final(digest, ctx):
|
|
||||||
* Output the SHA256 hash of the data input to the context ${ctx} into the
|
|
||||||
* buffer ${digest}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx,
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Add padding. */
|
|
||||||
SHA256_Pad(ctx, tmp32);
|
|
||||||
|
|
||||||
/* Write the hash. */
|
|
||||||
be32enc_vect(digest, ctx->state, 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_SHA256_Final(digest, ctx, tmp32);
|
|
||||||
|
|
||||||
/* Clear the context state. */
|
|
||||||
insecure_memzero(ctx, sizeof(SHA256_CTX));
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Buf(in, len, digest):
|
|
||||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
SHA256_Buf(const void * in, size_t len, uint8_t digest[32])
|
|
||||||
{
|
|
||||||
SHA256_CTX ctx;
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
SHA256_Init(&ctx);
|
|
||||||
_SHA256_Update(&ctx, in, len, tmp32);
|
|
||||||
_SHA256_Final(digest, &ctx, tmp32);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(&ctx, sizeof(SHA256_CTX));
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Init(ctx, K, Klen):
|
|
||||||
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
|
|
||||||
* ${K}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen,
|
|
||||||
uint32_t tmp32[static restrict 72], uint8_t pad[static restrict 64],
|
|
||||||
uint8_t khash[static restrict 32])
|
|
||||||
{
|
|
||||||
const uint8_t * K = _K;
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
/* If Klen > 64, the key is really SHA256(K). */
|
|
||||||
if (Klen > 64) {
|
|
||||||
SHA256_Init(&ctx->ictx);
|
|
||||||
_SHA256_Update(&ctx->ictx, K, Klen, tmp32);
|
|
||||||
_SHA256_Final(khash, &ctx->ictx, tmp32);
|
|
||||||
K = khash;
|
|
||||||
Klen = 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
|
||||||
SHA256_Init(&ctx->ictx);
|
|
||||||
memset(pad, 0x36, 64);
|
|
||||||
for (i = 0; i < Klen; i++)
|
|
||||||
pad[i] ^= K[i];
|
|
||||||
_SHA256_Update(&ctx->ictx, pad, 64, tmp32);
|
|
||||||
|
|
||||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
|
||||||
SHA256_Init(&ctx->octx);
|
|
||||||
memset(pad, 0x5c, 64);
|
|
||||||
for (i = 0; i < Klen; i++)
|
|
||||||
pad[i] ^= K[i];
|
|
||||||
_SHA256_Update(&ctx->octx, pad, 64, tmp32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
uint8_t pad[64];
|
|
||||||
uint8_t khash[32];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_HMAC_SHA256_Init(ctx, _K, Klen, tmp32, pad, khash);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(khash, 32);
|
|
||||||
insecure_memzero(pad, 64);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Update(ctx, in, len):
|
|
||||||
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len,
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Feed data to the inner SHA256 operation. */
|
|
||||||
_SHA256_Update(&ctx->ictx, in, len, tmp32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_HMAC_SHA256_Update(ctx, in, len, tmp32);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Final(digest, ctx):
|
|
||||||
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
|
|
||||||
* buffer ${digest}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx,
|
|
||||||
uint32_t tmp32[static restrict 72], uint8_t ihash[static restrict 32])
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Finish the inner SHA256 operation. */
|
|
||||||
_SHA256_Final(ihash, &ctx->ictx, tmp32);
|
|
||||||
|
|
||||||
/* Feed the inner hash to the outer SHA256 operation. */
|
|
||||||
_SHA256_Update(&ctx->octx, ihash, 32, tmp32);
|
|
||||||
|
|
||||||
/* Finish the outer SHA256 operation. */
|
|
||||||
_SHA256_Final(digest, &ctx->octx, tmp32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
uint8_t ihash[32];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_HMAC_SHA256_Final(digest, ctx, tmp32, ihash);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(ihash, 32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
|
||||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
|
||||||
* length ${Klen}, and write the result to ${digest}.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len,
|
|
||||||
uint8_t digest[32])
|
|
||||||
{
|
|
||||||
HMAC_SHA256_CTX ctx;
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
uint8_t tmp8[96];
|
|
||||||
|
|
||||||
_HMAC_SHA256_Init(&ctx, K, Klen, tmp32, &tmp8[0], &tmp8[64]);
|
|
||||||
_HMAC_SHA256_Update(&ctx, in, len, tmp32);
|
|
||||||
_HMAC_SHA256_Final(digest, &ctx, tmp32, &tmp8[0]);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(&ctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(tmp8, 96);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add padding and terminating bit-count, but don't invoke Transform yet. */
|
|
||||||
static int
|
|
||||||
SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8],
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
uint32_t r;
|
|
||||||
|
|
||||||
r = (ctx->count >> 3) & 0x3f;
|
|
||||||
if (r >= 56)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Convert length to a vector of bytes -- we do this now rather
|
|
||||||
* than later because the length will change after we pad.
|
|
||||||
*/
|
|
||||||
be64enc(len, ctx->count);
|
|
||||||
|
|
||||||
/* Add 1--56 bytes so that the resulting length is 56 mod 64. */
|
|
||||||
_SHA256_Update(ctx, PAD, 56 - r, tmp32);
|
|
||||||
|
|
||||||
/* Add the terminating bit-count. */
|
|
||||||
ctx->buf[63] = len[7];
|
|
||||||
_SHA256_Update(ctx, len, 7, tmp32);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
|
||||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
|
||||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
|
||||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
|
||||||
{
|
|
||||||
HMAC_SHA256_CTX Phctx, PShctx, hctx;
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
union {
|
|
||||||
uint8_t tmp8[96];
|
|
||||||
uint32_t state[8];
|
|
||||||
} u;
|
|
||||||
size_t i;
|
|
||||||
uint8_t ivec[4];
|
|
||||||
uint8_t U[32];
|
|
||||||
uint8_t T[32];
|
|
||||||
uint64_t j;
|
|
||||||
int k;
|
|
||||||
size_t clen;
|
|
||||||
|
|
||||||
/* Sanity-check. */
|
|
||||||
assert(dkLen <= 32 * (size_t)(UINT32_MAX));
|
|
||||||
|
|
||||||
if (c == 1 && (dkLen & 31) == 0 && (saltlen & 63) <= 51) {
|
|
||||||
uint32_t oldcount;
|
|
||||||
uint8_t * ivecp;
|
|
||||||
|
|
||||||
/* Compute HMAC state after processing P and S. */
|
|
||||||
_HMAC_SHA256_Init(&hctx, passwd, passwdlen,
|
|
||||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
|
||||||
_HMAC_SHA256_Update(&hctx, salt, saltlen, tmp32);
|
|
||||||
|
|
||||||
/* Prepare ictx padding. */
|
|
||||||
oldcount = hctx.ictx.count & (0x3f << 3);
|
|
||||||
_HMAC_SHA256_Update(&hctx, "\0\0\0", 4, tmp32);
|
|
||||||
if ((hctx.ictx.count & (0x3f << 3)) < oldcount ||
|
|
||||||
SHA256_Pad_Almost(&hctx.ictx, u.tmp8, tmp32))
|
|
||||||
goto generic; /* Can't happen due to saltlen check */
|
|
||||||
ivecp = hctx.ictx.buf + (oldcount >> 3);
|
|
||||||
|
|
||||||
/* Prepare octx padding. */
|
|
||||||
hctx.octx.count += 32 << 3;
|
|
||||||
SHA256_Pad_Almost(&hctx.octx, u.tmp8, tmp32);
|
|
||||||
|
|
||||||
/* Iterate through the blocks. */
|
|
||||||
for (i = 0; i * 32 < dkLen; i++) {
|
|
||||||
/* Generate INT(i + 1). */
|
|
||||||
be32enc(ivecp, (uint32_t)(i + 1));
|
|
||||||
|
|
||||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
|
||||||
memcpy(u.state, hctx.ictx.state, sizeof(u.state));
|
|
||||||
SHA256_Transform(u.state, hctx.ictx.buf,
|
|
||||||
&tmp32[0], &tmp32[64]);
|
|
||||||
be32enc_vect(hctx.octx.buf, u.state, 4);
|
|
||||||
memcpy(u.state, hctx.octx.state, sizeof(u.state));
|
|
||||||
SHA256_Transform(u.state, hctx.octx.buf,
|
|
||||||
&tmp32[0], &tmp32[64]);
|
|
||||||
be32enc_vect(&buf[i * 32], u.state, 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
generic:
|
|
||||||
/* Compute HMAC state after processing P. */
|
|
||||||
_HMAC_SHA256_Init(&Phctx, passwd, passwdlen,
|
|
||||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
|
||||||
|
|
||||||
/* Compute HMAC state after processing P and S. */
|
|
||||||
memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
_HMAC_SHA256_Update(&PShctx, salt, saltlen, tmp32);
|
|
||||||
|
|
||||||
/* Iterate through the blocks. */
|
|
||||||
for (i = 0; i * 32 < dkLen; i++) {
|
|
||||||
/* Generate INT(i + 1). */
|
|
||||||
be32enc(ivec, (uint32_t)(i + 1));
|
|
||||||
|
|
||||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
|
||||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
_HMAC_SHA256_Update(&hctx, ivec, 4, tmp32);
|
|
||||||
_HMAC_SHA256_Final(T, &hctx, tmp32, u.tmp8);
|
|
||||||
|
|
||||||
if (c > 1) {
|
|
||||||
/* T_i = U_1 ... */
|
|
||||||
memcpy(U, T, 32);
|
|
||||||
|
|
||||||
for (j = 2; j <= c; j++) {
|
|
||||||
/* Compute U_j. */
|
|
||||||
memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
_HMAC_SHA256_Update(&hctx, U, 32, tmp32);
|
|
||||||
_HMAC_SHA256_Final(U, &hctx, tmp32, u.tmp8);
|
|
||||||
|
|
||||||
/* ... xor U_j ... */
|
|
||||||
for (k = 0; k < 32; k++)
|
|
||||||
T[k] ^= U[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy as many bytes as necessary into buf. */
|
|
||||||
clen = dkLen - i * 32;
|
|
||||||
if (clen > 32)
|
|
||||||
clen = 32;
|
|
||||||
memcpy(&buf[i * 32], T, clen);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(&Phctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(&PShctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(U, 32);
|
|
||||||
insecure_memzero(T, 32);
|
|
||||||
|
|
||||||
cleanup:
|
|
||||||
insecure_memzero(&hctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(&u, sizeof(u));
|
|
||||||
}
|
|
@@ -1,672 +0,0 @@
|
|||||||
/*-
|
|
||||||
* Copyright 2005-2016 Colin Percival
|
|
||||||
* Copyright 2016-2018 Alexander Peslyak
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
||||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
||||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "insecure_memzero.h"
|
|
||||||
#include "sysendian.h"
|
|
||||||
|
|
||||||
#include "sha256.h"
|
|
||||||
|
|
||||||
#ifdef __ICC
|
|
||||||
/* Miscompile with icc 14.0.0 (at least), so don't use restrict there */
|
|
||||||
#define restrict
|
|
||||||
#elif __STDC_VERSION__ >= 199901L
|
|
||||||
/* Have restrict */
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
#define restrict __restrict
|
|
||||||
#else
|
|
||||||
#define restrict
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Encode a length len*2 vector of (uint32_t) into a length len*8 vector of
|
|
||||||
* (uint8_t) in big-endian form.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
be32enc_vect(uint8_t * dst, const uint32_t * src, size_t len)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Encode vector, two words at a time. */
|
|
||||||
do {
|
|
||||||
be32enc(&dst[0], src[0]);
|
|
||||||
be32enc(&dst[4], src[1]);
|
|
||||||
src += 2;
|
|
||||||
dst += 8;
|
|
||||||
} while (--len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Decode a big-endian length len*8 vector of (uint8_t) into a length
|
|
||||||
* len*2 vector of (uint32_t).
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
be32dec_vect(uint32_t * dst, const uint8_t * src, size_t len)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Decode vector, two words at a time. */
|
|
||||||
do {
|
|
||||||
dst[0] = be32dec(&src[0]);
|
|
||||||
dst[1] = be32dec(&src[4]);
|
|
||||||
src += 8;
|
|
||||||
dst += 2;
|
|
||||||
} while (--len);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* SHA256 round constants. */
|
|
||||||
static const uint32_t Krnd[64] = {
|
|
||||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
|
||||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
|
||||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
|
||||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
|
||||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
|
||||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
|
||||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
|
||||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
|
||||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
|
||||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
|
||||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
|
||||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
|
||||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
|
||||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
|
||||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
|
||||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Elementary functions used by SHA256 */
|
|
||||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
|
||||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
|
||||||
#define SHR(x, n) (x >> n)
|
|
||||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
|
||||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
|
||||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
|
||||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
|
||||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
|
||||||
|
|
||||||
/* SHA256 round function */
|
|
||||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
|
||||||
h += S1(e) + Ch(e, f, g) + k; \
|
|
||||||
d += h; \
|
|
||||||
h += S0(a) + Maj(a, b, c);
|
|
||||||
|
|
||||||
/* Adjusted round function for rotating state */
|
|
||||||
#define RNDr(S, W, i, ii) \
|
|
||||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
|
||||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
|
||||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
|
||||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
|
||||||
W[i + ii] + Krnd[i + ii])
|
|
||||||
|
|
||||||
/* Message schedule computation */
|
|
||||||
#define MSCH(W, ii, i) \
|
|
||||||
W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SHA256 block compression function. The 256-bit state is transformed via
|
|
||||||
* the 512-bit input block to produce a new state.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
SHA256_Transform(uint32_t state[static restrict 8],
|
|
||||||
const uint8_t block[static restrict 64],
|
|
||||||
uint32_t W[static restrict 64], uint32_t S[static restrict 8])
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* 1. Prepare the first part of the message schedule W. */
|
|
||||||
be32dec_vect(W, block, 8);
|
|
||||||
|
|
||||||
/* 2. Initialize working variables. */
|
|
||||||
memcpy(S, state, 32);
|
|
||||||
|
|
||||||
/* 3. Mix. */
|
|
||||||
for (i = 0; i < 64; i += 16) {
|
|
||||||
RNDr(S, W, 0, i);
|
|
||||||
RNDr(S, W, 1, i);
|
|
||||||
RNDr(S, W, 2, i);
|
|
||||||
RNDr(S, W, 3, i);
|
|
||||||
RNDr(S, W, 4, i);
|
|
||||||
RNDr(S, W, 5, i);
|
|
||||||
RNDr(S, W, 6, i);
|
|
||||||
RNDr(S, W, 7, i);
|
|
||||||
RNDr(S, W, 8, i);
|
|
||||||
RNDr(S, W, 9, i);
|
|
||||||
RNDr(S, W, 10, i);
|
|
||||||
RNDr(S, W, 11, i);
|
|
||||||
RNDr(S, W, 12, i);
|
|
||||||
RNDr(S, W, 13, i);
|
|
||||||
RNDr(S, W, 14, i);
|
|
||||||
RNDr(S, W, 15, i);
|
|
||||||
|
|
||||||
if (i == 48)
|
|
||||||
break;
|
|
||||||
MSCH(W, 0, i);
|
|
||||||
MSCH(W, 1, i);
|
|
||||||
MSCH(W, 2, i);
|
|
||||||
MSCH(W, 3, i);
|
|
||||||
MSCH(W, 4, i);
|
|
||||||
MSCH(W, 5, i);
|
|
||||||
MSCH(W, 6, i);
|
|
||||||
MSCH(W, 7, i);
|
|
||||||
MSCH(W, 8, i);
|
|
||||||
MSCH(W, 9, i);
|
|
||||||
MSCH(W, 10, i);
|
|
||||||
MSCH(W, 11, i);
|
|
||||||
MSCH(W, 12, i);
|
|
||||||
MSCH(W, 13, i);
|
|
||||||
MSCH(W, 14, i);
|
|
||||||
MSCH(W, 15, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 4. Mix local working variables into global state. */
|
|
||||||
state[0] += S[0];
|
|
||||||
state[1] += S[1];
|
|
||||||
state[2] += S[2];
|
|
||||||
state[3] += S[3];
|
|
||||||
state[4] += S[4];
|
|
||||||
state[5] += S[5];
|
|
||||||
state[6] += S[6];
|
|
||||||
state[7] += S[7];
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
static const uint8_t PAD[64] = {
|
|
||||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Add padding and terminating bit-count. */
|
|
||||||
static void
|
|
||||||
SHA256_Pad(SHA256_CTX * ctx, uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
size_t r;
|
|
||||||
|
|
||||||
/* Figure out how many bytes we have buffered. */
|
|
||||||
r = (ctx->count >> 3) & 0x3f;
|
|
||||||
|
|
||||||
/* Pad to 56 mod 64, transforming if we finish a block en route. */
|
|
||||||
if (r < 56) {
|
|
||||||
/* Pad to 56 mod 64. */
|
|
||||||
memcpy(&ctx->buf[r], PAD, 56 - r);
|
|
||||||
} else {
|
|
||||||
/* Finish the current block and mix. */
|
|
||||||
memcpy(&ctx->buf[r], PAD, 64 - r);
|
|
||||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
|
||||||
|
|
||||||
/* The start of the final block is all zeroes. */
|
|
||||||
memset(&ctx->buf[0], 0, 56);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add the terminating bit-count. */
|
|
||||||
be64enc(&ctx->buf[56], ctx->count);
|
|
||||||
|
|
||||||
/* Mix in the final block. */
|
|
||||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
|
||||||
}
|
|
||||||
#if 0
|
|
||||||
/* Magic initialization constants. */
|
|
||||||
static const uint32_t initial_state[8] = {
|
|
||||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
|
||||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Init(ctx):
|
|
||||||
* Initialize the SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
SHA256_Init(SHA256_CTX * ctx)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Zero bits processed so far. */
|
|
||||||
ctx->count = 0;
|
|
||||||
|
|
||||||
/* Initialize state. */
|
|
||||||
memcpy(ctx->state, initial_state, sizeof(initial_state));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Update(ctx, in, len):
|
|
||||||
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len,
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
uint32_t r;
|
|
||||||
const uint8_t * src = in;
|
|
||||||
|
|
||||||
/* Return immediately if we have nothing to do. */
|
|
||||||
if (len == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* Number of bytes left in the buffer from previous updates. */
|
|
||||||
r = (ctx->count >> 3) & 0x3f;
|
|
||||||
|
|
||||||
/* Update number of bits. */
|
|
||||||
ctx->count += (uint64_t)(len) << 3;
|
|
||||||
|
|
||||||
/* Handle the case where we don't need to perform any transforms. */
|
|
||||||
if (len < 64 - r) {
|
|
||||||
memcpy(&ctx->buf[r], src, len);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Finish the current block. */
|
|
||||||
memcpy(&ctx->buf[r], src, 64 - r);
|
|
||||||
SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]);
|
|
||||||
src += 64 - r;
|
|
||||||
len -= 64 - r;
|
|
||||||
|
|
||||||
/* Perform complete blocks. */
|
|
||||||
while (len >= 64) {
|
|
||||||
SHA256_Transform(ctx->state, src, &tmp32[0], &tmp32[64]);
|
|
||||||
src += 64;
|
|
||||||
len -= 64;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy left over data into buffer. */
|
|
||||||
memcpy(ctx->buf, src, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_SHA256_Update(ctx, in, len, tmp32);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Final(digest, ctx):
|
|
||||||
* Output the SHA256 hash of the data input to the context ${ctx} into the
|
|
||||||
* buffer ${digest}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx,
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Add padding. */
|
|
||||||
SHA256_Pad(ctx, tmp32);
|
|
||||||
|
|
||||||
/* Write the hash. */
|
|
||||||
be32enc_vect(digest, ctx->state, 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_SHA256_Final(digest, ctx, tmp32);
|
|
||||||
|
|
||||||
/* Clear the context state. */
|
|
||||||
insecure_memzero(ctx, sizeof(SHA256_CTX));
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
/**
|
|
||||||
* SHA256_Buf(in, len, digest):
|
|
||||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
SHA256_Buf(const void * in, size_t len, uint8_t digest[32])
|
|
||||||
{
|
|
||||||
SHA256_CTX ctx;
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
SHA256_Init(&ctx);
|
|
||||||
SHA256_Update(&ctx, in, len);
|
|
||||||
SHA256_Final(digest, &ctx);
|
|
||||||
// _SHA256_Update(&ctx, in, len, tmp32);
|
|
||||||
// _SHA256_Final(digest, &ctx, tmp32);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(&ctx, sizeof(SHA256_CTX));
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Init(ctx, K, Klen):
|
|
||||||
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
|
|
||||||
* ${K}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen,
|
|
||||||
uint32_t tmp32[static restrict 72], uint8_t pad[static restrict 64],
|
|
||||||
uint8_t khash[static restrict 32])
|
|
||||||
{
|
|
||||||
const uint8_t * K = _K;
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
/* If Klen > 64, the key is really SHA256(K). */
|
|
||||||
if (Klen > 64) {
|
|
||||||
SHA256_Init(&ctx->ictx);
|
|
||||||
SHA256_Update(&ctx->ictx, K, Klen);
|
|
||||||
SHA256_Final(khash, &ctx->ictx);
|
|
||||||
// _SHA256_Update(&ctx->ictx, K, Klen, tmp32);
|
|
||||||
// _SHA256_Final(khash, &ctx->ictx, tmp32);
|
|
||||||
K = khash;
|
|
||||||
Klen = 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
|
||||||
SHA256_Init(&ctx->ictx);
|
|
||||||
memset(pad, 0x36, 64);
|
|
||||||
for (i = 0; i < Klen; i++)
|
|
||||||
pad[i] ^= K[i];
|
|
||||||
SHA256_Update(&ctx->ictx, pad, 64);
|
|
||||||
// _SHA256_Update(&ctx->ictx, pad, 64, tmp32);
|
|
||||||
|
|
||||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
|
||||||
SHA256_Init(&ctx->octx);
|
|
||||||
memset(pad, 0x5c, 64);
|
|
||||||
for (i = 0; i < Klen; i++)
|
|
||||||
pad[i] ^= K[i];
|
|
||||||
SHA256_Update(&ctx->octx, pad, 64);
|
|
||||||
// _SHA256_Update(&ctx->octx, pad, 64, tmp32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
uint8_t pad[64];
|
|
||||||
uint8_t khash[32];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_HMAC_SHA256_Init(ctx, _K, Klen, tmp32, pad, khash);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(khash, 32);
|
|
||||||
insecure_memzero(pad, 64);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Update(ctx, in, len):
|
|
||||||
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len,
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Feed data to the inner SHA256 operation. */
|
|
||||||
SHA256_Update(&ctx->ictx, in, len);
|
|
||||||
// _SHA256_Update(&ctx->ictx, in, len, tmp32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_HMAC_SHA256_Update(ctx, in, len, tmp32);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Final(digest, ctx):
|
|
||||||
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
|
|
||||||
* buffer ${digest}.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx,
|
|
||||||
uint32_t tmp32[static restrict 72], uint8_t ihash[static restrict 32])
|
|
||||||
{
|
|
||||||
/* Finish the inner SHA256 operation. */
|
|
||||||
_SHA256_Final(ihash, &ctx->ictx, tmp32);
|
|
||||||
|
|
||||||
/* Feed the inner hash to the outer SHA256 operation. */
|
|
||||||
_SHA256_Update(&ctx->octx, ihash, 32, tmp32);
|
|
||||||
|
|
||||||
/* Finish the outer SHA256 operation. */
|
|
||||||
_SHA256_Final(digest, &ctx->octx, tmp32);
|
|
||||||
|
|
||||||
|
|
||||||
// _SHA256_Final(ihash, &ctx->ictx, tmp32);
|
|
||||||
// _SHA256_Update(&ctx->octx, ihash, 32, tmp32);
|
|
||||||
// _SHA256_Final(digest, &ctx->octx, tmp32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wrapper function for intermediate-values sanitization. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx)
|
|
||||||
{
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
uint8_t ihash[32];
|
|
||||||
|
|
||||||
/* Call the real function. */
|
|
||||||
_HMAC_SHA256_Final(digest, ctx, tmp32, ihash);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(ihash, 32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
|
||||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
|
||||||
* length ${Klen}, and write the result to ${digest}.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len,
|
|
||||||
uint8_t digest[32])
|
|
||||||
{
|
|
||||||
HMAC_SHA256_CTX ctx;
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
uint8_t tmp8[96];
|
|
||||||
|
|
||||||
_HMAC_SHA256_Init(&ctx, K, Klen, tmp32, &tmp8[0], &tmp8[64]);
|
|
||||||
_HMAC_SHA256_Update(&ctx, in, len, tmp32);
|
|
||||||
_HMAC_SHA256_Final(digest, &ctx, tmp32, &tmp8[0]);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(&ctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(tmp8, 96);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add padding and terminating bit-count, but don't invoke Transform yet. */
|
|
||||||
static int
|
|
||||||
SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8],
|
|
||||||
uint32_t tmp32[static restrict 72])
|
|
||||||
{
|
|
||||||
uint32_t r;
|
|
||||||
|
|
||||||
r = (ctx->count >> 3) & 0x3f;
|
|
||||||
if (r >= 56)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Convert length to a vector of bytes -- we do this now rather
|
|
||||||
* than later because the length will change after we pad.
|
|
||||||
*/
|
|
||||||
be64enc(len, ctx->count);
|
|
||||||
|
|
||||||
/* Add 1--56 bytes so that the resulting length is 56 mod 64. */
|
|
||||||
SHA256_Update(ctx, PAD, 56 - r, tmp);
|
|
||||||
|
|
||||||
/* Add the terminating bit-count. */
|
|
||||||
ctx->buf[63] = len[7];
|
|
||||||
SHA256_Update(ctx, len, 7, tmp);
|
|
||||||
|
|
||||||
/* Add 1--56 bytes so that the resulting length is 56 mod 64. */
|
|
||||||
// _SHA256_Update(ctx, PAD, 56 - r, tmp32);
|
|
||||||
|
|
||||||
/* Add the terminating bit-count. */
|
|
||||||
// ctx->buf[63] = len[7];
|
|
||||||
// _SHA256_Update(ctx, len, 7, tmp32);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
|
||||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
|
||||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
|
||||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
|
||||||
{
|
|
||||||
HMAC_SHA256_CTX Phctx, PShctx, hctx;
|
|
||||||
uint32_t tmp32[72];
|
|
||||||
union {
|
|
||||||
uint8_t tmp8[96];
|
|
||||||
uint32_t state[8];
|
|
||||||
} u;
|
|
||||||
size_t i;
|
|
||||||
uint8_t ivec[4];
|
|
||||||
uint8_t U[32];
|
|
||||||
uint8_t T[32];
|
|
||||||
uint64_t j;
|
|
||||||
int k;
|
|
||||||
size_t clen;
|
|
||||||
|
|
||||||
/* Sanity-check. */
|
|
||||||
assert(dkLen <= 32 * (size_t)(UINT32_MAX));
|
|
||||||
|
|
||||||
if (c == 1 && (dkLen & 31) == 0 && (saltlen & 63) <= 51) {
|
|
||||||
uint32_t oldcount;
|
|
||||||
uint8_t * ivecp;
|
|
||||||
|
|
||||||
/* Compute HMAC state after processing P and S. */
|
|
||||||
_HMAC_SHA256_Init(&hctx, passwd, passwdlen,
|
|
||||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
|
||||||
_HMAC_SHA256_Update(&hctx, salt, saltlen, tmp32);
|
|
||||||
|
|
||||||
/* Prepare ictx padding. */
|
|
||||||
oldcount = hctx.ictx.count & (0x3f << 3);
|
|
||||||
_HMAC_SHA256_Update(&hctx, "\0\0\0", 4, tmp32);
|
|
||||||
if ((hctx.ictx.count & (0x3f << 3)) < oldcount ||
|
|
||||||
SHA256_Pad_Almost(&hctx.ictx, u.tmp8, tmp32))
|
|
||||||
goto generic; /* Can't happen due to saltlen check */
|
|
||||||
ivecp = hctx.ictx.buf + (oldcount >> 3);
|
|
||||||
|
|
||||||
/* Prepare octx padding. */
|
|
||||||
hctx.octx.count += 32 << 3;
|
|
||||||
SHA256_Pad_Almost(&hctx.octx, u.tmp8, tmp32);
|
|
||||||
|
|
||||||
/* Iterate through the blocks. */
|
|
||||||
for (i = 0; i * 32 < dkLen; i++) {
|
|
||||||
/* Generate INT(i + 1). */
|
|
||||||
be32enc(ivecp, (uint32_t)(i + 1));
|
|
||||||
|
|
||||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
|
||||||
memcpy(u.state, hctx.ictx.state, sizeof(u.state));
|
|
||||||
|
|
||||||
SHA256_Transform(u.state, hctx.ictx.buf );
|
|
||||||
be32enc_vect(hctx.octx.buf, u.state, 4);
|
|
||||||
memcpy(u.state, hctx.octx.state, sizeof(u.state));
|
|
||||||
SHA256_Transform(u.state, hctx.octx.buf );
|
|
||||||
|
|
||||||
// SHA256_Transform(u.state, hctx.ictx.buf,
|
|
||||||
// &tmp32[0], &tmp32[64]);
|
|
||||||
// be32enc_vect(hctx.octx.buf, u.state, 4);
|
|
||||||
// memcpy(u.state, hctx.octx.state, sizeof(u.state));
|
|
||||||
// SHA256_Transform(u.state, hctx.octx.buf,
|
|
||||||
// &tmp32[0], &tmp32[64]);
|
|
||||||
|
|
||||||
be32enc_vect(&buf[i * 32], u.state, 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
generic:
|
|
||||||
/* Compute HMAC state after processing P. */
|
|
||||||
_HMAC_SHA256_Init(&Phctx, passwd, passwdlen,
|
|
||||||
tmp32, &u.tmp8[0], &u.tmp8[64]);
|
|
||||||
|
|
||||||
/* Compute HMAC state after processing P and S. */
|
|
||||||
memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
_HMAC_SHA256_Update(&PShctx, salt, saltlen, tmp32);
|
|
||||||
|
|
||||||
/* Iterate through the blocks. */
|
|
||||||
for (i = 0; i * 32 < dkLen; i++) {
|
|
||||||
/* Generate INT(i + 1). */
|
|
||||||
be32enc(ivec, (uint32_t)(i + 1));
|
|
||||||
|
|
||||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
|
||||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
_HMAC_SHA256_Update(&hctx, ivec, 4, tmp32);
|
|
||||||
_HMAC_SHA256_Final(T, &hctx, tmp32, u.tmp8);
|
|
||||||
|
|
||||||
if (c > 1) {
|
|
||||||
/* T_i = U_1 ... */
|
|
||||||
memcpy(U, T, 32);
|
|
||||||
|
|
||||||
for (j = 2; j <= c; j++) {
|
|
||||||
/* Compute U_j. */
|
|
||||||
memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
_HMAC_SHA256_Update(&hctx, U, 32, tmp32);
|
|
||||||
_HMAC_SHA256_Final(U, &hctx, tmp32, u.tmp8);
|
|
||||||
|
|
||||||
/* ... xor U_j ... */
|
|
||||||
for (k = 0; k < 32; k++)
|
|
||||||
T[k] ^= U[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy as many bytes as necessary into buf. */
|
|
||||||
clen = dkLen - i * 32;
|
|
||||||
if (clen > 32)
|
|
||||||
clen = 32;
|
|
||||||
memcpy(&buf[i * 32], T, clen);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
insecure_memzero(&Phctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(&PShctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(U, 32);
|
|
||||||
insecure_memzero(T, 32);
|
|
||||||
|
|
||||||
cleanup:
|
|
||||||
insecure_memzero(&hctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
insecure_memzero(tmp32, 288);
|
|
||||||
insecure_memzero(&u, sizeof(u));
|
|
||||||
}
|
|
@@ -1,129 +0,0 @@
|
|||||||
/*-
|
|
||||||
* Copyright 2005-2016 Colin Percival
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
||||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
||||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _SHA256_H_
|
|
||||||
#define _SHA256_H_
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Use #defines in order to avoid namespace collisions with anyone else's
|
|
||||||
* SHA256 code (e.g., the code in OpenSSL).
|
|
||||||
*/
|
|
||||||
#define SHA256_Init libcperciva_SHA256_Init
|
|
||||||
#define SHA256_Update libcperciva_SHA256_Update
|
|
||||||
#define SHA256_Final libcperciva_SHA256_Final
|
|
||||||
#define SHA256_Buf libcperciva_SHA256_Buf
|
|
||||||
#define SHA256_CTX libcperciva_SHA256_CTX
|
|
||||||
#define HMAC_SHA256_Init libcperciva_HMAC_SHA256_Init
|
|
||||||
#define HMAC_SHA256_Update libcperciva_HMAC_SHA256_Update
|
|
||||||
#define HMAC_SHA256_Final libcperciva_HMAC_SHA256_Final
|
|
||||||
#define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf
|
|
||||||
#define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX
|
|
||||||
|
|
||||||
/* Context structure for SHA256 operations. */
|
|
||||||
typedef struct {
|
|
||||||
uint32_t state[8];
|
|
||||||
uint64_t count;
|
|
||||||
uint8_t buf[64];
|
|
||||||
} SHA256_CTX;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Init(ctx):
|
|
||||||
* Initialize the SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
void SHA256_Init(SHA256_CTX *);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Update(ctx, in, len):
|
|
||||||
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
void SHA256_Update(SHA256_CTX *, const void *, size_t);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Final(digest, ctx):
|
|
||||||
* Output the SHA256 hash of the data input to the context ${ctx} into the
|
|
||||||
* buffer ${digest}.
|
|
||||||
*/
|
|
||||||
void SHA256_Final(uint8_t[32], SHA256_CTX *);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Buf(in, len, digest):
|
|
||||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
|
||||||
*/
|
|
||||||
void SHA256_Buf(const void *, size_t, uint8_t[32]);
|
|
||||||
|
|
||||||
/* Context structure for HMAC-SHA256 operations. */
|
|
||||||
typedef struct {
|
|
||||||
SHA256_CTX ictx;
|
|
||||||
SHA256_CTX octx;
|
|
||||||
} HMAC_SHA256_CTX;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Init(ctx, K, Klen):
|
|
||||||
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
|
|
||||||
* ${K}.
|
|
||||||
*/
|
|
||||||
void HMAC_SHA256_Init(HMAC_SHA256_CTX *, const void *, size_t);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Update(ctx, in, len):
|
|
||||||
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
void HMAC_SHA256_Update(HMAC_SHA256_CTX *, const void *, size_t);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Final(digest, ctx):
|
|
||||||
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
|
|
||||||
* buffer ${digest}.
|
|
||||||
*/
|
|
||||||
void HMAC_SHA256_Final(uint8_t[32], HMAC_SHA256_CTX *);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
|
||||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
|
||||||
* length ${Klen}, and write the result to ${digest}.
|
|
||||||
*/
|
|
||||||
void HMAC_SHA256_Buf(const void *, size_t, const void *, size_t, uint8_t[32]);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
|
||||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
|
||||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
|
||||||
*/
|
|
||||||
void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t,
|
|
||||||
uint64_t, uint8_t *, size_t);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* !_SHA256_H_ */
|
|
@@ -1,134 +0,0 @@
|
|||||||
/*-
|
|
||||||
* Copyright 2005-2016 Colin Percival
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
||||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
||||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _SHA256_H_
|
|
||||||
#define _SHA256_H_
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <openssl.sha>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Use #defines in order to avoid namespace collisions with anyone else's
|
|
||||||
* SHA256 code (e.g., the code in OpenSSL).
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
#define SHA256_Init libcperciva_SHA256_Init
|
|
||||||
#define SHA256_Update libcperciva_SHA256_Update
|
|
||||||
#define SHA256_Final libcperciva_SHA256_Final
|
|
||||||
#define SHA256_CTX libcperciva_SHA256_CTX
|
|
||||||
*/
|
|
||||||
#define SHA256_Buf libcperciva_SHA256_Buf
|
|
||||||
#define HMAC_SHA256_Init libcperciva_HMAC_SHA256_Init
|
|
||||||
#define HMAC_SHA256_Update libcperciva_HMAC_SHA256_Update
|
|
||||||
#define HMAC_SHA256_Final libcperciva_HMAC_SHA256_Final
|
|
||||||
#define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf
|
|
||||||
#define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* Context structure for SHA256 operations. */
|
|
||||||
typedef struct {
|
|
||||||
uint32_t state[8];
|
|
||||||
uint64_t count;
|
|
||||||
uint8_t buf[64];
|
|
||||||
} SHA256_CTX;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Init(ctx):
|
|
||||||
* Initialize the SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
void SHA256_Init(SHA256_CTX *);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Update(ctx, in, len):
|
|
||||||
* Input ${len} bytes from ${in} into the SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
void SHA256_Update(SHA256_CTX *, const void *, size_t);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Final(digest, ctx):
|
|
||||||
* Output the SHA256 hash of the data input to the context ${ctx} into the
|
|
||||||
* buffer ${digest}.
|
|
||||||
*/
|
|
||||||
void SHA256_Final(uint8_t[32], SHA256_CTX *);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Buf(in, len, digest):
|
|
||||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
|
||||||
*/
|
|
||||||
void SHA256_Buf(const void *, size_t, uint8_t[32]);
|
|
||||||
|
|
||||||
/* Context structure for HMAC-SHA256 operations. */
|
|
||||||
typedef struct {
|
|
||||||
SHA256_CTX ictx;
|
|
||||||
SHA256_CTX octx;
|
|
||||||
} HMAC_SHA256_CTX;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Init(ctx, K, Klen):
|
|
||||||
* Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from
|
|
||||||
* ${K}.
|
|
||||||
*/
|
|
||||||
void HMAC_SHA256_Init(HMAC_SHA256_CTX *, const void *, size_t);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Update(ctx, in, len):
|
|
||||||
* Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}.
|
|
||||||
*/
|
|
||||||
void HMAC_SHA256_Update(HMAC_SHA256_CTX *, const void *, size_t);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Final(digest, ctx):
|
|
||||||
* Output the HMAC-SHA256 of the data input to the context ${ctx} into the
|
|
||||||
* buffer ${digest}.
|
|
||||||
*/
|
|
||||||
void HMAC_SHA256_Final(uint8_t[32], HMAC_SHA256_CTX *);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
|
||||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
|
||||||
* length ${Klen}, and write the result to ${digest}.
|
|
||||||
*/
|
|
||||||
void HMAC_SHA256_Buf(const void *, size_t, const void *, size_t, uint8_t[32]);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
|
||||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
|
||||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
|
||||||
*/
|
|
||||||
void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t,
|
|
||||||
uint64_t, uint8_t *, size_t);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* !_SHA256_H_ */
|
|
218
algo/yespower/sha256_p.c
Normal file
218
algo/yespower/sha256_p.c
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
/*-
|
||||||
|
* Copyright 2005,2007,2009 Colin Percival
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "sysendian.h"
|
||||||
|
|
||||||
|
#include "sha256_p.h"
|
||||||
|
#include "compat.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* Elementary functions used by SHA256 */
|
||||||
|
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||||
|
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||||
|
#define SHR(x, n) (x >> n)
|
||||||
|
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||||
|
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||||
|
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||||
|
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||||
|
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||||
|
|
||||||
|
/* SHA256 round function */
|
||||||
|
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||||
|
t0 = h + S1(e) + Ch(e, f, g) + k; \
|
||||||
|
t1 = S0(a) + Maj(a, b, c); \
|
||||||
|
d += t0; \
|
||||||
|
h = t0 + t1;
|
||||||
|
|
||||||
|
/* Adjusted round function for rotating state */
|
||||||
|
#define RNDr(S, W, i, k) \
|
||||||
|
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||||
|
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||||
|
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||||
|
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||||
|
W[i] + k)
|
||||||
|
|
||||||
|
/*
|
||||||
|
static unsigned char PAD[64] = {
|
||||||
|
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||||
|
};
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
* SHA256_Buf(in, len, digest):
|
||||||
|
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SHA256_Buf( const void * in, size_t len, uint8_t digest[32] )
|
||||||
|
{
|
||||||
|
SHA256_CTX ctx;
|
||||||
|
SHA256_Init( &ctx );
|
||||||
|
SHA256_Update( &ctx, in, len );
|
||||||
|
SHA256_Final( digest, &ctx );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
||||||
|
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
||||||
|
* length ${Klen}, and write the result to ${digest}.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len,
|
||||||
|
uint8_t digest[32])
|
||||||
|
{
|
||||||
|
HMAC_SHA256_CTX ctx;
|
||||||
|
|
||||||
|
HMAC_SHA256_Init( &ctx, K, Klen );
|
||||||
|
HMAC_SHA256_Update( &ctx, in, len );
|
||||||
|
HMAC_SHA256_Final( digest, &ctx );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize an HMAC-SHA256 operation with the given key. */
|
||||||
|
void
|
||||||
|
HMAC_SHA256_Init( HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen )
|
||||||
|
{
|
||||||
|
unsigned char pad[64];
|
||||||
|
unsigned char khash[32];
|
||||||
|
const unsigned char * K = _K;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
/* If Klen > 64, the key is really SHA256(K). */
|
||||||
|
if (Klen > 64) {
|
||||||
|
SHA256_Init( &ctx->ictx );
|
||||||
|
SHA256_Update( &ctx->ictx, K, Klen );
|
||||||
|
SHA256_Final( khash, &ctx->ictx );
|
||||||
|
K = khash;
|
||||||
|
Klen = 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||||
|
SHA256_Init( &ctx->ictx );
|
||||||
|
memset( pad, 0x36, 64 );
|
||||||
|
for ( i = 0; i < Klen; i++ )
|
||||||
|
pad[i] ^= K[i];
|
||||||
|
SHA256_Update( &ctx->ictx, pad, 64 );
|
||||||
|
|
||||||
|
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||||
|
SHA256_Init( &ctx->octx );
|
||||||
|
memset(pad, 0x5c, 64);
|
||||||
|
for ( i = 0; i < Klen; i++ )
|
||||||
|
pad[i] ^= K[i];
|
||||||
|
SHA256_Update( &ctx->octx, pad, 64 );
|
||||||
|
|
||||||
|
/* Clean the stack. */
|
||||||
|
//memset(khash, 0, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add bytes to the HMAC-SHA256 operation. */
|
||||||
|
void
|
||||||
|
HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void *in, size_t len)
|
||||||
|
{
|
||||||
|
|
||||||
|
/* Feed data to the inner SHA256 operation. */
|
||||||
|
SHA256_Update( &ctx->ictx, in, len );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finish an HMAC-SHA256 operation. */
|
||||||
|
void
|
||||||
|
HMAC_SHA256_Final(unsigned char digest[32], HMAC_SHA256_CTX * ctx )
|
||||||
|
{
|
||||||
|
unsigned char ihash[32];
|
||||||
|
|
||||||
|
/* Finish the inner SHA256 operation. */
|
||||||
|
SHA256_Final( ihash, &ctx->ictx );
|
||||||
|
|
||||||
|
/* Feed the inner hash to the outer SHA256 operation. */
|
||||||
|
SHA256_Update( &ctx->octx, ihash, 32 );
|
||||||
|
|
||||||
|
/* Finish the outer SHA256 operation. */
|
||||||
|
SHA256_Final( digest, &ctx->octx );
|
||||||
|
|
||||||
|
/* Clean the stack. */
|
||||||
|
//memset(ihash, 0, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||||
|
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||||
|
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||||
|
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||||
|
{
|
||||||
|
HMAC_SHA256_CTX PShctx, hctx;
|
||||||
|
uint8_t _ALIGN(128) T[32];
|
||||||
|
uint8_t _ALIGN(128) U[32];
|
||||||
|
uint8_t ivec[4];
|
||||||
|
size_t i, clen;
|
||||||
|
uint64_t j;
|
||||||
|
int k;
|
||||||
|
|
||||||
|
/* Compute HMAC state after processing P and S. */
|
||||||
|
HMAC_SHA256_Init(&PShctx, passwd, passwdlen);
|
||||||
|
HMAC_SHA256_Update(&PShctx, salt, saltlen);
|
||||||
|
|
||||||
|
/* Iterate through the blocks. */
|
||||||
|
for (i = 0; i * 32 < dkLen; i++) {
|
||||||
|
/* Generate INT(i + 1). */
|
||||||
|
be32enc(ivec, (uint32_t)(i + 1));
|
||||||
|
|
||||||
|
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||||
|
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
|
||||||
|
HMAC_SHA256_Update(&hctx, ivec, 4);
|
||||||
|
HMAC_SHA256_Final(U, &hctx);
|
||||||
|
|
||||||
|
/* T_i = U_1 ... */
|
||||||
|
memcpy(T, U, 32);
|
||||||
|
|
||||||
|
for (j = 2; j <= c; j++) {
|
||||||
|
/* Compute U_j. */
|
||||||
|
HMAC_SHA256_Init(&hctx, passwd, passwdlen);
|
||||||
|
HMAC_SHA256_Update(&hctx, U, 32);
|
||||||
|
HMAC_SHA256_Final(U, &hctx);
|
||||||
|
|
||||||
|
/* ... xor U_j ... */
|
||||||
|
for (k = 0; k < 32; k++)
|
||||||
|
T[k] ^= U[k];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy as many bytes as necessary into buf. */
|
||||||
|
clen = dkLen - i * 32;
|
||||||
|
if (clen > 32)
|
||||||
|
clen = 32;
|
||||||
|
memcpy(&buf[i * 32], T, clen);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clean PShctx, since we never called _Final on it. */
|
||||||
|
//memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y));
|
||||||
|
}
|
@@ -1,496 +0,0 @@
|
|||||||
/*-
|
|
||||||
* Copyright 2005,2007,2009 Colin Percival
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
||||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
||||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <sys/types.h>
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "sysendian.h"
|
|
||||||
|
|
||||||
#include "sha256_p.h"
|
|
||||||
#include "compat.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Encode a length len/4 vector of (uint32_t) into a length len vector of
|
|
||||||
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < len / 4; i++)
|
|
||||||
be32enc(dst + i * 4, src[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Decode a big-endian length len vector of (unsigned char) into a length
|
|
||||||
* len/4 vector of (uint32_t). Assumes len is a multiple of 4.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < len / 4; i++)
|
|
||||||
dst[i] = be32dec(src + i * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Elementary functions used by SHA256 */
|
|
||||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
|
||||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
|
||||||
#define SHR(x, n) (x >> n)
|
|
||||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
|
||||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
|
||||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
|
||||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
|
||||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
|
||||||
|
|
||||||
/* SHA256 round function */
|
|
||||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
|
||||||
t0 = h + S1(e) + Ch(e, f, g) + k; \
|
|
||||||
t1 = S0(a) + Maj(a, b, c); \
|
|
||||||
d += t0; \
|
|
||||||
h = t0 + t1;
|
|
||||||
|
|
||||||
/* Adjusted round function for rotating state */
|
|
||||||
#define RNDr(S, W, i, k) \
|
|
||||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
|
||||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
|
||||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
|
||||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
|
||||||
W[i] + k)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SHA256 block compression function. The 256-bit state is transformed via
|
|
||||||
* the 512-bit input block to produce a new state.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
SHA256_Transform_p(uint32_t * state, const unsigned char block[64])
|
|
||||||
{
|
|
||||||
uint32_t _ALIGN(128) W[64], S[8];
|
|
||||||
uint32_t t0, t1;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* 1. Prepare message schedule W. */
|
|
||||||
be32dec_vect(W, block, 64);
|
|
||||||
for (i = 16; i < 64; i++)
|
|
||||||
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
|
|
||||||
|
|
||||||
/* 2. Initialize working variables. */
|
|
||||||
memcpy(S, state, 32);
|
|
||||||
|
|
||||||
/* 3. Mix. */
|
|
||||||
RNDr(S, W, 0, 0x428a2f98);
|
|
||||||
RNDr(S, W, 1, 0x71374491);
|
|
||||||
RNDr(S, W, 2, 0xb5c0fbcf);
|
|
||||||
RNDr(S, W, 3, 0xe9b5dba5);
|
|
||||||
RNDr(S, W, 4, 0x3956c25b);
|
|
||||||
RNDr(S, W, 5, 0x59f111f1);
|
|
||||||
RNDr(S, W, 6, 0x923f82a4);
|
|
||||||
RNDr(S, W, 7, 0xab1c5ed5);
|
|
||||||
RNDr(S, W, 8, 0xd807aa98);
|
|
||||||
RNDr(S, W, 9, 0x12835b01);
|
|
||||||
RNDr(S, W, 10, 0x243185be);
|
|
||||||
RNDr(S, W, 11, 0x550c7dc3);
|
|
||||||
RNDr(S, W, 12, 0x72be5d74);
|
|
||||||
RNDr(S, W, 13, 0x80deb1fe);
|
|
||||||
RNDr(S, W, 14, 0x9bdc06a7);
|
|
||||||
RNDr(S, W, 15, 0xc19bf174);
|
|
||||||
RNDr(S, W, 16, 0xe49b69c1);
|
|
||||||
RNDr(S, W, 17, 0xefbe4786);
|
|
||||||
RNDr(S, W, 18, 0x0fc19dc6);
|
|
||||||
RNDr(S, W, 19, 0x240ca1cc);
|
|
||||||
RNDr(S, W, 20, 0x2de92c6f);
|
|
||||||
RNDr(S, W, 21, 0x4a7484aa);
|
|
||||||
RNDr(S, W, 22, 0x5cb0a9dc);
|
|
||||||
RNDr(S, W, 23, 0x76f988da);
|
|
||||||
RNDr(S, W, 24, 0x983e5152);
|
|
||||||
RNDr(S, W, 25, 0xa831c66d);
|
|
||||||
RNDr(S, W, 26, 0xb00327c8);
|
|
||||||
RNDr(S, W, 27, 0xbf597fc7);
|
|
||||||
RNDr(S, W, 28, 0xc6e00bf3);
|
|
||||||
RNDr(S, W, 29, 0xd5a79147);
|
|
||||||
RNDr(S, W, 30, 0x06ca6351);
|
|
||||||
RNDr(S, W, 31, 0x14292967);
|
|
||||||
RNDr(S, W, 32, 0x27b70a85);
|
|
||||||
RNDr(S, W, 33, 0x2e1b2138);
|
|
||||||
RNDr(S, W, 34, 0x4d2c6dfc);
|
|
||||||
RNDr(S, W, 35, 0x53380d13);
|
|
||||||
RNDr(S, W, 36, 0x650a7354);
|
|
||||||
RNDr(S, W, 37, 0x766a0abb);
|
|
||||||
RNDr(S, W, 38, 0x81c2c92e);
|
|
||||||
RNDr(S, W, 39, 0x92722c85);
|
|
||||||
RNDr(S, W, 40, 0xa2bfe8a1);
|
|
||||||
RNDr(S, W, 41, 0xa81a664b);
|
|
||||||
RNDr(S, W, 42, 0xc24b8b70);
|
|
||||||
RNDr(S, W, 43, 0xc76c51a3);
|
|
||||||
RNDr(S, W, 44, 0xd192e819);
|
|
||||||
RNDr(S, W, 45, 0xd6990624);
|
|
||||||
RNDr(S, W, 46, 0xf40e3585);
|
|
||||||
RNDr(S, W, 47, 0x106aa070);
|
|
||||||
RNDr(S, W, 48, 0x19a4c116);
|
|
||||||
RNDr(S, W, 49, 0x1e376c08);
|
|
||||||
RNDr(S, W, 50, 0x2748774c);
|
|
||||||
RNDr(S, W, 51, 0x34b0bcb5);
|
|
||||||
RNDr(S, W, 52, 0x391c0cb3);
|
|
||||||
RNDr(S, W, 53, 0x4ed8aa4a);
|
|
||||||
RNDr(S, W, 54, 0x5b9cca4f);
|
|
||||||
RNDr(S, W, 55, 0x682e6ff3);
|
|
||||||
RNDr(S, W, 56, 0x748f82ee);
|
|
||||||
RNDr(S, W, 57, 0x78a5636f);
|
|
||||||
RNDr(S, W, 58, 0x84c87814);
|
|
||||||
RNDr(S, W, 59, 0x8cc70208);
|
|
||||||
RNDr(S, W, 60, 0x90befffa);
|
|
||||||
RNDr(S, W, 61, 0xa4506ceb);
|
|
||||||
RNDr(S, W, 62, 0xbef9a3f7);
|
|
||||||
RNDr(S, W, 63, 0xc67178f2);
|
|
||||||
|
|
||||||
/* 4. Mix local working variables into global state */
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
state[i] += S[i];
|
|
||||||
#if 0
|
|
||||||
/* Clean the stack. */
|
|
||||||
memset(W, 0, 256);
|
|
||||||
memset(S, 0, 32);
|
|
||||||
t0 = t1 = 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned char PAD[64] = {
|
|
||||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
||||||
};
|
|
||||||
|
|
||||||
// only called by SHA256_Final_p
|
|
||||||
/* Add padding and terminating bit-count. */
|
|
||||||
static void
|
|
||||||
SHA256_Pad_p(SHA256_CTX_p * ctx)
|
|
||||||
{
|
|
||||||
unsigned char len[8];
|
|
||||||
uint32_t r, plen;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Convert length to a vector of bytes -- we do this now rather
|
|
||||||
* than later because the length will change after we pad.
|
|
||||||
*/
|
|
||||||
be32enc_vect(len, ctx->count, 8);
|
|
||||||
|
|
||||||
/* Add 1--64 bytes so that the resulting length is 56 mod 64 */
|
|
||||||
r = (ctx->count[1] >> 3) & 0x3f;
|
|
||||||
plen = (r < 56) ? (56 - r) : (120 - r);
|
|
||||||
SHA256_Update_p(ctx, PAD, (size_t)plen);
|
|
||||||
/* Add the terminating bit-count */
|
|
||||||
SHA256_Update_p(ctx, len, 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* SHA-256 initialization. Begins a SHA-256 operation. */
|
|
||||||
void
|
|
||||||
SHA256_Init_p(SHA256_CTX_p * ctx)
|
|
||||||
{
|
|
||||||
/* Zero bits processed so far */
|
|
||||||
ctx->count[0] = ctx->count[1] = 0;
|
|
||||||
|
|
||||||
/* Magic initialization constants */
|
|
||||||
ctx->state[0] = 0x6A09E667;
|
|
||||||
ctx->state[1] = 0xBB67AE85;
|
|
||||||
ctx->state[2] = 0x3C6EF372;
|
|
||||||
ctx->state[3] = 0xA54FF53A;
|
|
||||||
ctx->state[4] = 0x510E527F;
|
|
||||||
ctx->state[5] = 0x9B05688C;
|
|
||||||
ctx->state[6] = 0x1F83D9AB;
|
|
||||||
ctx->state[7] = 0x5BE0CD19;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add bytes into the hash */
|
|
||||||
void
|
|
||||||
SHA256_Update_p(SHA256_CTX_p * ctx, const void *in, size_t len)
|
|
||||||
{
|
|
||||||
uint32_t bitlen[2];
|
|
||||||
uint32_t r;
|
|
||||||
const unsigned char *src = in;
|
|
||||||
|
|
||||||
/* Number of bytes left in the buffer from previous updates */
|
|
||||||
r = (ctx->count[1] >> 3) & 0x3f;
|
|
||||||
|
|
||||||
/* Convert the length into a number of bits */
|
|
||||||
bitlen[1] = ((uint32_t)len) << 3;
|
|
||||||
bitlen[0] = (uint32_t)(len >> 29);
|
|
||||||
|
|
||||||
/* Update number of bits */
|
|
||||||
if ((ctx->count[1] += bitlen[1]) < bitlen[1])
|
|
||||||
ctx->count[0]++;
|
|
||||||
ctx->count[0] += bitlen[0];
|
|
||||||
|
|
||||||
/* Handle the case where we don't need to perform any transforms */
|
|
||||||
if (len < 64 - r) {
|
|
||||||
memcpy(&ctx->buf[r], src, len);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Finish the current block */
|
|
||||||
memcpy(&ctx->buf[r], src, 64 - r);
|
|
||||||
SHA256_Transform_p(ctx->state, ctx->buf);
|
|
||||||
src += 64 - r;
|
|
||||||
len -= 64 - r;
|
|
||||||
|
|
||||||
/* Perform complete blocks */
|
|
||||||
while (len >= 64) {
|
|
||||||
SHA256_Transform_p(ctx->state, src);
|
|
||||||
src += 64;
|
|
||||||
len -= 64;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy left over data into buffer */
|
|
||||||
memcpy(ctx->buf, src, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SHA-256 finalization. Pads the input data, exports the hash value,
|
|
||||||
* and clears the context state.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
SHA256_Final_p(unsigned char digest[32], SHA256_CTX_p * ctx)
|
|
||||||
{
|
|
||||||
/* Add padding */
|
|
||||||
SHA256_Pad_p(ctx);
|
|
||||||
|
|
||||||
/* Write the hash */
|
|
||||||
be32enc_vect(digest, ctx->state, 32);
|
|
||||||
|
|
||||||
/* Clear the context state */
|
|
||||||
memset((void *)ctx, 0, sizeof(*ctx));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SHA256_Buf(in, len, digest):
|
|
||||||
* Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
SHA256_Buf_p(const void * in, size_t len, uint8_t digest[32])
|
|
||||||
{
|
|
||||||
// SHA256_CTX_p ctx;
|
|
||||||
// uint32_t tmp32[72];
|
|
||||||
|
|
||||||
#if defined(__SHA__)
|
|
||||||
SHA256_CTX ctx;
|
|
||||||
SHA256_Init(&ctx);
|
|
||||||
SHA256_Update(&ctx, in, len);
|
|
||||||
SHA256_Final(digest, &ctx);
|
|
||||||
#else
|
|
||||||
SHA256_CTX_p ctx;
|
|
||||||
SHA256_Init_p(&ctx);
|
|
||||||
SHA256_Update_p(&ctx, in, len);
|
|
||||||
SHA256_Final_p(digest, &ctx);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
// insecure_memzero(&ctx, sizeof(SHA256_CTX));
|
|
||||||
// insecure_memzero(tmp32, 288);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
|
||||||
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
|
||||||
* length ${Klen}, and write the result to ${digest}.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Buf_p(const void * K, size_t Klen, const void * in, size_t len,
|
|
||||||
uint8_t digest[32])
|
|
||||||
{
|
|
||||||
HMAC_SHA256_CTX_p ctx;
|
|
||||||
// uint32_t tmp32[72];
|
|
||||||
// uint8_t tmp8[96];
|
|
||||||
|
|
||||||
HMAC_SHA256_Init_p(&ctx, K, Klen);
|
|
||||||
HMAC_SHA256_Update_p(&ctx, in, len);
|
|
||||||
HMAC_SHA256_Final_p(digest, &ctx);
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
// insecure_memzero(&ctx, sizeof(HMAC_SHA256_CTX));
|
|
||||||
// insecure_memzero(tmp32, 288);
|
|
||||||
// insecure_memzero(tmp8, 96);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize an HMAC-SHA256 operation with the given key. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Init_p(HMAC_SHA256_CTX_p * ctx, const void * _K, size_t Klen)
|
|
||||||
{
|
|
||||||
unsigned char pad[64];
|
|
||||||
unsigned char khash[32];
|
|
||||||
const unsigned char * K = _K;
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
/* If Klen > 64, the key is really SHA256(K). */
|
|
||||||
if (Klen > 64) {
|
|
||||||
#if defined(__SHA__)
|
|
||||||
SHA256_Init(&ctx->ictx);
|
|
||||||
SHA256_Update(&ctx->ictx, K, Klen);
|
|
||||||
SHA256_Final(khash, &ctx->ictx);
|
|
||||||
#else
|
|
||||||
SHA256_Init_p(&ctx->ictx);
|
|
||||||
SHA256_Update_p(&ctx->ictx, K, Klen);
|
|
||||||
SHA256_Final_p(khash, &ctx->ictx);
|
|
||||||
#endif
|
|
||||||
K = khash;
|
|
||||||
Klen = 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
|
||||||
#if defined(__SHA__)
|
|
||||||
SHA256_Init(&ctx->ictx);
|
|
||||||
#else
|
|
||||||
SHA256_Init_p(&ctx->ictx);
|
|
||||||
#endif
|
|
||||||
memset(pad, 0x36, 64);
|
|
||||||
for (i = 0; i < Klen; i++)
|
|
||||||
pad[i] ^= K[i];
|
|
||||||
#if defined(__SHA__)
|
|
||||||
SHA256_Update(&ctx->ictx, pad, 64);
|
|
||||||
#else
|
|
||||||
SHA256_Update_p(&ctx->ictx, pad, 64);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
|
||||||
#if defined(__SHA__)
|
|
||||||
SHA256_Init(&ctx->octx);
|
|
||||||
#else
|
|
||||||
SHA256_Init_p(&ctx->octx);
|
|
||||||
#endif
|
|
||||||
memset(pad, 0x5c, 64);
|
|
||||||
for (i = 0; i < Klen; i++)
|
|
||||||
pad[i] ^= K[i];
|
|
||||||
#if defined(__SHA__)
|
|
||||||
SHA256_Update(&ctx->octx, pad, 64);
|
|
||||||
#else
|
|
||||||
SHA256_Update_p(&ctx->octx, pad, 64);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
//memset(khash, 0, 32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add bytes to the HMAC-SHA256 operation. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Update_p(HMAC_SHA256_CTX_p * ctx, const void *in, size_t len)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Feed data to the inner SHA256 operation. */
|
|
||||||
#if defined(__SHA__)
|
|
||||||
SHA256_Update(&ctx->ictx, in, len);
|
|
||||||
#else
|
|
||||||
SHA256_Update_p(&ctx->ictx, in, len);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Finish an HMAC-SHA256 operation. */
|
|
||||||
void
|
|
||||||
HMAC_SHA256_Final_p(unsigned char digest[32], HMAC_SHA256_CTX_p * ctx)
|
|
||||||
{
|
|
||||||
unsigned char ihash[32];
|
|
||||||
|
|
||||||
#if defined(__SHA__)
|
|
||||||
/* Finish the inner SHA256 operation. */
|
|
||||||
SHA256_Final(ihash, &ctx->ictx);
|
|
||||||
|
|
||||||
/* Feed the inner hash to the outer SHA256 operation. */
|
|
||||||
SHA256_Update(&ctx->octx, ihash, 32);
|
|
||||||
|
|
||||||
/* Finish the outer SHA256 operation. */
|
|
||||||
SHA256_Final(digest, &ctx->octx);
|
|
||||||
#else
|
|
||||||
/* Finish the inner SHA256 operation. */
|
|
||||||
SHA256_Final_p(ihash, &ctx->ictx);
|
|
||||||
|
|
||||||
/* Feed the inner hash to the outer SHA256 operation. */
|
|
||||||
SHA256_Update_p(&ctx->octx, ihash, 32);
|
|
||||||
|
|
||||||
/* Finish the outer SHA256 operation. */
|
|
||||||
SHA256_Final_p(digest, &ctx->octx);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Clean the stack. */
|
|
||||||
//memset(ihash, 0, 32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
|
||||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
|
||||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
PBKDF2_SHA256_p(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
|
||||||
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
|
||||||
{
|
|
||||||
HMAC_SHA256_CTX_p PShctx, hctx;
|
|
||||||
uint8_t _ALIGN(128) T[32];
|
|
||||||
uint8_t _ALIGN(128) U[32];
|
|
||||||
uint8_t ivec[4];
|
|
||||||
size_t i, clen;
|
|
||||||
uint64_t j;
|
|
||||||
int k;
|
|
||||||
|
|
||||||
/* Compute HMAC state after processing P and S. */
|
|
||||||
HMAC_SHA256_Init_p(&PShctx, passwd, passwdlen);
|
|
||||||
HMAC_SHA256_Update_p(&PShctx, salt, saltlen);
|
|
||||||
|
|
||||||
/* Iterate through the blocks. */
|
|
||||||
for (i = 0; i * 32 < dkLen; i++) {
|
|
||||||
/* Generate INT(i + 1). */
|
|
||||||
be32enc(ivec, (uint32_t)(i + 1));
|
|
||||||
|
|
||||||
/* Compute U_1 = PRF(P, S || INT(i)). */
|
|
||||||
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX_p));
|
|
||||||
HMAC_SHA256_Update_p(&hctx, ivec, 4);
|
|
||||||
HMAC_SHA256_Final_p(U, &hctx);
|
|
||||||
|
|
||||||
/* T_i = U_1 ... */
|
|
||||||
memcpy(T, U, 32);
|
|
||||||
|
|
||||||
for (j = 2; j <= c; j++) {
|
|
||||||
/* Compute U_j. */
|
|
||||||
HMAC_SHA256_Init_p(&hctx, passwd, passwdlen);
|
|
||||||
HMAC_SHA256_Update_p(&hctx, U, 32);
|
|
||||||
HMAC_SHA256_Final_p(U, &hctx);
|
|
||||||
|
|
||||||
/* ... xor U_j ... */
|
|
||||||
for (k = 0; k < 32; k++)
|
|
||||||
T[k] ^= U[k];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy as many bytes as necessary into buf. */
|
|
||||||
clen = dkLen - i * 32;
|
|
||||||
if (clen > 32)
|
|
||||||
clen = 32;
|
|
||||||
memcpy(&buf[i * 32], T, clen);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Clean PShctx, since we never called _Final on it. */
|
|
||||||
//memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y));
|
|
||||||
}
|
|
@@ -33,45 +33,24 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
typedef struct SHA256Context {
|
|
||||||
uint32_t state[8];
|
|
||||||
uint32_t count[2];
|
|
||||||
unsigned char buf[64];
|
|
||||||
} SHA256_CTX_p;
|
|
||||||
|
|
||||||
/*
|
|
||||||
typedef struct HMAC_SHA256Context {
|
typedef struct HMAC_SHA256Context {
|
||||||
SHA256_CTX_Y ictx;
|
|
||||||
SHA256_CTX_Y octx;
|
|
||||||
} HMAC_SHA256_CTX_Y;
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef struct HMAC_SHA256Context {
|
|
||||||
#if defined(__SHA__)
|
|
||||||
SHA256_CTX ictx;
|
SHA256_CTX ictx;
|
||||||
SHA256_CTX octx;
|
SHA256_CTX octx;
|
||||||
#else
|
} HMAC_SHA256_CTX;
|
||||||
SHA256_CTX_p ictx;
|
|
||||||
SHA256_CTX_p octx;
|
|
||||||
#endif
|
|
||||||
} HMAC_SHA256_CTX_p;
|
|
||||||
|
|
||||||
void SHA256_Init_p(SHA256_CTX_p *);
|
void SHA256_Buf( const void * in, size_t len, uint8_t digest[32] );
|
||||||
void SHA256_Update_p(SHA256_CTX_p *, const void *, size_t);
|
void HMAC_SHA256_Init( HMAC_SHA256_CTX *, const void *, size_t );
|
||||||
void SHA256_Final_p(unsigned char [32], SHA256_CTX_p *);
|
void HMAC_SHA256_Update( HMAC_SHA256_CTX *, const void *, size_t );
|
||||||
void SHA256_Buf_p(const void * in, size_t len, uint8_t digest[32]);
|
void HMAC_SHA256_Final( unsigned char [32], HMAC_SHA256_CTX * );
|
||||||
void HMAC_SHA256_Init_p(HMAC_SHA256_CTX_p *, const void *, size_t);
|
void HMAC_SHA256_Buf( const void * K, size_t Klen, const void * in,
|
||||||
void HMAC_SHA256_Update_p(HMAC_SHA256_CTX_p *, const void *, size_t);
|
size_t len, uint8_t digest[32] );
|
||||||
void HMAC_SHA256_Final_p(unsigned char [32], HMAC_SHA256_CTX_p *);
|
|
||||||
void HMAC_SHA256_Buf_p(const void * K, size_t Klen, const void * in,
|
|
||||||
size_t len, uint8_t digest[32]);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||||
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||||
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||||
*/
|
*/
|
||||||
void PBKDF2_SHA256_p(const uint8_t *, size_t, const uint8_t *, size_t,
|
void PBKDF2_SHA256( const uint8_t *, size_t, const uint8_t *, size_t,
|
||||||
uint64_t, uint8_t *, size_t);
|
uint64_t, uint8_t *, size_t);
|
||||||
|
|
||||||
#endif /* !_SHA256_H_ */
|
#endif /* !_SHA256_H_ */
|
@@ -62,6 +62,7 @@
|
|||||||
#warning "Note: building generic code for non-x86. That's OK."
|
#warning "Note: building generic code for non-x86. That's OK."
|
||||||
#endif
|
#endif
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The SSE4 code version has fewer instructions than the generic SSE2 version,
|
* The SSE4 code version has fewer instructions than the generic SSE2 version,
|
||||||
* but all of the instructions are SIMD, thereby wasting the scalar execution
|
* but all of the instructions are SIMD, thereby wasting the scalar execution
|
||||||
@@ -96,7 +97,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "insecure_memzero.h"
|
#include "insecure_memzero.h"
|
||||||
#include "sha256.h"
|
#include "sha256_p.h"
|
||||||
#include "sysendian.h"
|
#include "sysendian.h"
|
||||||
|
|
||||||
#include "yespower.h"
|
#include "yespower.h"
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -51,7 +51,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "sha256.h"
|
#include "sha256_p.h"
|
||||||
#include "sysendian.h"
|
#include "sysendian.h"
|
||||||
|
|
||||||
#include "yespower.h"
|
#include "yespower.h"
|
||||||
@@ -534,11 +534,12 @@ int yespower(yespower_local_t *local,
|
|||||||
|
|
||||||
if (pers) {
|
if (pers) {
|
||||||
HMAC_SHA256_Buf(dst, sizeof(*dst), pers, perslen,
|
HMAC_SHA256_Buf(dst, sizeof(*dst), pers, perslen,
|
||||||
|
return true;
|
||||||
(uint8_t *)sha256);
|
(uint8_t *)sha256);
|
||||||
SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst);
|
SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
HMAC_SHA256_Buf((uint8_t *)B + B_size - 64, 64,
|
HMAC_SHA256_Buf_P((uint8_t *)B + B_size - 64, 64,
|
||||||
sha256, sizeof(sha256), (uint8_t *)dst);
|
sha256, sizeof(sha256), (uint8_t *)dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -38,7 +38,7 @@ void yespower_hash( const char *input, char *output, uint32_t len )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_yespower( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_yespower( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(64) vhash[8];
|
uint32_t _ALIGN(64) vhash[8];
|
||||||
uint32_t _ALIGN(64) endiandata[20];
|
uint32_t _ALIGN(64) endiandata[20];
|
||||||
@@ -48,6 +48,7 @@ int scanhash_yespower( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
for (int k = 0; k < 19; k++)
|
for (int k = 0; k < 19; k++)
|
||||||
be32enc(&endiandata[k], pdata[k]);
|
be32enc(&endiandata[k], pdata[k]);
|
||||||
|
125
avxdefs.h
125
avxdefs.h
@@ -99,20 +99,73 @@
|
|||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
// 64 bit seems completely useless
|
// First some integer stuff that mirrors the SIMD utilities
|
||||||
|
#define ror_64( x, c ) \
|
||||||
|
(uint64_t)( ( (uint64_t)(x) >> (c) ) | ( (uint64_t)(x) << (64-(c)) ) )
|
||||||
|
#define rol_64( x, c ) \
|
||||||
|
(uint64_t)( ( (uint64_t)(x) << (c) ) | ( (uint64_t)(x) >> (64-(c)) ) )
|
||||||
|
#define ror_32( x, c ) \
|
||||||
|
(uint32_t)( ( (uint32_t)(x) >> (c) ) | ( (uint32_t)(x) << (32-(c)) ) )
|
||||||
|
#define rol_32( x, c ) \
|
||||||
|
(uint32_t)( ( (uint32_t)(x) << (c) ) | ( (uint32_t)(x) >> (32-(c)) ) )
|
||||||
|
#define ror_16( x, c ) \
|
||||||
|
(uint16_t)( ( (uint16_t)(x) >> (c) ) | ( (uint16_t)(x) << (16-(c)) ) )
|
||||||
|
#define rol_16( x, c ) \
|
||||||
|
(uint16_t)( ( (uint16_t)(x) << (c) ) | ( (uint16_t)(x) >> (16-(c)) ) )
|
||||||
|
#define ror_8( x, c ) \
|
||||||
|
(uint8_t) ( ( (uint8_t) (x) >> (c) ) | ( (uint8_t) (x) << ( 8-(c)) ) )
|
||||||
|
#define rol_8( x, c ) \
|
||||||
|
(uint8_t) ( ( (uint8_t) (x) << (c) ) | ( (uint8_t) (x) >> ( 8-(c)) ) )
|
||||||
|
|
||||||
|
#define bswap_64( x ) __builtin_bswap64(x)
|
||||||
|
#define bswap_32( x ) __builtin_bswap32(x)
|
||||||
|
|
||||||
|
// 128 bit integer
|
||||||
|
//
|
||||||
|
// Int128 uses two 64 bit GPRs to hold the data. The main benefits are
|
||||||
|
// for 128 bit arithmetic. Vectors are preferred when 128 bit arith
|
||||||
|
// is not required. int128 also works better with other integer sizes.
|
||||||
|
// Vectors benefit from wider registers.
|
||||||
|
//
|
||||||
|
// Use typecasting for conversion to/from 128 bit vector:
|
||||||
|
// __m128i v128 = (__m128i)my_int128l
|
||||||
|
// __m256i v256 = _mm256_set_m128i( (__m128i)my_int128, (__m128i)my_int128 );
|
||||||
|
// my_int128 = (uint128_t)_mm256_extracti128_si256( v256, 1 );
|
||||||
|
|
||||||
|
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
|
||||||
|
|
||||||
|
// Test this before using int128.
|
||||||
|
#define GCC_INT128 1
|
||||||
|
|
||||||
|
// Familiar looking type names
|
||||||
|
typedef __int128 int128_t;
|
||||||
|
typedef unsigned __int128 uint128_t;
|
||||||
|
|
||||||
|
// No real need or use.
|
||||||
|
#define i128_neg1 (uint128_t)(-1LL)
|
||||||
|
|
||||||
|
// Extract selected 64 bit half of 128 bit integer.
|
||||||
|
// A generic macro with a selector argument can't be encoded as a statement
|
||||||
|
// function and would require a branch.
|
||||||
|
#define i128_hi64( x ) (uint64_t)( (uint128_t)(x) >> 64 )
|
||||||
|
#define i128_lo64( x ) (uint64_t)( (uint128_t)(x) << 64 >> 64 )
|
||||||
|
|
||||||
|
// Not much need for this but it fills a gap.
|
||||||
|
#define ror_128( x, c ) \
|
||||||
|
( ( (uint128_t)(x) >> (c) ) | ( (uint128_t)(x) << (128-(c)) ) )
|
||||||
|
#define rol_128( x, c ) \
|
||||||
|
( ( (uint128_t)(x) << (c) ) | ( (uint128_t)(x) >> (128-(c)) ) )
|
||||||
|
|
||||||
|
#endif // INT128
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
// 64 bit MMX vectors.
|
// 64 bit MMX vectors.
|
||||||
//
|
//
|
||||||
// There are rumours MMX wil be removed. Although casting with int64
|
// There are rumours MMX wil be removed. Although casting with int64
|
||||||
// works there is likely some overhead to move the data to An MMX register
|
// works there is likely some overhead to move the data to An MMX register
|
||||||
// and back.
|
// and back.
|
||||||
// Byte swap and rotation may be more efficient using an MMX shuffle
|
// Byte swap and rotation may be more efficient using an MMX shuffle.
|
||||||
// except that it won't compile due to a "target specific option mismatch"
|
|
||||||
// with "inlining failed in call to always inline". MMX was designed for
|
|
||||||
// 32 bit CPUs and might not work on 64 bit CPUs where the CPU has full
|
|
||||||
// support for 64 bit operations without vectoring.
|
|
||||||
//
|
//
|
||||||
// Universal 64 bit overlay
|
// Universal 64 bit overlay
|
||||||
union _m64v
|
union _m64v
|
||||||
@@ -165,6 +218,7 @@ typedef union _m64_v16 m64_v16;
|
|||||||
#define casti_m64(p,i) (((__m64*)(p))[(i)])
|
#define casti_m64(p,i) (((__m64*)(p))[(i)])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// cast all arguments as the're likely uint64_t
|
// cast all arguments as the're likely uint64_t
|
||||||
|
|
||||||
// Bitwise not: ~(a)
|
// Bitwise not: ~(a)
|
||||||
@@ -173,6 +227,7 @@ typedef union _m64_v16 m64_v16;
|
|||||||
// Unary negate elements
|
// Unary negate elements
|
||||||
#define mm64_negate_32( v ) _mm_sub_pi32( m64_zero, (__m64)v )
|
#define mm64_negate_32( v ) _mm_sub_pi32( m64_zero, (__m64)v )
|
||||||
#define mm64_negate_16( v ) _mm_sub_pi16( m64_zero, (__m64)v )
|
#define mm64_negate_16( v ) _mm_sub_pi16( m64_zero, (__m64)v )
|
||||||
|
#define mm64_negate_8( v ) _mm_sub_pi8( m64_zero, (__m64)v )
|
||||||
|
|
||||||
// Rotate bits in packed elements of 64 bit vector
|
// Rotate bits in packed elements of 64 bit vector
|
||||||
#define mm64_rol_32( a, n ) \
|
#define mm64_rol_32( a, n ) \
|
||||||
@@ -206,15 +261,32 @@ typedef union _m64_v16 m64_v16;
|
|||||||
#if defined(__SSSE3__)
|
#if defined(__SSSE3__)
|
||||||
|
|
||||||
// Endian byte swap packed elements
|
// Endian byte swap packed elements
|
||||||
|
// A vectorized version of the u64 bswap, use when data already in MMX reg.
|
||||||
|
#define mm64_bswap_64( v ) \
|
||||||
|
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 0,1,2,3,4,5,6,7 ) )
|
||||||
|
|
||||||
#define mm64_bswap_32( v ) \
|
#define mm64_bswap_32( v ) \
|
||||||
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 4,5,6,7, 0,1,2,3 ) )
|
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 4,5,6,7, 0,1,2,3 ) )
|
||||||
|
|
||||||
#define mm64_bswap_16( v ) \
|
#define mm64_bswap_16( v ) \
|
||||||
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 6,7, 4,5, 2,3, 0,1 ) );
|
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 6,7, 4,5, 2,3, 0,1 ) );
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define mm64_bswap_64( v ) \
|
||||||
|
(__m64)__builtin_bswap64( (uint64_t)v )
|
||||||
|
|
||||||
|
// Looks clumsy but hopefully it works.
|
||||||
|
#define mm64_bswap_32( v ) \
|
||||||
|
_mm_set_pi32( __builtin_bswap32( ((uint32_t*)v)[1] ), \
|
||||||
|
__builtin_bswap32( ((uint32_t*)v)[0] ) )
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Invert vector: {3,2,1,0} -> {0,1,2,3}
|
// Invert vector: {3,2,1,0} -> {0,1,2,3}
|
||||||
|
// Invert_64 is the same as bswap64
|
||||||
|
// Invert_32 is the same as swap32
|
||||||
|
|
||||||
#define mm64_invert_16( v ) _mm_shuffle_pi16( (__m64)v, 0x1b )
|
#define mm64_invert_16( v ) _mm_shuffle_pi16( (__m64)v, 0x1b )
|
||||||
|
|
||||||
#if defined(__SSSE3__)
|
#if defined(__SSSE3__)
|
||||||
@@ -237,6 +309,12 @@ static inline void memset_zero_64( __m64 *src, int n )
|
|||||||
static inline void memset_64( __m64 *dst, const __m64 a, int n )
|
static inline void memset_64( __m64 *dst, const __m64 a, int n )
|
||||||
{ for ( int i = 0; i < n; i++ ) dst[i] = a; }
|
{ for ( int i = 0; i < n; i++ ) dst[i] = a; }
|
||||||
|
|
||||||
|
// The b is for broadcast, don't use in hybrid hash, interleave.
|
||||||
|
static inline void mem_bcpy_32( __m64 *dst, const uint32_t src, int n )
|
||||||
|
{
|
||||||
|
for ( int i = 0; i < n; i++ ) dst[i] = _mm_set1_pi32( src );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
@@ -644,57 +722,57 @@ do { \
|
|||||||
|
|
||||||
#define mm128_ror1x64_256( v1, v2 ) \
|
#define mm128_ror1x64_256( v1, v2 ) \
|
||||||
do { \
|
do { \
|
||||||
__m128i t = _mm_srli_si128( v1, 8 ) | _mm_slli_si128( v2, 24 ); \
|
__m128i t = _mm_srli_si128( v1, 8 ) | _mm_slli_si128( v2, 8 ); \
|
||||||
v2 = _mm_srli_si128( v2, 8 ) | _mm_slli_si128( v1, 24 ); \
|
v2 = _mm_srli_si128( v2, 8 ) | _mm_slli_si128( v1, 8 ); \
|
||||||
v1 = t; \
|
v1 = t; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#define mm128_rol1x64_256( v1, v2 ) \
|
#define mm128_rol1x64_256( v1, v2 ) \
|
||||||
do { \
|
do { \
|
||||||
__m128i t = _mm_slli_si128( v1, 8 ) | _mm_srli_si128( v2, 24 ); \
|
__m128i t = _mm_slli_si128( v1, 8 ) | _mm_srli_si128( v2, 8 ); \
|
||||||
v2 = _mm_slli_si128( v2, 8 ) | _mm_srli_si128( v1, 24 ); \
|
v2 = _mm_slli_si128( v2, 8 ) | _mm_srli_si128( v1, 8 ); \
|
||||||
v1 = t; \
|
v1 = t; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#define mm128_ror1x32_256( v1, v2 ) \
|
#define mm128_ror1x32_256( v1, v2 ) \
|
||||||
do { \
|
do { \
|
||||||
__m128i t = _mm_srli_si128( v1, 4 ) | _mm_slli_si128( v2, 28 ); \
|
__m128i t = _mm_srli_si128( v1, 4 ) | _mm_slli_si128( v2, 12 ); \
|
||||||
v2 = _mm_srli_si128( v2, 4 ) | _mm_slli_si128( v1, 28 ); \
|
v2 = _mm_srli_si128( v2, 4 ) | _mm_slli_si128( v1, 12 ); \
|
||||||
v1 = t; \
|
v1 = t; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#define mm128_rol1x32_256( v1, v2 ) \
|
#define mm128_rol1x32_256( v1, v2 ) \
|
||||||
do { \
|
do { \
|
||||||
__m128i t = _mm_slli_si128( v1, 4 ) | _mm_srli_si128( v2, 28 ); \
|
__m128i t = _mm_slli_si128( v1, 4 ) | _mm_srli_si128( v2, 12 ); \
|
||||||
v2 = _mm_slli_si128( v2, 4 ) | _mm_srli_si128( v1, 28 ); \
|
v2 = _mm_slli_si128( v2, 4 ) | _mm_srli_si128( v1, 12 ); \
|
||||||
v1 = t; \
|
v1 = t; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#define mm128_ror1x16_256( v1, v2 ) \
|
#define mm128_ror1x16_256( v1, v2 ) \
|
||||||
do { \
|
do { \
|
||||||
__m128i t = _mm_srli_si128( v1, 2 ) | _mm_slli_si128( v2, 30 ); \
|
__m128i t = _mm_srli_si128( v1, 2 ) | _mm_slli_si128( v2, 14 ); \
|
||||||
v2 = _mm_srli_si128( v2, 2 ) | _mm_slli_si128( v1, 30 ); \
|
v2 = _mm_srli_si128( v2, 2 ) | _mm_slli_si128( v1, 14 ); \
|
||||||
v1 = t; \
|
v1 = t; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#define mm128_rol1x16_256( v1, v2 ) \
|
#define mm128_rol1x16_256( v1, v2 ) \
|
||||||
do { \
|
do { \
|
||||||
__m128i t = _mm_slli_si128( v1, 2 ) | _mm_srli_si128( v2, 30 ); \
|
__m128i t = _mm_slli_si128( v1, 2 ) | _mm_srli_si128( v2, 14 ); \
|
||||||
v2 = _mm_slli_si128( v2, 2 ) | _mm_srli_si128( v1, 30 ); \
|
v2 = _mm_slli_si128( v2, 2 ) | _mm_srli_si128( v1, 14 ); \
|
||||||
v1 = t; \
|
v1 = t; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#define mm128_ror1x8_256( v1, v2 ) \
|
#define mm128_ror1x8_256( v1, v2 ) \
|
||||||
do { \
|
do { \
|
||||||
__m128i t = _mm_srli_si128( v1, 1 ) | _mm_slli_si128( v2, 31 ); \
|
__m128i t = _mm_srli_si128( v1, 1 ) | _mm_slli_si128( v2, 15 ); \
|
||||||
v2 = _mm_srli_si128( v2, 1 ) | _mm_slli_si128( v1, 31 ); \
|
v2 = _mm_srli_si128( v2, 1 ) | _mm_slli_si128( v1, 15 ); \
|
||||||
v1 = t; \
|
v1 = t; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#define mm128_rol1x8_256( v1, v2 ) \
|
#define mm128_rol1x8_256( v1, v2 ) \
|
||||||
do { \
|
do { \
|
||||||
__m128i t = _mm_slli_si128( v1, 1 ) | _mm_srli_si128( v2, 31 ); \
|
__m128i t = _mm_slli_si128( v1, 1 ) | _mm_srli_si128( v2, 15 ); \
|
||||||
v2 = _mm_slli_si128( v2, 1 ) | _mm_srli_si128( v1, 31 ); \
|
v2 = _mm_slli_si128( v2, 1 ) | _mm_srli_si128( v1, 15 ); \
|
||||||
v1 = t; \
|
v1 = t; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
@@ -1919,6 +1997,7 @@ static inline __m64 mmx_compile_test( __m64 a )
|
|||||||
m = _mm_shuffle_pi8( m, (__m64)0x0102030405060708 );
|
m = _mm_shuffle_pi8( m, (__m64)0x0102030405060708 );
|
||||||
i = (uint64_t) mm64_ror_32( (__m64)i, 7 );
|
i = (uint64_t) mm64_ror_32( (__m64)i, 7 );
|
||||||
casti_m64( n, 2 ) = m;
|
casti_m64( n, 2 ) = m;
|
||||||
|
m = (__m64)__builtin_bswap64( (uint64_t)m );
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
22
configure
vendored
22
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.1.
|
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.2.4.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='cpuminer-opt'
|
PACKAGE_NAME='cpuminer-opt'
|
||||||
PACKAGE_TARNAME='cpuminer-opt'
|
PACKAGE_TARNAME='cpuminer-opt'
|
||||||
PACKAGE_VERSION='3.9.1'
|
PACKAGE_VERSION='3.9.2.4'
|
||||||
PACKAGE_STRING='cpuminer-opt 3.9.1'
|
PACKAGE_STRING='cpuminer-opt 3.9.2.4'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures cpuminer-opt 3.9.1 to adapt to many kinds of systems.
|
\`configure' configures cpuminer-opt 3.9.2.4 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1404,7 +1404,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of cpuminer-opt 3.9.1:";;
|
short | recursive ) echo "Configuration of cpuminer-opt 3.9.2.4:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1509,7 +1509,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
cpuminer-opt configure 3.9.1
|
cpuminer-opt configure 3.9.2.4
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by cpuminer-opt $as_me 3.9.1, which was
|
It was created by cpuminer-opt $as_me 3.9.2.4, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -2993,7 +2993,7 @@ fi
|
|||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='cpuminer-opt'
|
PACKAGE='cpuminer-opt'
|
||||||
VERSION='3.9.1'
|
VERSION='3.9.2.4'
|
||||||
|
|
||||||
|
|
||||||
cat >>confdefs.h <<_ACEOF
|
cat >>confdefs.h <<_ACEOF
|
||||||
@@ -5884,7 +5884,7 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
# GC2 for GNU static
|
# GC2 for GNU static
|
||||||
if test "x$OS" = "xWindows_NT" ; then
|
if test "x$have_win32" = "xtrue" ; then
|
||||||
# MinGW
|
# MinGW
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
|
||||||
$as_echo_n "checking for pthread_create in -lpthread... " >&6; }
|
$as_echo_n "checking for pthread_create in -lpthread... " >&6; }
|
||||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by cpuminer-opt $as_me 3.9.1, which was
|
This file was extended by cpuminer-opt $as_me 3.9.2.4, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
cpuminer-opt config.status 3.9.1
|
cpuminer-opt config.status 3.9.2.4
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([cpuminer-opt], [3.9.1])
|
AC_INIT([cpuminer-opt], [3.9.2.4])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
@@ -106,7 +106,7 @@ fi
|
|||||||
AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
|
AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
|
||||||
|
|
||||||
# GC2 for GNU static
|
# GC2 for GNU static
|
||||||
if test "x$OS" = "xWindows_NT" ; then
|
if test "x$have_win32" = "xtrue" ; then
|
||||||
# MinGW
|
# MinGW
|
||||||
AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS="-lpthreadGC2",[])
|
AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS="-lpthreadGC2",[])
|
||||||
else
|
else
|
||||||
|
115
cpu-miner.c
115
cpu-miner.c
@@ -105,10 +105,12 @@ enum algos opt_algo = ALGO_NULL;
|
|||||||
int opt_scrypt_n = 0;
|
int opt_scrypt_n = 0;
|
||||||
int opt_pluck_n = 128;
|
int opt_pluck_n = 128;
|
||||||
int opt_n_threads = 0;
|
int opt_n_threads = 0;
|
||||||
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
|
// Windows doesn't support 128 bit affinity mask.
|
||||||
__int128_t opt_affinity = -1LL;
|
#if defined(__linux) && defined(GCC_INT128)
|
||||||
|
#define AFFINITY_USES_UINT128 1
|
||||||
|
uint128_t opt_affinity = -1LL;
|
||||||
#else
|
#else
|
||||||
int64_t opt_affinity = -1LL;
|
uint64_t opt_affinity = -1LL;
|
||||||
#endif
|
#endif
|
||||||
int opt_priority = 0;
|
int opt_priority = 0;
|
||||||
int num_cpus = 1;
|
int num_cpus = 1;
|
||||||
@@ -203,7 +205,8 @@ static inline void drop_policy(void)
|
|||||||
#define pthread_setaffinity_np(tid,sz,s) {} /* only do process affinity */
|
#define pthread_setaffinity_np(tid,sz,s) {} /* only do process affinity */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
|
// Linux affinity can use int128.
|
||||||
|
#if AFFINITY_USES_UINT128
|
||||||
static void affine_to_cpu_mask( int id, unsigned __int128 mask )
|
static void affine_to_cpu_mask( int id, unsigned __int128 mask )
|
||||||
#else
|
#else
|
||||||
static void affine_to_cpu_mask( int id, unsigned long long mask )
|
static void affine_to_cpu_mask( int id, unsigned long long mask )
|
||||||
@@ -216,7 +219,7 @@ static void affine_to_cpu_mask( int id, unsigned long long mask )
|
|||||||
for ( uint8_t i = 0; i < ncpus; i++ )
|
for ( uint8_t i = 0; i < ncpus; i++ )
|
||||||
{
|
{
|
||||||
// cpu mask
|
// cpu mask
|
||||||
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
|
#if AFFINITY_USES_UINT128
|
||||||
if( ( mask & ( (unsigned __int128)1ULL << i ) ) ) CPU_SET( i, &set );
|
if( ( mask & ( (unsigned __int128)1ULL << i ) ) ) CPU_SET( i, &set );
|
||||||
#else
|
#else
|
||||||
if( (ncpus > 64) || ( mask & (1ULL << i) ) ) CPU_SET( i, &set );
|
if( (ncpus > 64) || ( mask & (1ULL << i) ) ) CPU_SET( i, &set );
|
||||||
@@ -237,6 +240,7 @@ static void affine_to_cpu_mask( int id, unsigned long long mask )
|
|||||||
#elif defined(WIN32) /* Windows */
|
#elif defined(WIN32) /* Windows */
|
||||||
static inline void drop_policy(void) { }
|
static inline void drop_policy(void) { }
|
||||||
|
|
||||||
|
// Windows CPU groups to manage more than 64 CPUs.
|
||||||
static void affine_to_cpu_mask( int id, unsigned long mask )
|
static void affine_to_cpu_mask( int id, unsigned long mask )
|
||||||
{
|
{
|
||||||
bool success;
|
bool success;
|
||||||
@@ -263,7 +267,7 @@ static void affine_to_cpu_mask( int id, unsigned long mask )
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
cpu -= cpus;
|
cpu -= cpus;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
applog(LOG_DEBUG, "Binding thread %d to cpu %d on cpu group %d (mask %x)", id, cpu, group, (1ULL << cpu));
|
applog(LOG_DEBUG, "Binding thread %d to cpu %d on cpu group %d (mask %x)", id, cpu, group, (1ULL << cpu));
|
||||||
@@ -847,7 +851,8 @@ static int share_result( int result, struct work *work, const char *reason )
|
|||||||
float rate;
|
float rate;
|
||||||
char rate_s[8] = {0};
|
char rate_s[8] = {0};
|
||||||
double sharediff = work ? work->sharediff : stratum.sharediff;
|
double sharediff = work ? work->sharediff : stratum.sharediff;
|
||||||
bool solved = result && (net_diff > 0.0 ) && ( sharediff >= net_diff );
|
bool solved = result && accepted_share_count && (net_diff > 0.0 )
|
||||||
|
&& ( sharediff >= net_diff );
|
||||||
char sol[32] = {0};
|
char sol[32] = {0};
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@@ -857,15 +862,17 @@ static int share_result( int result, struct work *work, const char *reason )
|
|||||||
hashcount += thr_hashcount[i];
|
hashcount += thr_hashcount[i];
|
||||||
hashrate += thr_hashrates[i];
|
hashrate += thr_hashrates[i];
|
||||||
}
|
}
|
||||||
|
solved = result && ( (uint64_t)hashcount > 0 ) && (net_diff > 0.0 )
|
||||||
|
&& ( sharediff >= net_diff );
|
||||||
result ? accepted_share_count++ : rejected_share_count++;
|
result ? accepted_share_count++ : rejected_share_count++;
|
||||||
|
|
||||||
if ( solved )
|
if ( solved )
|
||||||
{
|
{
|
||||||
solved_block_count++;
|
solved_block_count++;
|
||||||
if ( use_colors )
|
if ( use_colors )
|
||||||
sprintf( sol, CL_GRN " Solved" CL_WHT " %d", solved_block_count );
|
sprintf( sol, CL_GRN " Solved: %d" CL_WHT, solved_block_count );
|
||||||
else
|
else
|
||||||
sprintf( sol, " Solved %d", solved_block_count );
|
sprintf( sol, ", Solved: %d", solved_block_count );
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_unlock(&stats_lock);
|
pthread_mutex_unlock(&stats_lock);
|
||||||
@@ -1839,26 +1846,42 @@ static void *miner_thread( void *userdata )
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if ( num_cpus > 1 )
|
if ( num_cpus > 1 )
|
||||||
{
|
{
|
||||||
if ( (opt_affinity == -1LL) && (opt_n_threads) > 1 )
|
#if AFFINITY_USES_UINT128
|
||||||
{
|
// Default affinity
|
||||||
if (opt_debug)
|
if ( (opt_affinity == i128_neg1 ) && opt_n_threads > 1 )
|
||||||
applog( LOG_DEBUG, "Binding thread %d to cpu %d (mask %x)",
|
{
|
||||||
thr_id, thr_id % num_cpus, ( 1ULL << (thr_id % num_cpus) ) );
|
if ( opt_debug )
|
||||||
#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )
|
applog( LOG_DEBUG, "Binding thread %d to cpu %d.",
|
||||||
affine_to_cpu_mask( thr_id,
|
thr_id, thr_id % num_cpus,
|
||||||
(unsigned __int128)1LL << (thr_id % num_cpus) );
|
i128_hi64( (uint128_t)1ULL << (thr_id % num_cpus) ),
|
||||||
|
i128_lo64( (uint128_t)1ULL << (thr_id % num_cpus) ) );
|
||||||
|
affine_to_cpu_mask( thr_id, (uint128_t)1ULL << (thr_id % num_cpus) );
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
affine_to_cpu_mask( thr_id, 1ULL << (thr_id % num_cpus) );
|
if ( (opt_affinity == -1LL) && opt_n_threads > 1 )
|
||||||
#endif
|
{
|
||||||
}
|
|
||||||
else if (opt_affinity != -1)
|
|
||||||
{
|
|
||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
applog( LOG_DEBUG, "Binding thread %d to cpu mask %x",
|
applog( LOG_DEBUG, "Binding thread %d to cpu %d.",
|
||||||
thr_id, opt_affinity);
|
thr_id, thr_id % num_cpus, 1LL << (thr_id % num_cpus)) ;
|
||||||
affine_to_cpu_mask( thr_id, opt_affinity );
|
affine_to_cpu_mask( thr_id, 1ULL << (thr_id % num_cpus) );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
else // Custom affinity
|
||||||
|
{
|
||||||
|
#if AFFINITY_USES_UINT128
|
||||||
|
if (opt_debug)
|
||||||
|
applog( LOG_DEBUG, "Binding thread %d to mask %016llx %016llx",
|
||||||
|
thr_id, i128_hi64( opt_affinity ),
|
||||||
|
i128_lo64( opt_affinity ) );
|
||||||
|
#else
|
||||||
|
if (opt_debug)
|
||||||
|
applog( LOG_DEBUG, "Binding thread %d to mask %016llx",
|
||||||
|
thr_id, opt_affinity );
|
||||||
|
#endif
|
||||||
|
affine_to_cpu_mask( thr_id, opt_affinity );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2894,13 +2917,21 @@ void parse_arg(int key, char *arg )
|
|||||||
break;
|
break;
|
||||||
case 1020:
|
case 1020:
|
||||||
p = strstr(arg, "0x");
|
p = strstr(arg, "0x");
|
||||||
if (p)
|
if ( p )
|
||||||
ul = strtoul(p, NULL, 16);
|
ul = strtoull( p, NULL, 16 );
|
||||||
else
|
else
|
||||||
ul = atol(arg);
|
ul = atoll( arg );
|
||||||
if (ul > (1UL<<num_cpus)-1)
|
// if ( ul > ( 1ULL << num_cpus ) - 1ULL )
|
||||||
ul = -1;
|
// ul = -1LL;
|
||||||
opt_affinity = ul;
|
#if AFFINITY_USES_UINT128
|
||||||
|
// replicate the low 64 bits to make a full 128 bit mask if there are more
|
||||||
|
// than 64 CPUs, otherwise zero extend the upper half.
|
||||||
|
opt_affinity = (uint128_t)ul;
|
||||||
|
if ( num_cpus > 64 )
|
||||||
|
opt_affinity = (opt_affinity << 64 ) | (uint128_t)ul;
|
||||||
|
#else
|
||||||
|
opt_affinity = ul;
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
case 1021:
|
case 1021:
|
||||||
v = atoi(arg);
|
v = atoi(arg);
|
||||||
@@ -3299,20 +3330,18 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!rpc_userpass)
|
if (!rpc_userpass)
|
||||||
{
|
{
|
||||||
rpc_userpass = (char*) malloc(strlen(rpc_user) + strlen(rpc_pass) + 2);
|
rpc_userpass = (char*) malloc(strlen(rpc_user) + strlen(rpc_pass) + 2);
|
||||||
if (rpc_userpass)
|
if (rpc_userpass)
|
||||||
sprintf(rpc_userpass, "%s:%s", rpc_user, rpc_pass);
|
sprintf(rpc_userpass, "%s:%s", rpc_user, rpc_pass);
|
||||||
else
|
else
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// All options must be set before starting the gate
|
// All options must be set before starting the gate
|
||||||
if ( !register_algo_gate( opt_algo, &algo_gate ) )
|
if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1);
|
||||||
exit(1);
|
|
||||||
|
|
||||||
if ( !check_cpu_capability() )
|
if ( !check_cpu_capability() ) exit(1);
|
||||||
exit(1);
|
|
||||||
|
|
||||||
pthread_mutex_init(&stats_lock, NULL);
|
pthread_mutex_init(&stats_lock, NULL);
|
||||||
pthread_mutex_init(&g_work_lock, NULL);
|
pthread_mutex_init(&g_work_lock, NULL);
|
||||||
@@ -3325,7 +3354,7 @@ int main(int argc, char *argv[])
|
|||||||
? (CURL_GLOBAL_ALL & ~CURL_GLOBAL_SSL)
|
? (CURL_GLOBAL_ALL & ~CURL_GLOBAL_SSL)
|
||||||
: CURL_GLOBAL_ALL;
|
: CURL_GLOBAL_ALL;
|
||||||
if (curl_global_init(flags))
|
if (curl_global_init(flags))
|
||||||
{
|
{
|
||||||
applog(LOG_ERR, "CURL initialization failed");
|
applog(LOG_ERR, "CURL initialization failed");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@@ -3384,6 +3413,8 @@ int main(int argc, char *argv[])
|
|||||||
if ( num_cpus != opt_n_threads )
|
if ( num_cpus != opt_n_threads )
|
||||||
applog( LOG_INFO,"%u CPU cores available, %u miner threads selected.",
|
applog( LOG_INFO,"%u CPU cores available, %u miner threads selected.",
|
||||||
num_cpus, opt_n_threads );
|
num_cpus, opt_n_threads );
|
||||||
|
|
||||||
|
// To be reviewed
|
||||||
if ( opt_affinity != -1 )
|
if ( opt_affinity != -1 )
|
||||||
{
|
{
|
||||||
if ( num_cpus > 64 )
|
if ( num_cpus > 64 )
|
||||||
|
404
interleave.h
404
interleave.h
@@ -43,8 +43,127 @@
|
|||||||
//
|
//
|
||||||
// AVX512: 4x128, 8x64, 16x32
|
// AVX512: 4x128, 8x64, 16x32
|
||||||
//
|
//
|
||||||
// Interleaving and deinterleaving is done in blocks of 16*16, 32*32,
|
// Interleaving and deinterleaving is done in blocks of 8*8, 16*16, 32*32,
|
||||||
// or 64*64 bytes for SSE2, AVX2 and AVX512 vectors respectively.
|
// or 64*64 bytes for MMX, SSE2, AVX2 and AVX512 vectors respectively.
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// MMX 64 bit vectors
|
||||||
|
|
||||||
|
#define mm64_put_32( s0, s1 ) \
|
||||||
|
_mm_set_pi32( *((const uint32_t*)(s1)), *((const uint32_t*)(s0)) )
|
||||||
|
|
||||||
|
#define mm64_get_32( s, i0, i1 ) \
|
||||||
|
_mm_set_pi32( ((const uint32_t*)(s))[i1], ((const uint32_t*)(s))[i0] )
|
||||||
|
|
||||||
|
// 1 MMX block, 8 bytes * 2 lanes
|
||||||
|
static inline void mm64_interleave_2x32( void *d, const void *s0,
|
||||||
|
const void *s1, int len )
|
||||||
|
{
|
||||||
|
casti_m64( d, 0 ) = mm64_put_32( s0 , s1 );
|
||||||
|
casti_m64( d, 1 ) = mm64_put_32( s0+ 4, s1+ 4 );
|
||||||
|
casti_m64( d, 2 ) = mm64_put_32( s0+ 8, s1+ 8 );
|
||||||
|
casti_m64( d, 3 ) = mm64_put_32( s0+ 12, s1+ 12 );
|
||||||
|
casti_m64( d, 4 ) = mm64_put_32( s0+ 16, s1+ 16 );
|
||||||
|
casti_m64( d, 5 ) = mm64_put_32( s0+ 20, s1+ 20 );
|
||||||
|
casti_m64( d, 6 ) = mm64_put_32( s0+ 24, s1+ 24 );
|
||||||
|
casti_m64( d, 7 ) = mm64_put_32( s0+ 28, s1+ 28 );
|
||||||
|
|
||||||
|
if ( len <= 256 ) return;
|
||||||
|
|
||||||
|
casti_m64( d, 8 ) = mm64_put_32( s0+ 32, s1+ 32 );
|
||||||
|
casti_m64( d, 9 ) = mm64_put_32( s0+ 36, s1+ 36 );
|
||||||
|
casti_m64( d,10 ) = mm64_put_32( s0+ 40, s1+ 40 );
|
||||||
|
casti_m64( d,11 ) = mm64_put_32( s0+ 44, s1+ 44 );
|
||||||
|
casti_m64( d,12 ) = mm64_put_32( s0+ 48, s1+ 48 );
|
||||||
|
casti_m64( d,13 ) = mm64_put_32( s0+ 52, s1+ 52 );
|
||||||
|
casti_m64( d,14 ) = mm64_put_32( s0+ 56, s1+ 56 );
|
||||||
|
casti_m64( d,15 ) = mm64_put_32( s0+ 60, s1+ 60 );
|
||||||
|
|
||||||
|
if ( len <= 512 ) return;
|
||||||
|
|
||||||
|
casti_m64( d,16 ) = mm64_put_32( s0+ 64, s1+ 64 );
|
||||||
|
casti_m64( d,17 ) = mm64_put_32( s0+ 68, s1+ 68 );
|
||||||
|
casti_m64( d,18 ) = mm64_put_32( s0+ 72, s1+ 72 );
|
||||||
|
casti_m64( d,19 ) = mm64_put_32( s0+ 76, s1+ 76 );
|
||||||
|
|
||||||
|
if ( len <= 640 ) return;
|
||||||
|
|
||||||
|
casti_m64( d,20 ) = mm64_put_32( s0+ 80, s1+ 80 );
|
||||||
|
casti_m64( d,21 ) = mm64_put_32( s0+ 84, s1+ 84 );
|
||||||
|
casti_m64( d,22 ) = mm64_put_32( s0+ 88, s1+ 88 );
|
||||||
|
casti_m64( d,23 ) = mm64_put_32( s0+ 92, s1+ 92 );
|
||||||
|
casti_m64( d,24 ) = mm64_put_32( s0+ 96, s1+ 96 );
|
||||||
|
casti_m64( d,25 ) = mm64_put_32( s0+100, s1+100 );
|
||||||
|
casti_m64( d,26 ) = mm64_put_32( s0+104, s1+104 );
|
||||||
|
casti_m64( d,27 ) = mm64_put_32( s0+108, s1+108 );
|
||||||
|
casti_m64( d,28 ) = mm64_put_32( s0+112, s1+112 );
|
||||||
|
casti_m64( d,29 ) = mm64_put_32( s0+116, s1+116 );
|
||||||
|
casti_m64( d,30 ) = mm64_put_32( s0+120, s1+120 );
|
||||||
|
casti_m64( d,31 ) = mm64_put_32( s0+124, s1+124 );
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mm64_deinterleave_2x32( void *d00, void *d01,
|
||||||
|
const int n, const void *s, int len )
|
||||||
|
{
|
||||||
|
casti_m64( d00,0 ) = mm64_get_32( s, 0, 2 );
|
||||||
|
casti_m64( d01,0 ) = mm64_get_32( s, 1, 3 );
|
||||||
|
casti_m64( d00,1 ) = mm64_get_32( s, 4, 6 );
|
||||||
|
casti_m64( d01,1 ) = mm64_get_32( s, 5, 7 );
|
||||||
|
casti_m64( d00,2 ) = mm64_get_32( s, 8, 10 );
|
||||||
|
casti_m64( d01,2 ) = mm64_get_32( s, 9, 11 );
|
||||||
|
casti_m64( d00,3 ) = mm64_get_32( s, 12, 14 );
|
||||||
|
casti_m64( d01,3 ) = mm64_get_32( s, 13, 15 );
|
||||||
|
|
||||||
|
if ( len <= 256 ) return;
|
||||||
|
|
||||||
|
casti_m64( d00,4 ) = mm64_get_32( s, 16, 18 );
|
||||||
|
casti_m64( d01,4 ) = mm64_get_32( s, 17, 19 );
|
||||||
|
casti_m64( d00,5 ) = mm64_get_32( s, 20, 22 );
|
||||||
|
casti_m64( d01,5 ) = mm64_get_32( s, 21, 23 );
|
||||||
|
casti_m64( d00,6 ) = mm64_get_32( s, 24, 26 );
|
||||||
|
casti_m64( d01,6 ) = mm64_get_32( s, 25, 27 );
|
||||||
|
casti_m64( d00,7 ) = mm64_get_32( s, 28, 30 );
|
||||||
|
casti_m64( d01,7 ) = mm64_get_32( s, 29, 31 );
|
||||||
|
|
||||||
|
if ( len <= 512 ) return;
|
||||||
|
|
||||||
|
casti_m64( d00,8 ) = mm64_get_32( s, 32, 34 );
|
||||||
|
casti_m64( d01,8 ) = mm64_get_32( s, 33, 35 );
|
||||||
|
casti_m64( d00,9 ) = mm64_get_32( s, 36, 38 );
|
||||||
|
casti_m64( d01,9 ) = mm64_get_32( s, 37, 39 );
|
||||||
|
|
||||||
|
if ( len <= 640 ) return;
|
||||||
|
|
||||||
|
casti_m64( d00,10 ) = mm64_get_32( s, 40, 42 );
|
||||||
|
casti_m64( d01,10 ) = mm64_get_32( s, 41, 43 );
|
||||||
|
casti_m64( d00,11 ) = mm64_get_32( s, 44, 46 );
|
||||||
|
casti_m64( d01,11 ) = mm64_get_32( s, 45, 47 );
|
||||||
|
casti_m64( d00,12 ) = mm64_get_32( s, 48, 50 );
|
||||||
|
casti_m64( d01,12 ) = mm64_get_32( s, 49, 51 );
|
||||||
|
casti_m64( d00,13 ) = mm64_get_32( s, 52, 54 );
|
||||||
|
casti_m64( d01,13 ) = mm64_get_32( s, 53, 55 );
|
||||||
|
casti_m64( d00,14 ) = mm64_get_32( s, 56, 58 );
|
||||||
|
casti_m64( d01,14 ) = mm64_get_32( s, 57, 59 );
|
||||||
|
casti_m64( d00,15 ) = mm64_get_32( s, 60, 62 );
|
||||||
|
casti_m64( d01,15 ) = mm64_get_32( s, 61, 63 );
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mm64_extract_lane_2x32( void *d, const void *s,
|
||||||
|
const int lane, const int bit_len )
|
||||||
|
{
|
||||||
|
casti_m64( d, 0 ) = mm64_get_32( s, lane , lane+ 4 );
|
||||||
|
casti_m64( d, 1 ) = mm64_get_32( s, lane+ 8, lane+12 );
|
||||||
|
casti_m64( d, 2 ) = mm64_get_32( s, lane+16, lane+20 );
|
||||||
|
casti_m64( d, 3 ) = mm64_get_32( s, lane+24, lane+28 );
|
||||||
|
|
||||||
|
if ( bit_len <= 256 ) return;
|
||||||
|
casti_m64( d, 4 ) = mm64_get_32( s, lane+32, lane+36 );
|
||||||
|
casti_m64( d, 5 ) = mm64_get_32( s, lane+40, lane+44 );
|
||||||
|
casti_m64( d, 6 ) = mm64_get_32( s, lane+48, lane+52 );
|
||||||
|
casti_m64( d, 7 ) = mm64_get_32( s, lane+56, lane+60 );
|
||||||
|
// bit_len == 512
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
@@ -356,6 +475,36 @@ static inline void mm256_interleave_8x32x256( void *d, const void *s00,
|
|||||||
s04+28, s05+28, s06+28, s07+28 );
|
s04+28, s05+28, s06+28, s07+28 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void mm256_be_interleave_8x32x256( void *d, const void *s00,
|
||||||
|
const void *s01, const void *s02, const void *s03, const void *s04,
|
||||||
|
const void *s05, const void *s06, const void *s07 )
|
||||||
|
{
|
||||||
|
casti_m256i( d, 0 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00, s01, s02, s03,
|
||||||
|
s04, s05, s06, s07 ) );
|
||||||
|
casti_m256i( d, 1 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00+ 4, s01+ 4, s02+ 4, s03+ 4,
|
||||||
|
s04+ 4, s05+ 4, s06+ 4, s07+ 4 ) );
|
||||||
|
casti_m256i( d, 2 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00+ 8, s01+ 8, s02+ 8, s03+ 8,
|
||||||
|
s04+ 8, s05+ 8, s06+ 8, s07+ 8 ) );
|
||||||
|
casti_m256i( d, 3 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00+12, s01+12, s02+12, s03+12,
|
||||||
|
s04+12, s05+12, s06+12, s07+12 ) );
|
||||||
|
casti_m256i( d, 4 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00+16, s01+16, s02+16, s03+16,
|
||||||
|
s04+16, s05+16, s06+16, s07+16 ) );
|
||||||
|
casti_m256i( d, 5 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00+20, s01+20, s02+20, s03+20,
|
||||||
|
s04+20, s05+20, s06+20, s07+20 ) );
|
||||||
|
casti_m256i( d, 6 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00+24, s01+24, s02+24, s03+24,
|
||||||
|
s04+24, s05+24, s06+24, s07+24 ) );
|
||||||
|
casti_m256i( d, 7 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00+28, s01+28, s02+28, s03+28,
|
||||||
|
s04+28, s05+28, s06+28, s07+28 ) );
|
||||||
|
}
|
||||||
|
|
||||||
static inline void mm256_interleave_8x32x128( void *d, const void *s00,
|
static inline void mm256_interleave_8x32x128( void *d, const void *s00,
|
||||||
const void *s01, const void *s02, const void *s03, const void *s04,
|
const void *s01, const void *s02, const void *s03, const void *s04,
|
||||||
const void *s05, const void *s06, const void *s07 )
|
const void *s05, const void *s06, const void *s07 )
|
||||||
@@ -370,6 +519,24 @@ static inline void mm256_interleave_8x32x128( void *d, const void *s00,
|
|||||||
s04+12, s05+12, s06+12, s07+12 );
|
s04+12, s05+12, s06+12, s07+12 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void mm256_be_interleave_8x32x128( void *d, const void *s00,
|
||||||
|
const void *s01, const void *s02, const void *s03, const void *s04,
|
||||||
|
const void *s05, const void *s06, const void *s07 )
|
||||||
|
{
|
||||||
|
casti_m256i( d, 0 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00, s01, s02, s03,
|
||||||
|
s04, s05, s06, s07 ) );
|
||||||
|
casti_m256i( d, 1 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00+ 4, s01+ 4, s02+ 4, s03+ 4,
|
||||||
|
s04+ 4, s05+ 4, s06+ 4, s07+ 4 ) );
|
||||||
|
casti_m256i( d, 2 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00+ 8, s01+ 8, s02+ 8, s03+ 8,
|
||||||
|
s04+ 8, s05+ 8, s06+ 8, s07+ 8 ) );
|
||||||
|
casti_m256i( d, 3 ) = mm256_bswap_32(
|
||||||
|
mm256_put_32( s00+12, s01+12, s02+12, s03+12,
|
||||||
|
s04+12, s05+12, s06+12, s07+12 ) );
|
||||||
|
}
|
||||||
|
|
||||||
// can be called directly for 32 byte hash using AVX2
|
// can be called directly for 32 byte hash using AVX2
|
||||||
static inline void mm256_deinterleave_8x32x256( void *d00, void *d01,
|
static inline void mm256_deinterleave_8x32x256( void *d00, void *d01,
|
||||||
void *d02, void *d03, void *d04, void *d05, void *d06,
|
void *d02, void *d03, void *d04, void *d05, void *d06,
|
||||||
@@ -394,6 +561,21 @@ static inline void mm256_interleave_4x64x256( void *d, const void *s0,
|
|||||||
casti_m256i( d,3 ) = mm256_put_64( s0+24, s1+24, s2+24, s3+24 );
|
casti_m256i( d,3 ) = mm256_put_64( s0+24, s1+24, s2+24, s3+24 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bswap the data as it's interleaved.
|
||||||
|
// A bit of a missnomer, but be is nice and short.
|
||||||
|
static inline void mm256_be_interleave_4x64x256( void *d, const void *s0,
|
||||||
|
const void *s1, const void *s2, const void *s3 )
|
||||||
|
{
|
||||||
|
casti_m256i( d,0 ) = mm256_bswap_32(
|
||||||
|
mm256_put_64( s0, s1, s2, s3 ) );
|
||||||
|
casti_m256i( d,1 ) = mm256_bswap_32(
|
||||||
|
mm256_put_64( s0+ 8, s1+ 8, s2+ 8, s3+ 8 ) );
|
||||||
|
casti_m256i( d,2 ) = mm256_bswap_32(
|
||||||
|
mm256_put_64( s0+16, s1+16, s2+16, s3+16 ) );
|
||||||
|
casti_m256i( d,3 ) = mm256_bswap_32(
|
||||||
|
mm256_put_64( s0+24, s1+24, s2+24, s3+24 ) );
|
||||||
|
}
|
||||||
|
|
||||||
static inline void mm256_interleave_4x64x128( void *d, const void *s0,
|
static inline void mm256_interleave_4x64x128( void *d, const void *s0,
|
||||||
const void *s1, const void *s2, const void *s3 )
|
const void *s1, const void *s2, const void *s3 )
|
||||||
{
|
{
|
||||||
@@ -401,6 +583,14 @@ static inline void mm256_interleave_4x64x128( void *d, const void *s0,
|
|||||||
casti_m256i( d,1 ) = mm256_put_64( s0+ 8, s1+ 8, s2+ 8, s3+ 8 );
|
casti_m256i( d,1 ) = mm256_put_64( s0+ 8, s1+ 8, s2+ 8, s3+ 8 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void mm256_be_interleave_4x64x128( void *d, const void *s0,
|
||||||
|
const void *s1, const void *s2, const void *s3 )
|
||||||
|
{
|
||||||
|
casti_m256i( d,0 ) = mm256_bswap_32(
|
||||||
|
mm256_put_64( s0, s1, s2, s3 ) );
|
||||||
|
casti_m256i( d,1 ) = mm256_bswap_32(
|
||||||
|
mm256_put_64( s0+ 8, s1+ 8, s2+ 8, s3+ 8 ) );
|
||||||
|
}
|
||||||
|
|
||||||
// 4 lanes of 256 bits using 64 bit interleaving (standard final hash size)
|
// 4 lanes of 256 bits using 64 bit interleaving (standard final hash size)
|
||||||
static inline void mm256_deinterleave_4x64x256( void *d0, void *d1, void *d2,
|
static inline void mm256_deinterleave_4x64x256( void *d0, void *d1, void *d2,
|
||||||
@@ -496,6 +686,28 @@ static inline void mm256_interleave_8x32( void *d, const void *s0,
|
|||||||
// bit_len == 1024
|
// bit_len == 1024
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void mm256_be_interleave_8x32( void *d, const void *s0,
|
||||||
|
const void *s1, const void *s2, const void *s3, const void *s4,
|
||||||
|
const void *s5, const void *s6, const void *s7, int bit_len )
|
||||||
|
{
|
||||||
|
mm256_be_interleave_8x32x256( d, s0, s1, s2, s3, s4, s5, s6, s7 );
|
||||||
|
if ( bit_len <= 256 ) return;
|
||||||
|
mm256_be_interleave_8x32x256( d+256, s0+32, s1+32, s2+32, s3+32,
|
||||||
|
s4+32, s5+32, s6+32, s7+32 );
|
||||||
|
if ( bit_len <= 512 ) return;
|
||||||
|
if ( bit_len <= 640 )
|
||||||
|
{
|
||||||
|
mm256_be_interleave_8x32x128( d+512, s0+64, s1+64, s2+64, s3+64,
|
||||||
|
s4+64, s5+64, s6+64, s7+64 );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mm256_be_interleave_8x32x256( d+512, s0+64, s1+64, s2+64, s3+64,
|
||||||
|
s4+64, s5+64, s6+64, s7+64 );
|
||||||
|
mm256_be_interleave_8x32x256( d+768, s0+96, s1+96, s2+96, s3+96,
|
||||||
|
s4+96, s5+96, s6+96, s7+96 );
|
||||||
|
// bit_len == 1024
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// Slower but it works with 32 bit data
|
// Slower but it works with 32 bit data
|
||||||
// bit_len must be multiple of 32
|
// bit_len must be multiple of 32
|
||||||
@@ -595,6 +807,23 @@ static inline void mm256_interleave_4x64( void *d, const void *s0,
|
|||||||
mm256_interleave_4x64x256( d+384, s0+96, s1+96, s2+96, s3+96 );
|
mm256_interleave_4x64x256( d+384, s0+96, s1+96, s2+96, s3+96 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void mm256_be_interleave_4x64( void *d, const void *s0,
|
||||||
|
const void *s1, const void *s2, const void *s3, int bit_len )
|
||||||
|
{
|
||||||
|
mm256_be_interleave_4x64x256( d, s0, s1, s2, s3 );
|
||||||
|
if ( bit_len <= 256 ) return;
|
||||||
|
mm256_be_interleave_4x64x256( d+128, s0+32, s1+32, s2+32, s3+32 );
|
||||||
|
if ( bit_len <= 512 ) return;
|
||||||
|
if ( bit_len <= 640 )
|
||||||
|
{
|
||||||
|
mm256_be_interleave_4x64x128( d+256, s0+64, s1+64, s2+64, s3+64 );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// bit_len == 1024
|
||||||
|
mm256_be_interleave_4x64x256( d+256, s0+64, s1+64, s2+64, s3+64 );
|
||||||
|
mm256_be_interleave_4x64x256( d+384, s0+96, s1+96, s2+96, s3+96 );
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// Slower version
|
// Slower version
|
||||||
// bit_len must be multiple of 64
|
// bit_len must be multiple of 64
|
||||||
@@ -676,7 +905,9 @@ static inline void mm256_extract_lane_4x64( void *d, const void *s,
|
|||||||
|
|
||||||
// Convert from 4x32 SSE2 interleaving to 4x64 AVX2.
|
// Convert from 4x32 SSE2 interleaving to 4x64 AVX2.
|
||||||
// Can't do it in place
|
// Can't do it in place
|
||||||
static inline void mm256_reinterleave_4x64( void *dst, void *src, int bit_len )
|
#define mm256_reinterleave_4x64 mm256_reinterleave_4x32_4x64
|
||||||
|
static inline void mm256_reinterleave_4x32_4x64( void *dst, void *src,
|
||||||
|
int bit_len )
|
||||||
{
|
{
|
||||||
__m256i* d = (__m256i*)dst;
|
__m256i* d = (__m256i*)dst;
|
||||||
uint32_t *s = (uint32_t*)src;
|
uint32_t *s = (uint32_t*)src;
|
||||||
@@ -736,7 +967,9 @@ static inline void mm256_reinterleave_4x64x( uint64_t *dst, uint32_t *src,
|
|||||||
|
|
||||||
// Convert 4x64 byte (256 bit) vectors to 4x32 (128 bit) vectors for AVX
|
// Convert 4x64 byte (256 bit) vectors to 4x32 (128 bit) vectors for AVX
|
||||||
// bit_len must be multiple of 64
|
// bit_len must be multiple of 64
|
||||||
static inline void mm256_reinterleave_4x32( void *dst, void *src, int bit_len )
|
#define mm256_reinterleave_4x32 mm256_reinterleave_4x64_4x32
|
||||||
|
static inline void mm256_reinterleave_4x64_4x32( void *dst, void *src,
|
||||||
|
int bit_len )
|
||||||
{
|
{
|
||||||
__m256i *d = (__m256i*)dst;
|
__m256i *d = (__m256i*)dst;
|
||||||
uint32_t *s = (uint32_t*)src;
|
uint32_t *s = (uint32_t*)src;
|
||||||
@@ -862,7 +1095,8 @@ static inline void mm_reinterleave_4x32( void *dst, void *src, int bit_len )
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static inline void mm256_interleave_2x128( const void *d, const void *s0,
|
#define mm256_interleave_2x128 mm256_interleave_1x128
|
||||||
|
static inline void mm256_interleave_1x128( const void *d, const void *s0,
|
||||||
void *s1, const int bit_len )
|
void *s1, const int bit_len )
|
||||||
{
|
{
|
||||||
casti_m256i( d, 0 ) = mm256_put_64( s0 , s0+ 8, s1 , s1+ 8 );
|
casti_m256i( d, 0 ) = mm256_put_64( s0 , s0+ 8, s1 , s1+ 8 );
|
||||||
@@ -879,7 +1113,8 @@ static inline void mm256_interleave_2x128( const void *d, const void *s0,
|
|||||||
// bit_len == 1024
|
// bit_len == 1024
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mm256_deinterleave_2x128( void *d0, void *d1, void *s,
|
#define mm256_deinterleave_2x128 mm256_deinterleave_1x128
|
||||||
|
static inline void mm256_deinterleave_1x128( void *d0, void *d1, void *s,
|
||||||
int bit_len )
|
int bit_len )
|
||||||
{
|
{
|
||||||
mm256_deinterleave_2x128x256( d0, d1, 0, s );
|
mm256_deinterleave_2x128x256( d0, d1, 0, s );
|
||||||
@@ -1078,38 +1313,38 @@ static inline void mm512_deinterleave_16x32x512( void *d00, void *d01,
|
|||||||
void *d12, void *d13, void *d14, void *d15, const int n,
|
void *d12, void *d13, void *d14, void *d15, const int n,
|
||||||
const void *s )
|
const void *s )
|
||||||
{
|
{
|
||||||
casti_m512i(d00,n) = mm512_get_32( s, 0, 16, 32, 48, 64, 80, 96, 112,
|
casti_m512i(d00,n) = mm512_get_32( s, 0, 16, 32, 48, 64, 80, 96,112,
|
||||||
128, 144, 160, 176, 192, 208, 224, 240 );
|
128,144,160,176,192,208,224,240 );
|
||||||
casti_m512i(d01,n) = mm512_get_32( s, 1, 17, 33, 49, 65, 81, 97, 113,
|
casti_m512i(d01,n) = mm512_get_32( s, 1, 17, 33, 49, 65, 81, 97,113,
|
||||||
129, 145, 161, 177, 193, 209, 225, 241 );
|
129,145,161,177,193,209,225,241 );
|
||||||
casti_m512i(d02,n) = mm512_get_32( s, 2, 18, 34, 50, 66, 82, 98, 114,
|
casti_m512i(d02,n) = mm512_get_32( s, 2, 18, 34, 50, 66, 82, 98,114,
|
||||||
130, 146, 162, 178, 194, 210, 226, 242 );
|
130,146,162,178,194,210,226,242 );
|
||||||
casti_m512i(d03,n) = mm512_get_32( s, 3, 19, 35, 51, 67, 83, 99, 115,
|
casti_m512i(d03,n) = mm512_get_32( s, 3, 19, 35, 51, 67, 83, 99,115,
|
||||||
131, 147, 163, 179, 195, 211, 227, 243 );
|
131,147,163,179,195,211,227,243 );
|
||||||
casti_m512i(d04,n) = mm512_get_32( s, 4, 20, 36, 52, 68, 84, 100, 116,
|
casti_m512i(d04,n) = mm512_get_32( s, 4, 20, 36, 52, 68, 84,100,116,
|
||||||
132, 148, 164, 180, 196, 212, 228, 244 );
|
132,148,164,180,196,212,228,244 );
|
||||||
casti_m512i(d05,n) = mm512_get_32( s, 5, 21, 37, 53, 69, 85, 101, 117,
|
casti_m512i(d05,n) = mm512_get_32( s, 5, 21, 37, 53, 69, 85,101,117,
|
||||||
133, 149, 165, 181, 197, 213, 229, 245 );
|
133,149,165,181,197,213,229,245 );
|
||||||
casti_m512i(d06,n) = mm512_get_32( s, 6, 22, 38, 54, 70, 86, 102, 118,
|
casti_m512i(d06,n) = mm512_get_32( s, 6, 22, 38, 54, 70, 86,102,118,
|
||||||
134, 150, 166, 182, 198, 214, 230, 246 );
|
134,150,166,182,198,214,230,246 );
|
||||||
casti_m512i(d07,n) = mm512_get_32( s, 7, 23, 39, 55, 71, 87, 103, 119,
|
casti_m512i(d07,n) = mm512_get_32( s, 7, 23, 39, 55, 71, 87,103,119,
|
||||||
135, 151, 167, 183, 199, 215, 231, 247 );
|
135,151,167,183,199,215,231,247 );
|
||||||
casti_m512i(d08,n) = mm512_get_32( s, 8, 24, 40, 56, 72, 88, 104, 120,
|
casti_m512i(d08,n) = mm512_get_32( s, 8, 24, 40, 56, 72, 88,104,120,
|
||||||
136, 152, 168, 184, 200, 216, 232, 248 );
|
136,152,168,184,200,216,232,248 );
|
||||||
casti_m512i(d09,n) = mm512_get_32( s, 9, 25, 41, 57, 73, 89, 105, 121,
|
casti_m512i(d09,n) = mm512_get_32( s, 9, 25, 41, 57, 73, 89,105,121,
|
||||||
137, 153, 169, 185, 201, 217, 233, 249 );
|
137,153,169,185,201,217,233,249 );
|
||||||
casti_m512i(d10,n) = mm512_get_32( s, 10, 26, 42, 58, 74, 90, 106, 122,
|
casti_m512i(d10,n) = mm512_get_32( s, 10, 26, 42, 58, 74, 90,106,122,
|
||||||
138, 154, 170, 186, 202, 218, 234, 250 );
|
138,154,170,186,202,218,234,250 );
|
||||||
casti_m512i(d11,n) = mm512_get_32( s, 11, 27, 43, 59, 75, 91, 107, 123,
|
casti_m512i(d11,n) = mm512_get_32( s, 11, 27, 43, 59, 75, 91,107,123,
|
||||||
139, 155, 171, 187, 203, 219, 235, 251 );
|
139,155,171,187,203,219,235,251 );
|
||||||
casti_m512i(d12,n) = mm512_get_32( s, 12, 28, 44, 60, 76, 92, 108, 124,
|
casti_m512i(d12,n) = mm512_get_32( s, 12, 28, 44, 60, 76, 92,108,124,
|
||||||
140, 156, 172, 188, 204, 220, 236, 252 );
|
140,156,172,188,204,220,236,252 );
|
||||||
casti_m512i(d13,n) = mm512_get_32( s, 13, 29, 45, 61, 77, 93, 109, 125,
|
casti_m512i(d13,n) = mm512_get_32( s, 13, 29, 45, 61, 77, 93,109,125,
|
||||||
141, 157, 173, 189, 205, 221, 237, 253 );
|
141,157,173,189,205,221,237,253 );
|
||||||
casti_m512i(d14,n) = mm512_get_32( s, 14, 30, 46, 62, 78, 94, 110, 126,
|
casti_m512i(d14,n) = mm512_get_32( s, 14, 30, 46, 62, 78, 94,110,126,
|
||||||
142, 158, 174, 190, 206, 222, 238, 254 );
|
142,158,174,190,206,222,238,254 );
|
||||||
casti_m512i(d15,n) = mm512_get_32( s, 15, 31, 47, 63, 79, 95, 111, 127,
|
casti_m512i(d15,n) = mm512_get_32( s, 15, 31, 47, 63, 79, 95,111,127,
|
||||||
143, 159, 175, 191, 207, 223, 239, 255 );
|
143,159,175,191,207,223,239,255 );
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mm512_interleave_8x64x512( void *d, const void *s0,
|
static inline void mm512_interleave_8x64x512( void *d, const void *s0,
|
||||||
@@ -1363,6 +1598,99 @@ static inline void mm512_deinterleave_4x128( void *d0, void *d1, void *d2,
|
|||||||
mm512_deinterleave_4x128x512( d0, d1, d2, d3, 1, s+256 );
|
mm512_deinterleave_4x128x512( d0, d1, d2, d3, 1, s+256 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// input one 8x64 buffer and return 2*4*128
|
||||||
|
static inline void mm512_reinterleave_8x64_4x128( void *dst0, void *dst1,
|
||||||
|
const void *src, int bit_len )
|
||||||
|
{
|
||||||
|
__m512i* d0 = (__m512i*)dst0;
|
||||||
|
__m512i* d1 = (__m512i*)dst1;
|
||||||
|
uint64_t *s = (uint64_t*)src;
|
||||||
|
|
||||||
|
d0[0] = _mm512_set_epi64( s[ 11], s[ 3], s[ 10], s[ 2],
|
||||||
|
s[ 9], s[ 1], s[ 8], s[ 0] );
|
||||||
|
d0[1] = _mm512_set_epi64( s[ 27], s[ 19], s[ 26], s[ 18],
|
||||||
|
s[ 25], s[ 17], s[ 24], s[ 16] );
|
||||||
|
d0[2] = _mm512_set_epi64( s[ 15], s[ 7], s[ 14], s[ 6],
|
||||||
|
s[ 13], s[ 5], s[ 12], s[ 4] );
|
||||||
|
d0[3] = _mm512_set_epi64( s[ 31], s[ 23], s[ 30], s[ 22],
|
||||||
|
s[ 29], s[ 21], s[ 28], s[ 20] );
|
||||||
|
d1[0] = _mm512_set_epi64( s[ 43], s[ 35], s[ 42], s[ 34],
|
||||||
|
s[ 41], s[ 33], s[ 40], s[ 32] );
|
||||||
|
d1[1] = _mm512_set_epi64( s[ 59], s[ 51], s[ 58], s[ 50],
|
||||||
|
s[ 57], s[ 49], s[ 56], s[ 48] );
|
||||||
|
d1[2] = _mm512_set_epi64( s[ 47], s[ 39], s[ 46], s[ 38],
|
||||||
|
s[ 45], s[ 37], s[ 44], s[ 36] );
|
||||||
|
d1[3] = _mm512_set_epi64( s[ 63], s[ 55], s[ 62], s[ 54],
|
||||||
|
s[ 61], s[ 53], s[ 60], s[ 52] );
|
||||||
|
|
||||||
|
if ( bit_len <= 512 ) return;
|
||||||
|
|
||||||
|
d0[4] = _mm512_set_epi64( s[ 75], s[ 67], s[ 74], s[ 66],
|
||||||
|
s[ 73], s[ 65], s[ 72], s[ 64] );
|
||||||
|
d0[5] = _mm512_set_epi64( s[ 91], s[ 83], s[ 90], s[ 82],
|
||||||
|
s[ 89], s[ 81], s[ 88], s[ 80] );
|
||||||
|
d0[6] = _mm512_set_epi64( s[ 79], s[ 71], s[ 78], s[ 70],
|
||||||
|
s[ 77], s[ 69], s[ 76], s[ 68] );
|
||||||
|
d0[7] = _mm512_set_epi64( s[ 95], s[ 87], s[ 94], s[ 86],
|
||||||
|
s[ 93], s[ 85], s[ 92], s[ 84] );
|
||||||
|
d1[4] = _mm512_set_epi64( s[107], s[ 99], s[106], s[ 98],
|
||||||
|
s[105], s[ 97], s[104], s[ 96] );
|
||||||
|
d1[5] = _mm512_set_epi64( s[123], s[115], s[122], s[114],
|
||||||
|
s[121], s[113], s[120], s[112] );
|
||||||
|
d1[6] = _mm512_set_epi64( s[111], s[103], s[110], s[102],
|
||||||
|
s[109], s[101], s[108], s[100] );
|
||||||
|
d1[7] = _mm512_set_epi64( s[127], s[119], s[126], s[118],
|
||||||
|
s[125], s[117], s[124], s[116] );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// input 2 4x128 return 8x64
|
||||||
|
static inline void mm512_reinterleave_4x128_8x64( void *dst, const void *src0,
|
||||||
|
const void *src1, int bit_len )
|
||||||
|
{
|
||||||
|
__m512i* d = (__m512i*)dst;
|
||||||
|
uint64_t *s0 = (uint64_t*)src0;
|
||||||
|
uint64_t *s1 = (uint64_t*)src1;
|
||||||
|
|
||||||
|
d[0] = _mm512_set_epi64( s1[ 6], s1[ 4], s1[ 2], s1[ 0],
|
||||||
|
s0[ 6], s0[ 4], s0[ 2], s0[ 0] );
|
||||||
|
d[1] = _mm512_set_epi64( s1[ 7], s1[ 5], s1[ 3], s1[ 1],
|
||||||
|
s0[ 7], s0[ 5], s0[ 3], s0[ 1] );
|
||||||
|
d[2] = _mm512_set_epi64( s1[14], s1[12], s1[10], s1[ 8],
|
||||||
|
s0[14], s0[12], s0[10], s0[ 8] );
|
||||||
|
d[3] = _mm512_set_epi64( s1[15], s1[13], s1[11], s1[ 9],
|
||||||
|
s0[15], s0[13], s0[11], s0[ 9] );
|
||||||
|
d[4] = _mm512_set_epi64( s1[22], s1[20], s1[18], s1[16],
|
||||||
|
s0[22], s0[20], s0[18], s0[16] );
|
||||||
|
d[5] = _mm512_set_epi64( s1[23], s1[21], s1[19], s1[17],
|
||||||
|
s0[24], s0[21], s0[19], s0[17] );
|
||||||
|
d[6] = _mm512_set_epi64( s1[22], s1[28], s1[26], s1[24],
|
||||||
|
s0[22], s0[28], s0[26], s0[24] );
|
||||||
|
d[7] = _mm512_set_epi64( s1[31], s1[29], s1[27], s1[25],
|
||||||
|
s0[31], s0[29], s0[27], s0[25] );
|
||||||
|
|
||||||
|
if ( bit_len <= 512 ) return;
|
||||||
|
|
||||||
|
d[0] = _mm512_set_epi64( s1[38], s1[36], s1[34], s1[32],
|
||||||
|
s0[38], s0[36], s0[34], s0[32] );
|
||||||
|
d[1] = _mm512_set_epi64( s1[39], s1[37], s1[35], s1[33],
|
||||||
|
s0[39], s0[37], s0[35], s0[33] );
|
||||||
|
d[2] = _mm512_set_epi64( s1[46], s1[44], s1[42], s1[40],
|
||||||
|
s0[46], s0[44], s0[42], s0[40] );
|
||||||
|
d[3] = _mm512_set_epi64( s1[47], s1[45], s1[43], s1[41],
|
||||||
|
s0[47], s0[45], s0[43], s0[41] );
|
||||||
|
d[4] = _mm512_set_epi64( s1[54], s1[52], s1[50], s1[48],
|
||||||
|
s0[54], s0[52], s0[50], s0[48] );
|
||||||
|
d[5] = _mm512_set_epi64( s1[55], s1[53], s1[51], s1[49],
|
||||||
|
s0[55], s0[53], s0[51], s0[49] );
|
||||||
|
|
||||||
|
d[6] = _mm512_set_epi64( s1[62], s1[60], s1[58], s1[56],
|
||||||
|
s0[62], s0[60], s0[58], s0[56] );
|
||||||
|
d[7] = _mm512_set_epi64( s1[63], s1[61], s1[59], s1[57],
|
||||||
|
s0[63], s0[61], s0[59], s0[57] );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
static inline void mm512_extract_lane_4x128( void *d, const void *s,
|
static inline void mm512_extract_lane_4x128( void *d, const void *s,
|
||||||
const int lane, const int bit_len )
|
const int lane, const int bit_len )
|
||||||
{
|
{
|
||||||
|
5
miner.h
5
miner.h
@@ -538,6 +538,7 @@ enum algos {
|
|||||||
ALGO_SCRYPTJANE,
|
ALGO_SCRYPTJANE,
|
||||||
ALGO_SHA256D,
|
ALGO_SHA256D,
|
||||||
ALGO_SHA256T,
|
ALGO_SHA256T,
|
||||||
|
ALGO_SHA256Q,
|
||||||
ALGO_SHAVITE3,
|
ALGO_SHAVITE3,
|
||||||
ALGO_SKEIN,
|
ALGO_SKEIN,
|
||||||
ALGO_SKEIN2,
|
ALGO_SKEIN2,
|
||||||
@@ -625,6 +626,7 @@ static const char* const algo_names[] = {
|
|||||||
"scryptjane",
|
"scryptjane",
|
||||||
"sha256d",
|
"sha256d",
|
||||||
"sha256t",
|
"sha256t",
|
||||||
|
"sha256q",
|
||||||
"shavite3",
|
"shavite3",
|
||||||
"skein",
|
"skein",
|
||||||
"skein2",
|
"skein2",
|
||||||
@@ -774,7 +776,8 @@ Options:\n\
|
|||||||
scryptjane:nf\n\
|
scryptjane:nf\n\
|
||||||
sha256d Double SHA-256\n\
|
sha256d Double SHA-256\n\
|
||||||
sha256t Triple SHA-256, Onecoin (OC)\n\
|
sha256t Triple SHA-256, Onecoin (OC)\n\
|
||||||
shavite3 Shavite3\n\
|
sha256q Quad SHA-256, Pyrite (PYE)\n\
|
||||||
|
shavite3 Shavite3\n\
|
||||||
skein Skein+Sha (Skeincoin)\n\
|
skein Skein+Sha (Skeincoin)\n\
|
||||||
skein2 Double Skein (Woodcoin)\n\
|
skein2 Double Skein (Woodcoin)\n\
|
||||||
skunk Signatum (SIGT)\n\
|
skunk Signatum (SIGT)\n\
|
||||||
|
@@ -19,7 +19,7 @@ export CONFIGURE_ARGS="--with-curl=$LOCAL_LIB/curl --with-crypto=$LOCAL_LIB/open
|
|||||||
ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
|
ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
|
||||||
|
|
||||||
# edit configure to fix pthread lib name for Windows.
|
# edit configure to fix pthread lib name for Windows.
|
||||||
sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
|
#sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
|
||||||
|
|
||||||
# make release directory and copy selected DLLs.
|
# make release directory and copy selected DLLs.
|
||||||
mkdir release
|
mkdir release
|
||||||
|
Reference in New Issue
Block a user