mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
3 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
d6e8d7a46e | ||
![]() |
71d6b97ee8 | ||
![]() |
b2331375a3 |
@@ -42,9 +42,6 @@ openssl 1.1.0e or higher. Add one of the following, depending on the
|
|||||||
compiler version, to CFLAGS:
|
compiler version, to CFLAGS:
|
||||||
"-march=native" or "-march=znver1" or "-msha".
|
"-march=native" or "-march=znver1" or "-msha".
|
||||||
|
|
||||||
Due to poor AVX2 performance on Ryzen users should add -DRYZEN_ to CFLAGS
|
|
||||||
to override multiway AVX2 on algos with sha256, and use SHA instead.
|
|
||||||
|
|
||||||
Additional instructions for static compilalation can be found here:
|
Additional instructions for static compilalation can be found here:
|
||||||
https://lxadm.com/Static_compilation_of_cpuminer
|
https://lxadm.com/Static_compilation_of_cpuminer
|
||||||
Static builds should only considered in a homogeneous HW and SW environment.
|
Static builds should only considered in a homogeneous HW and SW environment.
|
||||||
|
@@ -131,6 +131,7 @@ cpuminer_SOURCES = \
|
|||||||
algo/lyra2/lyra2h-4way.c \
|
algo/lyra2/lyra2h-4way.c \
|
||||||
algo/lyra2/allium-4way.c \
|
algo/lyra2/allium-4way.c \
|
||||||
algo/lyra2/allium.c \
|
algo/lyra2/allium.c \
|
||||||
|
algo/lyra2/phi2-4way.c \
|
||||||
algo/lyra2/phi2.c \
|
algo/lyra2/phi2.c \
|
||||||
algo/m7m.c \
|
algo/m7m.c \
|
||||||
algo/neoscrypt/neoscrypt.c \
|
algo/neoscrypt/neoscrypt.c \
|
||||||
@@ -147,6 +148,9 @@ cpuminer_SOURCES = \
|
|||||||
algo/quark/anime-gate.c \
|
algo/quark/anime-gate.c \
|
||||||
algo/quark/anime.c \
|
algo/quark/anime.c \
|
||||||
algo/quark/anime-4way.c \
|
algo/quark/anime-4way.c \
|
||||||
|
algo/quark/hmq1725-gate.c \
|
||||||
|
algo/quark/hmq1725-4way.c \
|
||||||
|
algo/quark/hmq1725.c \
|
||||||
algo/qubit/qubit-gate.c \
|
algo/qubit/qubit-gate.c \
|
||||||
algo/qubit/qubit.c \
|
algo/qubit/qubit.c \
|
||||||
algo/qubit/qubit-2way.c \
|
algo/qubit/qubit-2way.c \
|
||||||
@@ -257,7 +261,6 @@ cpuminer_SOURCES = \
|
|||||||
algo/x17/xevan-gate.c \
|
algo/x17/xevan-gate.c \
|
||||||
algo/x17/xevan.c \
|
algo/x17/xevan.c \
|
||||||
algo/x17/xevan-4way.c \
|
algo/x17/xevan-4way.c \
|
||||||
algo/x17/hmq1725.c \
|
|
||||||
algo/x17/sonoa-gate.c \
|
algo/x17/sonoa-gate.c \
|
||||||
algo/x17/sonoa-4way.c \
|
algo/x17/sonoa-4way.c \
|
||||||
algo/x17/sonoa.c \
|
algo/x17/sonoa.c \
|
||||||
|
@@ -29,7 +29,7 @@ cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
|||||||
cpuminer-aes-sse42.exe "-march=westmere" Westmere
|
cpuminer-aes-sse42.exe "-march=westmere" Westmere
|
||||||
cpuminer-avx.exe "-march=corei7-avx" Sandy-Ivybridge
|
cpuminer-avx.exe "-march=corei7-avx" Sandy-Ivybridge
|
||||||
cpuminer-avx2.exe "-march=core-avx2" Haswell, Sky-Kaby-Coffeelake
|
cpuminer-avx2.exe "-march=core-avx2" Haswell, Sky-Kaby-Coffeelake
|
||||||
cpuminer-zen "-march=znver1 -DRYZEN_" Ryzen
|
cpuminer-zen "-march=znver1" AMD Ryzen, Threadripper
|
||||||
|
|
||||||
If you like this software feel free to donate:
|
If you like this software feel free to donate:
|
||||||
|
|
||||||
|
@@ -38,6 +38,25 @@ supported.
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
v3.9.4
|
||||||
|
|
||||||
|
Faster AVX2 for lyra2v3, quark, anime.
|
||||||
|
Fixed skein AVX2 regression (invalid shares since v3.9.0) and faster.
|
||||||
|
Faster skein2 with 4way AVX2 enabled.
|
||||||
|
Automatic SHA override on Ryzen CPUs, no need for -DRYZEN compile flag.
|
||||||
|
Ongoing restructuring.
|
||||||
|
|
||||||
|
v3.9.3.1
|
||||||
|
|
||||||
|
Skipped v3.9.3 due to misidentification of v3.9.2.5 as v3.9.3.
|
||||||
|
Fixed x16r algo 25% invalid share reject rate. The bug may have also
|
||||||
|
affected other algos.
|
||||||
|
|
||||||
|
v3.9.2.5
|
||||||
|
|
||||||
|
Fixed 2 regressions: hodl AES detection, x16r invalid shares with AVX2.
|
||||||
|
More restructuring.
|
||||||
|
|
||||||
v3.9.2.4
|
v3.9.2.4
|
||||||
|
|
||||||
Yet another affinity fix. Hopefully the last one.
|
Yet another affinity fix. Hopefully the last one.
|
||||||
|
@@ -2,8 +2,7 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
#include "interleave.h"
|
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
////
|
////
|
||||||
|
@@ -112,7 +112,7 @@ int allocate_memory(const argon2_context *context, uint8_t **memory,
|
|||||||
void free_memory(const argon2_context *context, uint8_t *memory,
|
void free_memory(const argon2_context *context, uint8_t *memory,
|
||||||
size_t num, size_t size) {
|
size_t num, size_t size) {
|
||||||
size_t memory_size = num*size;
|
size_t memory_size = num*size;
|
||||||
clear_internal_memory(memory, memory_size);
|
// clear_internal_memory(memory, memory_size);
|
||||||
if (context->free_cbk) {
|
if (context->free_cbk) {
|
||||||
(context->free_cbk)(memory, memory_size);
|
(context->free_cbk)(memory, memory_size);
|
||||||
} else {
|
} else {
|
||||||
@@ -137,7 +137,7 @@ void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) {
|
|||||||
int FLAG_clear_internal_memory = 0;
|
int FLAG_clear_internal_memory = 0;
|
||||||
void clear_internal_memory(void *v, size_t n) {
|
void clear_internal_memory(void *v, size_t n) {
|
||||||
if (FLAG_clear_internal_memory && v) {
|
if (FLAG_clear_internal_memory && v) {
|
||||||
secure_wipe_memory(v, n);
|
// secure_wipe_memory(v, n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -559,7 +559,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
|||||||
context->pwdlen);
|
context->pwdlen);
|
||||||
|
|
||||||
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
|
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
|
||||||
secure_wipe_memory(context->pwd, context->pwdlen);
|
// secure_wipe_memory(context->pwd, context->pwdlen);
|
||||||
context->pwdlen = 0;
|
context->pwdlen = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -580,7 +580,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
|||||||
context->secretlen);
|
context->secretlen);
|
||||||
|
|
||||||
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
|
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
|
||||||
secure_wipe_memory(context->secret, context->secretlen);
|
// secure_wipe_memory(context->secret, context->secretlen);
|
||||||
context->secretlen = 0;
|
context->secretlen = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -45,7 +45,7 @@ extern "C"{
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include "algo/sha/sph_types.h"
|
#include "algo/sha/sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#define SPH_SIZE_blake256 256
|
#define SPH_SIZE_blake256 256
|
||||||
|
|
||||||
|
@@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
#if defined(__SSE4_2__)
|
#if defined(__SSE4_2__)
|
||||||
|
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
@@ -43,7 +43,7 @@ extern "C"{
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
#include "algo/sha/sph_types.h"
|
#include "algo/sha/sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#define SPH_SIZE_bmw256 256
|
#define SPH_SIZE_bmw256 256
|
||||||
|
|
||||||
|
@@ -537,6 +537,8 @@ bmw32_4way(bmw_4way_small_context *sc, const void *data, size_t len)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
sc->ptr = ptr;
|
sc->ptr = ptr;
|
||||||
|
|
||||||
|
|
||||||
if ( h1 != sc->H )
|
if ( h1 != sc->H )
|
||||||
memcpy_128( sc->H, h1, 16 );
|
memcpy_128( sc->H, h1, 16 );
|
||||||
}
|
}
|
||||||
@@ -571,6 +573,7 @@ bmw32_4way_close(bmw_4way_small_context *sc, unsigned ub, unsigned n,
|
|||||||
|
|
||||||
for ( u = 0; u < 16; u ++ )
|
for ( u = 0; u < 16; u ++ )
|
||||||
buf[u] = h2[u];
|
buf[u] = h2[u];
|
||||||
|
|
||||||
compress_small( buf, (__m128i*)final_s, h1 );
|
compress_small( buf, (__m128i*)final_s, h1 );
|
||||||
|
|
||||||
for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
|
for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
|
||||||
@@ -1041,22 +1044,22 @@ static const __m256i final_s8[16] =
|
|||||||
|
|
||||||
void bmw256_8way_init( bmw256_8way_context *ctx )
|
void bmw256_8way_init( bmw256_8way_context *ctx )
|
||||||
{
|
{
|
||||||
ctx->H[ 0] = _mm256_set1_epi64x( IV256[ 0] );
|
ctx->H[ 0] = _mm256_set1_epi32( IV256[ 0] );
|
||||||
ctx->H[ 1] = _mm256_set1_epi64x( IV256[ 1] );
|
ctx->H[ 1] = _mm256_set1_epi32( IV256[ 1] );
|
||||||
ctx->H[ 2] = _mm256_set1_epi64x( IV256[ 2] );
|
ctx->H[ 2] = _mm256_set1_epi32( IV256[ 2] );
|
||||||
ctx->H[ 3] = _mm256_set1_epi64x( IV256[ 3] );
|
ctx->H[ 3] = _mm256_set1_epi32( IV256[ 3] );
|
||||||
ctx->H[ 4] = _mm256_set1_epi64x( IV256[ 4] );
|
ctx->H[ 4] = _mm256_set1_epi32( IV256[ 4] );
|
||||||
ctx->H[ 5] = _mm256_set1_epi64x( IV256[ 5] );
|
ctx->H[ 5] = _mm256_set1_epi32( IV256[ 5] );
|
||||||
ctx->H[ 6] = _mm256_set1_epi64x( IV256[ 6] );
|
ctx->H[ 6] = _mm256_set1_epi32( IV256[ 6] );
|
||||||
ctx->H[ 7] = _mm256_set1_epi64x( IV256[ 7] );
|
ctx->H[ 7] = _mm256_set1_epi32( IV256[ 7] );
|
||||||
ctx->H[ 8] = _mm256_set1_epi64x( IV256[ 8] );
|
ctx->H[ 8] = _mm256_set1_epi32( IV256[ 8] );
|
||||||
ctx->H[ 9] = _mm256_set1_epi64x( IV256[ 9] );
|
ctx->H[ 9] = _mm256_set1_epi32( IV256[ 9] );
|
||||||
ctx->H[10] = _mm256_set1_epi64x( IV256[10] );
|
ctx->H[10] = _mm256_set1_epi32( IV256[10] );
|
||||||
ctx->H[11] = _mm256_set1_epi64x( IV256[11] );
|
ctx->H[11] = _mm256_set1_epi32( IV256[11] );
|
||||||
ctx->H[12] = _mm256_set1_epi64x( IV256[12] );
|
ctx->H[12] = _mm256_set1_epi32( IV256[12] );
|
||||||
ctx->H[13] = _mm256_set1_epi64x( IV256[13] );
|
ctx->H[13] = _mm256_set1_epi32( IV256[13] );
|
||||||
ctx->H[14] = _mm256_set1_epi64x( IV256[14] );
|
ctx->H[14] = _mm256_set1_epi32( IV256[14] );
|
||||||
ctx->H[15] = _mm256_set1_epi64x( IV256[15] );
|
ctx->H[15] = _mm256_set1_epi32( IV256[15] );
|
||||||
ctx->ptr = 0;
|
ctx->ptr = 0;
|
||||||
ctx->bit_count = 0;
|
ctx->bit_count = 0;
|
||||||
|
|
||||||
@@ -1076,14 +1079,15 @@ void bmw256_8way( bmw256_8way_context *ctx, const void *data, size_t len )
|
|||||||
ptr = ctx->ptr;
|
ptr = ctx->ptr;
|
||||||
h1 = ctx->H;
|
h1 = ctx->H;
|
||||||
h2 = htmp;
|
h2 = htmp;
|
||||||
|
|
||||||
while ( len > 0 )
|
while ( len > 0 )
|
||||||
{
|
{
|
||||||
size_t clen;
|
size_t clen;
|
||||||
clen = buf_size - ptr;
|
clen = buf_size - ptr;
|
||||||
if ( clen > len )
|
if ( clen > len )
|
||||||
clen = len;
|
clen = len;
|
||||||
memcpy_256( buf + (ptr>>3), vdata, clen >> 3 );
|
memcpy_256( buf + (ptr>>2), vdata, clen >> 2 );
|
||||||
vdata = vdata + (clen>>3);
|
vdata = vdata + (clen>>2);
|
||||||
len -= clen;
|
len -= clen;
|
||||||
ptr += clen;
|
ptr += clen;
|
||||||
if ( ptr == buf_size )
|
if ( ptr == buf_size )
|
||||||
@@ -1097,6 +1101,7 @@ void bmw256_8way( bmw256_8way_context *ctx, const void *data, size_t len )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
ctx->ptr = ptr;
|
ctx->ptr = ptr;
|
||||||
|
|
||||||
if ( h1 != ctx->H )
|
if ( h1 != ctx->H )
|
||||||
memcpy_256( ctx->H, h1, 16 );
|
memcpy_256( ctx->H, h1, 16 );
|
||||||
}
|
}
|
||||||
@@ -1106,24 +1111,26 @@ void bmw256_8way_close( bmw256_8way_context *ctx, void *dst )
|
|||||||
__m256i *buf;
|
__m256i *buf;
|
||||||
__m256i h1[16], h2[16], *h;
|
__m256i h1[16], h2[16], *h;
|
||||||
size_t ptr, u, v;
|
size_t ptr, u, v;
|
||||||
// unsigned z;
|
|
||||||
const int buf_size = 64; // bytes of one lane, compatible with len
|
const int buf_size = 64; // bytes of one lane, compatible with len
|
||||||
|
|
||||||
buf = ctx->buf;
|
buf = ctx->buf;
|
||||||
ptr = ctx->ptr;
|
ptr = ctx->ptr;
|
||||||
buf[ ptr>>3 ] = _mm256_set1_epi32( 0x80 );
|
buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
|
||||||
ptr += 8;
|
ptr += 4;
|
||||||
h = ctx->H;
|
h = ctx->H;
|
||||||
|
|
||||||
if ( ptr > (buf_size - 8) )
|
if ( ptr > (buf_size - 4) )
|
||||||
{
|
{
|
||||||
memset_zero_256( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
memset_zero_256( buf + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||||
compress_small_8way( buf, h, h1 );
|
compress_small_8way( buf, h, h1 );
|
||||||
ptr = 0;
|
ptr = 0;
|
||||||
h = h1;
|
h = h1;
|
||||||
}
|
}
|
||||||
memset_zero_256( buf + (ptr>>3), (buf_size - 8 - ptr) >> 3 );
|
memset_zero_256( buf + (ptr>>2), (buf_size - 8 - ptr) >> 2 );
|
||||||
buf[ (buf_size - 8) >> 3 ] = _mm256_set1_epi64x( ctx->bit_count );
|
buf[ (buf_size - 8) >> 2 ] = _mm256_set1_epi32( ctx->bit_count );
|
||||||
|
buf[ (buf_size - 4) >> 2 ] = m256_zero;
|
||||||
|
|
||||||
|
|
||||||
compress_small_8way( buf, h, h2 );
|
compress_small_8way( buf, h, h2 );
|
||||||
|
|
||||||
for ( u = 0; u < 16; u ++ )
|
for ( u = 0; u < 16; u ++ )
|
||||||
|
@@ -4,7 +4,7 @@
|
|||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
// 2x128, 2 way parallel SSE2
|
// 2x128, 2 way parallel SSE2
|
||||||
|
|
||||||
|
@@ -13,7 +13,7 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
// The result of hashing 10 rounds of initial data which is params and
|
// The result of hashing 10 rounds of initial data which is params and
|
||||||
|
@@ -12,7 +12,7 @@
|
|||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
#include "hash-groestl.h"
|
#include "hash-groestl.h"
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#ifndef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
|
|
||||||
|
@@ -9,7 +9,7 @@
|
|||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
#include "hash-groestl256.h"
|
#include "hash-groestl256.h"
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#ifndef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
|
|
||||||
|
@@ -40,7 +40,7 @@
|
|||||||
|
|
||||||
#if defined (__AVX2__)
|
#if defined (__AVX2__)
|
||||||
|
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C"{
|
extern "C"{
|
||||||
|
@@ -69,7 +69,7 @@ extern "C"{
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include "algo/sha/sph_types.h"
|
#include "algo/sha/sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#define SPH_SIZE_haval256_5 256
|
#define SPH_SIZE_haval256_5 256
|
||||||
|
|
||||||
|
@@ -156,7 +156,7 @@ int hodl_scanhash( int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
|
|
||||||
bool register_hodl_algo( algo_gate_t* gate )
|
bool register_hodl_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined(__AES__)
|
#if !defined(__AES__)
|
||||||
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
|
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
|
@@ -44,7 +44,7 @@ extern "C"{
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include "algo/sha/sph_types.h"
|
#include "algo/sha/sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#define SPH_SIZE_jh256 256
|
#define SPH_SIZE_jh256 256
|
||||||
|
|
||||||
|
@@ -44,7 +44,7 @@ extern "C"{
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include "algo/sha/sph_types.h"
|
#include "algo/sha/sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#define SPH_SIZE_keccak256 256
|
#define SPH_SIZE_keccak256 256
|
||||||
|
|
||||||
|
@@ -24,7 +24,7 @@
|
|||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#define MASK _mm256_set_epi32( 0UL, 0UL, 0UL, 0xffffffffUL, \
|
#define MASK _mm256_set_epi32( 0UL, 0UL, 0UL, 0xffffffffUL, \
|
||||||
0UL, 0UL, 0UL, 0xffffffffUL )
|
0UL, 0UL, 0UL, 0xffffffffUL )
|
||||||
|
@@ -24,7 +24,7 @@
|
|||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include "algo/sha/sha3-defs.h"
|
#include "algo/sha/sha3-defs.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
/* The length of digests*/
|
/* The length of digests*/
|
||||||
#define DIGEST_BIT_LEN_224 224
|
#define DIGEST_BIT_LEN_224 224
|
||||||
|
@@ -20,7 +20,7 @@
|
|||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
#include "luffa_for_sse2.h"
|
#include "luffa_for_sse2.h"
|
||||||
|
|
||||||
#define MULT2(a0,a1) do \
|
#define MULT2(a0,a1) do \
|
||||||
|
@@ -44,10 +44,11 @@ void allium_4way_hash( void *state, const void *input )
|
|||||||
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
||||||
blake256_4way_close( &ctx.blake, vhash32 );
|
blake256_4way_close( &ctx.blake, vhash32 );
|
||||||
|
|
||||||
mm256_reinterleave_4x64( vhash64, vhash32, 256 );
|
mm256_rintrlv_4x32_4x64( vhash64, vhash32, 256 );
|
||||||
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
||||||
keccak256_4way_close( &ctx.keccak, vhash64 );
|
keccak256_4way_close( &ctx.keccak, vhash64 );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||||
|
|
||||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||||
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
||||||
@@ -67,26 +68,23 @@ void allium_4way_hash( void *state, const void *input )
|
|||||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
mm256_intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||||
|
|
||||||
skein256_4way( &ctx.skein, vhash64, 32 );
|
skein256_4way( &ctx.skein, vhash64, 32 );
|
||||||
skein256_4way_close( &ctx.skein, vhash64 );
|
skein256_4way_close( &ctx.skein, vhash64 );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
|
||||||
|
|
||||||
update_and_final_groestl256( &ctx.groestl, hash0, hash0, 256 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
|
||||||
sizeof(hashState_groestl256) );
|
|
||||||
update_and_final_groestl256( &ctx.groestl, hash1, hash1, 256 );
|
|
||||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
|
||||||
sizeof(hashState_groestl256) );
|
|
||||||
update_and_final_groestl256( &ctx.groestl, hash2, hash2, 256 );
|
|
||||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
|
||||||
sizeof(hashState_groestl256) );
|
|
||||||
update_and_final_groestl256( &ctx.groestl, hash3, hash3, 256 );
|
|
||||||
|
|
||||||
memcpy( state, hash0, 32 );
|
update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
|
||||||
memcpy( state+32, hash1, 32 );
|
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||||
memcpy( state+64, hash2, 32 );
|
sizeof(hashState_groestl256) );
|
||||||
memcpy( state+96, hash3, 32 );
|
update_and_final_groestl256( &ctx.groestl, state+32, hash1, 256 );
|
||||||
|
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||||
|
sizeof(hashState_groestl256) );
|
||||||
|
update_and_final_groestl256( &ctx.groestl, state+64, hash2, 256 );
|
||||||
|
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||||
|
sizeof(hashState_groestl256) );
|
||||||
|
update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 );
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
@@ -94,7 +92,6 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t _ALIGN(64) edata[20];
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
@@ -106,13 +103,7 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||||
|
|
||||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
|
||||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
|
||||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
|
||||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
|
||||||
|
|
||||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
|
||||||
blake256_4way_init( &allium_4way_ctx.blake );
|
blake256_4way_init( &allium_4way_ctx.blake );
|
||||||
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );
|
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );
|
||||||
|
|
||||||
@@ -124,7 +115,7 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
for ( int lane = 0; lane < 4; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
|
for ( int lane = 0; lane < 4; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
|
||||||
{
|
{
|
||||||
if ( fulltest( hash+(lane<<3), ptarget ) )
|
if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_solution( work, hash+(lane<<3), mythr, lane );
|
submit_solution( work, hash+(lane<<3), mythr, lane );
|
||||||
|
@@ -47,7 +47,9 @@ bool lyra2rev3_thread_init()
|
|||||||
|
|
||||||
int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||||
l2v3_wholeMatrix = _mm_malloc( size, 64 );
|
l2v3_wholeMatrix = _mm_malloc( size, 64 );
|
||||||
#if defined (LYRA2REV3_4WAY)
|
#if defined (LYRA2REV3_8WAY)
|
||||||
|
init_lyra2rev3_8way_ctx();;
|
||||||
|
#elif defined (LYRA2REV3_4WAY)
|
||||||
init_lyra2rev3_4way_ctx();;
|
init_lyra2rev3_4way_ctx();;
|
||||||
#else
|
#else
|
||||||
init_lyra2rev3_ctx();
|
init_lyra2rev3_ctx();
|
||||||
@@ -57,7 +59,10 @@ bool lyra2rev3_thread_init()
|
|||||||
|
|
||||||
bool register_lyra2rev3_algo( algo_gate_t* gate )
|
bool register_lyra2rev3_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined (LYRA2REV3_4WAY)
|
#if defined (LYRA2REV3_8WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_lyra2rev3_8way;
|
||||||
|
gate->hash = (void*)&lyra2rev3_8way_hash;
|
||||||
|
#elif defined (LYRA2REV3_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_lyra2rev3_4way;
|
gate->scanhash = (void*)&scanhash_lyra2rev3_4way;
|
||||||
gate->hash = (void*)&lyra2rev3_4way_hash;
|
gate->hash = (void*)&lyra2rev3_4way_hash;
|
||||||
#else
|
#else
|
||||||
@@ -203,13 +208,18 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
|||||||
|
|
||||||
bool register_phi2_algo( algo_gate_t* gate )
|
bool register_phi2_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
init_phi2_ctx();
|
// init_phi2_ctx();
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||||
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
|
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
|
||||||
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
|
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
|
||||||
gate->build_extraheader = (void*)&phi2_build_extraheader;
|
gate->build_extraheader = (void*)&phi2_build_extraheader;
|
||||||
gate->set_target = (void*)&alt_set_target;
|
gate->set_target = (void*)&alt_set_target;
|
||||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||||
|
#if defined(PHI2_4WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_phi2_4way;
|
||||||
|
#else
|
||||||
|
init_phi2_ctx();
|
||||||
gate->scanhash = (void*)&scanhash_phi2;
|
gate->scanhash = (void*)&scanhash_phi2;
|
||||||
|
#endif
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@@ -6,14 +6,24 @@
|
|||||||
#include "lyra2.h"
|
#include "lyra2.h"
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
#define LYRA2REV3_8WAY
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE2__)
|
||||||
#define LYRA2REV3_4WAY
|
#define LYRA2REV3_4WAY
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern __thread uint64_t* l2v3_wholeMatrix;
|
extern __thread uint64_t* l2v3_wholeMatrix;
|
||||||
|
|
||||||
bool register_lyra2rev3_algo( algo_gate_t* gate );
|
bool register_lyra2rev3_algo( algo_gate_t* gate );
|
||||||
|
#if defined(LYRA2REV3_8WAY)
|
||||||
|
|
||||||
#if defined(LYRA2REV3_4WAY)
|
void lyra2rev3_8way_hash( void *state, const void *input );
|
||||||
|
int scanhash_lyra2rev3_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
bool init_lyra2rev3_8way_ctx();
|
||||||
|
|
||||||
|
#elif defined(LYRA2REV3_4WAY)
|
||||||
|
|
||||||
void lyra2rev3_4way_hash( void *state, const void *input );
|
void lyra2rev3_4way_hash( void *state, const void *input );
|
||||||
int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
@@ -140,15 +150,29 @@ bool init_allium_ctx();
|
|||||||
|
|
||||||
/////////////////////////////////////////
|
/////////////////////////////////////////
|
||||||
|
|
||||||
|
#if defined(__AVX2__) && defined(__AES__)
|
||||||
|
// #define PHI2_4WAY
|
||||||
|
#endif
|
||||||
|
|
||||||
bool phi2_has_roots;
|
bool phi2_has_roots;
|
||||||
|
|
||||||
bool register_phi2_algo( algo_gate_t* gate );
|
bool register_phi2_algo( algo_gate_t* gate );
|
||||||
|
#if defined(PHI2_4WAY)
|
||||||
|
|
||||||
|
void phi2_hash_4way( void *state, const void *input );
|
||||||
|
int scanhash_phi2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
//void init_phi2_ctx();
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
void phi2_hash( void *state, const void *input );
|
void phi2_hash( void *state, const void *input );
|
||||||
int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
void init_phi2_ctx();
|
void init_phi2_ctx();
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // LYRA2_GATE_H__
|
#endif // LYRA2_GATE_H__
|
||||||
|
|
||||||
|
|
||||||
|
@@ -566,7 +566,7 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd, const uint64_t pwdlen,
|
|||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
memset_zero_256( (__m256i*)wholeMatrix, i>>5 );
|
memset_zero_256( (__m256i*)wholeMatrix, i>>5 );
|
||||||
#elif defined(__SSE4_2__)
|
#elif defined(__SSE2__)
|
||||||
memset_zero_128( (__m128i*)wholeMatrix, i>>4 );
|
memset_zero_128( (__m128i*)wholeMatrix, i>>4 );
|
||||||
#else
|
#else
|
||||||
memset( wholeMatrix, 0, i );
|
memset( wholeMatrix, 0, i );
|
||||||
|
@@ -36,17 +36,16 @@ void lyra2h_4way_hash( void *state, const void *input )
|
|||||||
blake256_4way( &ctx_blake, input + (64*4), 16 );
|
blake256_4way( &ctx_blake, input + (64*4), 16 );
|
||||||
blake256_4way_close( &ctx_blake, vhash );
|
blake256_4way_close( &ctx_blake, vhash );
|
||||||
|
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||||
|
|
||||||
LYRA2Z( lyra2h_4way_matrix, hash0, 32, hash0, 32, hash0, 32, 16, 16, 16 );
|
LYRA2Z( lyra2h_4way_matrix, state, 32, hash0, 32, hash0, 32,
|
||||||
LYRA2Z( lyra2h_4way_matrix, hash1, 32, hash1, 32, hash1, 32, 16, 16, 16 );
|
16, 16, 16 );
|
||||||
LYRA2Z( lyra2h_4way_matrix, hash2, 32, hash2, 32, hash2, 32, 16, 16, 16 );
|
LYRA2Z( lyra2h_4way_matrix, state+32, 32, hash1, 32, hash1,
|
||||||
LYRA2Z( lyra2h_4way_matrix, hash3, 32, hash3, 32, hash3, 32, 16, 16, 16 );
|
32, 16, 16, 16 );
|
||||||
|
LYRA2Z( lyra2h_4way_matrix, state+64, 32, hash2, 32, hash2,
|
||||||
memcpy( state, hash0, 32 );
|
32, 16, 16, 16 );
|
||||||
memcpy( state+32, hash1, 32 );
|
LYRA2Z( lyra2h_4way_matrix, state+96, 32, hash3, 32, hash3,
|
||||||
memcpy( state+64, hash2, 32 );
|
32, 16, 16, 16 );
|
||||||
memcpy( state+96, hash3, 32 );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
@@ -54,49 +53,36 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t _ALIGN(64) edata[20];
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
uint32_t *nonces = work->nonces;
|
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||||
int num_found = 0;
|
|
||||||
uint32_t *noncep= vdata + 76; // 19*4
|
|
||||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
ptarget[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
|
|
||||||
for ( int i=0; i < 20; i++ )
|
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||||
be32enc( &edata[i], pdata[i] );
|
|
||||||
|
|
||||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
|
||||||
|
|
||||||
lyra2h_4way_midstate( vdata );
|
lyra2h_4way_midstate( vdata );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
be32enc( noncep, n );
|
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||||
be32enc( noncep+1, n+1 );
|
|
||||||
be32enc( noncep+2, n+2 );
|
|
||||||
be32enc( noncep+3, n+3 );
|
|
||||||
|
|
||||||
be32enc( &edata[19], n );
|
|
||||||
lyra2h_4way_hash( hash, vdata );
|
lyra2h_4way_hash( hash, vdata );
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ )
|
||||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||||
|
&& !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
nonces[ num_found++ ] = n+i;
|
submit_solution( work, hash+(i<<3), mythr, i );
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||||
&& !work_restart[thr_id].restart);
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -6,7 +6,7 @@
|
|||||||
#include "algo/keccak/sph_keccak.h"
|
#include "algo/keccak/sph_keccak.h"
|
||||||
#include "lyra2.h"
|
#include "lyra2.h"
|
||||||
#include "algo-gate-api.h"
|
#include "algo-gate-api.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
#if defined(__AES__)
|
#if defined(__AES__)
|
||||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||||
#endif
|
#endif
|
||||||
|
@@ -42,10 +42,12 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
|||||||
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
||||||
blake256_4way_close( &ctx.blake, vhash );
|
blake256_4way_close( &ctx.blake, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64( vhash64, vhash, 256 );
|
mm256_rintrlv_4x32_4x64( vhash64, vhash, 256 );
|
||||||
|
|
||||||
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
||||||
keccak256_4way_close( &ctx.keccak, vhash64 );
|
keccak256_4way_close( &ctx.keccak, vhash64 );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||||
|
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
||||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||||
@@ -60,10 +62,12 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
|||||||
LYRA2REV2( l2v2_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
LYRA2REV2( l2v2_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
||||||
LYRA2REV2( l2v2_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
LYRA2REV2( l2v2_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
mm256_intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||||
|
|
||||||
skein256_4way( &ctx.skein, vhash64, 32 );
|
skein256_4way( &ctx.skein, vhash64, 32 );
|
||||||
skein256_4way_close( &ctx.skein, vhash64 );
|
skein256_4way_close( &ctx.skein, vhash64 );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||||
|
|
||||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
||||||
@@ -74,11 +78,10 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
|||||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
|
||||||
|
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||||
bmw256_4way( &ctx.bmw, vhash, 32 );
|
|
||||||
bmw256_4way_close( &ctx.bmw, vhash );
|
|
||||||
|
|
||||||
mm128_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
bmw256_4way( &ctx.bmw, vhash, 32 );
|
||||||
|
bmw256_4way_close( &ctx.bmw, state );
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
@@ -86,49 +89,44 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t _ALIGN(64) edata[20];
|
uint32_t *hash7 = &(hash[7<<2]);
|
||||||
|
uint32_t lane_hash[8];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
uint32_t *nonces = work->nonces;
|
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||||
int num_found = 0;
|
|
||||||
uint32_t *noncep = vdata + 76; // 19*4
|
|
||||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||||
|
|
||||||
swab32_array( edata, pdata, 20 );
|
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||||
|
|
||||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
|
||||||
|
|
||||||
blake256_4way_init( &l2v2_4way_ctx.blake );
|
blake256_4way_init( &l2v2_4way_ctx.blake );
|
||||||
blake256_4way( &l2v2_4way_ctx.blake, vdata, 64 );
|
blake256_4way( &l2v2_4way_ctx.blake, vdata, 64 );
|
||||||
|
|
||||||
do {
|
do
|
||||||
be32enc( noncep, n );
|
{
|
||||||
be32enc( noncep+1, n+1 );
|
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||||
be32enc( noncep+2, n+2 );
|
|
||||||
be32enc( noncep+3, n+3 );
|
|
||||||
|
|
||||||
lyra2rev2_4way_hash( hash, vdata );
|
lyra2rev2_4way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||||
nonces[ num_found++ ] = n+i;
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||||
&& !work_restart[thr_id].restart);
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -1,12 +1,138 @@
|
|||||||
#include "lyra2-gate.h"
|
#include "lyra2-gate.h"
|
||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
|
|
||||||
#if defined (LYRA2REV3_4WAY)
|
|
||||||
|
|
||||||
#include "algo/blake/blake-hash-4way.h"
|
#include "algo/blake/blake-hash-4way.h"
|
||||||
#include "algo/bmw/bmw-hash-4way.h"
|
#include "algo/bmw/bmw-hash-4way.h"
|
||||||
#include "algo/cubehash/cubehash_sse2.h"
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
|
|
||||||
|
|
||||||
|
#if defined (LYRA2REV3_8WAY)
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
blake256_8way_context blake;
|
||||||
|
cubehashParam cube;
|
||||||
|
bmw256_8way_context bmw;
|
||||||
|
} lyra2v3_8way_ctx_holder;
|
||||||
|
|
||||||
|
static lyra2v3_8way_ctx_holder l2v3_8way_ctx;
|
||||||
|
|
||||||
|
bool init_lyra2rev3_8way_ctx()
|
||||||
|
{
|
||||||
|
blake256_8way_init( &l2v3_8way_ctx.blake );
|
||||||
|
cubehashInit( &l2v3_8way_ctx.cube, 256, 16, 32 );
|
||||||
|
bmw256_8way_init( &l2v3_8way_ctx.bmw );
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lyra2rev3_8way_hash( void *state, const void *input )
|
||||||
|
{
|
||||||
|
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t hash3[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t hash4[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t hash5[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t hash6[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t hash7[8] __attribute__ ((aligned (32)));
|
||||||
|
lyra2v3_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||||
|
memcpy( &ctx, &l2v3_8way_ctx, sizeof(l2v3_8way_ctx) );
|
||||||
|
|
||||||
|
blake256_8way( &ctx.blake, input, 80 );
|
||||||
|
blake256_8way_close( &ctx.blake, vhash );
|
||||||
|
|
||||||
|
mm256_dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||||
|
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash4, 32, hash4, 32, hash4, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash5, 32, hash5, 32, hash5, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
|
||||||
|
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
||||||
|
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
|
||||||
|
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
|
||||||
|
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
|
||||||
|
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash4, (const byte*) hash4, 32 );
|
||||||
|
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash5, (const byte*) hash5, 32 );
|
||||||
|
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash6, (const byte*) hash6, 32 );
|
||||||
|
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash7, (const byte*) hash7, 32 );
|
||||||
|
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash4, 32, hash4, 32, hash4, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash5, 32, hash5, 32, hash5, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV3( l2v3_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
|
||||||
|
|
||||||
|
mm256_intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7, 256 );
|
||||||
|
|
||||||
|
bmw256_8way( &ctx.bmw, vhash, 32 );
|
||||||
|
bmw256_8way_close( &ctx.bmw, state );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_lyra2rev3_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t *hash7 = &(hash[7<<3]);
|
||||||
|
uint32_t lane_hash[8];
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
|
if ( opt_benchmark )
|
||||||
|
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||||
|
|
||||||
|
mm256_bswap_intrlv80_8x32( vdata, pdata );
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||||
|
n+3, n+2, n+1, n ) );
|
||||||
|
|
||||||
|
lyra2rev3_8way_hash( hash, vdata );
|
||||||
|
pdata[19] = n;
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
|
||||||
|
{
|
||||||
|
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
||||||
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n += 8;
|
||||||
|
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined (LYRA2REV3_4WAY)
|
||||||
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
blake256_4way_context blake;
|
blake256_4way_context blake;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
@@ -35,7 +161,7 @@ void lyra2rev3_4way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
blake256_4way( &ctx.blake, input, 80 );
|
blake256_4way( &ctx.blake, input, 80 );
|
||||||
blake256_4way_close( &ctx.blake, vhash );
|
blake256_4way_close( &ctx.blake, vhash );
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||||
|
|
||||||
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||||
LYRA2REV3( l2v3_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
|
LYRA2REV3( l2v3_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
|
||||||
@@ -55,10 +181,9 @@ void lyra2rev3_4way_hash( void *state, const void *input )
|
|||||||
LYRA2REV3( l2v3_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
LYRA2REV3( l2v3_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
||||||
LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||||
|
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||||
bmw256_4way( &ctx.bmw, vhash, 32 );
|
bmw256_4way( &ctx.bmw, vhash, 32 );
|
||||||
bmw256_4way_close( &ctx.bmw, state );
|
bmw256_4way_close( &ctx.bmw, state );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
@@ -66,7 +191,6 @@ int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t *hash7 = &(hash[7<<2]);
|
uint32_t *hash7 = &(hash[7<<2]);
|
||||||
uint32_t lane_hash[8];
|
uint32_t lane_hash[8];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
@@ -80,15 +204,7 @@ int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||||
|
|
||||||
// Need big endian data
|
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
|
||||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
|
||||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
|
||||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
|
||||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
|
||||||
|
|
||||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||||
@@ -99,16 +215,14 @@ int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||||
{
|
{
|
||||||
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||||
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -36,17 +36,12 @@ void lyra2z_4way_hash( void *state, const void *input )
|
|||||||
blake256_4way( &ctx_blake, input + (64*4), 16 );
|
blake256_4way( &ctx_blake, input + (64*4), 16 );
|
||||||
blake256_4way_close( &ctx_blake, vhash );
|
blake256_4way_close( &ctx_blake, vhash );
|
||||||
|
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||||
|
|
||||||
LYRA2Z( lyra2z_4way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
LYRA2Z( lyra2z_4way_matrix, state , 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||||
LYRA2Z( lyra2z_4way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
LYRA2Z( lyra2z_4way_matrix, state+32, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||||
LYRA2Z( lyra2z_4way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
LYRA2Z( lyra2z_4way_matrix, state+64, 32, hash2, 32, hash2, 32, 8, 8, 8 );
|
||||||
LYRA2Z( lyra2z_4way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
LYRA2Z( lyra2z_4way_matrix, state+96, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||||
|
|
||||||
memcpy( state, hash0, 32 );
|
|
||||||
memcpy( state+32, hash1, 32 );
|
|
||||||
memcpy( state+64, hash2, 32 );
|
|
||||||
memcpy( state+96, hash3, 32 );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
@@ -54,7 +49,6 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t _ALIGN(64) edata[20];
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
@@ -66,13 +60,7 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
ptarget[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
|
|
||||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
|
||||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
|
||||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
|
||||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
|
||||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
|
||||||
|
|
||||||
lyra2z_4way_midstate( vdata );
|
lyra2z_4way_midstate( vdata );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
@@ -82,16 +70,11 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ )
|
||||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||||
|
&& !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
submit_solution( work, hash+(i<<3), mythr, i );
|
||||||
if ( submit_work( mythr, work ) )
|
|
||||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
|
||||||
accepted_share_count + rejected_share_count + 1,
|
|
||||||
thr_id, i );
|
|
||||||
else
|
|
||||||
applog( LOG_WARNING, "Failed to submit share." );
|
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||||
@@ -136,8 +119,8 @@ void lyra2z_8way_hash( void *state, const void *input )
|
|||||||
blake256_8way( &ctx_blake, input + (64*8), 16 );
|
blake256_8way( &ctx_blake, input + (64*8), 16 );
|
||||||
blake256_8way_close( &ctx_blake, vhash );
|
blake256_8way_close( &ctx_blake, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_8x32( hash0, hash1, hash2, hash3,
|
mm256_dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||||
|
|
||||||
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||||
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||||
@@ -163,7 +146,6 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t _ALIGN(64) edata[20];
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
@@ -175,13 +157,7 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
ptarget[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
|
|
||||||
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
mm256_bswap_intrlv80_8x32( vdata, pdata );
|
||||||
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
|
||||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
|
||||||
|
|
||||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
|
||||||
edata, edata, edata, edata, 640 );
|
|
||||||
|
|
||||||
lyra2z_8way_midstate( vdata );
|
lyra2z_8way_midstate( vdata );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
@@ -191,7 +167,8 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
for ( int i = 0; i < 8; i++ )
|
for ( int i = 0; i < 8; i++ )
|
||||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||||
|
&& !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr, i );
|
||||||
|
@@ -3,7 +3,7 @@
|
|||||||
#include "lyra2-gate.h"
|
#include "lyra2-gate.h"
|
||||||
#include "lyra2.h"
|
#include "lyra2.h"
|
||||||
#include "algo/blake/sph_blake.h"
|
#include "algo/blake/sph_blake.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
__thread uint64_t* lyra2z_matrix;
|
__thread uint64_t* lyra2z_matrix;
|
||||||
|
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
#include "algo-gate-api.h"
|
#include "algo-gate-api.h"
|
||||||
#include "lyra2.h"
|
#include "lyra2.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
__thread uint64_t* lyra2z330_wholeMatrix;
|
__thread uint64_t* lyra2z330_wholeMatrix;
|
||||||
|
|
||||||
@@ -30,14 +30,17 @@ int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
ptarget[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
|
|
||||||
for (int i=0; i < 19; i++)
|
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||||
be32enc(&endiandata[i], pdata[i]);
|
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||||
|
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||||
|
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||||
|
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
be32enc(&endiandata[19], nonce);
|
be32enc(&endiandata[19], nonce);
|
||||||
lyra2z330_hash( hash, endiandata, work->height );
|
lyra2z330_hash( hash, endiandata, work->height );
|
||||||
if ( hash[7] <= Htarg && fulltest(hash, ptarget) )
|
if ( hash[7] <= Htarg && fulltest(hash, ptarget) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
work_set_target_ratio(work, hash);
|
work_set_target_ratio(work, hash);
|
||||||
pdata[19] = nonce;
|
pdata[19] = nonce;
|
||||||
|
233
algo/lyra2/phi2-4way.c
Normal file
233
algo/lyra2/phi2-4way.c
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
/**
|
||||||
|
* Phi-2 algo Implementation
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "lyra2-gate.h"
|
||||||
|
|
||||||
|
#if defined(PHI2_4WAY)
|
||||||
|
|
||||||
|
#include "algo/skein/skein-hash-4way.h"
|
||||||
|
#include "algo/jh/jh-hash-4way.h"
|
||||||
|
#include "algo/gost/sph_gost.h"
|
||||||
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
cubehashParam cube;
|
||||||
|
jh512_4way_context jh;
|
||||||
|
hashState_echo echo;
|
||||||
|
// hashState_echo echo2;
|
||||||
|
sph_gost512_context gost;
|
||||||
|
skein512_4way_context skein;
|
||||||
|
} phi2_ctx_holder;
|
||||||
|
/*
|
||||||
|
phi2_ctx_holder phi2_ctx;
|
||||||
|
|
||||||
|
void init_phi2_ctx()
|
||||||
|
{
|
||||||
|
cubehashInit( &phi2_ctx.cube, 512, 16, 32 );
|
||||||
|
sph_jh512_init(&phi2_ctx.jh);
|
||||||
|
init_echo( &phi2_ctx.echo1, 512 );
|
||||||
|
init_echo( &phi2_ctx.echo2, 512 );
|
||||||
|
sph_gost512_init(&phi2_ctx.gost);
|
||||||
|
sph_skein512_init(&phi2_ctx.skein);
|
||||||
|
};
|
||||||
|
*/
|
||||||
|
void phi2_hash_4way( void *state, const void *input )
|
||||||
|
{
|
||||||
|
uint32_t hash[4][16] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hashA[4][16] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hashB[4][16] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vhash[4*16] __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
// unsigned char _ALIGN(128) hash[64];
|
||||||
|
// unsigned char _ALIGN(128) hashA[64];
|
||||||
|
// unsigned char _ALIGN(128) hashB[64];
|
||||||
|
|
||||||
|
phi2_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||||
|
// memcpy( &ctx, &phi2_ctx, sizeof(phi2_ctx) );
|
||||||
|
|
||||||
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[0], (const byte*)input,
|
||||||
|
phi2_has_roots ? 144 : 80 );
|
||||||
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[1], (const byte*)input+144,
|
||||||
|
phi2_has_roots ? 144 : 80 );
|
||||||
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[2], (const byte*)input+288,
|
||||||
|
phi2_has_roots ? 144 : 80 );
|
||||||
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[3], (const byte*)input+432,
|
||||||
|
phi2_has_roots ? 144 : 80 );
|
||||||
|
|
||||||
|
LYRA2RE( &hashA[0][0], 32, &hashB[0][0], 32, &hashB[0][0], 32, 1, 8, 8 );
|
||||||
|
LYRA2RE( &hashA[0][8], 32, &hashB[0][8], 32, &hashB[0][8], 32, 1, 8, 8 );
|
||||||
|
LYRA2RE( &hashA[1][0], 32, &hashB[1][0], 32, &hashB[1][0], 32, 1, 8, 8 );
|
||||||
|
LYRA2RE( &hashA[1][8], 32, &hashB[1][8], 32, &hashB[1][8], 32, 1, 8, 8 );
|
||||||
|
LYRA2RE( &hashA[2][0], 32, &hashB[2][0], 32, &hashB[2][0], 32, 1, 8, 8 );
|
||||||
|
LYRA2RE( &hashA[2][8], 32, &hashB[2][8], 32, &hashB[2][8], 32, 1, 8, 8 );
|
||||||
|
LYRA2RE( &hashA[3][0], 32, &hashB[3][0], 32, &hashB[3][0], 32, 1, 8, 8 );
|
||||||
|
LYRA2RE( &hashA[3][8], 32, &hashB[3][8], 32, &hashB[3][8], 32, 1, 8, 8 );
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhash, hashA[0], hashA[1], hashA[2], hashA[3], 512 );
|
||||||
|
|
||||||
|
jh512_4way_init( &ctx.jh );
|
||||||
|
jh512_4way( &ctx.jh, vhash, 64 );
|
||||||
|
jh512_4way_close( &ctx.jh, vhash );
|
||||||
|
|
||||||
|
mm256_dintrlv_4x64( hash[0], hash[1], hash[2], hash[3], vhash, 512 );
|
||||||
|
|
||||||
|
if ( hash[0][0] & 1 )
|
||||||
|
{
|
||||||
|
sph_gost512_init( &ctx.gost );
|
||||||
|
sph_gost512( &ctx.gost, (const void*)hash[0], 64 );
|
||||||
|
sph_gost512_close( &ctx.gost, (void*)hash[0] );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
|
||||||
|
(const BitSequence *)hash[0], 512 );
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
|
||||||
|
(const BitSequence *)hash[0], 512 );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hash[1][0] & 1 )
|
||||||
|
{
|
||||||
|
sph_gost512_init( &ctx.gost );
|
||||||
|
sph_gost512( &ctx.gost, (const void*)hash[1], 64 );
|
||||||
|
sph_gost512_close( &ctx.gost, (void*)hash[1] );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
|
||||||
|
(const BitSequence *)hash[1], 512 );
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
|
||||||
|
(const BitSequence *)hash[1], 512 );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hash[2][0] & 1 )
|
||||||
|
{
|
||||||
|
sph_gost512_init( &ctx.gost );
|
||||||
|
sph_gost512( &ctx.gost, (const void*)hash[2], 64 );
|
||||||
|
sph_gost512_close( &ctx.gost, (void*)hash[2] );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
|
||||||
|
(const BitSequence *)hash[2], 512 );
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
|
||||||
|
(const BitSequence *)hash[2], 512 );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hash[3][0] & 1 )
|
||||||
|
{
|
||||||
|
sph_gost512_init( &ctx.gost );
|
||||||
|
sph_gost512( &ctx.gost, (const void*)hash[3], 64 );
|
||||||
|
sph_gost512_close( &ctx.gost, (void*)hash[3] );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
|
||||||
|
(const BitSequence *)hash[3], 512 );
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
|
||||||
|
(const BitSequence *)hash[3], 512 );
|
||||||
|
}
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhash, hash[0], hash[1], hash[2], hash[3], 512 );
|
||||||
|
|
||||||
|
skein512_4way_init( &ctx.skein );
|
||||||
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
|
skein512_4way_close( &ctx.skein, vhash );
|
||||||
|
|
||||||
|
for (int i=0; i<4; i++)
|
||||||
|
{
|
||||||
|
( (uint64_t*)vhash )[i] ^= ( (uint64_t*)vhash )[i+4];
|
||||||
|
( (uint64_t*)vhash+ 8 )[i] ^= ( (uint64_t*)vhash+ 8 )[i+4];
|
||||||
|
( (uint64_t*)vhash+16 )[i] ^= ( (uint64_t*)vhash+16 )[i+4];
|
||||||
|
( (uint64_t*)vhash+24 )[i] ^= ( (uint64_t*)vhash+24 )[i+4];
|
||||||
|
}
|
||||||
|
// for ( int i = 0; i < 4; i++ )
|
||||||
|
// casti_m256i( vhash, i ) = _mm256_xor_si256( casti_m256i( vhash, i ),
|
||||||
|
// casti_m256i( vhash, i+4 ) );
|
||||||
|
|
||||||
|
memcpy( state, vhash, 128 );
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_phi2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t _ALIGN(128) hash[8];
|
||||||
|
uint32_t _ALIGN(128) edata[36];
|
||||||
|
uint32_t vdata[4][36] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t *hash7 = &(hash[25]);
|
||||||
|
uint32_t lane_hash[8];
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
|
if(opt_benchmark){
|
||||||
|
ptarget[7] = 0x00ff;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Data is not interleaved, but hash is.
|
||||||
|
// any non-zero data at index 20 or above sets roots true.
|
||||||
|
// Split up the operations, bswap first, then set roots.
|
||||||
|
|
||||||
|
phi2_has_roots = false;
|
||||||
|
for ( int i=0; i < 36; i++ )
|
||||||
|
{
|
||||||
|
be32enc(&edata[i], pdata[i]);
|
||||||
|
if (i >= 20 && pdata[i]) phi2_has_roots = true;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
casti_m256i( vdata[0], 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||||
|
casti_m256i( vdata[0], 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
|
casti_m256i( vdata[0], 2 ) = mm256_bswap_32( casti_m256i( pdata, 2 ) );
|
||||||
|
casti_m256i( vdata[0], 3 ) = mm256_bswap_32( casti_m256i( pdata, 3 ) );
|
||||||
|
casti_m128i( vdata[0], 8 ) = mm128_bswap_32( casti_m128i( pdata, 8 ) );
|
||||||
|
phi2_has_roots = mm128_anybits1( casti_m128i( vdata[0], 5 ) ) ||
|
||||||
|
mm128_anybits1( casti_m128i( vdata[0], 6 ) ) ||
|
||||||
|
mm128_anybits1( casti_m128i( vdata[0], 7 ) ) ||
|
||||||
|
mm128_anybits1( casti_m128i( vdata[0], 8 ) );
|
||||||
|
*/
|
||||||
|
|
||||||
|
memcpy( vdata[0], edata, 144 );
|
||||||
|
memcpy( vdata[1], edata, 144 );
|
||||||
|
memcpy( vdata[2], edata, 144 );
|
||||||
|
memcpy( vdata[3], edata, 144 );
|
||||||
|
|
||||||
|
do {
|
||||||
|
be32enc( &vdata[0][19], n );
|
||||||
|
be32enc( &vdata[1][19], n+1 );
|
||||||
|
be32enc( &vdata[2][19], n+2 );
|
||||||
|
be32enc( &vdata[3][19], n+3 );
|
||||||
|
|
||||||
|
phi2_hash_4way( hash, vdata );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[ lane<<1 ] < Htarg )
|
||||||
|
{
|
||||||
|
mm256_extract_lane_4x64( lane_hash, hash, lane, 256 );
|
||||||
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n += 4;
|
||||||
|
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // PHI2_4WAY
|
@@ -50,11 +50,11 @@ void phi2_hash(void *state, const void *input)
|
|||||||
unsigned char _ALIGN(128) hashA[64];
|
unsigned char _ALIGN(128) hashA[64];
|
||||||
unsigned char _ALIGN(128) hashB[64];
|
unsigned char _ALIGN(128) hashB[64];
|
||||||
|
|
||||||
phi2_ctx_holder ctx __attribute__ ((aligned (64)));
|
phi2_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||||
memcpy( &ctx, &phi2_ctx, sizeof(phi2_ctx) );
|
memcpy( &ctx, &phi2_ctx, sizeof(phi2_ctx) );
|
||||||
|
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB, (const byte*)input,
|
cubehashUpdateDigest( &ctx.cube, (byte*)hashB, (const byte*)input,
|
||||||
phi2_has_roots ? 144 : 80 );
|
phi2_has_roots ? 144 : 80 );
|
||||||
|
|
||||||
LYRA2RE( &hashA[ 0], 32, &hashB[ 0], 32, &hashB[ 0], 32, 1, 8, 8 );
|
LYRA2RE( &hashA[ 0], 32, &hashB[ 0], 32, &hashB[ 0], 32, 1, 8, 8 );
|
||||||
LYRA2RE( &hashA[32], 32, &hashB[32], 32, &hashB[32], 32, 1, 8, 8 );
|
LYRA2RE( &hashA[32], 32, &hashB[32], 32, &hashB[32], 32, 1, 8, 8 );
|
||||||
@@ -63,17 +63,17 @@ void phi2_hash(void *state, const void *input)
|
|||||||
sph_jh512_close( &ctx.jh, (void*)hash );
|
sph_jh512_close( &ctx.jh, (void*)hash );
|
||||||
|
|
||||||
if ( hash[0] & 1 )
|
if ( hash[0] & 1 )
|
||||||
{
|
{
|
||||||
sph_gost512( &ctx.gost, (const void*)hash, 64 );
|
sph_gost512( &ctx.gost, (const void*)hash, 64 );
|
||||||
sph_gost512_close( &ctx.gost, (void*)hash );
|
sph_gost512_close( &ctx.gost, (void*)hash );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#if defined(__AES__)
|
#if defined(__AES__)
|
||||||
update_final_echo ( &ctx.echo1, (BitSequence *)hash,
|
update_final_echo ( &ctx.echo1, (BitSequence *)hash,
|
||||||
(const BitSequence *)hash, 512 );
|
(const BitSequence *)hash, 512 );
|
||||||
update_final_echo ( &ctx.echo2, (BitSequence *)hash,
|
update_final_echo ( &ctx.echo2, (BitSequence *)hash,
|
||||||
(const BitSequence *)hash, 512 );
|
(const BitSequence *)hash, 512 );
|
||||||
#else
|
#else
|
||||||
sph_echo512( &ctx.echo1, (const void*)hash, 64 );
|
sph_echo512( &ctx.echo1, (const void*)hash, 64 );
|
||||||
sph_echo512_close( &ctx.echo1, (void*)hash );
|
sph_echo512_close( &ctx.echo1, (void*)hash );
|
||||||
|
@@ -51,7 +51,7 @@ inline void initState( uint64_t State[/*16*/] )
|
|||||||
state[3] = _mm256_set_epi64x( blake2b_IV[7], blake2b_IV[6],
|
state[3] = _mm256_set_epi64x( blake2b_IV[7], blake2b_IV[6],
|
||||||
blake2b_IV[5], blake2b_IV[4] );
|
blake2b_IV[5], blake2b_IV[4] );
|
||||||
|
|
||||||
#elif defined (__SSE4_2__)
|
#elif defined (__SSE2__)
|
||||||
|
|
||||||
__m128i* state = (__m128i*)State;
|
__m128i* state = (__m128i*)State;
|
||||||
|
|
||||||
@@ -137,7 +137,7 @@ inline void squeeze( uint64_t *State, byte *Out, unsigned int len )
|
|||||||
//Squeezes remaining bytes
|
//Squeezes remaining bytes
|
||||||
memcpy_256( out, state, ( len_m256i % BLOCK_LEN_M256I ) );
|
memcpy_256( out, state, ( len_m256i % BLOCK_LEN_M256I ) );
|
||||||
|
|
||||||
#elif defined (__SSE4_2__)
|
#elif defined (__SSE2__)
|
||||||
|
|
||||||
const int len_m128i = len / 16;
|
const int len_m128i = len / 16;
|
||||||
const int fullBlocks = len_m128i / BLOCK_LEN_M128I;
|
const int fullBlocks = len_m128i / BLOCK_LEN_M128I;
|
||||||
@@ -205,7 +205,7 @@ inline void absorbBlock( uint64_t *State, const uint64_t *In )
|
|||||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||||
|
|
||||||
#elif defined (__SSE4_2__)
|
#elif defined (__SSE2__)
|
||||||
|
|
||||||
__m128i* state = (__m128i*)State;
|
__m128i* state = (__m128i*)State;
|
||||||
__m128i* in = (__m128i*)In;
|
__m128i* in = (__m128i*)In;
|
||||||
@@ -273,7 +273,7 @@ inline void absorbBlockBlake2Safe( uint64_t *State, const uint64_t *In )
|
|||||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||||
|
|
||||||
#elif defined (__SSE4_2__)
|
#elif defined (__SSE2__)
|
||||||
|
|
||||||
__m128i* state = (__m128i*)State;
|
__m128i* state = (__m128i*)State;
|
||||||
__m128i* in = (__m128i*)In;
|
__m128i* in = (__m128i*)In;
|
||||||
@@ -355,7 +355,7 @@ inline void reducedSqueezeRow0( uint64_t* State, uint64_t* rowOut,
|
|||||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||||
|
|
||||||
#elif defined (__SSE4_2__)
|
#elif defined (__SSE2__)
|
||||||
|
|
||||||
__m128i* state = (__m128i*)State;
|
__m128i* state = (__m128i*)State;
|
||||||
__m128i state0 = _mm_load_si128( state );
|
__m128i state0 = _mm_load_si128( state );
|
||||||
@@ -494,7 +494,7 @@ inline void reducedDuplexRow1( uint64_t *State, uint64_t *rowIn,
|
|||||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||||
|
|
||||||
#elif defined (__SSE4_2__)
|
#elif defined (__SSE2__)
|
||||||
|
|
||||||
__m128i* state = (__m128i*)State;
|
__m128i* state = (__m128i*)State;
|
||||||
__m128i state0 = _mm_load_si128( state );
|
__m128i state0 = _mm_load_si128( state );
|
||||||
@@ -694,7 +694,7 @@ inline void reducedDuplexRowSetup( uint64_t *State, uint64_t *rowIn,
|
|||||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||||
|
|
||||||
#elif defined (__SSE4_2__)
|
#elif defined (__SSE2__)
|
||||||
|
|
||||||
__m128i* in = (__m128i*)rowIn;
|
__m128i* in = (__m128i*)rowIn;
|
||||||
__m128i* inout = (__m128i*)rowInOut;
|
__m128i* inout = (__m128i*)rowInOut;
|
||||||
@@ -713,9 +713,9 @@ inline void reducedDuplexRowSetup( uint64_t *State, uint64_t *rowIn,
|
|||||||
__m128i* state = (__m128i*)State;
|
__m128i* state = (__m128i*)State;
|
||||||
|
|
||||||
// For the last round in this function not optimized for AVX
|
// For the last round in this function not optimized for AVX
|
||||||
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
// uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
||||||
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
|
// uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
|
||||||
uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
|
// uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
|
||||||
|
|
||||||
for ( i = 0; i < nCols; i++ )
|
for ( i = 0; i < nCols; i++ )
|
||||||
{
|
{
|
||||||
@@ -750,6 +750,28 @@ inline void reducedDuplexRowSetup( uint64_t *State, uint64_t *rowIn,
|
|||||||
out[4] = _mm_xor_si128( state[4], in[4] );
|
out[4] = _mm_xor_si128( state[4], in[4] );
|
||||||
out[5] = _mm_xor_si128( state[5], in[5] );
|
out[5] = _mm_xor_si128( state[5], in[5] );
|
||||||
|
|
||||||
|
|
||||||
|
__m128i t0, t1;
|
||||||
|
t0 = _mm_srli_si128( state[0], 8 );
|
||||||
|
t1 = _mm_srli_si128( state[1], 8 );
|
||||||
|
inout[0] = _mm_xor_si128( inout[0],
|
||||||
|
_mm_or_si128( _mm_slli_si128( state[0], 8 ),
|
||||||
|
_mm_srli_si128( state[5], 8 ) ) );
|
||||||
|
inout[1] = _mm_xor_si128( inout[1],
|
||||||
|
_mm_or_si128( _mm_slli_si128( state[1], 8 ), t0 ) );
|
||||||
|
t0 = _mm_srli_si128( state[2], 8 );
|
||||||
|
inout[2] = _mm_xor_si128( inout[2],
|
||||||
|
_mm_or_si128( _mm_slli_si128( state[2], 8 ), t1 ) );
|
||||||
|
t1 = _mm_srli_si128( state[3], 8 );
|
||||||
|
inout[3] = _mm_xor_si128( inout[3],
|
||||||
|
_mm_or_si128( _mm_slli_si128( state[3], 8 ), t0 ) );
|
||||||
|
t0 = _mm_srli_si128( state[4], 8 );
|
||||||
|
inout[4] = _mm_xor_si128( inout[4],
|
||||||
|
_mm_or_si128( _mm_slli_si128( state[4], 8 ), t1 ) );
|
||||||
|
inout[5] = _mm_xor_si128( inout[5],
|
||||||
|
_mm_or_si128( _mm_slli_si128( state[5], 8 ), t0 ) );
|
||||||
|
|
||||||
|
/*
|
||||||
ptrWordInOut[0] ^= State[11];
|
ptrWordInOut[0] ^= State[11];
|
||||||
ptrWordInOut[1] ^= State[0];
|
ptrWordInOut[1] ^= State[0];
|
||||||
ptrWordInOut[2] ^= State[1];
|
ptrWordInOut[2] ^= State[1];
|
||||||
@@ -768,7 +790,7 @@ inline void reducedDuplexRowSetup( uint64_t *State, uint64_t *rowIn,
|
|||||||
ptrWordIn += BLOCK_LEN_INT64;
|
ptrWordIn += BLOCK_LEN_INT64;
|
||||||
//Output: goes to previous column
|
//Output: goes to previous column
|
||||||
ptrWordOut -= BLOCK_LEN_INT64;
|
ptrWordOut -= BLOCK_LEN_INT64;
|
||||||
|
*/
|
||||||
inout += BLOCK_LEN_M128I;
|
inout += BLOCK_LEN_M128I;
|
||||||
in += BLOCK_LEN_M128I;
|
in += BLOCK_LEN_M128I;
|
||||||
out -= BLOCK_LEN_M128I;
|
out -= BLOCK_LEN_M128I;
|
||||||
@@ -930,7 +952,7 @@ inline void reducedDuplexRow( uint64_t *State, uint64_t *rowIn,
|
|||||||
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
_mm256_store_si256( (__m256i*)State + 2, state2 );
|
||||||
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
_mm256_store_si256( (__m256i*)State + 3, state3 );
|
||||||
|
|
||||||
#elif defined(__SSE4_2__)
|
#elif defined (__SSE2__)
|
||||||
|
|
||||||
__m128i* state = (__m128i*)State;
|
__m128i* state = (__m128i*)State;
|
||||||
__m128i* in = (__m128i*)rowIn;
|
__m128i* in = (__m128i*)rowIn;
|
||||||
|
@@ -23,7 +23,7 @@
|
|||||||
#define SPONGE_H_
|
#define SPONGE_H_
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
#define ALIGN __attribute__ ((aligned(32)))
|
#define ALIGN __attribute__ ((aligned(32)))
|
||||||
@@ -59,7 +59,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
|||||||
// returns void, updates all args
|
// returns void, updates all args
|
||||||
#define G_4X64(a,b,c,d) \
|
#define G_4X64(a,b,c,d) \
|
||||||
a = _mm256_add_epi64( a, b ); \
|
a = _mm256_add_epi64( a, b ); \
|
||||||
d = mm256_ror_64( _mm256_xor_si256( d, a), 32 ); \
|
d = mm256_ror_64( _mm256_xor_si256( d, a ), 32 ); \
|
||||||
c = _mm256_add_epi64( c, d ); \
|
c = _mm256_add_epi64( c, d ); \
|
||||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 24 ); \
|
b = mm256_ror_64( _mm256_xor_si256( b, c ), 24 ); \
|
||||||
a = _mm256_add_epi64( a, b ); \
|
a = _mm256_add_epi64( a, b ); \
|
||||||
|
@@ -144,8 +144,8 @@ void init_m7m_ctx()
|
|||||||
#define NM7M 5
|
#define NM7M 5
|
||||||
#define SW_DIVS 5
|
#define SW_DIVS 5
|
||||||
#define M7_MIDSTATE_LEN 76
|
#define M7_MIDSTATE_LEN 76
|
||||||
int scanhash_m7m_hash( int thr_id, struct work* work,
|
int scanhash_m7m_hash( int thr_id, struct work* work, uint64_t max_nonce,
|
||||||
uint64_t max_nonce, unsigned long *hashes_done )
|
unsigned long *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -154,6 +154,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
|
|||||||
uint32_t hash[8] __attribute__((aligned(64)));
|
uint32_t hash[8] __attribute__((aligned(64)));
|
||||||
uint8_t bhash[7][64] __attribute__((aligned(64)));
|
uint8_t bhash[7][64] __attribute__((aligned(64)));
|
||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint32_t usw_, mpzscale;
|
uint32_t usw_, mpzscale;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
char data_str[161], hash_str[65], target_str[65];
|
char data_str[161], hash_str[65], target_str[65];
|
||||||
|
@@ -445,7 +445,7 @@ void pluck_hash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const i
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_pluck(int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_pluck(int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -454,6 +454,8 @@ int scanhash_pluck(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x0ffff;
|
((uint32_t*)ptarget)[7] = 0x0ffff;
|
||||||
|
@@ -48,8 +48,8 @@ void anime_4way_hash( void *state, const void *input )
|
|||||||
__m256i* vhA = (__m256i*)vhashA;
|
__m256i* vhA = (__m256i*)vhashA;
|
||||||
__m256i* vhB = (__m256i*)vhashB;
|
__m256i* vhB = (__m256i*)vhashB;
|
||||||
__m256i vh_mask;
|
__m256i vh_mask;
|
||||||
|
const uint32_t mask = 8;
|
||||||
const __m256i bit3_mask = _mm256_set1_epi64x( 8 );
|
const __m256i bit3_mask = _mm256_set1_epi64x( 8 );
|
||||||
int i;
|
|
||||||
anime_4way_ctx_holder ctx;
|
anime_4way_ctx_holder ctx;
|
||||||
memcpy( &ctx, &anime_4way_ctx, sizeof(anime_4way_ctx) );
|
memcpy( &ctx, &anime_4way_ctx, sizeof(anime_4way_ctx) );
|
||||||
|
|
||||||
@@ -62,27 +62,44 @@ void anime_4way_hash( void *state, const void *input )
|
|||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||||
m256_zero );
|
m256_zero );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
|
||||||
(char*)hash0, 512 );
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
|
||||||
(char*)hash1, 512 );
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
|
||||||
(char*)hash2, 512 );
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
|
||||||
(char*)hash3, 512 );
|
|
||||||
mm256_interleave_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
|
||||||
|
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
if ( hash0[0] & mask )
|
||||||
skein512_4way_close( &ctx.skein, vhashB );
|
{
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||||
|
(char*)hash0, 512 );
|
||||||
|
}
|
||||||
|
if ( hash1[0] & mask )
|
||||||
|
{
|
||||||
|
reinit_groestl( &ctx.groestl );
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||||
|
(char*)hash1, 512 );
|
||||||
|
}
|
||||||
|
if ( hash2[0] & mask )
|
||||||
|
{
|
||||||
|
reinit_groestl( &ctx.groestl );
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||||
|
(char*)hash2, 512 );
|
||||||
|
}
|
||||||
|
if ( hash3[0] & mask )
|
||||||
|
{
|
||||||
|
reinit_groestl( &ctx.groestl );
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||||
|
(char*)hash3, 512 );
|
||||||
|
}
|
||||||
|
|
||||||
for ( i = 0; i < 8; i++ )
|
mm256_intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
|
||||||
|
if ( mm256_anybits0( vh_mask ) )
|
||||||
|
{
|
||||||
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
|
skein512_4way_close( &ctx.skein, vhashB );
|
||||||
|
}
|
||||||
|
|
||||||
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
|
||||||
reinit_groestl( &ctx.groestl );
|
reinit_groestl( &ctx.groestl );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
reinit_groestl( &ctx.groestl );
|
||||||
@@ -91,7 +108,8 @@ void anime_4way_hash( void *state, const void *input )
|
|||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
reinit_groestl( &ctx.groestl );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
|
||||||
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
jh512_4way( &ctx.jh, vhash, 64 );
|
jh512_4way( &ctx.jh, vhash, 64 );
|
||||||
jh512_4way_close( &ctx.jh, vhash );
|
jh512_4way_close( &ctx.jh, vhash );
|
||||||
@@ -99,16 +117,20 @@ void anime_4way_hash( void *state, const void *input )
|
|||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||||
m256_zero );
|
m256_zero );
|
||||||
|
|
||||||
|
if ( mm256_anybits1( vh_mask ) )
|
||||||
|
{
|
||||||
blake512_4way_init( &ctx.blake );
|
blake512_4way_init( &ctx.blake );
|
||||||
blake512_4way( &ctx.blake, vhash, 64 );
|
blake512_4way( &ctx.blake, vhash, 64 );
|
||||||
blake512_4way_close( &ctx.blake, vhashA );
|
blake512_4way_close( &ctx.blake, vhashA );
|
||||||
|
}
|
||||||
|
if ( mm256_anybits0( vh_mask ) )
|
||||||
|
{
|
||||||
bmw512_4way_init( &ctx.bmw );
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||||
|
}
|
||||||
|
|
||||||
for ( i = 0; i < 8; i++ )
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
|
||||||
|
|
||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
@@ -120,33 +142,35 @@ void anime_4way_hash( void *state, const void *input )
|
|||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||||
m256_zero );
|
m256_zero );
|
||||||
|
|
||||||
keccak512_4way_init( &ctx.keccak );
|
if ( mm256_anybits1( vh_mask ) )
|
||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
{
|
||||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
keccak512_4way_init( &ctx.keccak );
|
||||||
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
|
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||||
|
}
|
||||||
|
if ( mm256_anybits0( vh_mask ) )
|
||||||
|
{
|
||||||
|
jh512_4way_init( &ctx.jh );
|
||||||
|
jh512_4way( &ctx.jh, vhash, 64 );
|
||||||
|
jh512_4way_close( &ctx.jh, vhashB );
|
||||||
|
}
|
||||||
|
|
||||||
jh512_4way_init( &ctx.jh );
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
jh512_4way( &ctx.jh, vhash, 64 );
|
|
||||||
jh512_4way_close( &ctx.jh, vhashB );
|
|
||||||
|
|
||||||
for ( i = 0; i < 8; i++ )
|
mm256_dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
||||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
|
||||||
|
|
||||||
mm256_deinterleave_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t *nonces = work->nonces;
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
int num_found = 0;
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
uint64_t htmax[] = {
|
uint64_t htmax[] = {
|
||||||
0,
|
0,
|
||||||
@@ -165,10 +189,7 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
swab32_array( endiandata, pdata, 20 );
|
mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
|
||||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
|
||||||
|
|
||||||
for (int m=0; m < 6; m++)
|
for (int m=0; m < 6; m++)
|
||||||
if (Htarg <= htmax[m])
|
if (Htarg <= htmax[m])
|
||||||
@@ -177,30 +198,26 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
be32enc( noncep, n );
|
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||||
be32enc( noncep+2, n+1 );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||||
be32enc( noncep+4, n+2 );
|
|
||||||
be32enc( noncep+6, n+3 );
|
|
||||||
|
|
||||||
anime_4way_hash( hash, vdata );
|
anime_4way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ )
|
||||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||||
&& fulltest( hash+(i<<3), ptarget ) )
|
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
nonces[ num_found++ ] = n+i;
|
submit_solution( work, hash+(i<<3), mythr, i );
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -13,19 +13,15 @@ bool register_anime_algo( algo_gate_t* gate );
|
|||||||
#if defined(ANIME_4WAY)
|
#if defined(ANIME_4WAY)
|
||||||
|
|
||||||
void anime_4way_hash( void *state, const void *input );
|
void anime_4way_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_anime_4way_ctx();
|
void init_anime_4way_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void anime_hash( void *state, const void *input );
|
void anime_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_anime( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_anime( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_anime_ctx();
|
void init_anime_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -46,20 +46,6 @@ void init_anime_ctx()
|
|||||||
void anime_hash( void *state, const void *input )
|
void anime_hash( void *state, const void *input )
|
||||||
{
|
{
|
||||||
unsigned char hash[128] __attribute__ ((aligned (32)));
|
unsigned char hash[128] __attribute__ ((aligned (32)));
|
||||||
/*
|
|
||||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
|
||||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
|
||||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
|
||||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
|
||||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
|
||||||
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
|
||||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
|
||||||
__m256i* vh = (__m256i*)vhash;
|
|
||||||
__m256i* vhA = (__m256i*)vhashA;
|
|
||||||
__m256i* vhB = (__m256i*)vhashB;
|
|
||||||
__m256i vh_mask;
|
|
||||||
__m256i bit3_mask; bit3_mask = _mm256_set1_epi64x( 8 );
|
|
||||||
*/
|
|
||||||
uint32_t mask = 8;
|
uint32_t mask = 8;
|
||||||
anime_ctx_holder ctx;
|
anime_ctx_holder ctx;
|
||||||
memcpy( &ctx, &anime_ctx, sizeof(anime_ctx) );
|
memcpy( &ctx, &anime_ctx, sizeof(anime_ctx) );
|
||||||
@@ -134,7 +120,7 @@ void anime_hash( void *state, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_anime( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_anime( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
{
|
{
|
||||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||||
@@ -142,6 +128,7 @@ int scanhash_anime( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
uint64_t htmax[] = {
|
uint64_t htmax[] = {
|
||||||
0,
|
0,
|
||||||
|
618
algo/quark/hmq1725-4way.c
Normal file
618
algo/quark/hmq1725-4way.c
Normal file
@@ -0,0 +1,618 @@
|
|||||||
|
#include "hmq1725-gate.h"
|
||||||
|
|
||||||
|
#if defined(HMQ1725_4WAY)
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "algo/blake/blake-hash-4way.h"
|
||||||
|
#include "algo/bmw/bmw-hash-4way.h"
|
||||||
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
|
#include "algo/skein/skein-hash-4way.h"
|
||||||
|
#include "algo/jh/jh-hash-4way.h"
|
||||||
|
#include "algo/keccak/keccak-hash-4way.h"
|
||||||
|
#include "algo/luffa/luffa_for_sse2.h"
|
||||||
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
|
#include "algo/simd/nist.h"
|
||||||
|
#include "algo/shavite/sph_shavite.h"
|
||||||
|
#include "algo/simd/simd-hash-2way.h"
|
||||||
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
|
#include "algo/hamsi/hamsi-hash-4way.h"
|
||||||
|
#include "algo/fugue/sph_fugue.h"
|
||||||
|
#include "algo/shabal/shabal-hash-4way.h"
|
||||||
|
#include "algo/whirlpool/sph_whirlpool.h"
|
||||||
|
#include "algo/haval/haval-hash-4way.h"
|
||||||
|
#include "algo/sha/sha2-hash-4way.h"
|
||||||
|
|
||||||
|
union _hmq1725_4way_context_overlay
|
||||||
|
{
|
||||||
|
blake512_4way_context blake;
|
||||||
|
bmw512_4way_context bmw;
|
||||||
|
hashState_groestl groestl;
|
||||||
|
skein512_4way_context skein;
|
||||||
|
jh512_4way_context jh;
|
||||||
|
keccak512_4way_context keccak;
|
||||||
|
hashState_luffa luffa;
|
||||||
|
cubehashParam cube;
|
||||||
|
sph_shavite512_context shavite;
|
||||||
|
hashState_sd simd;
|
||||||
|
hashState_echo echo;
|
||||||
|
hamsi512_4way_context hamsi;
|
||||||
|
sph_fugue512_context fugue;
|
||||||
|
shabal512_4way_context shabal;
|
||||||
|
sph_whirlpool_context whirlpool;
|
||||||
|
sha512_4way_context sha512;
|
||||||
|
haval256_5_4way_context haval;
|
||||||
|
};
|
||||||
|
typedef union _hmq1725_4way_context_overlay hmq1725_4way_context_overlay;
|
||||||
|
|
||||||
|
extern void hmq1725_4way_hash(void *state, const void *input)
|
||||||
|
{
|
||||||
|
// why so big? only really need 8, haval thing uses 16.
|
||||||
|
uint32_t hash0 [32] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash1 [32] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash2 [32] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash3 [32] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vhash [32<<2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vhashA[32<<2] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vhashB[32<<2] __attribute__ ((aligned (64)));
|
||||||
|
hmq1725_4way_context_overlay ctx __attribute__ ((aligned (64)));
|
||||||
|
__m256i vh_mask;
|
||||||
|
const __m256i vmask = _mm256_set1_epi64x( 24 );
|
||||||
|
const uint32_t mask = 24;
|
||||||
|
__m256i* vh = (__m256i*)vhash;
|
||||||
|
__m256i* vhA = (__m256i*)vhashA;
|
||||||
|
__m256i* vhB = (__m256i*)vhashB;
|
||||||
|
|
||||||
|
bmw512_4way_init( &ctx.bmw );
|
||||||
|
bmw512_4way( &ctx.bmw, input, 80 );
|
||||||
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||||
|
|
||||||
|
// first fork, A is groestl serial, B is skein parallel.
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||||
|
m256_zero );
|
||||||
|
|
||||||
|
// A
|
||||||
|
|
||||||
|
// if ( hash0[0] & mask )
|
||||||
|
// {
|
||||||
|
init_groestl( &ctx.groestl, 64 );
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||||
|
(char*)hash0, 512 );
|
||||||
|
// }
|
||||||
|
// if ( hash1[0] & mask )
|
||||||
|
// {
|
||||||
|
init_groestl( &ctx.groestl, 64 );
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||||
|
(char*)hash1, 512 );
|
||||||
|
// }
|
||||||
|
// if ( hash2[0] & mask )
|
||||||
|
// {
|
||||||
|
init_groestl( &ctx.groestl, 64 );
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||||
|
(char*)hash2, 512 );
|
||||||
|
// }
|
||||||
|
// if ( hash3[0] & mask )
|
||||||
|
// {
|
||||||
|
init_groestl( &ctx.groestl, 64 );
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||||
|
(char*)hash3, 512 );
|
||||||
|
// }
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
// B
|
||||||
|
|
||||||
|
// if ( mm256_any_clr_256( vh_mask ) )
|
||||||
|
// {
|
||||||
|
skein512_4way_init( &ctx.skein );
|
||||||
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
|
skein512_4way_close( &ctx.skein, vhashB );
|
||||||
|
// }
|
||||||
|
|
||||||
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
|
jh512_4way_init( &ctx.jh );
|
||||||
|
jh512_4way( &ctx.jh, vhash, 64 );
|
||||||
|
jh512_4way_close( &ctx.jh, vhash );
|
||||||
|
|
||||||
|
keccak512_4way_init( &ctx.keccak );
|
||||||
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
|
// second fork, A = blake parallel, B= bmw parallel.
|
||||||
|
|
||||||
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||||
|
m256_zero );
|
||||||
|
|
||||||
|
// if ( mm256_any_set_256( vh_mask ) )
|
||||||
|
// {
|
||||||
|
blake512_4way_init( &ctx.blake );
|
||||||
|
blake512_4way( &ctx.blake, vhash, 64 );
|
||||||
|
blake512_4way_close( &ctx.blake, vhashA );
|
||||||
|
// }
|
||||||
|
|
||||||
|
// if ( mm256_any_clr_256( vh_mask ) )
|
||||||
|
// {
|
||||||
|
bmw512_4way_init( &ctx.bmw );
|
||||||
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
|
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||||
|
// }
|
||||||
|
|
||||||
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
init_luffa( &ctx.luffa, 512 );
|
||||||
|
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0,
|
||||||
|
(const BitSequence*)hash0, 64 );
|
||||||
|
init_luffa( &ctx.luffa, 512 );
|
||||||
|
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash1,
|
||||||
|
(const BitSequence*)hash1, 64 );
|
||||||
|
init_luffa( &ctx.luffa, 512 );
|
||||||
|
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash2,
|
||||||
|
(const BitSequence*)hash2, 64 );
|
||||||
|
init_luffa( &ctx.luffa, 512 );
|
||||||
|
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash3,
|
||||||
|
(const BitSequence*)hash3, 64 );
|
||||||
|
|
||||||
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (BitSequence *)hash0,
|
||||||
|
(const BitSequence *)hash0, 64 );
|
||||||
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (BitSequence *)hash1,
|
||||||
|
(const BitSequence *)hash1, 64 );
|
||||||
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (BitSequence *)hash2,
|
||||||
|
(const BitSequence *)hash2, 64 );
|
||||||
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &ctx.cube, (BitSequence *)hash3,
|
||||||
|
(const BitSequence *)hash3, 64 );
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
// A= keccak parallel, B= jh parallel
|
||||||
|
|
||||||
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||||
|
m256_zero );
|
||||||
|
|
||||||
|
// if ( mm256_any_set_256( vh_mask ) )
|
||||||
|
// {
|
||||||
|
keccak512_4way_init( &ctx.keccak );
|
||||||
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
|
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||||
|
// }
|
||||||
|
|
||||||
|
// if ( mm256_any_clr_256( vh_mask ) )
|
||||||
|
// {
|
||||||
|
jh512_4way_init( &ctx.jh );
|
||||||
|
jh512_4way( &ctx.jh, vhash, 64 );
|
||||||
|
jh512_4way_close( &ctx.jh, vhashB );
|
||||||
|
// }
|
||||||
|
|
||||||
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
sph_shavite512_init( &ctx.shavite );
|
||||||
|
sph_shavite512 ( &ctx.shavite, hash0, 64 );
|
||||||
|
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||||
|
sph_shavite512_init( &ctx.shavite );
|
||||||
|
sph_shavite512 ( &ctx.shavite, hash1, 64 );
|
||||||
|
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||||
|
sph_shavite512_init( &ctx.shavite );
|
||||||
|
sph_shavite512 ( &ctx.shavite, hash2, 64 );
|
||||||
|
sph_shavite512_close( &ctx.shavite, hash2 );
|
||||||
|
sph_shavite512_init( &ctx.shavite );
|
||||||
|
sph_shavite512 ( &ctx.shavite, hash3, 64 );
|
||||||
|
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||||
|
|
||||||
|
init_sd( &ctx.simd, 512 );
|
||||||
|
update_final_sd( &ctx.simd, (BitSequence *)hash0,
|
||||||
|
(const BitSequence *)hash0, 512 );
|
||||||
|
init_sd( &ctx.simd, 512 );
|
||||||
|
update_final_sd( &ctx.simd, (BitSequence *)hash1,
|
||||||
|
(const BitSequence *)hash1, 512 );
|
||||||
|
init_sd( &ctx.simd, 512 );
|
||||||
|
update_final_sd( &ctx.simd, (BitSequence *)hash2,
|
||||||
|
(const BitSequence *)hash2, 512 );
|
||||||
|
init_sd( &ctx.simd, 512 );
|
||||||
|
update_final_sd( &ctx.simd, (BitSequence *)hash3,
|
||||||
|
(const BitSequence *)hash3, 512 );
|
||||||
|
|
||||||
|
// A is whirlpool serial, B is haval parallel.
|
||||||
|
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||||
|
m256_zero );
|
||||||
|
// A
|
||||||
|
|
||||||
|
// if ( hash0[0] & mask )
|
||||||
|
// {
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||||
|
// }
|
||||||
|
// if ( hash1[0] & mask )
|
||||||
|
// {
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||||
|
// }
|
||||||
|
// if ( hash2[0] & mask )
|
||||||
|
// {
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||||
|
// }
|
||||||
|
// if ( hash3[0] & mask )
|
||||||
|
// {
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||||
|
// }
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
// B
|
||||||
|
|
||||||
|
// if ( mm256_any_clr_256( vh_mask ) )
|
||||||
|
// {
|
||||||
|
haval256_5_4way_init( &ctx.haval );
|
||||||
|
haval256_5_4way( &ctx.haval, vhash, 64 );
|
||||||
|
haval256_5_4way_close( &ctx.haval, vhashB );
|
||||||
|
memset( &vhashB[8<<2], 0, 32<<2);
|
||||||
|
// }
|
||||||
|
|
||||||
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
|
(const BitSequence *)hash0, 512 );
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||||
|
(const BitSequence *)hash1, 512 );
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||||
|
(const BitSequence *)hash2, 512 );
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
|
(const BitSequence *)hash3, 512 );
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
blake512_4way_init( &ctx.blake );
|
||||||
|
blake512_4way( &ctx.blake, vhash, 64 );
|
||||||
|
blake512_4way_close( &ctx.blake, vhash );
|
||||||
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
// shavite & luffa, both serial, select individually.
|
||||||
|
|
||||||
|
if ( hash0[0] & mask )
|
||||||
|
{
|
||||||
|
sph_shavite512_init( &ctx.shavite );
|
||||||
|
sph_shavite512( &ctx.shavite, hash0, 64 ); //
|
||||||
|
sph_shavite512_close( &ctx.shavite, hash0 ); //8
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_luffa( &ctx.luffa, 512 );
|
||||||
|
update_and_final_luffa( &ctx.luffa, (BitSequence *)hash0,
|
||||||
|
(const BitSequence *)hash0, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hash1[0] & mask )
|
||||||
|
{
|
||||||
|
sph_shavite512_init( &ctx.shavite );
|
||||||
|
sph_shavite512( &ctx.shavite, hash1, 64 ); //
|
||||||
|
sph_shavite512_close( &ctx.shavite, hash1 ); //8
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_luffa( &ctx.luffa, 512 );
|
||||||
|
update_and_final_luffa( &ctx.luffa, (BitSequence *)hash1,
|
||||||
|
(const BitSequence *)hash1, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hash2[0] & mask )
|
||||||
|
{
|
||||||
|
sph_shavite512_init( &ctx.shavite );
|
||||||
|
sph_shavite512( &ctx.shavite, hash2, 64 ); //
|
||||||
|
sph_shavite512_close( &ctx.shavite, hash2 ); //8
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_luffa( &ctx.luffa, 512 );
|
||||||
|
update_and_final_luffa( &ctx.luffa, (BitSequence *)hash2,
|
||||||
|
(const BitSequence *)hash2, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hash3[0] & mask )
|
||||||
|
{
|
||||||
|
sph_shavite512_init( &ctx.shavite );
|
||||||
|
sph_shavite512( &ctx.shavite, hash3, 64 ); //
|
||||||
|
sph_shavite512_close( &ctx.shavite, hash3 ); //8
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_luffa( &ctx.luffa, 512 );
|
||||||
|
update_and_final_luffa( &ctx.luffa, (BitSequence *)hash3,
|
||||||
|
(const BitSequence *)hash3, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
|
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||||
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
|
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
|
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
|
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
|
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
|
||||||
|
|
||||||
|
// A echo, B sd both serial
|
||||||
|
|
||||||
|
if ( hash0[0] & mask ) //4
|
||||||
|
{
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
|
(const BitSequence *)hash0, 512 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_sd( &ctx.simd, 512 );
|
||||||
|
update_final_sd( &ctx.simd, (BitSequence *)hash0,
|
||||||
|
(const BitSequence *)hash0, 512 );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hash1[0] & mask ) //4
|
||||||
|
{
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||||
|
(const BitSequence *)hash1, 512 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_sd( &ctx.simd, 512 );
|
||||||
|
update_final_sd( &ctx.simd, (BitSequence *)hash1,
|
||||||
|
(const BitSequence *)hash1, 512 );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hash2[0] & mask ) //4
|
||||||
|
{
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||||
|
(const BitSequence *)hash2, 512 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_sd( &ctx.simd, 512 );
|
||||||
|
update_final_sd( &ctx.simd, (BitSequence *)hash2,
|
||||||
|
(const BitSequence *)hash2, 512 );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hash3[0] & mask ) //4
|
||||||
|
{
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
|
(const BitSequence *)hash3, 512 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
init_sd( &ctx.simd, 512 );
|
||||||
|
update_final_sd( &ctx.simd, (BitSequence *)hash3,
|
||||||
|
(const BitSequence *)hash3, 512 );
|
||||||
|
}
|
||||||
|
|
||||||
|
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
shabal512_4way_init( &ctx.shabal );
|
||||||
|
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||||
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
|
|
||||||
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||||
|
|
||||||
|
// A = fugue serial, B = sha512 prarallel
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||||
|
m256_zero );
|
||||||
|
|
||||||
|
// if ( hash0[0] & mask )
|
||||||
|
// {
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
|
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||||
|
// }
|
||||||
|
// if ( hash1[0] & mask )
|
||||||
|
// {
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
|
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||||
|
// }
|
||||||
|
// if ( hash2[0] & mask )
|
||||||
|
// {
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
|
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||||
|
// }
|
||||||
|
// if ( hash3[0] & mask )
|
||||||
|
// {
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
|
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||||
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
// }
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
// if ( mm256_any_clr_256( vh_mask ) )
|
||||||
|
// {
|
||||||
|
sha512_4way_init( &ctx.sha512 );
|
||||||
|
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||||
|
sha512_4way_close( &ctx.sha512, vhashB );
|
||||||
|
// }
|
||||||
|
|
||||||
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
init_groestl( &ctx.groestl, 64 );
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
|
init_groestl( &ctx.groestl, 64 );
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||||
|
init_groestl( &ctx.groestl, 64 );
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||||
|
init_groestl( &ctx.groestl, 64 );
|
||||||
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
sha512_4way_init( &ctx.sha512 );
|
||||||
|
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||||
|
sha512_4way_close( &ctx.sha512, vhash );
|
||||||
|
|
||||||
|
// A = haval parallel, B = Whirlpool serial
|
||||||
|
|
||||||
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||||
|
m256_zero );
|
||||||
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
// if ( mm256_any_set_256( vh_mask ) ) //4
|
||||||
|
// {
|
||||||
|
haval256_5_4way_init( &ctx.haval );
|
||||||
|
haval256_5_4way( &ctx.haval, vhash, 64 );
|
||||||
|
haval256_5_4way_close( &ctx.haval, vhashA );
|
||||||
|
memset( &vhashA[8<<2], 0, 32<<2 );
|
||||||
|
// }
|
||||||
|
|
||||||
|
// if ( !( hash0[0] & mask ) )
|
||||||
|
// {
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||||
|
// }
|
||||||
|
// if ( !( hash2[0] & mask ) )
|
||||||
|
// {
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash1, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||||
|
// }
|
||||||
|
// if ( !( hash2[0] & mask ) )
|
||||||
|
// {
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash2, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||||
|
// }
|
||||||
|
// if ( !( hash3[0] & mask ) )
|
||||||
|
// {
|
||||||
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
|
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
||||||
|
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||||
|
// }
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
|
bmw512_4way_init( &ctx.bmw );
|
||||||
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
|
memcpy(state, vhash, 32<<2 );
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_hmq1725_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||||
|
// uint32_t *hash7 = &(hash[7<<2]);
|
||||||
|
// uint32_t lane_hash[8];
|
||||||
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
uint32_t n = pdata[19] - 1;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||||
|
0xFFF, 0xFFFF, 0x10000000 };
|
||||||
|
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||||
|
0xFFFFF000, 0xFFFF0000, 0 };
|
||||||
|
|
||||||
|
mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||||
|
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||||
|
{
|
||||||
|
uint32_t mask = masks[ m ];
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||||
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||||
|
hmq1725_4way_hash( hash, vdata );
|
||||||
|
for ( int i = 0; i < 4; i++ )
|
||||||
|
if ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||||
|
{
|
||||||
|
if ( fulltest( (hash+(i<<3)), ptarget ) && !opt_benchmark )
|
||||||
|
{
|
||||||
|
pdata[19] = n + i;
|
||||||
|
submit_solution( work, (hash+(i<<3)), mythr, i );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n += 4;
|
||||||
|
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // HMQ1725_4WAY
|
17
algo/quark/hmq1725-gate.c
Normal file
17
algo/quark/hmq1725-gate.c
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
#include "hmq1725-gate.h"
|
||||||
|
|
||||||
|
bool register_hmq1725_algo( algo_gate_t* gate )
|
||||||
|
{
|
||||||
|
#if defined(HMQ1725_4WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_hmq1725_4way;
|
||||||
|
gate->hash = (void*)&hmq1725_4way_hash;
|
||||||
|
#else
|
||||||
|
init_hmq1725_ctx();
|
||||||
|
gate->scanhash = (void*)&scanhash_hmq1725;
|
||||||
|
gate->hash = (void*)&hmq1725hash;
|
||||||
|
#endif
|
||||||
|
gate->set_target = (void*)&scrypt_set_target;
|
||||||
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
28
algo/quark/hmq1725-gate.h
Normal file
28
algo/quark/hmq1725-gate.h
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
#ifndef HMQ1725_GATE_H__
|
||||||
|
#define HMQ1725_GATE_H__ 1
|
||||||
|
|
||||||
|
#include "algo-gate-api.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#if defined(__AVX2__) && defined(__AES__)
|
||||||
|
// #define HMQ1725_4WAY
|
||||||
|
#endif
|
||||||
|
|
||||||
|
bool register_hmq1725_algo( algo_gate_t* gate );
|
||||||
|
|
||||||
|
#if defined(HMQ1725_4WAY)
|
||||||
|
|
||||||
|
void hmq1725_4way_hash( void *state, const void *input );
|
||||||
|
int scanhash_hmq1725_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
void hmq1725hash( void *state, const void *input );
|
||||||
|
int scanhash_hmq1725( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
void init_hmq1725_ctx();
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // HMQ1725_GATE_H__
|
@@ -1,4 +1,4 @@
|
|||||||
#include "algo-gate-api.h"
|
#include "hmq1725-gate.h"
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "algo/blake/sph_blake.h"
|
#include "algo/blake/sph_blake.h"
|
||||||
@@ -298,19 +298,22 @@ extern void hmq1725hash(void *state, const void *input)
|
|||||||
memcpy(state, hashA, 32);
|
memcpy(state, hashA, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_hmq1725( int thr_id, struct work *work, int32_t max_nonce,
|
int scanhash_hmq1725( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t endiandata[32] __attribute__((aligned(64)));
|
// uint32_t endiandata[32] __attribute__((aligned(64)));
|
||||||
|
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
//const uint32_t Htarg = ptarget[7];
|
//const uint32_t Htarg = ptarget[7];
|
||||||
|
|
||||||
//we need bigendian data...
|
//we need bigendian data...
|
||||||
for (int k = 0; k < 32; k++)
|
// for (int k = 0; k < 32; k++)
|
||||||
|
for (int k = 0; k < 20; k++)
|
||||||
be32enc(&endiandata[k], pdata[k]);
|
be32enc(&endiandata[k], pdata[k]);
|
||||||
|
|
||||||
hmq_bmw512_midstate( endiandata );
|
hmq_bmw512_midstate( endiandata );
|
||||||
@@ -406,14 +409,14 @@ int scanhash_hmq1725( int thr_id, struct work *work, int32_t max_nonce,
|
|||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
bool register_hmq1725_algo( algo_gate_t* gate )
|
bool register_hmq1725_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
init_hmq1725_ctx();
|
init_hmq1725_ctx();
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||||
gate->set_target = (void*)&scrypt_set_target;
|
gate->set_target = (void*)&scrypt_set_target;
|
||||||
gate->scanhash = (void*)&scanhash_hmq1725;
|
gate->scanhash = (void*)&scanhash_hmq1725;
|
||||||
gate->hash = (void*)&hmq1725hash;
|
gate->hash = (void*)&hmq1725hash;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
*/
|
@@ -48,9 +48,10 @@ void quark_4way_hash( void *state, const void *input )
|
|||||||
__m256i* vhA = (__m256i*)vhashA;
|
__m256i* vhA = (__m256i*)vhashA;
|
||||||
__m256i* vhB = (__m256i*)vhashB;
|
__m256i* vhB = (__m256i*)vhashB;
|
||||||
__m256i vh_mask;
|
__m256i vh_mask;
|
||||||
__m256i bit3_mask; bit3_mask = _mm256_set1_epi64x( 8 );
|
|
||||||
int i;
|
|
||||||
quark_4way_ctx_holder ctx;
|
quark_4way_ctx_holder ctx;
|
||||||
|
const __m256i bit3_mask = _mm256_set1_epi64x( 8 );
|
||||||
|
const uint32_t mask = 8;
|
||||||
|
|
||||||
memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) );
|
memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) );
|
||||||
|
|
||||||
blake512_4way( &ctx.blake, input, 80 );
|
blake512_4way( &ctx.blake, input, 80 );
|
||||||
@@ -62,27 +63,44 @@ void quark_4way_hash( void *state, const void *input )
|
|||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||||
m256_zero );
|
m256_zero );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
if ( hash0[0] & mask )
|
||||||
|
{
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||||
(char*)hash0, 512 );
|
(char*)hash0, 512 );
|
||||||
|
}
|
||||||
|
if ( hash1[0] & mask )
|
||||||
|
{
|
||||||
reinit_groestl( &ctx.groestl );
|
reinit_groestl( &ctx.groestl );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||||
(char*)hash1, 512 );
|
(char*)hash1, 512 );
|
||||||
|
}
|
||||||
|
if ( hash2[0] & mask )
|
||||||
|
{
|
||||||
reinit_groestl( &ctx.groestl );
|
reinit_groestl( &ctx.groestl );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||||
(char*)hash2, 512 );
|
(char*)hash2, 512 );
|
||||||
|
}
|
||||||
|
if ( hash3[0] & mask )
|
||||||
|
{
|
||||||
reinit_groestl( &ctx.groestl );
|
reinit_groestl( &ctx.groestl );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||||
(char*)hash3, 512 );
|
(char*)hash3, 512 );
|
||||||
mm256_interleave_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
}
|
||||||
|
|
||||||
|
mm256_intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
if ( mm256_anybits0( vh_mask ) )
|
||||||
|
{
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
skein512_4way_close( &ctx.skein, vhashB );
|
skein512_4way_close( &ctx.skein, vhashB );
|
||||||
|
}
|
||||||
|
|
||||||
for ( i = 0; i < 8; i++ )
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
|
||||||
reinit_groestl( &ctx.groestl );
|
reinit_groestl( &ctx.groestl );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
reinit_groestl( &ctx.groestl );
|
||||||
@@ -91,7 +109,8 @@ void quark_4way_hash( void *state, const void *input )
|
|||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
reinit_groestl( &ctx.groestl );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
|
||||||
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
jh512_4way( &ctx.jh, vhash, 64 );
|
jh512_4way( &ctx.jh, vhash, 64 );
|
||||||
jh512_4way_close( &ctx.jh, vhash );
|
jh512_4way_close( &ctx.jh, vhash );
|
||||||
@@ -99,16 +118,21 @@ void quark_4way_hash( void *state, const void *input )
|
|||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||||
m256_zero );
|
m256_zero );
|
||||||
|
|
||||||
|
if ( mm256_anybits1( vh_mask ) )
|
||||||
|
{
|
||||||
blake512_4way_init( &ctx.blake );
|
blake512_4way_init( &ctx.blake );
|
||||||
blake512_4way( &ctx.blake, vhash, 64 );
|
blake512_4way( &ctx.blake, vhash, 64 );
|
||||||
blake512_4way_close( &ctx.blake, vhashA );
|
blake512_4way_close( &ctx.blake, vhashA );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( mm256_anybits0( vh_mask ) )
|
||||||
|
{
|
||||||
bmw512_4way_init( &ctx.bmw );
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhashB );
|
bmw512_4way_close( &ctx.bmw, vhashB );
|
||||||
|
}
|
||||||
|
|
||||||
for ( i = 0; i < 8; i++ )
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
|
||||||
|
|
||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
@@ -120,63 +144,65 @@ void quark_4way_hash( void *state, const void *input )
|
|||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||||
m256_zero );
|
m256_zero );
|
||||||
|
|
||||||
|
if ( mm256_anybits1( vh_mask ) )
|
||||||
|
{
|
||||||
keccak512_4way_init( &ctx.keccak );
|
keccak512_4way_init( &ctx.keccak );
|
||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( mm256_anybits0( vh_mask ) )
|
||||||
|
{
|
||||||
jh512_4way_init( &ctx.jh );
|
jh512_4way_init( &ctx.jh );
|
||||||
jh512_4way( &ctx.jh, vhash, 64 );
|
jh512_4way( &ctx.jh, vhash, 64 );
|
||||||
jh512_4way_close( &ctx.jh, vhashB );
|
jh512_4way_close( &ctx.jh, vhashB );
|
||||||
|
}
|
||||||
|
|
||||||
for ( i = 0; i < 8; i++ )
|
// Final blend, directly to state, only need 32 bytes.
|
||||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
casti_m256i( state, 0 ) = _mm256_blendv_epi8( vhA[0], vhB[0], vh_mask );
|
||||||
|
casti_m256i( state, 1 ) = _mm256_blendv_epi8( vhA[1], vhB[1], vh_mask );
|
||||||
mm256_deinterleave_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
casti_m256i( state, 2 ) = _mm256_blendv_epi8( vhA[2], vhB[2], vh_mask );
|
||||||
|
casti_m256i( state, 3 ) = _mm256_blendv_epi8( vhA[3], vhB[3], vh_mask );
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t *hash7 = &(hash[25]);
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t *nonces = work->nonces;
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
int num_found = 0;
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
|
||||||
|
|
||||||
swab32_array( endiandata, pdata, 20 );
|
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
|
||||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
|
||||||
|
|
||||||
|
mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
be32enc( noncep, n );
|
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||||
be32enc( noncep+2, n+1 );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||||
be32enc( noncep+4, n+2 );
|
|
||||||
be32enc( noncep+6, n+3 );
|
|
||||||
|
|
||||||
quark_4way_hash( hash, vdata );
|
quark_4way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ )
|
||||||
if ( ( ( (hash+(i<<3))[7] & 0xFFFFFF00 ) == 0 )
|
if ( ( hash7[ i<<1 ] & 0xFFFFFF00 ) == 0 )
|
||||||
&& fulltest( hash+(i<<3), ptarget ) )
|
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
mm256_extract_lane_4x64( lane_hash, hash, i, 256 );
|
||||||
nonces[ num_found++ ] = n+i;
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
{
|
||||||
|
pdata[19] = n+i;
|
||||||
|
submit_solution( work, lane_hash, mythr, i );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -13,19 +13,15 @@ bool register_quark_algo( algo_gate_t* gate );
|
|||||||
#if defined(QUARK_4WAY)
|
#if defined(QUARK_4WAY)
|
||||||
|
|
||||||
void quark_4way_hash( void *state, const void *input );
|
void quark_4way_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_quark_4way_ctx();
|
void init_quark_4way_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void quark_hash( void *state, const void *input );
|
void quark_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_quark_ctx();
|
void init_quark_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -173,16 +173,17 @@ void quark_hash(void *state, const void *input)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||||
uint32_t hash64[8] __attribute__((aligned(32)));
|
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
swab32_array( endiandata, pdata, 20 );
|
swab32_array( endiandata, pdata, 20 );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
pdata[19] = ++n;
|
pdata[19] = ++n;
|
||||||
|
@@ -64,7 +64,7 @@ void deep_2way_hash( void *output, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
@@ -73,17 +73,17 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
|||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t *nonces = work->nonces;
|
|
||||||
int num_found = 0;
|
|
||||||
uint32_t *noncep = vdata + 32+3; // 4*8 + 3
|
uint32_t *noncep = vdata + 32+3; // 4*8 + 3
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||||
0xFFF, 0xFFFF, 0x10000000 };
|
0xFFF, 0xFFFF, 0x10000000 };
|
||||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||||
0xFFFFF000, 0xFFFF0000, 0 };
|
0xFFFFF000, 0xFFFF0000, 0 };
|
||||||
|
|
||||||
// big endian encode 0..18 uint32_t, 64 bits at a time
|
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||||
swab32_array( endiandata, pdata, 20 );
|
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
|
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
uint64_t *edata = (uint64_t*)endiandata;
|
||||||
mm256_interleave_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
mm256_interleave_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
||||||
@@ -102,23 +102,24 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
|||||||
deep_2way_hash( hash, vdata );
|
deep_2way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
if ( !( hash[7] & mask ) && fulltest( hash, ptarget) )
|
if ( !( hash[7] & mask ) )
|
||||||
|
if ( fulltest( hash, ptarget) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
nonces[ num_found++ ] = n;
|
pdata[19] = n;
|
||||||
work_set_target_ratio( work, hash );
|
submit_solution( work, hash, mythr, 0 );
|
||||||
}
|
}
|
||||||
if ( !( (hash+8)[7] & mask ) && fulltest( hash+8, ptarget) )
|
if ( !( (hash+8)[7] & mask ) )
|
||||||
|
if ( fulltest( hash+8, ptarget) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
nonces[ num_found++ ] = n+1;
|
pdata[19] = n+1;
|
||||||
work_set_target_ratio( work, hash+8 );
|
submit_solution( work, hash+8, mythr, 1 );
|
||||||
}
|
}
|
||||||
n += 2;
|
n += 2;
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -13,19 +13,15 @@ bool register_deep_algo( algo_gate_t* gate );
|
|||||||
#if defined(DEEP_2WAY)
|
#if defined(DEEP_2WAY)
|
||||||
|
|
||||||
void deep_2way_hash( void *state, const void *input );
|
void deep_2way_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_deep_2way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_deep_2way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_deep_2way_ctx();
|
void init_deep_2way_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void deep_hash( void *state, const void *input );
|
void deep_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_deep_ctx();
|
void init_deep_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -72,14 +72,15 @@ void deep_hash(void *output, const void *input)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||||
uint32_t hash64[8] __attribute__((aligned(32)));
|
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
|
||||||
uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 };
|
uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 };
|
||||||
|
@@ -70,7 +70,7 @@ void qubit_2way_hash( void *output, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
@@ -79,17 +79,17 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
|||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t *nonces = work->nonces;
|
|
||||||
int num_found = 0;
|
|
||||||
uint32_t *noncep = vdata + 32+3; // 4*8 + 3
|
uint32_t *noncep = vdata + 32+3; // 4*8 + 3
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||||
0xFFF, 0xFFFF, 0x10000000 };
|
0xFFF, 0xFFFF, 0x10000000 };
|
||||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||||
0xFFFFF000, 0xFFFF0000, 0 };
|
0xFFFFF000, 0xFFFF0000, 0 };
|
||||||
|
|
||||||
// big endian encode 0..18 uint32_t, 64 bits at a time
|
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||||
swab32_array( endiandata, pdata, 20 );
|
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
|
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
uint64_t *edata = (uint64_t*)endiandata;
|
||||||
mm256_interleave_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
mm256_interleave_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
||||||
@@ -107,25 +107,24 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
|||||||
qubit_2way_hash( hash, vdata );
|
qubit_2way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
|
if ( !( hash[7] & mask ) )
|
||||||
if ( !( hash[7] & mask ) && fulltest( hash, ptarget) )
|
if ( fulltest( hash, ptarget) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
nonces[ num_found++ ] = n;
|
pdata[19] = n;
|
||||||
work_set_target_ratio( work, hash );
|
submit_solution( work, hash, mythr, 0 );
|
||||||
}
|
}
|
||||||
if ( !( (hash+8)[7] & mask ) && fulltest( hash+8, ptarget) )
|
if ( !( (hash+8)[7] & mask ) )
|
||||||
|
if ( fulltest( hash+8, ptarget) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+1;
|
pdata[19] = n+1;
|
||||||
nonces[ num_found++ ] = n+1;
|
submit_solution( work, hash+8, mythr, 1 );
|
||||||
work_set_target_ratio( work, hash+8 );
|
|
||||||
}
|
}
|
||||||
n += 2;
|
n += 2;
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -13,19 +13,15 @@ bool register_qubit_algo( algo_gate_t* gate );
|
|||||||
#if defined(QUBIT_2WAY)
|
#if defined(QUBIT_2WAY)
|
||||||
|
|
||||||
void qubit_2way_hash( void *state, const void *input );
|
void qubit_2way_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_qubit_2way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_qubit_2way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_qubit_2way_ctx();
|
void init_qubit_2way_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void qubit_hash( void *state, const void *input );
|
void qubit_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_qubit( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_qubit( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_qubit_ctx();
|
void init_qubit_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -83,15 +83,16 @@ void qubit_hash(void *output, const void *input)
|
|||||||
memcpy(output, hash, 32);
|
memcpy(output, hash, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_qubit(int thr_id, struct work *work,
|
int scanhash_qubit( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
|
||||||
uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 };
|
uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 };
|
||||||
|
@@ -40,9 +40,9 @@ void lbry_8way_hash( void* output, const void* input )
|
|||||||
sha256_8way_close( &ctx_sha256, vhashA );
|
sha256_8way_close( &ctx_sha256, vhashA );
|
||||||
|
|
||||||
// reinterleave to do sha512 4-way 64 bit twice.
|
// reinterleave to do sha512 4-way 64 bit twice.
|
||||||
mm256_deinterleave_8x32( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 256 );
|
mm256_dintrlv_8x32( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 256 );
|
||||||
mm256_interleave_4x64( vhashA, h0, h1, h2, h3, 256 );
|
mm256_intrlv_4x64( vhashA, h0, h1, h2, h3, 256 );
|
||||||
mm256_interleave_4x64( vhashB, h4, h5, h6, h7, 256 );
|
mm256_intrlv_4x64( vhashB, h4, h5, h6, h7, 256 );
|
||||||
|
|
||||||
sha512_4way_init( &ctx_sha512 );
|
sha512_4way_init( &ctx_sha512 );
|
||||||
sha512_4way( &ctx_sha512, vhashA, 32 );
|
sha512_4way( &ctx_sha512, vhashA, 32 );
|
||||||
@@ -53,9 +53,9 @@ void lbry_8way_hash( void* output, const void* input )
|
|||||||
sha512_4way_close( &ctx_sha512, vhashB );
|
sha512_4way_close( &ctx_sha512, vhashB );
|
||||||
|
|
||||||
// back to 8-way 32 bit
|
// back to 8-way 32 bit
|
||||||
mm256_deinterleave_4x64( h0, h1, h2, h3, vhashA, 512 );
|
mm256_dintrlv_4x64( h0, h1, h2, h3, vhashA, 512 );
|
||||||
mm256_deinterleave_4x64( h4, h5, h6, h7, vhashB, 512 );
|
mm256_dintrlv_4x64( h4, h5, h6, h7, vhashB, 512 );
|
||||||
mm256_interleave_8x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 512 );
|
mm256_intrlv_8x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 512 );
|
||||||
|
|
||||||
ripemd160_8way_init( &ctx_ripemd );
|
ripemd160_8way_init( &ctx_ripemd );
|
||||||
ripemd160_8way( &ctx_ripemd, vhashA, 32 );
|
ripemd160_8way( &ctx_ripemd, vhashA, 32 );
|
||||||
@@ -72,27 +72,24 @@ void lbry_8way_hash( void* output, const void* input )
|
|||||||
|
|
||||||
sha256_8way_init( &ctx_sha256 );
|
sha256_8way_init( &ctx_sha256 );
|
||||||
sha256_8way( &ctx_sha256, vhashA, 32 );
|
sha256_8way( &ctx_sha256, vhashA, 32 );
|
||||||
sha256_8way_close( &ctx_sha256, vhashA );
|
sha256_8way_close( &ctx_sha256, output );
|
||||||
|
|
||||||
mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96,
|
|
||||||
output+128, output+160, output+192, output+224,
|
|
||||||
vhashA, 256 );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[32*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[32*8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t *hash7 = &(hash[7<<3]);
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[27];
|
uint32_t n = pdata[27];
|
||||||
const uint32_t first_nonce = pdata[27];
|
const uint32_t first_nonce = pdata[27];
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
uint32_t edata[32] __attribute__ ((aligned (64)));
|
uint32_t edata[32] __attribute__ ((aligned (64)));
|
||||||
uint32_t *nonces = work->nonces;
|
__m256i *noncev = (__m256i*)vdata + 27; // aligned
|
||||||
int num_found = 0;
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint32_t *noncep = vdata + 216; // 27*8
|
|
||||||
|
|
||||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||||
0xFFF, 0xFFFF, 0x10000000 };
|
0xFFF, 0xFFFF, 0x10000000 };
|
||||||
@@ -100,9 +97,12 @@ int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
0xFFFFF000, 0xFFFF0000, 0 };
|
0xFFFFF000, 0xFFFF0000, 0 };
|
||||||
|
|
||||||
// we need bigendian data...
|
// we need bigendian data...
|
||||||
swab32_array( edata, pdata, 32 );
|
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
edata, edata, edata, edata, 1024 );
|
casti_m256i( edata, 2 ) = mm256_bswap_32( casti_m256i( pdata, 2 ) );
|
||||||
|
casti_m256i( edata, 3 ) = mm256_bswap_32( casti_m256i( pdata, 3 ) );
|
||||||
|
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
|
||||||
|
edata, edata, edata, edata, 1024 );
|
||||||
sha256_8way_init( &sha256_8w_mid );
|
sha256_8way_init( &sha256_8w_mid );
|
||||||
sha256_8way( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
|
sha256_8way( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
|
||||||
|
|
||||||
@@ -111,136 +111,26 @@ int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t mask = masks[m];
|
uint32_t mask = masks[m];
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
be32enc( noncep, n );
|
*noncev = mm256_bswap_32( _mm256_set_epi32(
|
||||||
be32enc( noncep+1, n+1 );
|
n+7,n+6,n+5,n+4,n+3,n+2,n+1,n ) );
|
||||||
be32enc( noncep+2, n+2 );
|
|
||||||
be32enc( noncep+3, n+3 );
|
|
||||||
be32enc( noncep+4, n+4 );
|
|
||||||
be32enc( noncep+5, n+5 );
|
|
||||||
be32enc( noncep+6, n+6 );
|
|
||||||
be32enc( noncep+7, n+7 );
|
|
||||||
|
|
||||||
lbry_8way_hash( hash, vdata );
|
lbry_8way_hash( hash, vdata );
|
||||||
|
|
||||||
for ( int i = 0; i < 8; i++ )
|
for ( int i = 0; i < 8; i++ ) if ( !( hash7[ i ] & mask ) )
|
||||||
if ( !( (hash+(i<<3))[7] & mask ) && fulltest( hash+(i<<3), ptarget ) )
|
|
||||||
{
|
{
|
||||||
pdata[27] = n+i;
|
// deinterleave hash for lane
|
||||||
nonces[ num_found++ ] = n+i;
|
mm256_extract_lane_8x32( lane_hash, hash, i, 256 );
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
|
{
|
||||||
|
pdata[27] = n + i;
|
||||||
|
submit_solution( work, lane_hash, mythr, i );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
n+=8;
|
n += 8;
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
*hashes_done = n - first_nonce + 1;
|
||||||
*hashes_done = n - first_nonce;
|
return 0;
|
||||||
return num_found;
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif defined(LBRY_4WAY)
|
|
||||||
|
|
||||||
static __thread sha256_4way_context sha256_mid;
|
|
||||||
|
|
||||||
void lbry_4way_hash( void* output, const void* input )
|
|
||||||
{
|
|
||||||
sha256_4way_context ctx_sha256 __attribute__ ((aligned (64)));
|
|
||||||
sha512_4way_context ctx_sha512;
|
|
||||||
ripemd160_4way_context ctx_ripemd;
|
|
||||||
uint32_t _ALIGN(64) vhashA[16<<2];
|
|
||||||
uint32_t _ALIGN(64) vhashB[16<<2];
|
|
||||||
uint32_t _ALIGN(64) vhashC[16<<2];
|
|
||||||
|
|
||||||
memcpy( &ctx_sha256, &sha256_mid, sizeof(ctx_sha256) );
|
|
||||||
sha256_4way( &ctx_sha256, input + (LBRY_MIDSTATE<<2), LBRY_TAIL );
|
|
||||||
sha256_4way_close( &ctx_sha256, vhashA );
|
|
||||||
|
|
||||||
sha256_4way_init( &ctx_sha256 );
|
|
||||||
sha256_4way( &ctx_sha256, vhashA, 32 );
|
|
||||||
sha256_4way_close( &ctx_sha256, vhashA );
|
|
||||||
|
|
||||||
// sha512 64 bit data, 64 byte output
|
|
||||||
mm256_reinterleave_4x64( vhashB, vhashA, 256 );
|
|
||||||
sha512_4way_init( &ctx_sha512 );
|
|
||||||
sha512_4way( &ctx_sha512, vhashB, 32 );
|
|
||||||
sha512_4way_close( &ctx_sha512, vhashB );
|
|
||||||
mm256_reinterleave_4x32( vhashA, vhashB, 512 );
|
|
||||||
|
|
||||||
ripemd160_4way_init( &ctx_ripemd );
|
|
||||||
ripemd160_4way( &ctx_ripemd, vhashA, 32 );
|
|
||||||
ripemd160_4way_close( &ctx_ripemd, vhashB );
|
|
||||||
|
|
||||||
ripemd160_4way_init( &ctx_ripemd );
|
|
||||||
ripemd160_4way( &ctx_ripemd, vhashA+(8<<2), 32 );
|
|
||||||
ripemd160_4way_close( &ctx_ripemd, vhashC );
|
|
||||||
|
|
||||||
sha256_4way_init( &ctx_sha256 );
|
|
||||||
sha256_4way( &ctx_sha256, vhashB, 20 );
|
|
||||||
sha256_4way( &ctx_sha256, vhashC, 20 );
|
|
||||||
sha256_4way_close( &ctx_sha256, vhashA );
|
|
||||||
|
|
||||||
sha256_4way_init( &ctx_sha256 );
|
|
||||||
sha256_4way( &ctx_sha256, vhashA, 32 );
|
|
||||||
sha256_4way_close( &ctx_sha256, vhashA );
|
|
||||||
|
|
||||||
mm128_deinterleave_4x32( output, output+32, output+64, output+96,
|
|
||||||
vhashA, 256 );
|
|
||||||
}
|
|
||||||
|
|
||||||
int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|
||||||
uint64_t *hashes_done)
|
|
||||||
{
|
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t vdata[32*4] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t *pdata = work->data;
|
|
||||||
uint32_t *ptarget = work->target;
|
|
||||||
uint32_t n = pdata[27];
|
|
||||||
const uint32_t first_nonce = pdata[27];
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
uint32_t edata[32] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t *nonces = work->nonces;
|
|
||||||
int num_found = 0;
|
|
||||||
uint32_t *noncep = vdata + 108; // 27*4
|
|
||||||
|
|
||||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
|
||||||
0xFFF, 0xFFFF, 0x10000000 };
|
|
||||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
|
||||||
0xFFFFF000, 0xFFFF0000, 0 };
|
|
||||||
|
|
||||||
// we need bigendian data...
|
|
||||||
swab32_array( edata, pdata, 32 );
|
|
||||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 1024 );
|
|
||||||
sha256_4way_init( &sha256_mid );
|
|
||||||
sha256_4way( &sha256_mid, vdata, LBRY_MIDSTATE );
|
|
||||||
|
|
||||||
for ( int m = 0; m < sizeof(masks); m++ ) if ( Htarg <= htmax[m] )
|
|
||||||
{
|
|
||||||
uint32_t mask = masks[m];
|
|
||||||
do
|
|
||||||
{
|
|
||||||
be32enc( noncep, n );
|
|
||||||
be32enc( noncep+1, n+1 );
|
|
||||||
be32enc( noncep+2, n+2 );
|
|
||||||
be32enc( noncep+3, n+3 );
|
|
||||||
|
|
||||||
lbry_4way_hash( hash, vdata );
|
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
|
||||||
if ( !( (hash+(i<<3))[7] & mask ) && fulltest( hash+(i<<3), ptarget ) )
|
|
||||||
{
|
|
||||||
pdata[27] = n+i;
|
|
||||||
nonces[ num_found++ ] = n+i;
|
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
|
||||||
}
|
|
||||||
n+=4;
|
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce;
|
|
||||||
return num_found;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -4,12 +4,10 @@
|
|||||||
#include "algo-gate-api.h"
|
#include "algo-gate-api.h"
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
// Overide multi way on ryzen, SHA is better.
|
#if !defined(__SHA__)
|
||||||
#if !defined(RYZEN_)
|
#if defined(__AVX2__)
|
||||||
// need sha512 2 way AVX x2 or 1 way scalar x4 to support 4way AVX.
|
|
||||||
#if defined(__AVX2__)
|
|
||||||
#define LBRY_8WAY
|
#define LBRY_8WAY
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define LBRY_NTIME_INDEX 25
|
#define LBRY_NTIME_INDEX 25
|
||||||
@@ -24,17 +22,18 @@ bool register_lbry_algo( algo_gate_t* gate );
|
|||||||
|
|
||||||
void lbry_8way_hash( void *state, const void *input );
|
void lbry_8way_hash( void *state, const void *input );
|
||||||
int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
/*
|
||||||
#elif defined(LBRY_4WAY)
|
#elif defined(LBRY_4WAY)
|
||||||
|
|
||||||
void lbry_4way_hash( void *state, const void *input );
|
void lbry_4way_hash( void *state, const void *input );
|
||||||
int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done );
|
||||||
|
*/
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void lbry_hash( void *state, const void *input );
|
void lbry_hash( void *state, const void *input );
|
||||||
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@@ -48,13 +48,14 @@ void lbry_hash(void* output, const void* input)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[27] - 1;
|
uint32_t n = pdata[27] - 1;
|
||||||
const uint32_t first_nonce = pdata[27];
|
const uint32_t first_nonce = pdata[27];
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||||
uint32_t endiandata[32] __attribute__ ((aligned (64)));
|
uint32_t endiandata[32] __attribute__ ((aligned (64)));
|
||||||
|
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
#if defined(__SSE4_2__)
|
#if defined(__SSE4_2__)
|
||||||
|
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
|
@@ -696,7 +696,7 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
|||||||
#endif /* HAVE_SCRYPT_6WAY */
|
#endif /* HAVE_SCRYPT_6WAY */
|
||||||
|
|
||||||
extern int scanhash_scrypt( int thr_id, struct work *work, uint32_t max_nonce,
|
extern int scanhash_scrypt( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -704,6 +704,7 @@ extern int scanhash_scrypt( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t midstate[8];
|
uint32_t midstate[8];
|
||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
int throughput = scrypt_best_throughput();
|
int throughput = scrypt_best_throughput();
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@@ -42,7 +42,7 @@
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include "sph_types.h"
|
#include "sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
//#if defined(__SSE4_2__)
|
//#if defined(__SSE4_2__)
|
||||||
|
@@ -1,3 +1,4 @@
|
|||||||
|
#if 0
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -65,7 +66,7 @@ static const uint32_t K256[64] =
|
|||||||
_mm_xor_si64( _mm_xor_si64( \
|
_mm_xor_si64( _mm_xor_si64( \
|
||||||
mm64_ror_32(x,2), mm64_ror_32(x,13) ), _mm_srli_pi32(x,22) )
|
mm64_ror_32(x,2), mm64_ror_32(x,13) ), _mm_srli_pi32(x,22) )
|
||||||
|
|
||||||
#define BSG2_0z(x) ( ror_32(x,2) ^ ror_32(x,13) ^ ((x)>>22) )
|
#define BSG2_0z(x) ( u32_ror_32(x,2) ^ u32_ror_32(x,13) ^ ((x)>>22) )
|
||||||
|
|
||||||
#define BSG2_1x(x) \
|
#define BSG2_1x(x) \
|
||||||
_mm256_xor_si256( _mm256_xor_si256( \
|
_mm256_xor_si256( _mm256_xor_si256( \
|
||||||
@@ -75,7 +76,7 @@ static const uint32_t K256[64] =
|
|||||||
_mm_xor_si64( _mm_xor_si64( \
|
_mm_xor_si64( _mm_xor_si64( \
|
||||||
mm64_ror_32(x,6), mm64_ror_32(x,11) ), _mm_srli_pi32(x,25) )
|
mm64_ror_32(x,6), mm64_ror_32(x,11) ), _mm_srli_pi32(x,25) )
|
||||||
|
|
||||||
#define BSG2_1z(x) ( ror_32(x,6) ^ ror_32(x,11) ^ ((x)>>25) )
|
#define BSG2_1z(x) ( u32_ror_32(x,6) ^ u32_ror_32(x,11) ^ ((x)>>25) )
|
||||||
|
|
||||||
#define SSG2_0x(x) \
|
#define SSG2_0x(x) \
|
||||||
_mm256_xor_si256( _mm256_xor_si256( \
|
_mm256_xor_si256( _mm256_xor_si256( \
|
||||||
@@ -85,7 +86,7 @@ static const uint32_t K256[64] =
|
|||||||
_mm_xor_si64( _mm_xor_si64( \
|
_mm_xor_si64( _mm_xor_si64( \
|
||||||
mm64_ror_32(x,7), mm64_ror_32(x,18) ), _mm_srli_pi32(x,3) )
|
mm64_ror_32(x,7), mm64_ror_32(x,18) ), _mm_srli_pi32(x,3) )
|
||||||
|
|
||||||
#define SSG2_0z(x) (( ror_32(x,7) ^ ror_32(x,18) ) ^ ((x)>>3) )
|
#define SSG2_0z(x) (( u32_ror_32(x,7) ^ u32_ror_32(x,18) ) ^ ((x)>>3) )
|
||||||
|
|
||||||
#define SSG2_1x(x) \
|
#define SSG2_1x(x) \
|
||||||
_mm256_xor_si256( _mm256_xor_si256( \
|
_mm256_xor_si256( _mm256_xor_si256( \
|
||||||
@@ -95,7 +96,7 @@ static const uint32_t K256[64] =
|
|||||||
_mm_xor_si64( _mm_xor_si64( \
|
_mm_xor_si64( _mm_xor_si64( \
|
||||||
mm64_ror_32(x,17), mm64_ror_32(x,19) ), _mm_srli_pi32(x,10) )
|
mm64_ror_32(x,17), mm64_ror_32(x,19) ), _mm_srli_pi32(x,10) )
|
||||||
|
|
||||||
#define SSG2_1z(x) ( ror_32(x,17) ^ ror_32(x,19) ^ ((x)>>10) )
|
#define SSG2_1z(x) ( u32_ror_32(x,17) ^ u32_ror_32(x,19) ^ ((x)>>10) )
|
||||||
|
|
||||||
#define SHA2x_MEXP( a, b, c, d ) \
|
#define SHA2x_MEXP( a, b, c, d ) \
|
||||||
_mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
|
_mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||||
@@ -449,7 +450,7 @@ void sha256_11way_update( sha256_11way_context *ctx, const void *datax,
|
|||||||
if ( clen > len )
|
if ( clen > len )
|
||||||
clen = len;
|
clen = len;
|
||||||
memcpy_256( ctx->bufx + (ptr>>2), vdatax + (ptr>>2), clen>>2 );
|
memcpy_256( ctx->bufx + (ptr>>2), vdatax + (ptr>>2), clen>>2 );
|
||||||
memcpy_64 ( ctx->bufy + (ptr>>2), vdatay + (ptr>>2), clen>>2 );
|
memcpy_m64( ctx->bufy + (ptr>>2), vdatay + (ptr>>2), clen>>2 );
|
||||||
memcpy ( ctx->bufz + ptr, idataz + ptr, clen );
|
memcpy ( ctx->bufz + ptr, idataz + ptr, clen );
|
||||||
ptr += clen;
|
ptr += clen;
|
||||||
len -= clen;
|
len -= clen;
|
||||||
@@ -486,19 +487,19 @@ void sha256_11way_close( sha256_11way_context *ctx, void *dstx, void *dsty,
|
|||||||
if ( ptr > pad )
|
if ( ptr > pad )
|
||||||
{
|
{
|
||||||
memset_zero_256( ctx->bufx + (ptr>>2), (buf_size - ptr) >> 2 );
|
memset_zero_256( ctx->bufx + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||||
memset_zero_64( ctx->bufy + (ptr>>2), (buf_size - ptr) >> 2 );
|
memset_zero_m64( ctx->bufy + (ptr>>2), (buf_size - ptr) >> 2 );
|
||||||
memset( ctx->bufz + (ptr>>2), 0, (buf_size - ptr) >> 2 );
|
memset( ctx->bufz + (ptr>>2), 0, (buf_size - ptr) >> 2 );
|
||||||
sha256_11way_round( ctx->bufx, ctx->valx,
|
sha256_11way_round( ctx->bufx, ctx->valx,
|
||||||
ctx->bufy, ctx->valy,
|
ctx->bufy, ctx->valy,
|
||||||
ctx->bufz, ctx->valz );
|
ctx->bufz, ctx->valz );
|
||||||
memset_zero_256( ctx->bufx, pad >> 2 );
|
memset_zero_256( ctx->bufx, pad >> 2 );
|
||||||
memset_zero_64( ctx->bufy, pad >> 2 );
|
memset_zero_m64( ctx->bufy, pad >> 2 );
|
||||||
memset( ctx->bufz, 0, pad >> 2 );
|
memset( ctx->bufz, 0, pad >> 2 );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
memset_zero_256( ctx->bufx + (ptr>>2), (pad - ptr) >> 2 );
|
memset_zero_256( ctx->bufx + (ptr>>2), (pad - ptr) >> 2 );
|
||||||
memset_zero_64( ctx->bufy + (ptr>>2), (pad - ptr) >> 2 );
|
memset_zero_m64( ctx->bufy + (ptr>>2), (pad - ptr) >> 2 );
|
||||||
memset( ctx->bufz + (ptr>>2), 0, (pad - ptr) >> 2 );
|
memset( ctx->bufz + (ptr>>2), 0, (pad - ptr) >> 2 );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -534,3 +535,4 @@ void sha256_11way_close( sha256_11way_context *ctx, void *dstx, void *dsty,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
#endif // 0
|
||||||
|
@@ -36,7 +36,6 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||||
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
@@ -59,12 +58,7 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
0 };
|
0 };
|
||||||
|
|
||||||
// Need big endian data
|
// Need big endian data
|
||||||
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
mm256_bswap_intrlv80_8x32( vdata, pdata );
|
||||||
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
|
||||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
|
||||||
|
|
||||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
|
||||||
edata, edata, edata, edata, 640 );
|
|
||||||
sha256_8way_init( &sha256_ctx8 );
|
sha256_8way_init( &sha256_ctx8 );
|
||||||
sha256_8way( &sha256_ctx8, vdata, 64 );
|
sha256_8way( &sha256_ctx8, vdata, 64 );
|
||||||
|
|
||||||
@@ -73,11 +67,10 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t mask = masks[m];
|
uint32_t mask = masks[m];
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm256_bswap_32(
|
*noncev = mm256_bswap_32(
|
||||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||||
|
|
||||||
pdata[19] = n;
|
|
||||||
|
|
||||||
|
pdata[19] = n;
|
||||||
sha256q_8way_hash( hash, vdata );
|
sha256q_8way_hash( hash, vdata );
|
||||||
|
|
||||||
uint32_t *hash7 = &(hash[7<<3]);
|
uint32_t *hash7 = &(hash[7<<3]);
|
||||||
@@ -86,27 +79,19 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( !( hash7[ lane ] & mask ) )
|
if ( !( hash7[ lane ] & mask ) )
|
||||||
{
|
{
|
||||||
// deinterleave hash for lane
|
// deinterleave hash for lane
|
||||||
uint32_t lane_hash[8];
|
uint32_t lane_hash[8];
|
||||||
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
||||||
|
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
work_set_target_ratio( work, lane_hash );
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
if ( submit_work( mythr, work ) )
|
}
|
||||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
}
|
||||||
accepted_share_count + rejected_share_count + 1,
|
|
||||||
thr_id, lane );
|
|
||||||
else
|
|
||||||
applog( LOG_WARNING, "Failed to submit share." );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
n += 8;
|
n += 8;
|
||||||
|
|
||||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -146,7 +131,6 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||||
uint32_t *hash7 = &(hash[7<<2]);
|
uint32_t *hash7 = &(hash[7<<2]);
|
||||||
uint32_t lane_hash[8];
|
uint32_t lane_hash[8];
|
||||||
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
@@ -168,13 +152,7 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
0xFFFF0000,
|
0xFFFF0000,
|
||||||
0 };
|
0 };
|
||||||
|
|
||||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
|
||||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
|
||||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
|
||||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
|
||||||
|
|
||||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
|
||||||
sha256_4way_init( &sha256_ctx4 );
|
sha256_4way_init( &sha256_ctx4 );
|
||||||
sha256_4way( &sha256_ctx4, vdata, 64 );
|
sha256_4way( &sha256_ctx4, vdata, 64 );
|
||||||
|
|
||||||
@@ -183,7 +161,7 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t mask = masks[m];
|
uint32_t mask = masks[m];
|
||||||
do {
|
do {
|
||||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
sha256q_4way_hash( hash, vdata );
|
sha256q_4way_hash( hash, vdata );
|
||||||
|
|
||||||
@@ -192,25 +170,16 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||||
|
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
work_set_target_ratio( work, lane_hash );
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
if ( submit_work( mythr, work ) )
|
|
||||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
|
||||||
accepted_share_count + rejected_share_count + 1,
|
|
||||||
thr_id, lane );
|
|
||||||
else
|
|
||||||
applog( LOG_WARNING, "Failed to submit share." );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
n += 4;
|
||||||
n += 4;
|
|
||||||
|
|
||||||
} while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
|
} while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -72,7 +72,7 @@ int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
casti_m256i( dataz, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
casti_m256i( dataz, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
casti_m128i( dataz, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
casti_m128i( dataz, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
mm256_interleave_8x32( datax, dataz, dataz, dataz, dataz,
|
mm256_intrlv_8x32( datax, dataz, dataz, dataz, dataz,
|
||||||
dataz, dataz, dataz, dataz, 640 );
|
dataz, dataz, dataz, dataz, 640 );
|
||||||
mm64_interleave_2x32( datay, dataz, dataz, 640 );
|
mm64_interleave_2x32( datay, dataz, dataz, 640 );
|
||||||
|
|
||||||
@@ -156,15 +156,15 @@ void sha256t_8way_hash( void* output, const void* input )
|
|||||||
sha256_8way_init( &ctx );
|
sha256_8way_init( &ctx );
|
||||||
sha256_8way( &ctx, vhash, 32 );
|
sha256_8way( &ctx, vhash, 32 );
|
||||||
sha256_8way_close( &ctx, output );
|
sha256_8way_close( &ctx, output );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||||
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t *hash7 = &(hash[7<<3]);
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
@@ -187,12 +187,7 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
0 };
|
0 };
|
||||||
|
|
||||||
// Need big endian data
|
// Need big endian data
|
||||||
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
mm256_bswap_intrlv80_8x32( vdata, pdata );
|
||||||
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
|
||||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
|
||||||
|
|
||||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
|
||||||
edata, edata, edata, edata, 640 );
|
|
||||||
sha256_8way_init( &sha256_ctx8 );
|
sha256_8way_init( &sha256_ctx8 );
|
||||||
sha256_8way( &sha256_ctx8, vdata, 64 );
|
sha256_8way( &sha256_ctx8, vdata, 64 );
|
||||||
|
|
||||||
@@ -201,29 +196,22 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t mask = masks[m];
|
uint32_t mask = masks[m];
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm256_bswap_32(
|
*noncev = mm256_bswap_32( _mm256_set_epi32(
|
||||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
n+7,n+6,n+5,n+4,n+3,n+2,n+1,n ) );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
sha256t_8way_hash( hash, vdata );
|
sha256t_8way_hash( hash, vdata );
|
||||||
|
|
||||||
uint32_t *hash7 = &(hash[7<<3]);
|
|
||||||
|
|
||||||
for ( int lane = 0; lane < 8; lane++ )
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
if ( !( hash7[ lane ] & mask ) )
|
if ( !( hash7[ lane ] & mask ) )
|
||||||
{
|
{
|
||||||
// deinterleave hash for lane
|
// deinterleave hash for lane
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
|
||||||
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
||||||
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
|
|
||||||
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
} while ( (n < max_nonce-10) && !work_restart[thr_id].restart );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -253,7 +241,6 @@ void sha256t_4way_hash( void* output, const void* input )
|
|||||||
sha256_4way_init( &ctx );
|
sha256_4way_init( &ctx );
|
||||||
sha256_4way( &ctx, vhash, 32 );
|
sha256_4way( &ctx, vhash, 32 );
|
||||||
sha256_4way_close( &ctx, output );
|
sha256_4way_close( &ctx, output );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
@@ -262,7 +249,6 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t edata[20] __attribute__ ((aligned (32)));;
|
|
||||||
uint32_t *hash7 = &(hash[7<<2]);
|
uint32_t *hash7 = &(hash[7<<2]);
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -278,20 +264,14 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
0xFFF,
|
0xFFF,
|
||||||
0xFFFF,
|
0xFFFF,
|
||||||
0x10000000 };
|
0x10000000 };
|
||||||
const uint32_t masks[] = { 0xFFFFFFFF,
|
const uint32_t masks[] = { 0xFFFFFFFF,
|
||||||
0xFFFFFFF0,
|
0xFFFFFFF0,
|
||||||
0xFFFFFF00,
|
0xFFFFFF00,
|
||||||
0xFFFFF000,
|
0xFFFFF000,
|
||||||
0xFFFF0000,
|
0xFFFF0000,
|
||||||
0 };
|
0 };
|
||||||
|
|
||||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
|
||||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
|
||||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
|
||||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
|
||||||
|
|
||||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
|
||||||
sha256_4way_init( &sha256_ctx4 );
|
sha256_4way_init( &sha256_ctx4 );
|
||||||
sha256_4way( &sha256_ctx4, vdata, 64 );
|
sha256_4way( &sha256_ctx4, vdata, 64 );
|
||||||
|
|
||||||
@@ -300,7 +280,7 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t mask = masks[m];
|
uint32_t mask = masks[m];
|
||||||
do {
|
do {
|
||||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
sha256t_4way_hash( hash, vdata );
|
sha256t_4way_hash( hash, vdata );
|
||||||
|
|
||||||
@@ -308,15 +288,13 @@ int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( !( hash7[ lane ] & mask ) )
|
if ( !( hash7[ lane ] & mask ) )
|
||||||
{
|
{
|
||||||
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||||
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
|
||||||
} while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
|
} while ( (n < max_nonce - 4) && !work_restart[thr_id].restart );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -2,11 +2,7 @@
|
|||||||
|
|
||||||
bool register_sha256t_algo( algo_gate_t* gate )
|
bool register_sha256t_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined(SHA256T_11WAY)
|
#if defined(SHA256T_8WAY)
|
||||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
|
||||||
gate->scanhash = (void*)&scanhash_sha256t_11way;
|
|
||||||
gate->hash = (void*)&sha256t_11way_hash;
|
|
||||||
#elif defined(SHA256T_8WAY)
|
|
||||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
||||||
gate->hash = (void*)&sha256t_8way_hash;
|
gate->hash = (void*)&sha256t_8way_hash;
|
||||||
@@ -25,11 +21,7 @@ gate->optimizations = SHA_OPT;
|
|||||||
|
|
||||||
bool register_sha256q_algo( algo_gate_t* gate )
|
bool register_sha256q_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined(SHA256T_8WAY)
|
#if defined(SHA256T_4WAY)
|
||||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
|
||||||
gate->scanhash = (void*)&scanhash_sha256q_8way;
|
|
||||||
gate->hash = (void*)&sha256q_8way_hash;
|
|
||||||
#elif defined(SHA256T_4WAY)
|
|
||||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_sha256q_4way;
|
gate->scanhash = (void*)&scanhash_sha256q_4way;
|
||||||
gate->hash = (void*)&sha256q_4way_hash;
|
gate->hash = (void*)&sha256q_4way_hash;
|
||||||
|
@@ -5,30 +5,17 @@
|
|||||||
#include "algo-gate-api.h"
|
#include "algo-gate-api.h"
|
||||||
|
|
||||||
// Override multi way on ryzen, SHA is better.
|
// Override multi way on ryzen, SHA is better.
|
||||||
#if !defined(RYZEN_)
|
#if !defined(__SHA__)
|
||||||
#if defined(__SSE2__)
|
#if defined(__AVX2__)
|
||||||
#define SHA256T_4WAY
|
|
||||||
#endif
|
|
||||||
#if defined(__AVX2__)
|
|
||||||
#define SHA256T_8WAY
|
#define SHA256T_8WAY
|
||||||
// #define SHA256T_11WAY
|
#elif defined(__SSE2__)
|
||||||
#endif
|
#define SHA256T_4WAY
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool register_sha256t_algo( algo_gate_t* gate );
|
bool register_sha256t_algo( algo_gate_t* gate );
|
||||||
bool register_sha256q_algo( algo_gate_t* gate );
|
bool register_sha256q_algo( algo_gate_t* gate );
|
||||||
|
|
||||||
#if defined(SHA256T_11WAY)
|
|
||||||
|
|
||||||
void sha256t_11way_hash( void *outx, void *outy, void *outz, const void *inpx,
|
|
||||||
const void *inpy, const void *inpz );
|
|
||||||
int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce,
|
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
//void sha256q_8way_hash( void *output, const void *input );
|
|
||||||
//int scanhash_sha256q_11way( int thr_id, struct work *work, uint32_t max_nonce,
|
|
||||||
// uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(SHA256T_8WAY)
|
#if defined(SHA256T_8WAY)
|
||||||
|
|
||||||
void sha256t_8way_hash( void *output, const void *input );
|
void sha256t_8way_hash( void *output, const void *input );
|
||||||
|
@@ -40,7 +40,7 @@
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include "algo/sha/sph_types.h"
|
#include "algo/sha/sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C"{
|
extern "C"{
|
||||||
|
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned char buf[128<<1];
|
unsigned char buf[128<<1];
|
||||||
|
@@ -36,7 +36,7 @@
|
|||||||
#ifdef __AES__
|
#ifdef __AES__
|
||||||
|
|
||||||
#include "sph_shavite.h"
|
#include "sph_shavite.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C"{
|
extern "C"{
|
||||||
|
@@ -6,6 +6,12 @@
|
|||||||
|
|
||||||
#if defined (__AVX2__)
|
#if defined (__AVX2__)
|
||||||
|
|
||||||
|
union _m256_v16 {
|
||||||
|
uint16_t u16[16];
|
||||||
|
__m256i v256;
|
||||||
|
};
|
||||||
|
typedef union _m256_v16 m256_v16;
|
||||||
|
|
||||||
// imported from simd_iv.h
|
// imported from simd_iv.h
|
||||||
|
|
||||||
uint32_t SIMD_IV_512[] = { 0x0ba16b95, 0x72f999ad, 0x9fecc2ae, 0xba3264fc,
|
uint32_t SIMD_IV_512[] = { 0x0ba16b95, 0x72f999ad, 0x9fecc2ae, 0xba3264fc,
|
||||||
|
@@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t A[ 32*2 ] __attribute__((aligned(64)));
|
uint32_t A[ 32*2 ] __attribute__((aligned(64)));
|
||||||
|
@@ -2,7 +2,11 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "skein-hash-4way.h"
|
#include "skein-hash-4way.h"
|
||||||
#include "algo/sha/sha2-hash-4way.h"
|
#if defined(__SHA__)
|
||||||
|
#include <openssl/sha.h>
|
||||||
|
#else
|
||||||
|
#include "algo/sha/sha2-hash-4way.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined (SKEIN_4WAY)
|
#if defined (SKEIN_4WAY)
|
||||||
|
|
||||||
@@ -11,53 +15,69 @@ void skeinhash_4way( void *state, const void *input )
|
|||||||
uint64_t vhash64[8*4] __attribute__ ((aligned (64)));
|
uint64_t vhash64[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vhash32[16*4] __attribute__ ((aligned (64)));
|
uint32_t vhash32[16*4] __attribute__ ((aligned (64)));
|
||||||
skein512_4way_context ctx_skein;
|
skein512_4way_context ctx_skein;
|
||||||
|
#if defined(__SHA__)
|
||||||
|
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash2[16] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash3[16] __attribute__ ((aligned (64)));
|
||||||
|
SHA256_CTX ctx_sha256;
|
||||||
|
#else
|
||||||
sha256_4way_context ctx_sha256;
|
sha256_4way_context ctx_sha256;
|
||||||
|
#endif
|
||||||
|
|
||||||
skein512_4way_init( &ctx_skein );
|
skein512_4way_init( &ctx_skein );
|
||||||
skein512_4way( &ctx_skein, input, 80 );
|
skein512_4way( &ctx_skein, input, 80 );
|
||||||
skein512_4way_close( &ctx_skein, vhash64 );
|
skein512_4way_close( &ctx_skein, vhash64 );
|
||||||
|
|
||||||
mm256_reinterleave_4x32( vhash32, vhash64, 512 );
|
#if defined(__SHA__)
|
||||||
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 512 );
|
||||||
|
|
||||||
|
SHA256_Init( &ctx_sha256 );
|
||||||
|
SHA256_Update( &ctx_sha256, (unsigned char*)hash0, 64 );
|
||||||
|
SHA256_Final( (unsigned char*)hash0, &ctx_sha256 );
|
||||||
|
|
||||||
|
SHA256_Init( &ctx_sha256 );
|
||||||
|
SHA256_Update( &ctx_sha256, (unsigned char*)hash1, 64 );
|
||||||
|
SHA256_Final( (unsigned char*)hash1, &ctx_sha256 );
|
||||||
|
|
||||||
|
SHA256_Init( &ctx_sha256 );
|
||||||
|
SHA256_Update( &ctx_sha256, (unsigned char*)hash2, 64 );
|
||||||
|
SHA256_Final( (unsigned char*)hash2, &ctx_sha256 );
|
||||||
|
|
||||||
|
SHA256_Init( &ctx_sha256 );
|
||||||
|
SHA256_Update( &ctx_sha256, (unsigned char*)hash3, 64 );
|
||||||
|
SHA256_Final( (unsigned char*)hash3, &ctx_sha256 );
|
||||||
|
|
||||||
|
mm128_intrlv_4x32( state, hash0, hash1, hash2, hash3, 256 );
|
||||||
|
#else
|
||||||
|
mm256_rintrlv_4x64_4x32( vhash32, vhash64, 512 );
|
||||||
|
|
||||||
sha256_4way_init( &ctx_sha256 );
|
sha256_4way_init( &ctx_sha256 );
|
||||||
sha256_4way( &ctx_sha256, vhash32, 64 );
|
sha256_4way( &ctx_sha256, vhash32, 64 );
|
||||||
sha256_4way_close( &ctx_sha256, state );
|
sha256_4way_close( &ctx_sha256, state );
|
||||||
|
#endif
|
||||||
mm128_deinterleave_4x32( state, state+32, state+64, state+96,
|
|
||||||
vhash32, 256 );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8];
|
uint32_t lane_hash[8];
|
||||||
uint32_t *hash7 = &(hash[7<<2]);
|
uint32_t *hash7 = &(hash[7<<2]);
|
||||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
// hash is returned deinterleaved
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
uint32_t *nonces = work->nonces;
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
int num_found = 0;
|
|
||||||
|
|
||||||
// data is 80 bytes, 20 u32 or 4 u64.
|
|
||||||
|
|
||||||
swab32_array( edata, pdata, 20 );
|
|
||||||
|
|
||||||
mm256_interleave_4x64( vdata, edata, edata, edata, edata, 640 );
|
|
||||||
|
|
||||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
|
||||||
|
|
||||||
|
mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
be32enc( noncep, n );
|
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||||
be32enc( noncep+2, n+1 );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||||
be32enc( noncep+4, n+2 );
|
|
||||||
be32enc( noncep+6, n+3 );
|
|
||||||
|
|
||||||
skeinhash_4way( hash, vdata );
|
skeinhash_4way( hash, vdata );
|
||||||
|
|
||||||
@@ -68,16 +88,14 @@ int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
nonces[ num_found++ ] = n + lane;
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
work_set_target_ratio( work, lane_hash );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (num_found == 0) && (n < max_nonce)
|
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -3,24 +3,21 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "algo-gate-api.h"
|
#include "algo-gate-api.h"
|
||||||
|
|
||||||
// Override multi way on ryzen, SHA is better.
|
|
||||||
#if !defined(RYZEN_)
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
#define SKEIN_4WAY
|
#define SKEIN_4WAY
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(SKEIN_4WAY)
|
#if defined(SKEIN_4WAY)
|
||||||
|
|
||||||
void skeinhash_4way( void *output, const void *input );
|
void skeinhash_4way( void *output, const void *input );
|
||||||
|
|
||||||
int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void skeinhash( void *output, const void *input );
|
void skeinhash( void *output, const void *input );
|
||||||
|
|
||||||
int scanhash_skein( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_skein( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -49,7 +49,7 @@ extern "C"{
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include "algo/sha/sph_types.h"
|
#include "algo/sha/sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
// Output size in bits
|
// Output size in bits
|
||||||
#define SPH_SIZE_skein256 256
|
#define SPH_SIZE_skein256 256
|
||||||
|
@@ -21,8 +21,8 @@ void skeinhash(void *state, const void *input)
|
|||||||
memcpy(state, hash, 32);
|
memcpy(state, hash, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_skein(int thr_id, struct work *work,
|
int scanhash_skein( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -31,7 +31,8 @@ int scanhash_skein(int thr_id, struct work *work,
|
|||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
swab32_array( endiandata, pdata, 20 );
|
swab32_array( endiandata, pdata, 20 );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
@@ -20,55 +20,43 @@ void skein2hash_4way( void *output, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &(hash[25]);
|
uint32_t *hash7 = &(hash[25]);
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
uint32_t *nonces = work->nonces;
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
int num_found = 0;
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
swab32_array( endiandata, pdata, 20 );
|
|
||||||
|
|
||||||
mm256_interleave_4x64( vdata, edata, edata, edata, edata, 640 );
|
|
||||||
|
|
||||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
|
||||||
|
|
||||||
|
mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
be32enc( noncep, n );
|
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||||
be32enc( noncep+2, n+1 );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||||
be32enc( noncep+4, n+2 );
|
|
||||||
be32enc( noncep+6, n+3 );
|
|
||||||
|
|
||||||
skein2hash( hash, vdata );
|
skein2hash_4way( hash, vdata );
|
||||||
|
|
||||||
for ( int lane = 0; lane < 4; lane++ )
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
if ( hash7[ lane ] <= Htarg )
|
if ( hash7[ lane<<1 ] <= Htarg )
|
||||||
{
|
{
|
||||||
// deinterleave hash for lane
|
|
||||||
uint32_t lane_hash[8];
|
uint32_t lane_hash[8];
|
||||||
mm256_extract_lane_4x64( lane_hash, hash, lane, 256 );
|
mm256_extract_lane_4x64( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
nonces[ num_found++ ] = n + lane;
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
work_set_target_ratio( work, lane_hash );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (num_found == 0) && (n < max_nonce)
|
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -10,10 +10,9 @@ int64_t skein2_get_max64 ()
|
|||||||
bool register_skein2_algo( algo_gate_t* gate )
|
bool register_skein2_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
gate->optimizations = AVX2_OPT;
|
gate->optimizations = AVX2_OPT;
|
||||||
#if defined (FOUR_WAY) && defined (__AVX2__)
|
#if defined (SKEIN2_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_skein2_4way;
|
gate->scanhash = (void*)&scanhash_skein2_4way;
|
||||||
gate->hash = (void*)&skein2hash_4way;
|
gate->hash = (void*)&skein2hash_4way;
|
||||||
four_way_not_tested();
|
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_skein2;
|
gate->scanhash = (void*)&scanhash_skein2;
|
||||||
gate->hash = (void*)&skein2hash;
|
gate->hash = (void*)&skein2hash;
|
||||||
|
@@ -10,11 +10,11 @@
|
|||||||
#if defined(SKEIN2_4WAY)
|
#if defined(SKEIN2_4WAY)
|
||||||
void skein2hash_4way( void *output, const void *input );
|
void skein2hash_4way( void *output, const void *input );
|
||||||
int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t* hashes_done );
|
uint64_t* hashes_done, struct thr_info *mythr );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void skein2hash( void *output, const void *input );
|
void skein2hash( void *output, const void *input );
|
||||||
int scanhash_skein2( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_skein2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -34,8 +34,8 @@ void skein2hash(void *output, const void *input)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_skein2(int thr_id, struct work *work,
|
int scanhash_skein2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -44,6 +44,7 @@ int scanhash_skein2(int thr_id, struct work *work,
|
|||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
swab32_array( endiandata, pdata, 20 );
|
swab32_array( endiandata, pdata, 20 );
|
||||||
|
|
||||||
|
@@ -59,7 +59,7 @@
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
@@ -52,7 +52,7 @@
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include "algo/sha/sph_types.h"
|
#include "algo/sha/sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Output size (in bits) for WHIRLPOOL.
|
* Output size (in bits) for WHIRLPOOL.
|
||||||
|
@@ -44,12 +44,13 @@ void axiomhash(void *output, const void *input)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_axiom(int thr_id, struct work *work,
|
int scanhash_axiom(int thr_id, struct work *work,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done)
|
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t _ALIGN(64) hash64[8];
|
uint32_t _ALIGN(64) hash64[8];
|
||||||
uint32_t _ALIGN(64) endiandata[20];
|
uint32_t _ALIGN(64) endiandata[20];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
@@ -13,26 +13,16 @@
|
|||||||
#include "algo/gost/sph_gost.h"
|
#include "algo/gost/sph_gost.h"
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
|
|
||||||
typedef struct {
|
union _poly_4way_context_overlay
|
||||||
|
{
|
||||||
skein512_4way_context skein;
|
skein512_4way_context skein;
|
||||||
shabal512_4way_context shabal;
|
shabal512_4way_context shabal;
|
||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
luffa_2way_context luffa;
|
luffa_2way_context luffa;
|
||||||
sph_fugue512_context fugue;
|
sph_fugue512_context fugue;
|
||||||
sph_gost512_context gost;
|
sph_gost512_context gost;
|
||||||
} poly_4way_ctx_holder;
|
};
|
||||||
|
typedef union _poly_4way_context_overlay poly_4way_context_overlay;
|
||||||
poly_4way_ctx_holder poly_4way_ctx;
|
|
||||||
|
|
||||||
void init_polytimos_4way_ctx()
|
|
||||||
{
|
|
||||||
skein512_4way_init( &poly_4way_ctx.skein );
|
|
||||||
shabal512_4way_init( &poly_4way_ctx.shabal );
|
|
||||||
init_echo( &poly_4way_ctx.echo, 512 );
|
|
||||||
luffa_2way_init( &poly_4way_ctx.luffa, 512 );
|
|
||||||
sph_fugue512_init( &poly_4way_ctx.fugue );
|
|
||||||
sph_gost512_init( &poly_4way_ctx.gost );
|
|
||||||
}
|
|
||||||
|
|
||||||
void polytimos_4way_hash( void *output, const void *input )
|
void polytimos_4way_hash( void *output, const void *input )
|
||||||
{
|
{
|
||||||
@@ -41,51 +31,57 @@ void polytimos_4way_hash( void *output, const void *input )
|
|||||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||||
poly_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
poly_4way_context_overlay ctx;
|
||||||
memcpy( &ctx, &poly_4way_ctx, sizeof(poly_4way_ctx) );
|
|
||||||
|
|
||||||
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way( &ctx.skein, input, 80 );
|
skein512_4way( &ctx.skein, input, 80 );
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
skein512_4way_close( &ctx.skein, vhash );
|
||||||
|
|
||||||
// Need to convert from 64 bit interleaved to 32 bit interleaved.
|
// Need to convert from 64 bit interleaved to 32 bit interleaved.
|
||||||
uint32_t vhash32[16*4];
|
uint32_t vhash32[16*4];
|
||||||
mm256_reinterleave_4x32( vhash32, vhash, 512 );
|
mm256_rintrlv_4x64_4x32( vhash32, vhash, 512 );
|
||||||
|
shabal512_4way_init( &ctx.shabal );
|
||||||
shabal512_4way( &ctx.shabal, vhash32, 64 );
|
shabal512_4way( &ctx.shabal, vhash32, 64 );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash32 );
|
shabal512_4way_close( &ctx.shabal, vhash32 );
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash32, 512 );
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash32, 512 );
|
||||||
|
|
||||||
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
|
||||||
(const BitSequence *)hash0, 512 );
|
(const BitSequence *)hash0, 512 );
|
||||||
memcpy( &ctx.echo, &poly_4way_ctx.echo, sizeof(hashState_echo) );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||||
(const BitSequence *) hash1, 512 );
|
(const BitSequence *) hash1, 512 );
|
||||||
memcpy( &ctx.echo, &poly_4way_ctx.echo, sizeof(hashState_echo) );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||||
(const BitSequence *) hash2, 512 );
|
(const BitSequence *) hash2, 512 );
|
||||||
memcpy( &ctx.echo, &poly_4way_ctx.echo, sizeof(hashState_echo) );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
(const BitSequence *) hash3, 512 );
|
(const BitSequence *) hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
|
||||||
mm256_interleave_2x128( vhash, hash2, hash3, 512 );
|
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||||
|
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||||
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
|
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||||
|
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
sph_fugue512_close( &ctx.fugue, hash0 );
|
||||||
memcpy( &ctx.fugue, &poly_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash1, 64 );
|
sph_fugue512( &ctx.fugue, hash1, 64 );
|
||||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
sph_fugue512_close( &ctx.fugue, hash1 );
|
||||||
memcpy( &ctx.fugue, &poly_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash2, 64 );
|
sph_fugue512( &ctx.fugue, hash2, 64 );
|
||||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
sph_fugue512_close( &ctx.fugue, hash2 );
|
||||||
memcpy( &ctx.fugue, &poly_4way_ctx.fugue, sizeof(sph_fugue512_context) );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
|
||||||
|
sph_gost512_init( &ctx.gost );
|
||||||
sph_gost512( &ctx.gost, hash0, 64 );
|
sph_gost512( &ctx.gost, hash0, 64 );
|
||||||
sph_gost512_close( &ctx.gost, hash0 );
|
sph_gost512_close( &ctx.gost, hash0 );
|
||||||
sph_gost512_init( &ctx.gost );
|
sph_gost512_init( &ctx.gost );
|
||||||
@@ -104,51 +100,43 @@ void polytimos_4way_hash( void *output, const void *input )
|
|||||||
memcpy( output+96, hash3, 32 );
|
memcpy( output+96, hash3, 32 );
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done )
|
int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
uint32_t *nonces = work->nonces;
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
int num_found = 0;
|
|
||||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
ptarget[7] = 0x0cff;
|
ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
for ( int i=0; i < 19; i++ )
|
mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||||
be32enc( &endiandata[i], pdata[i] );
|
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
|
||||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
|
||||||
do {
|
do {
|
||||||
be32enc( noncep, n );
|
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||||
be32enc( noncep+2, n+1 );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||||
be32enc( noncep+4, n+2 );
|
|
||||||
be32enc( noncep+6, n+3 );
|
|
||||||
|
|
||||||
polytimos_4way_hash(hash, vdata);
|
polytimos_4way_hash(hash, vdata);
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ ) if ( (hash+(i<<3))[7] <= Htarg )
|
||||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
nonces[ num_found++ ] = n+i;
|
submit_solution( work, hash+(i<<3), mythr, i );
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart));
|
} while ( ( n < max_nonce-4 ) && !(*restart));
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -4,7 +4,6 @@ bool register_polytimos_algo( algo_gate_t* gate )
|
|||||||
{
|
{
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||||
#ifdef POLYTIMOS_4WAY
|
#ifdef POLYTIMOS_4WAY
|
||||||
init_polytimos_4way_ctx();
|
|
||||||
gate->scanhash = (void*)&scanhash_polytimos_4way;
|
gate->scanhash = (void*)&scanhash_polytimos_4way;
|
||||||
gate->hash = (void*)&polytimos_4way_hash;
|
gate->hash = (void*)&polytimos_4way_hash;
|
||||||
#else
|
#else
|
||||||
|
@@ -13,19 +13,14 @@ bool register_polytimos_algo( algo_gate_t* gate );
|
|||||||
#if defined(POLYTIMOS_4WAY)
|
#if defined(POLYTIMOS_4WAY)
|
||||||
|
|
||||||
void polytimos_4way_hash( void *state, const void *input );
|
void polytimos_4way_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_polytimos_4way_ctx();
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void polytimos_hash( void *state, const void *input );
|
void polytimos_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_polytimos( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_polytimos( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_polytimos_ctx();
|
void init_polytimos_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -76,13 +76,14 @@ void polytimos_hash(void *output, const void *input)
|
|||||||
memcpy(output, hashA, 32);
|
memcpy(output, hashA, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_polytimos(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
int scanhash_polytimos( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[8];
|
uint32_t _ALIGN(128) hash[8];
|
||||||
uint32_t _ALIGN(128) endiandata[20];
|
uint32_t _ALIGN(128) endiandata[20];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
|
@@ -40,7 +40,7 @@ void veltor_4way_hash( void *output, const void *input )
|
|||||||
|
|
||||||
skein512_4way( &ctx.skein, input, 80 );
|
skein512_4way( &ctx.skein, input, 80 );
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
skein512_4way_close( &ctx.skein, vhash );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
sph_shavite512( &ctx.shavite, hash0, 64 );
|
sph_shavite512( &ctx.shavite, hash0, 64 );
|
||||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
sph_shavite512_close( &ctx.shavite, hash0 );
|
||||||
@@ -54,10 +54,10 @@ void veltor_4way_hash( void *output, const void *input )
|
|||||||
sph_shavite512( &ctx.shavite, hash3, 64 );
|
sph_shavite512( &ctx.shavite, hash3, 64 );
|
||||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||||
|
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash );
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
sph_gost512( &ctx.gost, hash0, 64 );
|
sph_gost512( &ctx.gost, hash0, 64 );
|
||||||
sph_gost512_close( &ctx.gost, hash0 );
|
sph_gost512_close( &ctx.gost, hash0 );
|
||||||
@@ -78,7 +78,7 @@ void veltor_4way_hash( void *output, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
@@ -91,6 +91,7 @@ int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *nonces = work->nonces;
|
uint32_t *nonces = work->nonces;
|
||||||
int num_found = 0;
|
int num_found = 0;
|
||||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
@@ -101,7 +102,7 @@ int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
uint64_t *edata = (uint64_t*)endiandata;
|
||||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
be32enc( noncep, n );
|
be32enc( noncep, n );
|
||||||
|
@@ -15,7 +15,7 @@ bool register_veltor_algo( algo_gate_t* gate );
|
|||||||
void veltor_4way_hash( void *state, const void *input );
|
void veltor_4way_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_veltor_4way_ctx();
|
void init_veltor_4way_ctx();
|
||||||
|
|
||||||
@@ -24,7 +24,7 @@ void init_veltor_4way_ctx();
|
|||||||
void veltor_hash( void *state, const void *input );
|
void veltor_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_veltor( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_veltor( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_veltor_ctx();
|
void init_veltor_ctx();
|
||||||
|
|
||||||
|
@@ -61,12 +61,14 @@ void veltor_hash(void *output, const void *input)
|
|||||||
memcpy(output, hashB, 32);
|
memcpy(output, hashB, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_veltor(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
int scanhash_veltor( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[8];
|
uint32_t _ALIGN(128) hash[8];
|
||||||
uint32_t _ALIGN(128) endiandata[20];
|
uint32_t _ALIGN(128) endiandata[20];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
@@ -78,7 +78,7 @@ void x14_4way_hash( void *state, const void *input )
|
|||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
// Serial
|
// Serial
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
// 3 Groestl
|
// 3 Groestl
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
@@ -90,7 +90,7 @@ void x14_4way_hash( void *state, const void *input )
|
|||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
|
||||||
// Parallel 4way
|
// Parallel 4way
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
// 4 Skein
|
// 4 Skein
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
@@ -105,16 +105,16 @@ void x14_4way_hash( void *state, const void *input )
|
|||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
// Serial
|
// Serial
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
// 7 Luffa
|
// 7 Luffa
|
||||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||||
mm256_interleave_2x128( vhash, hash2, hash3, 512 );
|
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
// 8 Cubehash
|
// 8 Cubehash
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||||
@@ -142,13 +142,13 @@ void x14_4way_hash( void *state, const void *input )
|
|||||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||||
|
|
||||||
// 10 Simd
|
// 10 Simd
|
||||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||||
mm256_interleave_2x128( vhash, hash2, hash3, 512 );
|
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
// 11 Echo
|
// 11 Echo
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
@@ -164,10 +164,10 @@ void x14_4way_hash( void *state, const void *input )
|
|||||||
(const BitSequence *) hash3, 512 );
|
(const BitSequence *) hash3, 512 );
|
||||||
|
|
||||||
// 12 Hamsi parallel 4way 32 bit
|
// 12 Hamsi parallel 4way 32 bit
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
// 13 Fugue serial
|
// 13 Fugue serial
|
||||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||||
@@ -183,14 +183,14 @@ void x14_4way_hash( void *state, const void *input )
|
|||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
|
||||||
// 14 Shabal, parallel 32 bit
|
// 14 Shabal, parallel 32 bit
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||||
shabal512_4way_close( &ctx.shabal, state );
|
shabal512_4way_close( &ctx.shabal, state );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
@@ -203,6 +203,7 @@ int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
int num_found = 0;
|
int num_found = 0;
|
||||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||||
0xFFF, 0xFFFF, 0x10000000 };
|
0xFFF, 0xFFFF, 0x10000000 };
|
||||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||||
@@ -212,7 +213,7 @@ int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
swab32_array( endiandata, pdata, 20 );
|
swab32_array( endiandata, pdata, 20 );
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
uint64_t *edata = (uint64_t*)endiandata;
|
||||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||||
|
|
||||||
for ( int m=0; m < 6; m++ )
|
for ( int m=0; m < 6; m++ )
|
||||||
if ( Htarg <= htmax[m] )
|
if ( Htarg <= htmax[m] )
|
||||||
|
@@ -13,19 +13,15 @@ bool register_x14_algo( algo_gate_t* gate );
|
|||||||
#if defined(X14_4WAY)
|
#if defined(X14_4WAY)
|
||||||
|
|
||||||
void x14_4way_hash( void *state, const void *input );
|
void x14_4way_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_x14_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_x14_4way_ctx();
|
void init_x14_4way_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void x14hash( void *state, const void *input );
|
void x14hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_x14( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_x14( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_x14_ctx();
|
void init_x14_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -180,8 +180,8 @@ void x14hash(void *output, const void *input)
|
|||||||
memcpy(output, hash, 32);
|
memcpy(output, hash, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x14(int thr_id, struct work *work,
|
int scanhash_x14( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||||
@@ -190,6 +190,7 @@ int scanhash_x14(int thr_id, struct work *work,
|
|||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
uint64_t htmax[] = {
|
uint64_t htmax[] = {
|
||||||
0,
|
0,
|
||||||
|
@@ -81,7 +81,7 @@ void x15_4way_hash( void *state, const void *input )
|
|||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
// Serial
|
// Serial
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
// 3 Groestl
|
// 3 Groestl
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
@@ -93,7 +93,7 @@ void x15_4way_hash( void *state, const void *input )
|
|||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
|
||||||
// Parallel 4way
|
// Parallel 4way
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
// 4 Skein
|
// 4 Skein
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
@@ -108,16 +108,16 @@ void x15_4way_hash( void *state, const void *input )
|
|||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
// Serial to the end
|
// Serial to the end
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
// 7 Luffa
|
// 7 Luffa
|
||||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||||
mm256_interleave_2x128( vhash, hash2, hash3, 512 );
|
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhash, vhash, 64 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
// 8 Cubehash
|
// 8 Cubehash
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*) hash0, 64 );
|
||||||
@@ -145,13 +145,13 @@ void x15_4way_hash( void *state, const void *input )
|
|||||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||||
|
|
||||||
// 10 Simd
|
// 10 Simd
|
||||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
mm256_intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||||
mm256_interleave_2x128( vhash, hash2, hash3, 512 );
|
mm256_intrlv_2x128( vhash, hash2, hash3, 512 );
|
||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
// 11 Echo
|
// 11 Echo
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
@@ -167,10 +167,10 @@ void x15_4way_hash( void *state, const void *input )
|
|||||||
(const BitSequence *) hash3, 512 );
|
(const BitSequence *) hash3, 512 );
|
||||||
|
|
||||||
// 12 Hamsi parallel 4way 32 bit
|
// 12 Hamsi parallel 4way 32 bit
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
// 13 Fugue
|
// 13 Fugue
|
||||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||||
@@ -186,10 +186,10 @@ void x15_4way_hash( void *state, const void *input )
|
|||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
|
||||||
// 14 Shabal, parallel 32 bit
|
// 14 Shabal, parallel 32 bit
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash );
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
// 15 Whirlpool
|
// 15 Whirlpool
|
||||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||||
@@ -214,7 +214,7 @@ void x15_4way_hash( void *state, const void *input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
@@ -227,6 +227,7 @@ int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
int num_found = 0;
|
int num_found = 0;
|
||||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||||
0xFFF, 0xFFFF, 0x10000000 };
|
0xFFF, 0xFFFF, 0x10000000 };
|
||||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||||
@@ -236,7 +237,7 @@ int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
swab32_array( endiandata, pdata, 20 );
|
swab32_array( endiandata, pdata, 20 );
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
uint64_t *edata = (uint64_t*)endiandata;
|
||||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||||
|
|
||||||
for ( int m=0; m < 6; m++ )
|
for ( int m=0; m < 6; m++ )
|
||||||
if ( Htarg <= htmax[m] )
|
if ( Htarg <= htmax[m] )
|
||||||
|
@@ -13,19 +13,15 @@ bool register_x15_algo( algo_gate_t* gate );
|
|||||||
#if defined(X15_4WAY)
|
#if defined(X15_4WAY)
|
||||||
|
|
||||||
void x15_4way_hash( void *state, const void *input );
|
void x15_4way_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_x15_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_x15_4way_ctx();
|
void init_x15_4way_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void x15hash( void *state, const void *input );
|
void x15hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_x15( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_x15( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_x15_ctx();
|
void init_x15_ctx();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -186,8 +186,8 @@ void x15hash(void *output, const void *input)
|
|||||||
memcpy(output, hashB, 32);
|
memcpy(output, hashB, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x15(int thr_id, struct work *work,
|
int scanhash_x15( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done)
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||||
@@ -196,6 +196,7 @@ int scanhash_x15(int thr_id, struct work *work,
|
|||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
uint64_t htmax[] = {
|
uint64_t htmax[] = {
|
||||||
0,
|
0,
|
||||||
|
@@ -32,8 +32,8 @@
|
|||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||||
|
|
||||||
|
union _x16r_4way_context_overlay
|
||||||
typedef struct {
|
{
|
||||||
blake512_4way_context blake;
|
blake512_4way_context blake;
|
||||||
bmw512_4way_context bmw;
|
bmw512_4way_context bmw;
|
||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
@@ -50,16 +50,8 @@ typedef struct {
|
|||||||
shabal512_4way_context shabal;
|
shabal512_4way_context shabal;
|
||||||
sph_whirlpool_context whirlpool;
|
sph_whirlpool_context whirlpool;
|
||||||
sha512_4way_context sha512;
|
sha512_4way_context sha512;
|
||||||
} x16r_4way_ctx_holder;
|
|
||||||
|
|
||||||
x16r_4way_ctx_holder x16r_4way_ctx __attribute__ ((aligned (64)));
|
|
||||||
|
|
||||||
// Cube needs one full init so fast reinits can be done in the hash loop.
|
|
||||||
void init_x16r_4way_ctx()
|
|
||||||
{
|
|
||||||
cubehashInit( &x16r_4way_ctx.cube, 512, 16, 32 );
|
|
||||||
};
|
};
|
||||||
|
typedef union _x16r_4way_context_overlay x16r_4way_context_overlay;
|
||||||
|
|
||||||
void x16r_4way_hash( void* output, const void* input )
|
void x16r_4way_hash( void* output, const void* input )
|
||||||
{
|
{
|
||||||
@@ -68,14 +60,14 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
uint32_t hash2[24] __attribute__ ((aligned (64)));
|
uint32_t hash2[24] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash3[24] __attribute__ ((aligned (64)));
|
uint32_t hash3[24] __attribute__ ((aligned (64)));
|
||||||
uint32_t vhash[24*4] __attribute__ ((aligned (64)));
|
uint32_t vhash[24*4] __attribute__ ((aligned (64)));
|
||||||
x16r_4way_ctx_holder ctx;
|
x16r_4way_context_overlay ctx;
|
||||||
void *in0 = (void*) hash0;
|
void *in0 = (void*) hash0;
|
||||||
void *in1 = (void*) hash1;
|
void *in1 = (void*) hash1;
|
||||||
void *in2 = (void*) hash2;
|
void *in2 = (void*) hash2;
|
||||||
void *in3 = (void*) hash3;
|
void *in3 = (void*) hash3;
|
||||||
int size = 80;
|
int size = 80;
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, input, 640 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
|
||||||
|
|
||||||
if ( s_ntime == UINT32_MAX )
|
if ( s_ntime == UINT32_MAX )
|
||||||
{
|
{
|
||||||
@@ -104,11 +96,11 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
blake512_4way( &ctx.blake, input, size );
|
blake512_4way( &ctx.blake, input, size );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mm256_interleave_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
mm256_intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
blake512_4way( &ctx.blake, vhash, size );
|
blake512_4way( &ctx.blake, vhash, size );
|
||||||
}
|
}
|
||||||
blake512_4way_close( &ctx.blake, vhash );
|
blake512_4way_close( &ctx.blake, vhash );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
break;
|
break;
|
||||||
case BMW:
|
case BMW:
|
||||||
bmw512_4way_init( &ctx.bmw );
|
bmw512_4way_init( &ctx.bmw );
|
||||||
@@ -116,11 +108,11 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
bmw512_4way( &ctx.bmw, input, size );
|
bmw512_4way( &ctx.bmw, input, size );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mm256_interleave_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
mm256_intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
bmw512_4way( &ctx.bmw, vhash, size );
|
bmw512_4way( &ctx.bmw, vhash, size );
|
||||||
}
|
}
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
break;
|
break;
|
||||||
case GROESTL:
|
case GROESTL:
|
||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
@@ -142,11 +134,11 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
skein512_4way( &ctx.skein, input, size );
|
skein512_4way( &ctx.skein, input, size );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mm256_interleave_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
mm256_intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
skein512_4way( &ctx.skein, vhash, size );
|
skein512_4way( &ctx.skein, vhash, size );
|
||||||
}
|
}
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
skein512_4way_close( &ctx.skein, vhash );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
break;
|
break;
|
||||||
case JH:
|
case JH:
|
||||||
jh512_4way_init( &ctx.jh );
|
jh512_4way_init( &ctx.jh );
|
||||||
@@ -154,11 +146,11 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
jh512_4way( &ctx.jh, input, size );
|
jh512_4way( &ctx.jh, input, size );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mm256_interleave_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
mm256_intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
jh512_4way( &ctx.jh, vhash, size );
|
jh512_4way( &ctx.jh, vhash, size );
|
||||||
}
|
}
|
||||||
jh512_4way_close( &ctx.jh, vhash );
|
jh512_4way_close( &ctx.jh, vhash );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
break;
|
break;
|
||||||
case KECCAK:
|
case KECCAK:
|
||||||
keccak512_4way_init( &ctx.keccak );
|
keccak512_4way_init( &ctx.keccak );
|
||||||
@@ -166,21 +158,21 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
keccak512_4way( &ctx.keccak, input, size );
|
keccak512_4way( &ctx.keccak, input, size );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mm256_interleave_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
mm256_intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
keccak512_4way( &ctx.keccak, vhash, size );
|
keccak512_4way( &ctx.keccak, vhash, size );
|
||||||
}
|
}
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
break;
|
break;
|
||||||
case LUFFA:
|
case LUFFA:
|
||||||
mm256_interleave_2x128( vhash, in0, in1, size<<3 );
|
mm256_intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size );
|
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size );
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||||
mm256_interleave_2x128( vhash, in2, in3, size<<3 );
|
mm256_intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size);
|
luffa_2way_update_close( &ctx.luffa, vhash, vhash, size);
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||||
break;
|
break;
|
||||||
case CUBEHASH:
|
case CUBEHASH:
|
||||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
@@ -194,7 +186,7 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
(const byte*)in2, size );
|
(const byte*)in2, size );
|
||||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3,
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash3,
|
||||||
(const byte*)in3, size );
|
(const byte*)in3, size );
|
||||||
break;
|
break;
|
||||||
case SHAVITE:
|
case SHAVITE:
|
||||||
sph_shavite512_init( &ctx.shavite );
|
sph_shavite512_init( &ctx.shavite );
|
||||||
@@ -211,14 +203,14 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
sph_shavite512_close( &ctx.shavite, hash3 );
|
||||||
break;
|
break;
|
||||||
case SIMD:
|
case SIMD:
|
||||||
mm256_interleave_2x128( vhash, in0, in1, size<<3 );
|
mm256_intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||||
mm256_interleave_2x128( vhash, in2, in3, size<<3 );
|
mm256_intrlv_2x128( vhash, in2, in3, size<<3 );
|
||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
simd_2way_update_close( &ctx.simd, vhash, vhash, size<<3 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhash, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||||
break;
|
break;
|
||||||
case ECHO:
|
case ECHO:
|
||||||
init_echo( &ctx.echo, 512 );
|
init_echo( &ctx.echo, 512 );
|
||||||
@@ -235,11 +227,11 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
(const BitSequence*)in3, size<<3 );
|
(const BitSequence*)in3, size<<3 );
|
||||||
break;
|
break;
|
||||||
case HAMSI:
|
case HAMSI:
|
||||||
mm256_interleave_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
mm256_intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
hamsi512_4way_init( &ctx.hamsi );
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, size );
|
hamsi512_4way( &ctx.hamsi, vhash, size );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
@@ -256,11 +248,11 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
mm128_interleave_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
mm128_intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
shabal512_4way_init( &ctx.shabal );
|
shabal512_4way_init( &ctx.shabal );
|
||||||
shabal512_4way( &ctx.shabal, vhash, size );
|
shabal512_4way( &ctx.shabal, vhash, size );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash );
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
break;
|
break;
|
||||||
case WHIRLPOOL:
|
case WHIRLPOOL:
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
@@ -277,11 +269,11 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||||
break;
|
break;
|
||||||
case SHA_512:
|
case SHA_512:
|
||||||
mm256_interleave_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
mm256_intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
sha512_4way_init( &ctx.sha512 );
|
sha512_4way_init( &ctx.sha512 );
|
||||||
sha512_4way( &ctx.sha512, vhash, size );
|
sha512_4way( &ctx.sha512, vhash, size );
|
||||||
sha512_4way_close( &ctx.sha512, vhash );
|
sha512_4way_close( &ctx.sha512, vhash );
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
size = 64;
|
size = 64;
|
||||||
@@ -293,7 +285,7 @@ void x16r_4way_hash( void* output, const void* input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr)
|
||||||
{
|
{
|
||||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
@@ -303,15 +295,14 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
uint32_t *nonces = work->nonces;
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
int num_found = 0;
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
|
||||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
|
||||||
for ( int k=0; k < 19; k++ )
|
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||||
be32enc( &endiandata[k], pdata[k] );
|
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||||
|
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
// if ( s_ntime != pdata[17] )
|
|
||||||
if ( s_ntime != endiandata[17] )
|
if ( s_ntime != endiandata[17] )
|
||||||
{
|
{
|
||||||
uint32_t ntime = swab32(pdata[17]);
|
uint32_t ntime = swab32(pdata[17]);
|
||||||
@@ -325,30 +316,27 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
ptarget[7] = 0x0cff;
|
ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
uint64_t *edata = (uint64_t*)endiandata;
|
||||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
mm256_intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
be32enc( noncep, n );
|
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||||
be32enc( noncep+2, n+1 );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||||
be32enc( noncep+4, n+2 );
|
|
||||||
be32enc( noncep+6, n+3 );
|
|
||||||
|
|
||||||
x16r_4way_hash( hash, vdata );
|
x16r_4way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ ) if ( (hash+(i<<3))[7] <= Htarg )
|
||||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
nonces[ num_found++ ] = n+i;
|
submit_solution( work, hash+(i<<3), mythr, i );
|
||||||
work_set_target_ratio( work, hash+(i<<3) );
|
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
|
} while ( ( n < max_nonce ) && !(*restart) );
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -35,11 +35,9 @@ void x16s_getAlgoString( const uint8_t* prevblock, char *output )
|
|||||||
bool register_x16r_algo( algo_gate_t* gate )
|
bool register_x16r_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined (X16R_4WAY)
|
#if defined (X16R_4WAY)
|
||||||
init_x16r_4way_ctx();
|
|
||||||
gate->scanhash = (void*)&scanhash_x16r_4way;
|
gate->scanhash = (void*)&scanhash_x16r_4way;
|
||||||
gate->hash = (void*)&x16r_4way_hash;
|
gate->hash = (void*)&x16r_4way_hash;
|
||||||
#else
|
#else
|
||||||
init_x16r_ctx();
|
|
||||||
gate->scanhash = (void*)&scanhash_x16r;
|
gate->scanhash = (void*)&scanhash_x16r;
|
||||||
gate->hash = (void*)&x16r_hash;
|
gate->hash = (void*)&x16r_hash;
|
||||||
#endif
|
#endif
|
||||||
@@ -52,11 +50,9 @@ bool register_x16r_algo( algo_gate_t* gate )
|
|||||||
bool register_x16s_algo( algo_gate_t* gate )
|
bool register_x16s_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined (X16R_4WAY)
|
#if defined (X16R_4WAY)
|
||||||
init_x16r_4way_ctx();
|
|
||||||
gate->scanhash = (void*)&scanhash_x16r_4way;
|
gate->scanhash = (void*)&scanhash_x16r_4way;
|
||||||
gate->hash = (void*)&x16r_4way_hash;
|
gate->hash = (void*)&x16r_4way_hash;
|
||||||
#else
|
#else
|
||||||
init_x16r_ctx();
|
|
||||||
gate->scanhash = (void*)&scanhash_x16r;
|
gate->scanhash = (void*)&scanhash_x16r;
|
||||||
gate->hash = (void*)&x16r_hash;
|
gate->hash = (void*)&x16r_hash;
|
||||||
#endif
|
#endif
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
#define X16R_GATE_H__ 1
|
#define X16R_GATE_H__ 1
|
||||||
|
|
||||||
#include "algo-gate-api.h"
|
#include "algo-gate-api.h"
|
||||||
#include "avxdefs.h"
|
#include "simd-utils.h"
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#if defined(__AVX2__) && defined(__AES__)
|
#if defined(__AVX2__) && defined(__AES__)
|
||||||
@@ -39,20 +39,14 @@ bool register_x16s_algo( algo_gate_t* gate );
|
|||||||
#if defined(X16R_4WAY)
|
#if defined(X16R_4WAY)
|
||||||
|
|
||||||
void x16r_4way_hash( void *state, const void *input );
|
void x16r_4way_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_x16r_4way_ctx();
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void x16r_hash( void *state, const void *input );
|
void x16r_hash( void *state, const void *input );
|
||||||
|
|
||||||
int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
void init_x16r_ctx();
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -33,7 +33,8 @@
|
|||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
||||||
|
|
||||||
typedef struct {
|
union _x16r_context_overlay
|
||||||
|
{
|
||||||
#if defined(__AES__)
|
#if defined(__AES__)
|
||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
hashState_groestl groestl;
|
hashState_groestl groestl;
|
||||||
@@ -55,19 +56,13 @@ typedef struct {
|
|||||||
sph_shabal512_context shabal;
|
sph_shabal512_context shabal;
|
||||||
sph_whirlpool_context whirlpool;
|
sph_whirlpool_context whirlpool;
|
||||||
SHA512_CTX sha512;
|
SHA512_CTX sha512;
|
||||||
} x16r_ctx_holder;
|
|
||||||
|
|
||||||
x16r_ctx_holder x16r_ctx __attribute__ ((aligned (64)));
|
|
||||||
|
|
||||||
void init_x16r_ctx()
|
|
||||||
{
|
|
||||||
cubehashInit( &x16r_ctx.cube, 512, 16, 32 );
|
|
||||||
};
|
};
|
||||||
|
typedef union _x16r_context_overlay x16r_context_overlay;
|
||||||
|
|
||||||
void x16r_hash( void* output, const void* input )
|
void x16r_hash( void* output, const void* input )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[16];
|
uint32_t _ALIGN(128) hash[16];
|
||||||
x16r_ctx_holder ctx;
|
x16r_context_overlay ctx;
|
||||||
void *in = (void*) input;
|
void *in = (void*) input;
|
||||||
int size = 80;
|
int size = 80;
|
||||||
|
|
||||||
@@ -126,7 +121,7 @@ void x16r_hash( void* output, const void* input )
|
|||||||
(const BitSequence*)in, size );
|
(const BitSequence*)in, size );
|
||||||
break;
|
break;
|
||||||
case CUBEHASH:
|
case CUBEHASH:
|
||||||
memcpy( &ctx.cube, &x16r_ctx.cube, sizeof(cubehashParam) );
|
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash,
|
||||||
(const byte*)in, size );
|
(const byte*)in, size );
|
||||||
break;
|
break;
|
||||||
@@ -184,7 +179,7 @@ void x16r_hash( void* output, const void* input )
|
|||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash32[8];
|
uint32_t _ALIGN(128) hash32[8];
|
||||||
uint32_t _ALIGN(128) endiandata[20];
|
uint32_t _ALIGN(128) endiandata[20];
|
||||||
@@ -192,16 +187,16 @@ int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
|
||||||
for ( int k=0; k < 19; k++ )
|
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||||
be32enc( &endiandata[k], pdata[k] );
|
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||||
|
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||||
|
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||||
|
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
// This code is suspicious. s_ntime is saved after byteswapping pdata[17]
|
|
||||||
// but is tested vs unswapped pdata[17]. This should result in calling
|
|
||||||
// getAlgoString every pass, but that doesn't seem to be the case.
|
|
||||||
// It appears to be working correctly as is.
|
|
||||||
if ( s_ntime != pdata[17] )
|
if ( s_ntime != pdata[17] )
|
||||||
{
|
{
|
||||||
uint32_t ntime = swab32(pdata[17]);
|
uint32_t ntime = swab32(pdata[17]);
|
||||||
|
@@ -47,30 +47,6 @@ union _sonoa_4way_context_overlay
|
|||||||
};
|
};
|
||||||
|
|
||||||
typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay;
|
typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay;
|
||||||
/*
|
|
||||||
sonoa_4way_ctx_holder sonoa_4way_ctx __attribute__ ((aligned (64)));
|
|
||||||
|
|
||||||
void init_sonoa_4way_ctx()
|
|
||||||
{
|
|
||||||
blake512_4way_init( &sonoa_4way_ctx.blake );
|
|
||||||
bmw512_4way_init( &sonoa_4way_ctx.bmw );
|
|
||||||
init_groestl( &sonoa_4way_ctx.groestl, 64 );
|
|
||||||
skein512_4way_init( &sonoa_4way_ctx.skein );
|
|
||||||
jh512_4way_init( &sonoa_4way_ctx.jh );
|
|
||||||
keccak512_4way_init( &sonoa_4way_ctx.keccak );
|
|
||||||
luffa_2way_init( &sonoa_4way_ctx.luffa, 512 );
|
|
||||||
cube_2way_init( &sonoa_4way_ctx.cube, 512, 16, 32 );
|
|
||||||
shavite512_2way_init( &sonoa_4way_ctx.shavite );
|
|
||||||
simd_2way_init( &sonoa_4way_ctx.simd, 512 );
|
|
||||||
init_echo( &sonoa_4way_ctx.echo, 512 );
|
|
||||||
hamsi512_4way_init( &sonoa_4way_ctx.hamsi );
|
|
||||||
sph_fugue512_init( &sonoa_4way_ctx.fugue );
|
|
||||||
shabal512_4way_init( &sonoa_4way_ctx.shabal );
|
|
||||||
sph_whirlpool_init( &sonoa_4way_ctx.whirlpool );
|
|
||||||
sha512_4way_init( &sonoa_4way_ctx.sha512 );
|
|
||||||
haval256_5_4way_init( &sonoa_4way_ctx.haval );
|
|
||||||
};
|
|
||||||
*/
|
|
||||||
|
|
||||||
void sonoa_4way_hash( void *state, const void *input )
|
void sonoa_4way_hash( void *state, const void *input )
|
||||||
{
|
{
|
||||||
@@ -82,8 +58,6 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
||||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
||||||
sonoa_4way_context_overlay ctx;
|
sonoa_4way_context_overlay ctx;
|
||||||
// sonoa_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
|
||||||
// memcpy( &ctx, &sonoa_4way_ctx, sizeof(sonoa_4way_ctx) );
|
|
||||||
|
|
||||||
// 1
|
// 1
|
||||||
|
|
||||||
@@ -95,7 +69,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
@@ -106,7 +80,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
skein512_4way_init( &ctx.skein );
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
@@ -120,7 +94,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
mm256_rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||||
@@ -142,8 +116,8 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||||
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhashA, 512 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||||
|
|
||||||
init_echo( &ctx.echo, 512 );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
@@ -160,13 +134,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
// 2
|
// 2
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
bmw512_4way_init( &ctx.bmw );
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
@@ -177,7 +151,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
skein512_4way_init( &ctx.skein );
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
@@ -191,7 +165,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
mm256_rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||||
@@ -213,8 +187,8 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||||
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhashA, 512 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||||
|
|
||||||
init_echo( &ctx.echo, 512 );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
@@ -229,7 +203,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
(const BitSequence *) hash3, 512 );
|
(const BitSequence *) hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
hamsi512_4way_init( &ctx.hamsi );
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||||
@@ -241,7 +215,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
@@ -252,7 +226,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
skein512_4way_init( &ctx.skein );
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
@@ -266,7 +240,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
mm256_rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||||
@@ -288,8 +262,8 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||||
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhashA, 512 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||||
|
|
||||||
init_echo( &ctx.echo, 512 );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
@@ -304,13 +278,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
(const BitSequence *) hash3, 512 );
|
(const BitSequence *) hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
hamsi512_4way_init( &ctx.hamsi );
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||||
@@ -326,13 +300,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
|
||||||
// 4
|
// 4
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
bmw512_4way_init( &ctx.bmw );
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
@@ -343,7 +317,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
skein512_4way_init( &ctx.skein );
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
@@ -357,7 +331,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
mm256_rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||||
@@ -379,8 +353,8 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||||
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhashA, 512 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||||
|
|
||||||
init_echo( &ctx.echo, 512 );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
@@ -395,13 +369,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
(const BitSequence *) hash3, 512 );
|
(const BitSequence *) hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
hamsi512_4way_init( &ctx.hamsi );
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||||
@@ -416,19 +390,19 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
shabal512_4way_init( &ctx.shabal );
|
shabal512_4way_init( &ctx.shabal );
|
||||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash );
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x32_4x64( vhashB, vhash, 512 );
|
mm256_rintrlv_4x32_4x64( vhashB, vhash, 512 );
|
||||||
|
|
||||||
hamsi512_4way_init( &ctx.hamsi );
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhashB, 64 );
|
hamsi512_4way( &ctx.hamsi, vhashB, 64 );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
init_echo( &ctx.echo, 512 );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
@@ -443,8 +417,8 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
(const BitSequence *) hash3, 512 );
|
(const BitSequence *) hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_2x128( vhashA, hash0, hash1, 512 );
|
mm256_intrlv_2x128( vhashA, hash0, hash1, 512 );
|
||||||
mm256_interleave_2x128( vhashB, hash2, hash3, 512 );
|
mm256_intrlv_2x128( vhashB, hash2, hash3, 512 );
|
||||||
|
|
||||||
shavite512_2way_init( &ctx.shavite );
|
shavite512_2way_init( &ctx.shavite );
|
||||||
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
shavite512_2way_update_close( &ctx.shavite, vhashA, vhashA, 64 );
|
||||||
@@ -452,19 +426,19 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 );
|
||||||
|
|
||||||
// 5
|
// 5
|
||||||
mm256_reinterleave_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
mm256_rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
|
||||||
|
|
||||||
bmw512_4way_init( &ctx.bmw );
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64_4x32( vhashB, vhash, 512 );
|
mm256_rintrlv_4x64_4x32( vhashB, vhash, 512 );
|
||||||
|
|
||||||
shabal512_4way_init( &ctx.shabal );
|
shabal512_4way_init( &ctx.shabal );
|
||||||
shabal512_4way( &ctx.shabal, vhashB, 64 );
|
shabal512_4way( &ctx.shabal, vhashB, 64 );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash );
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
|
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
@@ -475,7 +449,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
skein512_4way_init( &ctx.skein );
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
@@ -489,7 +463,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
mm256_rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||||
@@ -511,8 +485,8 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||||
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhashA, 512 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||||
|
|
||||||
init_echo( &ctx.echo, 512 );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
@@ -527,13 +501,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
(const BitSequence *) hash3, 512 );
|
(const BitSequence *) hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
hamsi512_4way_init( &ctx.hamsi );
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||||
@@ -548,13 +522,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
shabal512_4way_init( &ctx.shabal );
|
shabal512_4way_init( &ctx.shabal );
|
||||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash );
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
|
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||||
@@ -571,13 +545,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
// 6
|
// 6
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
bmw512_4way_init( &ctx.bmw );
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
@@ -588,7 +562,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
skein512_4way_init( &ctx.skein );
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
@@ -602,7 +576,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
mm256_rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||||
@@ -624,8 +598,8 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||||
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhashA, 512 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||||
|
|
||||||
init_echo( &ctx.echo, 512 );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
@@ -640,13 +614,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
(const BitSequence *) hash3, 512 );
|
(const BitSequence *) hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
hamsi512_4way_init( &ctx.hamsi );
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||||
@@ -661,13 +635,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
shabal512_4way_init( &ctx.shabal );
|
shabal512_4way_init( &ctx.shabal );
|
||||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash );
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
|
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||||
@@ -682,13 +656,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
sha512_4way_init( &ctx.sha512 );
|
sha512_4way_init( &ctx.sha512 );
|
||||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||||
sha512_4way_close( &ctx.sha512, vhash );
|
sha512_4way_close( &ctx.sha512, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||||
@@ -705,13 +679,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
// 7
|
// 7
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
bmw512_4way_init( &ctx.bmw );
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
@@ -722,7 +696,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
init_groestl( &ctx.groestl, 64 );
|
init_groestl( &ctx.groestl, 64 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
skein512_4way_init( &ctx.skein );
|
skein512_4way_init( &ctx.skein );
|
||||||
skein512_4way( &ctx.skein, vhash, 64 );
|
skein512_4way( &ctx.skein, vhash, 64 );
|
||||||
@@ -736,7 +710,7 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
keccak512_4way( &ctx.keccak, vhash, 64 );
|
keccak512_4way( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
mm256_rintrlv_4x64_2x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
luffa_2way_init( &ctx.luffa, 512 );
|
luffa_2way_init( &ctx.luffa, 512 );
|
||||||
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
luffa_2way_update_close( &ctx.luffa, vhashA, vhashA, 64 );
|
||||||
@@ -758,8 +732,8 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
simd_2way_init( &ctx.simd, 512 );
|
simd_2way_init( &ctx.simd, 512 );
|
||||||
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
simd_2way_update_close( &ctx.simd, vhashB, vhashB, 512 );
|
||||||
|
|
||||||
mm256_deinterleave_2x128( hash0, hash1, vhashA, 512 );
|
mm256_dintrlv_2x128( hash0, hash1, vhashA, 512 );
|
||||||
mm256_deinterleave_2x128( hash2, hash3, vhashB, 512 );
|
mm256_dintrlv_2x128( hash2, hash3, vhashB, 512 );
|
||||||
|
|
||||||
init_echo( &ctx.echo, 512 );
|
init_echo( &ctx.echo, 512 );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
@@ -774,13 +748,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
(const BitSequence *) hash3, 512 );
|
(const BitSequence *) hash3, 512 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
hamsi512_4way_init( &ctx.hamsi );
|
hamsi512_4way_init( &ctx.hamsi );
|
||||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||||
|
|
||||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_init( &ctx.fugue );
|
||||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||||
@@ -795,13 +769,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
sph_fugue512( &ctx.fugue, hash3, 64 );
|
sph_fugue512( &ctx.fugue, hash3, 64 );
|
||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||||
|
|
||||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
shabal512_4way_init( &ctx.shabal );
|
shabal512_4way_init( &ctx.shabal );
|
||||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||||
shabal512_4way_close( &ctx.shabal, vhash );
|
shabal512_4way_close( &ctx.shabal, vhash );
|
||||||
|
|
||||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool_init( &ctx.whirlpool );
|
||||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||||
@@ -816,13 +790,13 @@ void sonoa_4way_hash( void *state, const void *input )
|
|||||||
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
sph_whirlpool( &ctx.whirlpool, hash3, 64 );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||||
|
|
||||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
sha512_4way_init( &ctx.sha512 );
|
sha512_4way_init( &ctx.sha512 );
|
||||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||||
sha512_4way_close( &ctx.sha512, vhash );
|
sha512_4way_close( &ctx.sha512, vhash );
|
||||||
|
|
||||||
mm256_reinterleave_4x64_4x32( vhashB, vhash, 512 );
|
mm256_rintrlv_4x64_4x32( vhashB, vhash, 512 );
|
||||||
|
|
||||||
haval256_5_4way_init( &ctx.haval );
|
haval256_5_4way_init( &ctx.haval );
|
||||||
haval256_5_4way( &ctx.haval, vhashB, 64 );
|
haval256_5_4way( &ctx.haval, vhashB, 64 );
|
||||||
@@ -836,7 +810,6 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *hash7 = &(hash[7<<2]);
|
uint32_t *hash7 = &(hash[7<<2]);
|
||||||
uint32_t lane_hash[8];
|
uint32_t lane_hash[8];
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
@@ -850,19 +823,13 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
0xFFFFF000, 0xFFFF0000, 0 };
|
0xFFFFF000, 0xFFFF0000, 0 };
|
||||||
|
|
||||||
// Need big endian data
|
// Need big endian data
|
||||||
casti_m256i( endiandata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||||
casti_m256i( endiandata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
|
||||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
|
||||||
|
|
||||||
uint64_t *edata = (uint64_t*)endiandata;
|
|
||||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
|
||||||
|
|
||||||
for ( int m=0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
for ( int m=0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||||
{
|
{
|
||||||
uint32_t mask = masks[m];
|
uint32_t mask = masks[m];
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm256_interleave_blend_32( mm256_bswap_32(
|
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||||
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ),
|
_mm256_set_epi32( n+3, 0,n+2, 0,n+1, 0, n, 0 ) ),
|
||||||
*noncev );
|
*noncev );
|
||||||
sonoa_4way_hash( hash, vdata );
|
sonoa_4way_hash( hash, vdata );
|
||||||
@@ -871,17 +838,10 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( ( ( hash7[ lane ] & mask ) == 0 ) )
|
if ( ( ( hash7[ lane ] & mask ) == 0 ) )
|
||||||
{
|
{
|
||||||
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
work_set_target_ratio( work, lane_hash );
|
submit_solution( work, lane_hash, mythr, lane );
|
||||||
if ( submit_work( mythr, work ) )
|
|
||||||
applog( LOG_NOTICE,
|
|
||||||
"Share %d submitted by thread %d, lane %d.",
|
|
||||||
accepted_share_count + rejected_share_count + 1,
|
|
||||||
thr_id, lane );
|
|
||||||
else
|
|
||||||
applog( LOG_WARNING, "Failed to submit share." );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user