This commit is contained in:
Jay D Dee
2018-01-08 22:04:43 -05:00
parent 2d2e54f001
commit bee78eac76
58 changed files with 2817 additions and 499 deletions

View File

@@ -38,7 +38,6 @@ cpuminer_SOURCES = \
algo/argon2/ar2/cores.c \
algo/argon2/ar2/ar2-scrypt-jane.c \
algo/argon2/ar2/blake2b.c \
algo/axiom.c \
algo/blake/sph_blake.c \
algo/blake/blake-hash-4way.c \
algo/blake/blake-gate.c \
@@ -56,6 +55,7 @@ cpuminer_SOURCES = \
algo/blake/pentablake-4way.c \
algo/blake/pentablake.c \
algo/bmw/sph_bmw.c \
algo/bmw/bmw-hash-4way.c \
algo/bmw/bmw256.c \
algo/cryptonight/cryptolight.c \
algo/cryptonight/cryptonight-common.c\
@@ -63,10 +63,8 @@ cpuminer_SOURCES = \
algo/cryptonight/cryptonight.c\
algo/cubehash/sph_cubehash.c \
algo/cubehash/sse2/cubehash_sse2.c\
algo/drop.c \
algo/echo/sph_echo.c \
algo/echo/aes_ni/hash.c\
algo/fresh.c \
algo/gost/sph_gost.c \
algo/groestl/sph_groestl.c \
algo/groestl/groestl.c \
@@ -113,9 +111,8 @@ cpuminer_SOURCES = \
algo/nist5/nist5-gate.c \
algo/nist5/nist5-4way.c \
algo/nist5/nist5.c \
algo/nist5/zr5.c \
algo/pluck.c \
algo/polytimos/polytimos-gate.c \
algo/polytimos/polytimos.c \
algo/quark/quark.c \
algo/qubit/qubit.c \
algo/qubit/deep.c \
@@ -127,6 +124,7 @@ cpuminer_SOURCES = \
algo/sha/sha2.c \
algo/sha/sha256t.c \
algo/shabal/sph_shabal.c \
algo/shabal/shabal-hash-4way.c \
algo/shavite/sph_shavite.c \
algo/shavite/sph-shavite-aesni.c \
algo/shavite/shavite.c \
@@ -141,15 +139,10 @@ cpuminer_SOURCES = \
algo/skein/skein2.c \
algo/skein/skein2-4way.c \
algo/skein/skein2-gate.c \
algo/skunk.c \
algo/sm3/sm3.c \
algo/tiger/sph_tiger.c \
algo/timetravel.c \
algo/timetravel10.c \
algo/tribus/tribus-gate.c \
algo/tribus/tribus.c \
algo/tribus/tribus-4way.c \
algo/veltor.c \
algo/whirlpool/sph_whirlpool.c \
algo/whirlpool/whirlpool-hash-4way.c \
algo/whirlpool/whirlpool-gate.c \
@@ -165,6 +158,10 @@ cpuminer_SOURCES = \
algo/x11/c11-gate.c \
algo/x11/c11.c \
algo/x11/c11-4way.c \
algo/x11/tribus-gate.c \
algo/x11/tribus.c \
algo/x11/tribus-4way.c \
algo/x11/fresh.c \
algo/x11/x11evo.c \
algo/x13/x13-gate.c \
algo/x13/x13.c \
@@ -175,9 +172,20 @@ cpuminer_SOURCES = \
algo/x13/phi1612-gate.c \
algo/x13/phi1612.c \
algo/x13/phi1612-4way.c \
algo/x13/skunk-gate.c \
algo/x13/skunk-4way.c \
algo/x13/skunk.c \
algo/x13/drop.c \
algo/x14/x14-gate.c \
algo/x14/x14.c \
algo/x14/x14-4way.c \
algo/x14/veltor-gate.c \
algo/x14/veltor.c \
algo/x14/veltor-4way.c \
algo/x14/polytimos-gate.c \
algo/x14/polytimos.c \
algo/x14/polytimos-4way.c \
algo/x14/axiom.c \
algo/x15/x15-gate.c \
algo/x15/x15.c \
algo/x15/x15-4way.c \
@@ -189,10 +197,8 @@ cpuminer_SOURCES = \
algo/x17/xevan-4way.c \
algo/x17/hmq1725.c \
algo/yescrypt/yescrypt.c \
algo/yescrypt/sha256_Y.c\
algo/yescrypt/yescrypt-simd.c\
algo/zr5.c
algo/yescrypt/sha256_Y.c \
algo/yescrypt/yescrypt-simd.c
disable_flags =

View File

@@ -68,7 +68,7 @@ Supported Algorithms
timetravel10 Bitcore
tribus Denarius (DNR)
vanilla blake256r8vnl (VCash)
veltor
veltor (VLT)
whirlpool
whirlpoolx
x11 Dash
@@ -81,6 +81,7 @@ Supported Algorithms
x17
xevan Bitsend
yescrypt Globalboost-Y (BSTY)
yescryptr8 BitZeny (ZNY)\n\
yescryptr16 Yenten (YTN)
zr5 Ziftr

View File

@@ -165,6 +165,13 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------
v3.7.9
Partial 4way optimizations for veltor, skunk, polytimos, lyra2z.
Additional 4way optimizations for X algos.
New algo yescryptr8 for BitZeny, not to be confused with original
yescrypt Globalboost-Y.
v3.7.8
Partial 4way optimization for most X algos including c11, xevan, phi, hsr

View File

@@ -219,6 +219,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_X17: register_x17_algo ( gate ); break;
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
case ALGO_ZR5: register_zr5_algo ( gate ); break;
default:
@@ -278,6 +279,7 @@ const char* const algo_alias_map[][2] =
{
// alias proper
{ "bitcore", "timetravel10" },
{ "bitzeny", "yescryptr8" },
{ "blake256r8", "blakecoin" },
{ "blake256r8vnl", "vanilla" },
{ "blake256r14", "blake" },
@@ -300,10 +302,9 @@ const char* const algo_alias_map[][2] =
// { "sia", "blake2b" },
{ "sib", "x11gost" },
{ "timetravel8", "timetravel" },
{ "yes", "yescrypt" },
{ "ziftr", "zr5" },
{ "yenten", "yescryptr16" },
{ "yescryptr8", "yescrypt" },
{ "yescryptr8k", "yescrypt" },
{ "zcoin", "lyra2z" },
{ "zoin", "lyra2z330" },
{ NULL, NULL }

View File

@@ -36,7 +36,6 @@
#include <string.h>
#include <limits.h>
//#include "sph_blake.h"
#include "blake-hash-4way.h"
#ifdef __cplusplus
@@ -98,18 +97,6 @@ static const unsigned sigma[16][16] = {
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
};
/*
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
14 10 4 8 9 15 13 6 1 12 0 2 11 7 5 3
11 8 12 0 5 2 15 13 10 14 3 6 7 1 9 4
7 9 3 1 13 12 11 14 2 6 5 10 4 0 15 8
9 0 5 7 2 4 10 15 14 1 11 12 6 8 3 13
2 12 6 10 0 11 8 3 4 13 7 5 15 14 1 9
12 5 1 15 14 13 4 10 0 7 6 3 9 2 8 11
13 11 7 14 12 1 3 9 5 0 15 4 8 6 2 10
6 15 14 9 11 3 0 8 12 2 13 7 1 4 10 5
10 2 8 4 7 6 1 5 15 11 9 14 3 12 13 0
*/
#endif
#define Z00 0
@@ -914,34 +901,29 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
// unsigned z = 0x80 >> n;
// unsigned zz = ((ub & -z) | z) & 0xFF;
// u.buf[ptr>>2] = _mm_set_epi32( zz, zz, zz, zz );
u.buf[ptr>>2] = _mm_set1_epi32( 0x80 );
tl = sc->T0 + bit_len;
th = sc->T1;
if ( ptr == 0 )
{
sc->T0 = SPH_C32(0xFFFFFE00);
sc->T1 = SPH_C32(0xFFFFFFFF);
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
}
else if ( sc->T0 == 0 )
{
sc->T0 = SPH_C32(0xFFFFFE00) + bit_len;
sc->T0 = SPH_C32(0xFFFFFE00UL) + bit_len;
sc->T1 = SPH_T32(sc->T1 - 1);
}
else
sc->T0 -= 512 - bit_len;
// if ( ptr <= 48 )
if ( ptr <= 52 )
{
memset_zero_128( u.buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
// memset_zero_128( u.buf + (ptr>>2) + 1, (48 - ptr) >> 2 );
if (out_size_w32 == 8)
u.buf[52>>2] = _mm_or_si128( u.buf[52>>2],
_mm_set1_epi32( 0x010000000 ) );
_mm_set1_epi32( 0x01000000UL ) );
*(u.buf+(56>>2)) = mm_byteswap_32( _mm_set1_epi32( th ) );
*(u.buf+(60>>2)) = mm_byteswap_32( _mm_set1_epi32( tl ) );
blake32_4way( sc, u.buf + (ptr>>2), 64 - ptr );
@@ -950,11 +932,11 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
{
memset_zero_128( u.buf + (ptr>>2) + 1, (60-ptr) >> 2 );
blake32_4way( sc, u.buf + (ptr>>2), 64 - ptr );
sc->T0 = SPH_C32(0xFFFFFE00);
sc->T1 = SPH_C32(0xFFFFFFFF);
sc->T0 = SPH_C32(0xFFFFFE00UL);
sc->T1 = SPH_C32(0xFFFFFFFFUL);
memset_zero_128( u.buf, 56>>2 );
if (out_size_w32 == 8)
u.buf[52>>2] = _mm_set1_epi32( 0x010000000 );
u.buf[52>>2] = _mm_set1_epi32( 0x01000000UL );
*(u.buf+(56>>2)) = mm_byteswap_32( _mm_set1_epi32( th ) );
*(u.buf+(60>>2)) = mm_byteswap_32( _mm_set1_epi32( tl ) );
blake32_4way( sc, u.buf, 64 );
@@ -962,7 +944,6 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
out = (__m128i*)dst;
for ( k = 0; k < out_size_w32; k++ )
out[k] = mm_byteswap_32( sc->H[k] );
// out[k] = sc->H[k];
}
#if defined (__AVX2__)
@@ -975,9 +956,9 @@ blake64_4way_init( blake_4way_big_context *sc, const sph_u64 *iv,
{
int i;
for ( i = 0; i < 8; i++ )
sc->H[i] = _mm256_set_epi64x( iv[i], iv[i], iv[i], iv[i] );
sc->H[i] = _mm256_set1_epi64x( iv[i] );
for ( i = 0; i < 4; i++ )
sc->S[i] = _mm256_set_epi64x( salt[i], salt[i], salt[i], salt[i] );
sc->S[i] = _mm256_set1_epi64x( salt[i] );
sc->T0 = sc->T1 = 0;
sc->ptr = 0;
}
@@ -1049,12 +1030,12 @@ blake64_4way_close( blake_4way_big_context *sc,
th = sc->T1;
if (ptr == 0 )
{
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
}
else if ( sc->T0 == 0 )
{
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + bit_len;
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL) + bit_len;
sc->T1 = SPH_T64(sc->T1 - 1);
}
else
@@ -1066,10 +1047,7 @@ blake64_4way_close( blake_4way_big_context *sc,
memset_zero_256( u.buf + (ptr>>3) + 1, (104-ptr) >> 3 );
if ( out_size_w64 == 8 )
u.buf[(104>>3)] = _mm256_or_si256( u.buf[(104>>3)],
_mm256_set_epi64x( 0x0100000000000000,
0x0100000000000000,
0x0100000000000000,
0x0100000000000000 ) );
_mm256_set1_epi64x( 0x0100000000000000ULL ) );
*(u.buf+(112>>3)) = mm256_byteswap_64(
_mm256_set_epi64x( th, th, th, th ) );
*(u.buf+(120>>3)) = mm256_byteswap_64(
@@ -1082,15 +1060,11 @@ blake64_4way_close( blake_4way_big_context *sc,
memset_zero_256( u.buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
blake64_4way( sc, u.buf + (ptr>>3), 128 - ptr );
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
memset_zero_256( u.buf, 112>>3 );
if ( out_size_w64 == 8 )
u.buf[104>>3] = _mm256_set_epi64x( 0x0100000000000000,
0x0100000000000000,
0x0100000000000000,
0x0100000000000000 );
u.buf[104>>3] = _mm256_set1_epi64x( 0x0100000000000000ULL );
*(u.buf+(112>>3)) = mm256_byteswap_64(
_mm256_set_epi64x( th, th, th, th ) );
*(u.buf+(120>>3)) = mm256_byteswap_64(

969
algo/bmw/bmw-hash-4way.c Normal file
View File

@@ -0,0 +1,969 @@
/* $Id: bmw.c 227 2010-06-16 17:28:38Z tp $ */
/*
* BMW implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include <limits.h>
#include "bmw-hash-4way.h"
#if defined(__AVX2__)
#ifdef __cplusplus
extern "C"{
#endif
//#include "sph_bmw.h"
//#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BMW
#define SPH_SMALL_FOOTPRINT_BMW 1
//#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
//#undef SPH_ROTL64
//#define SPH_ROTL64(x,n) (((x) << (n)) | ((x) >> (64 - (n))))
//#define SPH_ROTL64(x,n) mm256_rotl_64(x,n)
static const sph_u32 IV256[] = {
SPH_C32(0x40414243), SPH_C32(0x44454647),
SPH_C32(0x48494A4B), SPH_C32(0x4C4D4E4F),
SPH_C32(0x50515253), SPH_C32(0x54555657),
SPH_C32(0x58595A5B), SPH_C32(0x5C5D5E5F),
SPH_C32(0x60616263), SPH_C32(0x64656667),
SPH_C32(0x68696A6B), SPH_C32(0x6C6D6E6F),
SPH_C32(0x70717273), SPH_C32(0x74757677),
SPH_C32(0x78797A7B), SPH_C32(0x7C7D7E7F)
};
#if SPH_64
static const sph_u64 IV512[] = {
SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F),
SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F),
SPH_C64(0xA0A1A2A3A4A5A6A7), SPH_C64(0xA8A9AAABACADAEAF),
SPH_C64(0xB0B1B2B3B4B5B6B7), SPH_C64(0xB8B9BABBBCBDBEBF),
SPH_C64(0xC0C1C2C3C4C5C6C7), SPH_C64(0xC8C9CACBCCCDCECF),
SPH_C64(0xD0D1D2D3D4D5D6D7), SPH_C64(0xD8D9DADBDCDDDEDF),
SPH_C64(0xE0E1E2E3E4E5E6E7), SPH_C64(0xE8E9EAEBECEDEEEF),
SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF)
};
#endif
#define XCAT(x, y) XCAT_(x, y)
#define XCAT_(x, y) x ## y
#define LPAR (
/*
#define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \
^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19))
#define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \
^ SPH_ROTL32(x, 8) ^ SPH_ROTL32(x, 23))
#define ss2(x) (((x) >> 2) ^ SPH_T32((x) << 1) \
^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25))
#define ss3(x) (((x) >> 2) ^ SPH_T32((x) << 2) \
^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29))
#define ss4(x) (((x) >> 1) ^ (x))
#define ss5(x) (((x) >> 2) ^ (x))
#define rs1(x) SPH_ROTL32(x, 3)
#define rs2(x) SPH_ROTL32(x, 7)
#define rs3(x) SPH_ROTL32(x, 13)
#define rs4(x) SPH_ROTL32(x, 16)
#define rs5(x) SPH_ROTL32(x, 19)
#define rs6(x) SPH_ROTL32(x, 23)
#define rs7(x) SPH_ROTL32(x, 27)
#define Ks(j) SPH_T32((sph_u32)(j) * SPH_C32(0x05555555))
#define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
(SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \
- SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m))
#define expand1s_inner(qf, mf, hf, i16, \
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
i9, i10, i11, i12, i13, i14, i15, \
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \
+ ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \
+ ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \
+ ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
#define expand1s(qf, mf, hf, i16) \
expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
#define expand1s_(qf, mf, hf, i16, ix, iy) \
expand1s_inner LPAR qf, mf, hf, i16, ix, iy)
#define expand2s_inner(qf, mf, hf, i16, \
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
i9, i10, i11, i12, i13, i14, i15, \
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \
+ qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \
+ qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \
+ qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
#define expand2s(qf, mf, hf, i16) \
expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
#define expand2s_(qf, mf, hf, i16, ix, iy) \
expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
*/
#if SPH_64
#define sb0(x) \
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 1), \
_mm256_slli_epi64( (x), 3) ), \
_mm256_xor_si256( mm256_rotl_64( (x), 4), \
mm256_rotl_64( (x), 37) ) )
#define sb1(x) \
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 1), \
_mm256_slli_epi64( (x), 2) ), \
_mm256_xor_si256( mm256_rotl_64( (x), 13), \
mm256_rotl_64( (x), 43) ) )
#define sb2(x) \
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 2), \
_mm256_slli_epi64( (x), 1) ), \
_mm256_xor_si256( mm256_rotl_64( (x), 19), \
mm256_rotl_64( (x), 53) ) )
#define sb3(x) \
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 2), \
_mm256_slli_epi64( (x), 2) ), \
_mm256_xor_si256( mm256_rotl_64( (x), 28), \
mm256_rotl_64( (x), 59) ) )
#define sb4(x) \
_mm256_xor_si256( (x), _mm256_srli_epi64( (x), 1 ) )
#define sb5(x) \
_mm256_xor_si256( (x), _mm256_srli_epi64( (x), 2 ) )
#define rb1(x) mm256_rotl_64( x, 5 )
#define rb2(x) mm256_rotl_64( x, 11 )
#define rb3(x) mm256_rotl_64( x, 27 )
#define rb4(x) mm256_rotl_64( x, 32 )
#define rb5(x) mm256_rotl_64( x, 37 )
#define rb6(x) mm256_rotl_64( x, 43 )
#define rb7(x) mm256_rotl_64( x, 53 )
#define rol_off( M, j, off ) \
mm256_rotl_64( M[ ( (j) + (off) ) & 15 ] , \
( ( (j) + (off) ) & 15 ) + 1 )
#define add_elt_b( M, H, j ) \
_mm256_xor_si256( \
_mm256_add_epi64( \
_mm256_sub_epi64( _mm256_add_epi64( rol_off( M, j, 0 ), \
rol_off( M, j, 3 ) ), \
rol_off( M, j, 10 ) ), \
_mm256_set1_epi64x( ( (j) + 16 ) * 0x0555555555555555ULL ) ), \
H[ ( (j)+7 ) & 15 ] )
#define expand1b( qt, M, H, i ) \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( sb1( qt[ (i)-16 ] ), \
sb2( qt[ (i)-15 ] ) ), \
_mm256_add_epi64( sb3( qt[ (i)-14 ] ), \
sb0( qt[ (i)-13 ] ) ) ), \
_mm256_add_epi64( \
_mm256_add_epi64( sb1( qt[ (i)-12 ] ), \
sb2( qt[ (i)-11 ] ) ), \
_mm256_add_epi64( sb3( qt[ (i)-10 ] ), \
sb0( qt[ (i)- 9 ] ) ) ) ), \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( sb1( qt[ (i)- 8 ] ), \
sb2( qt[ (i)- 7 ] ) ), \
_mm256_add_epi64( sb3( qt[ (i)- 6 ] ), \
sb0( qt[ (i)- 5 ] ) ) ), \
_mm256_add_epi64( \
_mm256_add_epi64( sb1( qt[ (i)- 4 ] ), \
sb2( qt[ (i)- 3 ] ) ), \
_mm256_add_epi64( sb3( qt[ (i)- 2 ] ), \
sb0( qt[ (i)- 1 ] ) ) ) ) ), \
add_elt_b( M, H, (i)-16 ) )
#define expand2b( qt, M, H, i) \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( qt[ (i)-16 ], rb1( qt[ (i)-15 ] ) ), \
_mm256_add_epi64( qt[ (i)-14 ], rb2( qt[ (i)-13 ] ) ) ), \
_mm256_add_epi64( \
_mm256_add_epi64( qt[ (i)-12 ], rb3( qt[ (i)-11 ] ) ), \
_mm256_add_epi64( qt[ (i)-10 ], rb4( qt[ (i)- 9 ] ) ) ) ), \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( qt[ (i)- 8 ], rb5( qt[ (i)- 7 ] ) ), \
_mm256_add_epi64( qt[ (i)- 6 ], rb6( qt[ (i)- 5 ] ) ) ), \
_mm256_add_epi64( \
_mm256_add_epi64( qt[ (i)- 4 ], rb7( qt[ (i)- 3 ] ) ), \
_mm256_add_epi64( sb4( qt[ (i)- 2 ] ), \
sb5( qt[ (i)- 1 ] ) ) ) ) ), \
add_elt_b( M, H, (i)-16 ) )
#endif
/*
#define MAKE_W( i0, op01, i1, op12, i2, op23, i3, op34, i4) \
((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
*/
/*
#define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14)
#define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15)
#define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15)
#define Ws3 MAKE_W(SPH_T32, 0, -, 1, +, 8, -, 10, +, 13)
#define Ws4 MAKE_W(SPH_T32, 1, +, 2, +, 9, -, 11, -, 14)
#define Ws5 MAKE_W(SPH_T32, 3, -, 2, +, 10, -, 12, +, 15)
#define Ws6 MAKE_W(SPH_T32, 4, -, 0, -, 3, -, 11, +, 13)
#define Ws7 MAKE_W(SPH_T32, 1, -, 4, -, 5, -, 12, -, 14)
#define Ws8 MAKE_W(SPH_T32, 2, -, 5, -, 6, +, 13, -, 15)
#define Ws9 MAKE_W(SPH_T32, 0, -, 3, +, 6, -, 7, +, 14)
#define Ws10 MAKE_W(SPH_T32, 8, -, 1, -, 4, -, 7, +, 15)
#define Ws11 MAKE_W(SPH_T32, 8, -, 0, -, 2, -, 5, +, 9)
#define Ws12 MAKE_W(SPH_T32, 1, +, 3, -, 6, -, 9, +, 10)
#define Ws13 MAKE_W(SPH_T32, 2, +, 4, +, 7, +, 10, +, 11)
#define Ws14 MAKE_W(SPH_T32, 3, -, 5, +, 8, -, 11, -, 12)
#define Ws15 MAKE_W(SPH_T32, 12, -, 4, -, 6, -, 9, +, 13)
#if SPH_SMALL_FOOTPRINT_BMW
#define MAKE_Qas do { \
unsigned u; \
sph_u32 Ws[16]; \
Ws[ 0] = Ws0; \
Ws[ 1] = Ws1; \
Ws[ 2] = Ws2; \
Ws[ 3] = Ws3; \
Ws[ 4] = Ws4; \
Ws[ 5] = Ws5; \
Ws[ 6] = Ws6; \
Ws[ 7] = Ws7; \
Ws[ 8] = Ws8; \
Ws[ 9] = Ws9; \
Ws[10] = Ws10; \
Ws[11] = Ws11; \
Ws[12] = Ws12; \
Ws[13] = Ws13; \
Ws[14] = Ws14; \
Ws[15] = Ws15; \
for (u = 0; u < 15; u += 5) { \
qt[u + 0] = SPH_T32(ss0(Ws[u + 0]) + H(u + 1)); \
qt[u + 1] = SPH_T32(ss1(Ws[u + 1]) + H(u + 2)); \
qt[u + 2] = SPH_T32(ss2(Ws[u + 2]) + H(u + 3)); \
qt[u + 3] = SPH_T32(ss3(Ws[u + 3]) + H(u + 4)); \
qt[u + 4] = SPH_T32(ss4(Ws[u + 4]) + H(u + 5)); \
} \
qt[15] = SPH_T32(ss0(Ws[15]) + H(0)); \
} while (0)
#define MAKE_Qbs do { \
qt[16] = expand1s(Qs, M, H, 16); \
qt[17] = expand1s(Qs, M, H, 17); \
qt[18] = expand2s(Qs, M, H, 18); \
qt[19] = expand2s(Qs, M, H, 19); \
qt[20] = expand2s(Qs, M, H, 20); \
qt[21] = expand2s(Qs, M, H, 21); \
qt[22] = expand2s(Qs, M, H, 22); \
qt[23] = expand2s(Qs, M, H, 23); \
qt[24] = expand2s(Qs, M, H, 24); \
qt[25] = expand2s(Qs, M, H, 25); \
qt[26] = expand2s(Qs, M, H, 26); \
qt[27] = expand2s(Qs, M, H, 27); \
qt[28] = expand2s(Qs, M, H, 28); \
qt[29] = expand2s(Qs, M, H, 29); \
qt[30] = expand2s(Qs, M, H, 30); \
qt[31] = expand2s(Qs, M, H, 31); \
} while (0)
#else
#define MAKE_Qas do { \
qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \
qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \
qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \
qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \
qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \
qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \
qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \
qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \
qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \
qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \
qt[10] = SPH_T32(ss0(Ws10) + H(11)); \
qt[11] = SPH_T32(ss1(Ws11) + H(12)); \
qt[12] = SPH_T32(ss2(Ws12) + H(13)); \
qt[13] = SPH_T32(ss3(Ws13) + H(14)); \
qt[14] = SPH_T32(ss4(Ws14) + H(15)); \
qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \
} while (0)
#define MAKE_Qbs do { \
qt[16] = expand1s(Qs, M, H, 16); \
qt[17] = expand1s(Qs, M, H, 17); \
qt[18] = expand2s(Qs, M, H, 18); \
qt[19] = expand2s(Qs, M, H, 19); \
qt[20] = expand2s(Qs, M, H, 20); \
qt[21] = expand2s(Qs, M, H, 21); \
qt[22] = expand2s(Qs, M, H, 22); \
qt[23] = expand2s(Qs, M, H, 23); \
qt[24] = expand2s(Qs, M, H, 24); \
qt[25] = expand2s(Qs, M, H, 25); \
qt[26] = expand2s(Qs, M, H, 26); \
qt[27] = expand2s(Qs, M, H, 27); \
qt[28] = expand2s(Qs, M, H, 28); \
qt[29] = expand2s(Qs, M, H, 29); \
qt[30] = expand2s(Qs, M, H, 30); \
qt[31] = expand2s(Qs, M, H, 31); \
} while (0)
#endif
#define MAKE_Qs do { \
MAKE_Qas; \
MAKE_Qbs; \
} while (0)
#define Qs(j) (qt[j])
*/
#if SPH_64
#define Wb0 \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[ 5], H[ 5] ), \
_mm256_xor_si256( M[ 7], H[ 7] ) ), \
_mm256_xor_si256( M[10], H[10] ) ), \
_mm256_xor_si256( M[13], H[13] ) ), \
_mm256_xor_si256( M[14], H[14] ) )
#define Wb1 \
_mm256_sub_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[ 6], H[ 6] ), \
_mm256_xor_si256( M[ 8], H[ 8] ) ), \
_mm256_xor_si256( M[11], H[11] ) ), \
_mm256_xor_si256( M[14], H[14] ) ), \
_mm256_xor_si256( M[15], H[15] ) )
#define Wb2 \
_mm256_add_epi64( \
_mm256_sub_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( _mm256_xor_si256( M[ 0], H[ 0] ), \
_mm256_xor_si256( M[ 7], H[ 7] ) ), \
_mm256_xor_si256( M[ 9], H[ 9] ) ), \
_mm256_xor_si256( M[12], H[12] ) ), \
_mm256_xor_si256( M[15], H[15] ) )
#define Wb3 \
_mm256_add_epi64( \
_mm256_sub_epi64( \
_mm256_add_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[ 0], H[ 0] ), \
_mm256_xor_si256( M[ 1], H[ 1] ) ), \
_mm256_xor_si256( M[ 8], H[ 8] ) ), \
_mm256_xor_si256( M[10], H[10] ) ), \
_mm256_xor_si256( M[13], H[13] ) )
#define Wb4 \
_mm256_sub_epi64( \
_mm256_sub_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( _mm256_xor_si256( M[ 1], H[ 1] ), \
_mm256_xor_si256( M[ 2], H[ 2] ) ), \
_mm256_xor_si256( M[ 9], H[ 9] ) ), \
_mm256_xor_si256( M[11], H[11] ) ), \
_mm256_xor_si256( M[14], H[14] ) )
#define Wb5 \
_mm256_add_epi64( \
_mm256_sub_epi64( \
_mm256_add_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[ 3], H[ 3] ), \
_mm256_xor_si256( M[ 2], H[ 2] ) ), \
_mm256_xor_si256( M[10], H[10] ) ), \
_mm256_xor_si256( M[12], H[12] ) ), \
_mm256_xor_si256( M[15], H[15] ) )
#define Wb6 \
_mm256_add_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[ 4], H[ 4] ), \
_mm256_xor_si256( M[ 0], H[ 0] ) ), \
_mm256_xor_si256( M[ 3], H[ 3] ) ), \
_mm256_xor_si256( M[11], H[11] ) ), \
_mm256_xor_si256( M[13], H[13] ) )
#define Wb7 \
_mm256_sub_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[ 1], H[ 1] ), \
_mm256_xor_si256( M[ 4], H[ 4] ) ), \
_mm256_xor_si256( M[ 5], H[ 5] ) ), \
_mm256_xor_si256( M[12], H[12] ) ), \
_mm256_xor_si256( M[14], H[14] ) )
#define Wb8 \
_mm256_sub_epi64( \
_mm256_add_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[ 2], H[ 2] ), \
_mm256_xor_si256( M[ 5], H[ 5] ) ), \
_mm256_xor_si256( M[ 6], H[ 6] ) ), \
_mm256_xor_si256( M[13], H[13] ) ), \
_mm256_xor_si256( M[15], H[15] ) )
#define Wb9 \
_mm256_add_epi64( \
_mm256_sub_epi64( \
_mm256_add_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[ 0], H[ 0] ), \
_mm256_xor_si256( M[ 3], H[ 3] ) ), \
_mm256_xor_si256( M[ 6], H[ 6] ) ), \
_mm256_xor_si256( M[ 7], H[ 7] ) ), \
_mm256_xor_si256( M[14], H[14] ) )
#define Wb10 \
_mm256_add_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[ 8], H[ 8] ), \
_mm256_xor_si256( M[ 1], H[ 1] ) ), \
_mm256_xor_si256( M[ 4], H[ 4] ) ), \
_mm256_xor_si256( M[ 7], H[ 7] ) ), \
_mm256_xor_si256( M[15], H[15] ) )
#define Wb11 \
_mm256_add_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[ 8], H[ 8] ), \
_mm256_xor_si256( M[ 0], H[ 0] ) ), \
_mm256_xor_si256( M[ 2], H[ 2] ) ), \
_mm256_xor_si256( M[ 5], H[ 5] ) ), \
_mm256_xor_si256( M[ 9], H[ 9] ) )
#define Wb12 \
_mm256_add_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( \
_mm256_add_epi64( _mm256_xor_si256( M[ 1], H[ 1] ), \
_mm256_xor_si256( M[ 3], H[ 3] ) ), \
_mm256_xor_si256( M[ 6], H[ 6] ) ), \
_mm256_xor_si256( M[ 9], H[ 9] ) ), \
_mm256_xor_si256( M[10], H[10] ) )
#define Wb13 \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( \
_mm256_add_epi64( _mm256_xor_si256( M[ 2], H[ 2] ), \
_mm256_xor_si256( M[ 4], H[ 4] ) ), \
_mm256_xor_si256( M[ 7], H[ 7] ) ), \
_mm256_xor_si256( M[10], H[10] ) ), \
_mm256_xor_si256( M[11], H[11] ) )
#define Wb14 \
_mm256_sub_epi64( \
_mm256_sub_epi64( \
_mm256_add_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[ 3], H[ 3] ), \
_mm256_xor_si256( M[ 5], H[ 5] ) ), \
_mm256_xor_si256( M[ 8], H[ 8] ) ), \
_mm256_xor_si256( M[11], H[11] ) ), \
_mm256_xor_si256( M[12], H[12] ) )
#define Wb15 \
_mm256_add_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( \
_mm256_sub_epi64( _mm256_xor_si256( M[12], H[12] ), \
_mm256_xor_si256( M[ 4], H[4] ) ), \
_mm256_xor_si256( M[ 6], H[ 6] ) ), \
_mm256_xor_si256( M[ 9], H[ 9] ) ), \
_mm256_xor_si256( M[13], H[13] ) )
void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
{
__m256i qt[32], xl, xh; \
qt[ 0] = sb0( Wb0 ) + H[ 1];
qt[ 1] = sb1( Wb1 ) + H[ 2];
qt[ 2] = sb2( Wb2 ) + H[ 3];
qt[ 3] = sb3( Wb3 ) + H[ 4];
qt[ 4] = sb4( Wb4 ) + H[ 5];
qt[ 5] = sb0( Wb5 ) + H[ 6];
qt[ 6] = sb1( Wb6 ) + H[ 7];
qt[ 7] = sb2( Wb7 ) + H[ 8];
qt[ 8] = sb3( Wb8 ) + H[ 9];
qt[ 9] = sb4( Wb9 ) + H[10];
qt[10] = sb0( Wb10) + H[11];
qt[11] = sb1( Wb11) + H[12];
qt[12] = sb2( Wb12) + H[13];
qt[13] = sb3( Wb13) + H[14];
qt[14] = sb4( Wb14) + H[15];
qt[15] = sb0( Wb15) + H[ 0];
qt[16] = expand1b( qt, M, H, 16 );
qt[17] = expand1b( qt, M, H, 17 );
qt[18] = expand2b( qt, M, H, 18 );
qt[19] = expand2b( qt, M, H, 19 );
qt[20] = expand2b( qt, M, H, 20 );
qt[21] = expand2b( qt, M, H, 21 );
qt[22] = expand2b( qt, M, H, 22 );
qt[23] = expand2b( qt, M, H, 23 );
qt[24] = expand2b( qt, M, H, 24 );
qt[25] = expand2b( qt, M, H, 25 );
qt[26] = expand2b( qt, M, H, 26 );
qt[27] = expand2b( qt, M, H, 27 );
qt[28] = expand2b( qt, M, H, 28 );
qt[29] = expand2b( qt, M, H, 29 );
qt[30] = expand2b( qt, M, H, 30 );
qt[31] = expand2b( qt, M, H, 31 );
xl = _mm256_xor_si256(
_mm256_xor_si256( _mm256_xor_si256( qt[16], qt[17] ),
_mm256_xor_si256( qt[18], qt[19] ) ),
_mm256_xor_si256( _mm256_xor_si256( qt[20], qt[21] ),
_mm256_xor_si256( qt[22], qt[23] ) ) );
xh = _mm256_xor_si256( xl,
_mm256_xor_si256(
_mm256_xor_si256( _mm256_xor_si256( qt[24], qt[25] ),
_mm256_xor_si256( qt[26], qt[27] ) ),
_mm256_xor_si256( _mm256_xor_si256( qt[28], qt[29] ),
_mm256_xor_si256( qt[30], qt[31] ) )));
dH[ 0] = _mm256_add_epi64(
_mm256_xor_si256( M[0],
_mm256_xor_si256( _mm256_slli_epi64( xh, 5 ),
_mm256_srli_epi64( qt[16], 5 ) ) ),
_mm256_xor_si256( _mm256_xor_si256( xl, qt[24] ), qt[ 0] ));
dH[ 1] = _mm256_add_epi64(
_mm256_xor_si256( M[1],
_mm256_xor_si256( _mm256_srli_epi64( xh, 7 ),
_mm256_slli_epi64( qt[17], 8 ) ) ),
_mm256_xor_si256( _mm256_xor_si256( xl, qt[25] ), qt[ 1] ));
dH[ 2] = _mm256_add_epi64(
_mm256_xor_si256( M[2],
_mm256_xor_si256( _mm256_srli_epi64( xh, 5 ),
_mm256_slli_epi64( qt[18], 5 ) ) ),
_mm256_xor_si256( _mm256_xor_si256( xl, qt[26] ), qt[ 2] ));
dH[ 3] = _mm256_add_epi64(
_mm256_xor_si256( M[3],
_mm256_xor_si256( _mm256_srli_epi64( xh, 1 ),
_mm256_slli_epi64( qt[19], 5 ) ) ),
_mm256_xor_si256( _mm256_xor_si256( xl, qt[27] ), qt[ 3] ));
dH[ 4] = _mm256_add_epi64(
_mm256_xor_si256( M[4],
_mm256_xor_si256( _mm256_srli_epi64( xh, 3 ),
_mm256_slli_epi64( qt[20], 0 ) ) ),
_mm256_xor_si256( _mm256_xor_si256( xl, qt[28] ), qt[ 4] ));
dH[ 5] = _mm256_add_epi64(
_mm256_xor_si256( M[5],
_mm256_xor_si256( _mm256_slli_epi64( xh, 6 ),
_mm256_srli_epi64( qt[21], 6 ) ) ),
_mm256_xor_si256( _mm256_xor_si256( xl, qt[29] ), qt[ 5] ));
dH[ 6] = _mm256_add_epi64(
_mm256_xor_si256( M[6],
_mm256_xor_si256( _mm256_srli_epi64( xh, 4 ),
_mm256_slli_epi64( qt[22], 6 ) ) ),
_mm256_xor_si256( _mm256_xor_si256( xl, qt[30] ), qt[ 6] ));
dH[ 7] = _mm256_add_epi64(
_mm256_xor_si256( M[7],
_mm256_xor_si256( _mm256_srli_epi64( xh, 11 ),
_mm256_slli_epi64( qt[23], 2 ) ) ),
_mm256_xor_si256( _mm256_xor_si256( xl, qt[31] ), qt[ 7] ));
dH[ 8] = _mm256_add_epi64( _mm256_add_epi64(
mm256_rotl_64( dH[4], 9 ),
_mm256_xor_si256( _mm256_xor_si256( xh, qt[24] ), M[ 8] )),
_mm256_xor_si256( _mm256_slli_epi64( xl, 8 ),
_mm256_xor_si256( qt[23], qt[ 8] ) ) );
dH[ 9] = _mm256_add_epi64( _mm256_add_epi64(
mm256_rotl_64( dH[5], 10 ),
_mm256_xor_si256( _mm256_xor_si256( xh, qt[25] ), M[ 9] )),
_mm256_xor_si256( _mm256_srli_epi64( xl, 6 ),
_mm256_xor_si256( qt[16], qt[ 9] ) ) );
dH[10] = _mm256_add_epi64( _mm256_add_epi64(
mm256_rotl_64( dH[6], 11 ),
_mm256_xor_si256( _mm256_xor_si256( xh, qt[26] ), M[10] )),
_mm256_xor_si256( _mm256_slli_epi64( xl, 6 ),
_mm256_xor_si256( qt[17], qt[10] ) ) );
dH[11] = _mm256_add_epi64( _mm256_add_epi64(
mm256_rotl_64( dH[7], 12 ),
_mm256_xor_si256( _mm256_xor_si256( xh, qt[27] ), M[11] )),
_mm256_xor_si256( _mm256_slli_epi64( xl, 4 ),
_mm256_xor_si256( qt[18], qt[11] ) ) );
dH[12] = _mm256_add_epi64( _mm256_add_epi64(
mm256_rotl_64( dH[0], 13 ),
_mm256_xor_si256( _mm256_xor_si256( xh, qt[28] ), M[12] )),
_mm256_xor_si256( _mm256_srli_epi64( xl, 3 ),
_mm256_xor_si256( qt[19], qt[12] ) ) );
dH[13] = _mm256_add_epi64( _mm256_add_epi64(
mm256_rotl_64( dH[1], 14 ),
_mm256_xor_si256( _mm256_xor_si256( xh, qt[29] ), M[13] )),
_mm256_xor_si256( _mm256_srli_epi64( xl, 4 ),
_mm256_xor_si256( qt[20], qt[13] ) ) );
dH[14] = _mm256_add_epi64( _mm256_add_epi64(
mm256_rotl_64( dH[2], 15 ),
_mm256_xor_si256( _mm256_xor_si256( xh, qt[30] ), M[14] )),
_mm256_xor_si256( _mm256_srli_epi64( xl, 7 ),
_mm256_xor_si256( qt[21], qt[14] ) ) );
dH[15] = _mm256_add_epi64( _mm256_add_epi64(
mm256_rotl_64( dH[3], 16 ),
_mm256_xor_si256( _mm256_xor_si256( xh, qt[31] ), M[15] )),
_mm256_xor_si256( _mm256_srli_epi64( xl, 2 ),
_mm256_xor_si256( qt[22], qt[15] ) ) );
}
#endif // 64
//#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_ROTL32, M, Qs, dH)
/*
static void
compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16])
{
#define M(x) sph_dec32le_aligned(data + 4 * (x))
#define H(x) (h[x])
#define dH(x) (dh[x])
FOLDs;
#undef M
#undef H
#undef dH
}
static const sph_u32 final_s[16] = {
SPH_C32(0xaaaaaaa0), SPH_C32(0xaaaaaaa1), SPH_C32(0xaaaaaaa2),
SPH_C32(0xaaaaaaa3), SPH_C32(0xaaaaaaa4), SPH_C32(0xaaaaaaa5),
SPH_C32(0xaaaaaaa6), SPH_C32(0xaaaaaaa7), SPH_C32(0xaaaaaaa8),
SPH_C32(0xaaaaaaa9), SPH_C32(0xaaaaaaaa), SPH_C32(0xaaaaaaab),
SPH_C32(0xaaaaaaac), SPH_C32(0xaaaaaaad), SPH_C32(0xaaaaaaae),
SPH_C32(0xaaaaaaaf)
};
static void
bmw32_4way_init(bmw_4way_small_context *sc, const sph_u32 *iv)
{
memcpy(sc->H, iv, sizeof sc->H);
sc->ptr = 0;
#if SPH_64
sc->bit_count = 0;
#else
sc->bit_count_high = 0;
sc->bit_count_low = 0;
#endif
}
static void
bmw32_4way(bmw_4way_small_context *sc, const void *data, size_t len)
{
unsigned char *buf;
size_t ptr;
sph_u32 htmp[16];
sph_u32 *h1, *h2;
#if !SPH_64
sph_u32 tmp;
#endif
#if SPH_64
sc->bit_count += (sph_u64)len << 3;
#else
tmp = sc->bit_count_low;
sc->bit_count_low = SPH_T32(tmp + ((sph_u32)len << 3));
if (sc->bit_count_low < tmp)
sc->bit_count_high ++;
sc->bit_count_high += len >> 29;
#endif
buf = sc->buf;
ptr = sc->ptr;
h1 = sc->H;
h2 = htmp;
while (len > 0) {
size_t clen;
clen = (sizeof sc->buf) - ptr;
if (clen > len)
clen = len;
memcpy(buf + ptr, data, clen);
data = (const unsigned char *)data + clen;
len -= clen;
ptr += clen;
if (ptr == sizeof sc->buf) {
sph_u32 *ht;
compress_small(buf, h1, h2);
ht = h1;
h1 = h2;
h2 = ht;
ptr = 0;
}
}
sc->ptr = ptr;
if (h1 != sc->H)
memcpy(sc->H, h1, sizeof sc->H);
}
static void
bmw32_4way_close(bmw_4way_small_context *sc, unsigned ub, unsigned n,
void *dst, size_t out_size_w32)
{
unsigned char *buf, *out;
size_t ptr, u, v;
unsigned z;
sph_u32 h1[16], h2[16], *h;
buf = sc->buf;
ptr = sc->ptr;
z = 0x80 >> n;
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
h = sc->H;
if (ptr > (sizeof sc->buf) - 8) {
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
compress_small(buf, h, h1);
ptr = 0;
h = h1;
}
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
#if SPH_64
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
SPH_T64(sc->bit_count + n));
#else
sph_enc32le_aligned(buf + (sizeof sc->buf) - 8,
sc->bit_count_low + n);
sph_enc32le_aligned(buf + (sizeof sc->buf) - 4,
SPH_T32(sc->bit_count_high));
#endif
compress_small(buf, h, h2);
for (u = 0; u < 16; u ++)
sph_enc32le_aligned(buf + 4 * u, h2[u]);
compress_small(buf, final_s, h1);
out = dst;
for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
sph_enc32le(out + 4 * u, h1[v]);
}
*/
#if SPH_64
static const __m256i final_b[16] =
{
{ 0xaaaaaaaaaaaaaaa0, 0xaaaaaaaaaaaaaaa0,
0xaaaaaaaaaaaaaaa0, 0xaaaaaaaaaaaaaaa0 },
{ 0xaaaaaaaaaaaaaaa1, 0xaaaaaaaaaaaaaaa1,
0xaaaaaaaaaaaaaaa1, 0xaaaaaaaaaaaaaaa1 },
{ 0xaaaaaaaaaaaaaaa2, 0xaaaaaaaaaaaaaaa2,
0xaaaaaaaaaaaaaaa2, 0xaaaaaaaaaaaaaaa2 },
{ 0xaaaaaaaaaaaaaaa3, 0xaaaaaaaaaaaaaaa3,
0xaaaaaaaaaaaaaaa3, 0xaaaaaaaaaaaaaaa3 },
{ 0xaaaaaaaaaaaaaaa4, 0xaaaaaaaaaaaaaaa4,
0xaaaaaaaaaaaaaaa4, 0xaaaaaaaaaaaaaaa4 },
{ 0xaaaaaaaaaaaaaaa5, 0xaaaaaaaaaaaaaaa5,
0xaaaaaaaaaaaaaaa5, 0xaaaaaaaaaaaaaaa5 },
{ 0xaaaaaaaaaaaaaaa6, 0xaaaaaaaaaaaaaaa6,
0xaaaaaaaaaaaaaaa6, 0xaaaaaaaaaaaaaaa6 },
{ 0xaaaaaaaaaaaaaaa7, 0xaaaaaaaaaaaaaaa7,
0xaaaaaaaaaaaaaaa7, 0xaaaaaaaaaaaaaaa7 },
{ 0xaaaaaaaaaaaaaaa8, 0xaaaaaaaaaaaaaaa8,
0xaaaaaaaaaaaaaaa8, 0xaaaaaaaaaaaaaaa8 },
{ 0xaaaaaaaaaaaaaaa9, 0xaaaaaaaaaaaaaaa9,
0xaaaaaaaaaaaaaaa9, 0xaaaaaaaaaaaaaaa9 },
{ 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa,
0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa },
{ 0xaaaaaaaaaaaaaaab, 0xaaaaaaaaaaaaaaab,
0xaaaaaaaaaaaaaaab, 0xaaaaaaaaaaaaaaab },
{ 0xaaaaaaaaaaaaaaac, 0xaaaaaaaaaaaaaaac,
0xaaaaaaaaaaaaaaac, 0xaaaaaaaaaaaaaaac },
{ 0xaaaaaaaaaaaaaaad, 0xaaaaaaaaaaaaaaad,
0xaaaaaaaaaaaaaaad, 0xaaaaaaaaaaaaaaad },
{ 0xaaaaaaaaaaaaaaae, 0xaaaaaaaaaaaaaaae,
0xaaaaaaaaaaaaaaae, 0xaaaaaaaaaaaaaaae },
{ 0xaaaaaaaaaaaaaaaf, 0xaaaaaaaaaaaaaaaf,
0xaaaaaaaaaaaaaaaf, 0xaaaaaaaaaaaaaaaf }
};
static void
bmw64_4way_init( bmw_4way_big_context *sc, const sph_u64 *iv )
{
for ( int i = 0; i < 16; i++ )
sc->H[i] = _mm256_set1_epi64x( iv[i] );
sc->ptr = 0;
sc->bit_count = 0;
}
static void
bmw64_4way( bmw_4way_big_context *sc, const void *data, size_t len )
{
__m256i *vdata = (__m256i*)data;
__m256i *buf;
__m256i htmp[16];
__m256i *h1, *h2;
size_t ptr;
const int buf_size = 128; // bytes of one lane, compatible with len
sc->bit_count += (sph_u64)len << 3;
buf = sc->buf;
ptr = sc->ptr;
h1 = sc->H;
h2 = htmp;
while ( len > 0 )
{
size_t clen;
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_256( buf + (ptr>>3), vdata, clen >> 3 );
vdata = vdata + (clen>>3);
len -= clen;
ptr += clen;
if ( ptr == buf_size )
{
__m256i *ht;
compress_big( buf, h1, h2 );
ht = h1;
h1 = h2;
h2 = ht;
ptr = 0;
}
}
sc->ptr = ptr;
if ( h1 != sc->H )
memcpy_256( sc->H, h1, 16 );
}
static void
bmw64_4way_close(bmw_4way_big_context *sc, unsigned ub, unsigned n,
void *dst, size_t out_size_w64)
{
__m256i *buf;
__m256i h1[16], h2[16], *h;
size_t ptr, u, v;
unsigned z;
const int buf_size = 128; // bytes of one lane, compatible with len
buf = sc->buf;
ptr = sc->ptr;
z = 0x80 >> n;
buf[ ptr>>3 ] = _mm256_set1_epi64x( z );
ptr += 8;
h = sc->H;
if ( ptr > (buf_size - 8) )
{
memset_zero_256( buf + (ptr>>3), (buf_size - ptr) >> 3 );
compress_big( buf, h, h1 );
ptr = 0;
h = h1;
}
memset_zero_256( buf + (ptr>>3), (buf_size - 8 - ptr) >> 3 );
buf[ (buf_size - 8) >> 3 ] = _mm256_set1_epi64x( sc->bit_count + n );
compress_big( buf, h, h2 );
for ( u = 0; u < 16; u ++ )
buf[u] = h2[u];
compress_big( buf, final_b, h1 );
for (u = 0, v = 16 - out_size_w64; u < out_size_w64; u ++, v ++)
casti_m256i(dst,u) = h1[v];
}
#endif
void
bmw256_4way_init(void *cc)
{
// bmw32_4way_init(cc, IV256);
}
void
bmw256_4way(void *cc, const void *data, size_t len)
{
// bmw32_4way(cc, data, len);
}
void
bmw256_4way_close(void *cc, void *dst)
{
// bmw256_4way_addbits_and_close(cc, 0, 0, dst);
}
void
bmw256_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
// bmw32_4way_close(cc, ub, n, dst, 8);
}
#if SPH_64
void
bmw512_4way_init(void *cc)
{
bmw64_4way_init(cc, IV512);
}
void
bmw512_4way(void *cc, const void *data, size_t len)
{
bmw64_4way(cc, data, len);
}
void
bmw512_4way_close(void *cc, void *dst)
{
bmw512_4way_addbits_and_close(cc, 0, 0, dst);
}
void
bmw512_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
bmw64_4way_close(cc, ub, n, dst, 8);
}
#endif
#ifdef __cplusplus
}
#endif
#endif

154
algo/bmw/bmw-hash-4way.h Normal file
View File

@@ -0,0 +1,154 @@
/* $Id: sph_bmw.h 216 2010-06-08 09:46:57Z tp $ */
/**
* BMW interface. BMW (aka "Blue Midnight Wish") is a family of
* functions which differ by their output size; this implementation
* defines BMW for output sizes 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_bmw.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef BMW_HASH_H__
#define BMW_HASH_H__
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#ifdef __AVX2__
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
/**
* Output size (in bits) for BMW-224.
*/
#define SPH_SIZE_bmw224 224
/**
* Output size (in bits) for BMW-256.
*/
#define SPH_SIZE_bmw256 256
#if SPH_64
/**
* Output size (in bits) for BMW-384.
*/
#define SPH_SIZE_bmw384 384
/**
* Output size (in bits) for BMW-512.
*/
#define SPH_SIZE_bmw512 512
#endif
/**
* This structure is a context for BMW-224 and BMW-256 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a BMW computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running BMW
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
size_t ptr;
sph_u32 H[16];
#if SPH_64
sph_u64 bit_count;
#else
sph_u32 bit_count_high, bit_count_low;
#endif
#endif
} bmw_4way_small_context;
typedef bmw_4way_small_context bmw256_4way_context;
#if SPH_64
/**
* This structure is a context for BMW-384 and BMW-512 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a BMW computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running BMW
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
__m256i buf[16];
__m256i H[16];
// unsigned char buf[128]; /* first field, for alignment */
size_t ptr;
// sph_u64 H[16];
sph_u64 bit_count;
#endif
} bmw_4way_big_context;
typedef bmw_4way_big_context bmw512_4way_context;
#endif
void bmw256_4way_init(void *cc);
void bmw256_4way(void *cc, const void *data, size_t len);
void bmw256_4way_close(void *cc, void *dst);
void bmw256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#if SPH_64
void bmw512_4way_init(void *cc);
void bmw512_4way(void *cc, const void *data, size_t len);
void bmw512_4way_close(void *cc, void *dst);
void bmw512_4way_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#endif
#ifdef __cplusplus
}
#endif
#endif
#endif

View File

@@ -96,34 +96,18 @@ extern "C"{
do { \
__m256i cc = _mm256_set_epi64x( c, c, c, c ); \
x3 = mm256_not( x3 ); \
x0 = _mm256_xor_si256( x0, _mm256_and_si256( cc, mm256_not( x2 ) ) ); \
x0 = _mm256_xor_si256( x0, _mm256_andnot_si256( x2, cc ) ); \
tmp = _mm256_xor_si256( cc, _mm256_and_si256( x0, x1 ) ); \
x0 = _mm256_xor_si256( x0, _mm256_and_si256( x2, x3 ) ); \
x3 = _mm256_xor_si256( x3, _mm256_and_si256( mm256_not( x1 ), x2 ) ); \
x3 = _mm256_xor_si256( x3, _mm256_andnot_si256( x1, x2 ) ); \
x1 = _mm256_xor_si256( x1, _mm256_and_si256( x0, x2 ) ); \
x2 = _mm256_xor_si256( x2, _mm256_and_si256( x0, mm256_not( x3 ) ) ); \
x2 = _mm256_xor_si256( x2, _mm256_andnot_si256( x3, x0 ) ); \
x0 = _mm256_xor_si256( x0, _mm256_or_si256( x1, x3 ) ); \
x3 = _mm256_xor_si256( x3, _mm256_and_si256( x1, x2 ) ); \
x1 = _mm256_xor_si256( x1, _mm256_and_si256( tmp, x0 ) ); \
x2 = _mm256_xor_si256( x2, tmp ); \
} while (0)
/*
#define Sb(x0, x1, x2, x3, c) do { \
x3 = ~x3; \
x0 ^= (c) & ~x2; \
tmp = (c) ^ (x0 & x1); \
x0 ^= x2 & x3; \
x3 ^= ~x1 & x2; \
x1 ^= x0 & x2; \
x2 ^= x0 & ~x3; \
x0 ^= x1 | x3; \
x3 ^= x1 & x2; \
x1 ^= tmp & x0; \
x2 ^= tmp; \
} while (0)
*/
#define Lb(x0, x1, x2, x3, x4, x5, x6, x7) \
do { \
x4 = _mm256_xor_si256( x4, x1 ); \
@@ -136,20 +120,6 @@ do { \
x3 = _mm256_xor_si256( x3, x4 ); \
} while (0)
/*
#define Lb(x0, x1, x2, x3, x4, x5, x6, x7) do { \
x4 ^= x1; \
x5 ^= x2; \
x6 ^= x3 ^ x0; \
x7 ^= x0; \
x0 ^= x5; \
x1 ^= x6; \
x2 ^= x7 ^ x4; \
x3 ^= x4; \
} while (0)
*/
#if SPH_JH_64
static const sph_u64 C[] = {

View File

@@ -4,13 +4,10 @@
#include <memory.h>
#include <mm_malloc.h>
//#include "algo-gate-api.h"
#include "lyra2.h"
#include "algo/blake/sph_blake.h"
#include "algo/blake/blake-hash-4way.h"
//#include "avxdefs.h"
// same size, only difference is the name, lyra2 is done serially
__thread uint64_t* lyra2z_4way_matrix;
bool lyra2z_4way_thread_init()
@@ -26,12 +23,8 @@ void lyra2z_4way_midstate( const void* input )
blake256_4way( &l2z_4way_blake_mid, input, 64 );
}
// block 2050 new algo, blake plus new lyra parms. new input
// is power of 2 so normal lyra can be used
//void zcoin_hash(void *state, const void *input, uint32_t height)
void lyra2z_4way_hash( void *state, const void *input )
{
// uint32_t _ALIGN(64) hash[16];
uint32_t hash0[8] __attribute__ ((aligned (64)));
uint32_t hash1[8] __attribute__ ((aligned (64)));
uint32_t hash2[8] __attribute__ ((aligned (64)));
@@ -39,27 +32,21 @@ void lyra2z_4way_hash( void *state, const void *input )
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
// memcpy( &ctx_blake, &l2z_4way_blake_mid, sizeof l2z_4way_blake_mid );
// blake256_4way( &ctx_blake, input + (64*4), 16 );
// blake256_4way_close( &ctx_blake, vhash );
blake256_4way_init( &ctx_blake );
blake256_4way( &ctx_blake, input, 80 );
memcpy( &ctx_blake, &l2z_4way_blake_mid, sizeof l2z_4way_blake_mid );
blake256_4way( &ctx_blake, input + (64*4), 16 );
blake256_4way_close( &ctx_blake, vhash );
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
LYRA2Z( lyra2z_4way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
// LYRA2Z( lyra2z_4way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
LYRA2Z( lyra2z_4way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
LYRA2Z( lyra2z_4way_matrix, hash2, 32, hash2, 32, hash2, 32, 8, 8, 8 );
// LYRA2Z( lyra2z_4way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
LYRA2Z( lyra2z_4way_matrix, hash3, 32, hash3, 32, hash3, 32, 8, 8, 8 );
memcpy( state, hash0, 32 );
memcpy( state+32, hash1, 32 );
memcpy( state+64, hash2, 32 );
memcpy( state+96, hash3, 32 );
// memcpy(state, hash, 32);
}
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
@@ -67,7 +54,6 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
// uint32_t _ALIGN(64) hash[8];
uint32_t _ALIGN(64) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -90,7 +76,7 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
// lyra2z_4way_midstate( vdata );
lyra2z_4way_midstate( vdata );
do {
found[0] = found[1] = found[2] = found[3] = false;
@@ -104,42 +90,33 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
printf("found 0\n");
found[0] = true;
num_found++;
nonces[0] = pdata[19] = n;
work_set_target_ratio( work, hash );
}
/*
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
printf("found 1\n");
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
*/
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
printf("found 2\n");
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
/*
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
printf("found 3\n");
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n += 4;
*/
n += 2;
} while ( (num_found == 0) && (n < max_nonce-4)
&& !work_restart[thr_id].restart);
@@ -149,21 +126,3 @@ printf("found 3\n");
#endif
/*
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
work_set_target_ratio(work, hash);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 1;
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
*/

View File

@@ -9,18 +9,15 @@ void lyra2z_set_target( struct work* work, double job_diff )
bool register_lyra2z_algo( algo_gate_t* gate )
{
#ifdef LYRA2Z_4WAY
four_way_not_tested();
gate->optimizations = AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->miner_thread_init = (void*)&lyra2z_4way_thread_init;
gate->scanhash = (void*)&scanhash_lyra2z_4way;
gate->hash = (void*)&lyra2z_4way_hash;
#else
gate->optimizations = AVX_OPT | AVX2_OPT;
gate->miner_thread_init = (void*)&lyra2z_thread_init;
gate->scanhash = (void*)&scanhash_lyra2z;
gate->hash = (void*)&lyra2z_hash;
#endif
gate->optimizations = AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->get_max64 = (void*)&get_max64_0xffffLL;
gate->set_target = (void*)&lyra2z_set_target;
return true;

View File

@@ -2,7 +2,7 @@
bool register_nist5_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | FOUR_WAY_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | FOUR_WAY_OPT;
#if defined (NIST5_4WAY)
gate->scanhash = (void*)&scanhash_nist5_4way;
gate->hash = (void*)&nist5hash_4way;

View File

@@ -1,12 +0,0 @@
#ifndef __POLYTIMOS_GATE_H__
#define __POLYTIMOS_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
void polytimos_hash( void *state, const void *input );
int scanhash_polytimos( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_polytimos_context();
#endif

View File

@@ -1,31 +1,20 @@
#include "algo-gate-api.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#include "algo/luffa/sse2/luffa_for_sse2.h"
#include "algo/cubehash/sse2/cubehash_sse2.h"
#include "algo/simd/sse2/nist.h"
#include "algo/shavite/sph_shavite.h"
#ifndef NO_AES_NI
#include "algo/echo/aes_ni/hash_api.h"
#else
#include "algo/echo/sph_echo.h"
#endif
typedef struct
{
hashState_luffa luffa;
cubehashParam cubehash;
sph_shavite512_context shavite;
hashState_sd simd;
#ifdef NO_AES_NI
sph_echo512_context echo;
#else

View File

@@ -1,23 +1,16 @@
#include "algo-gate-api.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#include "algo/luffa/sse2/luffa_for_sse2.h"
#include "algo/cubehash/sse2/cubehash_sse2.h"
#include "algo/simd/sse2/nist.h"
#include "algo/shavite/sph_shavite.h"
#ifndef NO_AES_NI
#include "algo/echo/aes_ni/hash_api.h"
#else
#include "algo/echo/sph_echo.h"
#endif
typedef struct

View File

@@ -0,0 +1,618 @@
/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
/*
* Shabal implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#ifdef __AVX2__
#include "shabal-hash-4way.h"
#ifdef __cplusplus
extern "C"{
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
/*
* Part of this code was automatically generated (the part between
* the "BEGIN" and "END" markers).
*/
#define sM 16
#define C32 SPH_C32
#define T32 SPH_T32
#define O1 13
#define O2 9
#define O3 6
/*
* We copy the state into local variables, so that the compiler knows
* that it can optimize them at will.
*/
/* BEGIN -- automatically generated code. */
#define DECL_STATE \
__m128i A00, A01, A02, A03, A04, A05, A06, A07, \
A08, A09, A0A, A0B; \
__m128i B0, B1, B2, B3, B4, B5, B6, B7, \
B8, B9, BA, BB, BC, BD, BE, BF; \
__m128i C0, C1, C2, C3, C4, C5, C6, C7, \
C8, C9, CA, CB, CC, CD, CE, CF; \
__m128i M0, M1, M2, M3, M4, M5, M6, M7, \
M8, M9, MA, MB, MC, MD, ME, MF; \
sph_u32 Wlow, Whigh;
#define READ_STATE(state) do { \
A00 = (state)->A[0]; \
A01 = (state)->A[1]; \
A02 = (state)->A[2]; \
A03 = (state)->A[3]; \
A04 = (state)->A[4]; \
A05 = (state)->A[5]; \
A06 = (state)->A[6]; \
A07 = (state)->A[7]; \
A08 = (state)->A[8]; \
A09 = (state)->A[9]; \
A0A = (state)->A[10]; \
A0B = (state)->A[11]; \
B0 = (state)->B[0]; \
B1 = (state)->B[1]; \
B2 = (state)->B[2]; \
B3 = (state)->B[3]; \
B4 = (state)->B[4]; \
B5 = (state)->B[5]; \
B6 = (state)->B[6]; \
B7 = (state)->B[7]; \
B8 = (state)->B[8]; \
B9 = (state)->B[9]; \
BA = (state)->B[10]; \
BB = (state)->B[11]; \
BC = (state)->B[12]; \
BD = (state)->B[13]; \
BE = (state)->B[14]; \
BF = (state)->B[15]; \
C0 = (state)->C[0]; \
C1 = (state)->C[1]; \
C2 = (state)->C[2]; \
C3 = (state)->C[3]; \
C4 = (state)->C[4]; \
C5 = (state)->C[5]; \
C6 = (state)->C[6]; \
C7 = (state)->C[7]; \
C8 = (state)->C[8]; \
C9 = (state)->C[9]; \
CA = (state)->C[10]; \
CB = (state)->C[11]; \
CC = (state)->C[12]; \
CD = (state)->C[13]; \
CE = (state)->C[14]; \
CF = (state)->C[15]; \
Wlow = (state)->Wlow; \
Whigh = (state)->Whigh; \
} while (0)
#define WRITE_STATE(state) do { \
(state)->A[0] = A00; \
(state)->A[1] = A01; \
(state)->A[2] = A02; \
(state)->A[3] = A03; \
(state)->A[4] = A04; \
(state)->A[5] = A05; \
(state)->A[6] = A06; \
(state)->A[7] = A07; \
(state)->A[8] = A08; \
(state)->A[9] = A09; \
(state)->A[10] = A0A; \
(state)->A[11] = A0B; \
(state)->B[0] = B0; \
(state)->B[1] = B1; \
(state)->B[2] = B2; \
(state)->B[3] = B3; \
(state)->B[4] = B4; \
(state)->B[5] = B5; \
(state)->B[6] = B6; \
(state)->B[7] = B7; \
(state)->B[8] = B8; \
(state)->B[9] = B9; \
(state)->B[10] = BA; \
(state)->B[11] = BB; \
(state)->B[12] = BC; \
(state)->B[13] = BD; \
(state)->B[14] = BE; \
(state)->B[15] = BF; \
(state)->C[0] = C0; \
(state)->C[1] = C1; \
(state)->C[2] = C2; \
(state)->C[3] = C3; \
(state)->C[4] = C4; \
(state)->C[5] = C5; \
(state)->C[6] = C6; \
(state)->C[7] = C7; \
(state)->C[8] = C8; \
(state)->C[9] = C9; \
(state)->C[10] = CA; \
(state)->C[11] = CB; \
(state)->C[12] = CC; \
(state)->C[13] = CD; \
(state)->C[14] = CE; \
(state)->C[15] = CF; \
(state)->Wlow = Wlow; \
(state)->Whigh = Whigh; \
} while (0)
#define DECODE_BLOCK \
do { \
M0 = buf[ 0]; \
M1 = buf[ 1]; \
M2 = buf[ 2]; \
M3 = buf[ 3]; \
M4 = buf[ 4]; \
M5 = buf[ 5]; \
M6 = buf[ 6]; \
M7 = buf[ 7]; \
M8 = buf[ 8]; \
M9 = buf[ 9]; \
MA = buf[10]; \
MB = buf[11]; \
MC = buf[12]; \
MD = buf[13]; \
ME = buf[14]; \
MF = buf[15]; \
} while (0)
#define INPUT_BLOCK_ADD \
do { \
B0 = _mm_add_epi32( B0, M0 );\
B1 = _mm_add_epi32( B1, M1 );\
B2 = _mm_add_epi32( B2, M2 );\
B3 = _mm_add_epi32( B3, M3 );\
B4 = _mm_add_epi32( B4, M4 );\
B5 = _mm_add_epi32( B5, M5 );\
B6 = _mm_add_epi32( B6, M6 );\
B7 = _mm_add_epi32( B7, M7 );\
B8 = _mm_add_epi32( B8, M8 );\
B9 = _mm_add_epi32( B9, M9 );\
BA = _mm_add_epi32( BA, MA );\
BB = _mm_add_epi32( BB, MB );\
BC = _mm_add_epi32( BC, MC );\
BD = _mm_add_epi32( BD, MD );\
BE = _mm_add_epi32( BE, ME );\
BF = _mm_add_epi32( BF, MF );\
} while (0)
#define INPUT_BLOCK_SUB \
do { \
C0 = _mm_sub_epi32( C0, M0 ); \
C1 = _mm_sub_epi32( C1, M1 ); \
C2 = _mm_sub_epi32( C2, M2 ); \
C3 = _mm_sub_epi32( C3, M3 ); \
C4 = _mm_sub_epi32( C4, M4 ); \
C5 = _mm_sub_epi32( C5, M5 ); \
C6 = _mm_sub_epi32( C6, M6 ); \
C7 = _mm_sub_epi32( C7, M7 ); \
C8 = _mm_sub_epi32( C8, M8 ); \
C9 = _mm_sub_epi32( C9, M9 ); \
CA = _mm_sub_epi32( CA, MA ); \
CB = _mm_sub_epi32( CB, MB ); \
CC = _mm_sub_epi32( CC, MC ); \
CD = _mm_sub_epi32( CD, MD ); \
CE = _mm_sub_epi32( CE, ME ); \
CF = _mm_sub_epi32( CF, MF ); \
} while (0)
#define XOR_W \
do { \
A00 = _mm_xor_si128( A00, _mm_set1_epi32( Wlow ) ); \
A01 = _mm_xor_si128( A01, _mm_set1_epi32( Whigh ) ); \
} while (0)
/*
#define SWAP(v1, v2) do { \
sph_u32 tmp = (v1); \
(v1) = (v2); \
(v2) = tmp; \
} while (0)
*/
#define SWAP_BC \
do { \
mm_swap_128( B0, C0 ); \
mm_swap_128( B1, C1 ); \
mm_swap_128( B2, C2 ); \
mm_swap_128( B3, C3 ); \
mm_swap_128( B4, C4 ); \
mm_swap_128( B5, C5 ); \
mm_swap_128( B6, C6 ); \
mm_swap_128( B7, C7 ); \
mm_swap_128( B8, C8 ); \
mm_swap_128( B9, C9 ); \
mm_swap_128( BA, CA ); \
mm_swap_128( BB, CB ); \
mm_swap_128( BC, CC ); \
mm_swap_128( BD, CD ); \
mm_swap_128( BE, CE ); \
mm_swap_128( BF, CF ); \
} while (0)
#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) \
do { \
xa0 = _mm_xor_si128( xm, _mm_xor_si128( xb1, _mm_xor_si128( \
_mm_andnot_si128( xb3, xb2 ), \
_mm_mullo_epi32( _mm_xor_si128( xa0, _mm_xor_si128( xc, \
_mm_mullo_epi32( mm_rotl_32( xa1, 15 ), _mm_set1_epi32(5UL) ) \
) ), _mm_set1_epi32(3UL) ) ) ) ); \
xb0 = mm_not( _mm_xor_si128( xa0, mm_rotl_32( xb0, 1 ) ) ); \
} while (0)
#define PERM_STEP_0 do { \
PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
} while (0)
#define PERM_STEP_1 do { \
PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
} while (0)
#define PERM_STEP_2 do { \
PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
} while (0)
#define APPLY_P \
do { \
B0 = mm_rotr_32( B0, 15 ); \
B1 = mm_rotr_32( B1, 15 ); \
B2 = mm_rotr_32( B2, 15 ); \
B3 = mm_rotr_32( B3, 15 ); \
B4 = mm_rotr_32( B4, 15 ); \
B5 = mm_rotr_32( B5, 15 ); \
B6 = mm_rotr_32( B6, 15 ); \
B7 = mm_rotr_32( B7, 15 ); \
B8 = mm_rotr_32( B8, 15 ); \
B9 = mm_rotr_32( B9, 15 ); \
BA = mm_rotr_32( BA, 15 ); \
BB = mm_rotr_32( BB, 15 ); \
BC = mm_rotr_32( BC, 15 ); \
BD = mm_rotr_32( BD, 15 ); \
BE = mm_rotr_32( BE, 15 ); \
BF = mm_rotr_32( BF, 15 ); \
PERM_STEP_0; \
PERM_STEP_1; \
PERM_STEP_2; \
A0B = _mm_add_epi32( A0B, C6 ); \
A0A = _mm_add_epi32( A0A, C5 ); \
A09 = _mm_add_epi32( A09, C4 ); \
A08 = _mm_add_epi32( A08, C3 ); \
A07 = _mm_add_epi32( A07, C2 ); \
A06 = _mm_add_epi32( A06, C1 ); \
A05 = _mm_add_epi32( A05, C0 ); \
A04 = _mm_add_epi32( A04, CF ); \
A03 = _mm_add_epi32( A03, CE ); \
A02 = _mm_add_epi32( A02, CD ); \
A01 = _mm_add_epi32( A01, CC ); \
A00 = _mm_add_epi32( A00, CB ); \
A0B = _mm_add_epi32( A0B, CA ); \
A0A = _mm_add_epi32( A0A, C9 ); \
A09 = _mm_add_epi32( A09, C8 ); \
A08 = _mm_add_epi32( A08, C7 ); \
A07 = _mm_add_epi32( A07, C6 ); \
A06 = _mm_add_epi32( A06, C5 ); \
A05 = _mm_add_epi32( A05, C4 ); \
A04 = _mm_add_epi32( A04, C3 ); \
A03 = _mm_add_epi32( A03, C2 ); \
A02 = _mm_add_epi32( A02, C1 ); \
A01 = _mm_add_epi32( A01, C0 ); \
A00 = _mm_add_epi32( A00, CF ); \
A0B = _mm_add_epi32( A0B, CE ); \
A0A = _mm_add_epi32( A0A, CD ); \
A09 = _mm_add_epi32( A09, CC ); \
A08 = _mm_add_epi32( A08, CB ); \
A07 = _mm_add_epi32( A07, CA ); \
A06 = _mm_add_epi32( A06, C9 ); \
A05 = _mm_add_epi32( A05, C8 ); \
A04 = _mm_add_epi32( A04, C7 ); \
A03 = _mm_add_epi32( A03, C6 ); \
A02 = _mm_add_epi32( A02, C5 ); \
A01 = _mm_add_epi32( A01, C4 ); \
A00 = _mm_add_epi32( A00, C3 ); \
} while (0)
#define INCR_W do { \
if ((Wlow = T32(Wlow + 1)) == 0) \
Whigh = T32(Whigh + 1); \
} while (0)
static const sph_u32 A_init_256[] = {
C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A)
};
static const sph_u32 B_init_256[] = {
C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F),
C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002),
C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890),
C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5)
};
static const sph_u32 C_init_256[] = {
C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55),
C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433),
C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F),
C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60)
};
static const sph_u32 A_init_512[] = {
C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
};
static const sph_u32 B_init_512[] = {
C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
};
static const sph_u32 C_init_512[] = {
C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
};
static void
shabal_4way_init( void *cc, unsigned size )
{
shabal_4way_context *sc = (shabal_4way_context*)cc;
int i;
if ( size == 512 )
{
for ( i = 0; i < 12; i++ )
sc->A[i] = _mm_set1_epi32( A_init_512[i] );
for ( i = 0; i < 16; i++ )
{
sc->B[i] = _mm_set1_epi32( B_init_512[i] );
sc->C[i] = _mm_set1_epi32( C_init_512[i] );
}
}
else
{
for ( i = 0; i < 12; i++ )
sc->A[i] = _mm_set1_epi32( A_init_256[i] );
for ( i = 0; i < 16; i++ )
{
sc->B[i] = _mm_set1_epi32( B_init_256[i] );
sc->C[i] = _mm_set1_epi32( C_init_256[i] );
}
}
sc->Wlow = 1;
sc->Whigh = 0;
sc->ptr = 0;
}
static void
shabal_4way_core( void *cc, const unsigned char *data, size_t len )
{
shabal_4way_context *sc = (shabal_4way_context*)cc;
__m128i *buf;
__m128i *vdata = (__m128i*)data;
const int buf_size = 64;
size_t ptr;
DECL_STATE
buf = sc->buf;
ptr = sc->ptr;
if ( len < (buf_size - ptr ) )
{
memcpy_128( buf + (ptr>>2), vdata, len>>2 );
ptr += len;
sc->ptr = ptr;
return;
}
READ_STATE(sc);
while ( len > 0 )
{
size_t clen;
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_128( buf + (ptr>>2), vdata, clen>>2 );
ptr += clen;
vdata += clen>>2;
len -= clen;
if ( ptr == buf_size )
{
DECODE_BLOCK;
INPUT_BLOCK_ADD;
XOR_W;
APPLY_P;
INPUT_BLOCK_SUB;
SWAP_BC;
INCR_W;
ptr = 0;
}
}
WRITE_STATE(sc);
sc->ptr = ptr;
}
static void
shabal_4way_close( void *cc, unsigned ub, unsigned n, void *dst,
unsigned size_words )
{
shabal_4way_context *sc = (shabal_4way_context*)cc;
__m128i *buf;
const int buf_size = 64;
size_t ptr;
int i;
unsigned z, zz;
DECL_STATE
buf = sc->buf;
ptr = sc->ptr;
z = 0x80 >> n;
zz = ((ub & -z) | z) & 0xFF;
buf[ptr>>2] = _mm_set1_epi32( zz );
memset_zero_128( buf + (ptr>>2) + 1, ( (buf_size - ptr) >> 2 ) - 1 );
READ_STATE(sc);
DECODE_BLOCK;
INPUT_BLOCK_ADD;
XOR_W;
APPLY_P;
for ( i = 0; i < 3; i ++ )
{
SWAP_BC;
XOR_W;
APPLY_P;
}
__m128i *d = (__m128i*)dst;
if ( size_words == 16 ) // 512
{
d[ 0] = B0; d[ 1] = B1; d[ 2] = B2; d[ 3] = B3;
d[ 4] = B4; d[ 5] = B5; d[ 6] = B6; d[ 7] = B7;
d[ 8] = B8; d[ 9] = B9; d[10] = BA; d[11] = BB;
d[12] = BC; d[13] = BD; d[14] = BE; d[15] = BF;
}
else // 256
{
d[ 0] = B8; d[ 1] = B9; d[ 2] = BA; d[ 3] = BB;
d[ 4] = BC; d[ 5] = BD; d[ 6] = BE; d[ 7] = BF;
}
}
void
shabal256_4way_init( void *cc )
{
shabal_4way_init(cc, 256);
}
void
shabal256_4way( void *cc, const void *data, size_t len )
{
shabal_4way_core( cc, data, len );
}
void
shabal256_4way_close( void *cc, void *dst )
{
shabal_4way_close(cc, 0, 0, dst, 8);
}
void
shabal256_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
void *dst )
{
shabal_4way_close(cc, ub, n, dst, 8);
}
void
shabal512_4way_init(void *cc)
{
shabal_4way_init(cc, 512);
}
void
shabal512_4way(void *cc, const void *data, size_t len)
{
shabal_4way_core(cc, data, len);
}
void
shabal512_4way_close(void *cc, void *dst)
{
shabal_4way_close(cc, 0, 0, dst, 16);
}
void
shabal512_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
shabal_4way_close(cc, ub, n, dst, 16);
}
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,82 @@
/* $Id: sph_shabal.h 175 2010-05-07 16:03:20Z tp $ */
/**
* Shabal interface. Shabal is a family of functions which differ by
* their output size; this implementation defines Shabal for output
* sizes 192, 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_shabal.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SHABAL_HASH_4WAY_H__
#define SHABAL_HASH_4WAY_H__ 1
#ifdef __AVX2__
#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
#ifdef __cplusplus
extern "C"{
#endif
#define SPH_SIZE_shabal256 256
#define SPH_SIZE_shabal512 512
typedef struct {
__m128i buf[16] __attribute__ ((aligned (64)));
__m128i A[12], B[16], C[16];
sph_u32 Whigh, Wlow;
size_t ptr;
} shabal_4way_context;
typedef shabal_4way_context shabal256_4way_context;
typedef shabal_4way_context shabal512_4way_context;
void shabal256_4way_init( void *cc );
void shabal256_4way( void *cc, const void *data, size_t len );
void shabal256_4way_close( void *cc, void *dst );
void shabal256_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
void *dst );
void shabal512_4way_init( void *cc );
void shabal512_4way( void *cc, const void *data, size_t len );
void shabal512_4way_close( void *cc, void *dst );
void shabal512_4way_addbits_and_close( void *cc, unsigned ub, unsigned n,
void *dst );
#ifdef __cplusplus
}
#endif
#endif
#endif

View File

@@ -7,7 +7,7 @@
#include <stdint.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
@@ -20,7 +20,7 @@
typedef struct {
blake512_4way_context blake;
sph_bmw512_context bmw;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
@@ -37,7 +37,7 @@ c11_4way_ctx_holder c11_4way_ctx;
void init_c11_4way_ctx()
{
blake512_4way_init( &c11_4way_ctx.blake );
sph_bmw512_init( &c11_4way_ctx.bmw );
bmw512_4way_init( &c11_4way_ctx.bmw );
init_groestl( &c11_4way_ctx.groestl, 64 );
skein512_4way_init( &c11_4way_ctx.skein );
jh512_4way_init( &c11_4way_ctx.jh );
@@ -63,22 +63,13 @@ void c11_4way_hash( void *state, const void *input )
blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 2 Bmw
sph_bmw512( &ctx.bmw, hash0, 64 );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &c11_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, 64 );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &c11_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, 64 );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &c11_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, 64 );
sph_bmw512_close( &ctx.bmw, hash3 );
// 3 Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
memcpy( &ctx.groestl, &c11_4way_ctx.groestl, sizeof(hashState_groestl) );

View File

@@ -11,7 +11,7 @@ bool register_c11_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_c11;
gate->hash = (void*)&c11_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -7,7 +7,7 @@
#include <stdint.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
@@ -20,7 +20,7 @@
typedef struct {
blake512_4way_context blake;
sph_bmw512_context bmw;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
@@ -37,7 +37,7 @@ x11_4way_ctx_holder x11_4way_ctx;
void init_x11_4way_ctx()
{
blake512_4way_init( &x11_4way_ctx.blake );
sph_bmw512_init( &x11_4way_ctx.bmw );
bmw512_4way_init( &x11_4way_ctx.bmw );
init_groestl( &x11_4way_ctx.groestl, 64 );
skein512_4way_init( &x11_4way_ctx.skein );
jh512_4way_init( &x11_4way_ctx.jh );
@@ -63,22 +63,13 @@ void x11_4way_hash( void *state, const void *input )
blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 2 Bmw
sph_bmw512( &ctx.bmw, hash0, 64 );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &x11_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, 64 );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &x11_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, 64 );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &x11_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, 64 );
sph_bmw512_close( &ctx.bmw, hash3 );
// 3 Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
memcpy( &ctx.groestl, &x11_4way_ctx.groestl, sizeof(hashState_groestl) );

View File

@@ -11,7 +11,7 @@ bool register_x11_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x11;
gate->hash = (void*)&x11_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -7,7 +7,7 @@
#include <stdint.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
@@ -21,7 +21,7 @@
typedef struct {
blake512_4way_context blake;
sph_bmw512_context bmw;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
@@ -39,7 +39,7 @@ x11gost_4way_ctx_holder x11gost_4way_ctx;
void init_x11gost_4way_ctx()
{
blake512_4way_init( &x11gost_4way_ctx.blake );
sph_bmw512_init( &x11gost_4way_ctx.bmw );
bmw512_4way_init( &x11gost_4way_ctx.bmw );
init_groestl( &x11gost_4way_ctx.groestl, 64 );
skein512_4way_init( &x11gost_4way_ctx.skein );
jh512_4way_init( &x11gost_4way_ctx.jh );
@@ -65,21 +65,12 @@ void x11gost_4way_hash( void *state, const void *input )
blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
sph_bmw512( &ctx.bmw, hash0, 64 );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &x11gost_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, 64 );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &x11gost_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, 64 );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &x11gost_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, 64 );
sph_bmw512_close( &ctx.bmw, hash3 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
memcpy( &ctx.groestl, &x11gost_4way_ctx.groestl,
sizeof(hashState_groestl) );
@@ -110,8 +101,8 @@ void x11gost_4way_hash( void *state, const void *input )
sph_gost512_close( &ctx.gost, hash0 );
memcpy( &ctx.gost, &x11gost_4way_ctx.gost, sizeof(sph_gost512_context) );
sph_gost512( &ctx.gost, hash1, 64 );
memcpy( &ctx.gost, &x11gost_4way_ctx.gost, sizeof(sph_gost512_context) );
sph_gost512_close( &ctx.gost, hash1 );
memcpy( &ctx.gost, &x11gost_4way_ctx.gost, sizeof(sph_gost512_context) );
sph_gost512( &ctx.gost, hash2, 64 );
sph_gost512_close( &ctx.gost, hash2 );
memcpy( &ctx.gost, &x11gost_4way_ctx.gost, sizeof(sph_gost512_context) );

View File

@@ -11,7 +11,7 @@ bool register_x11gost_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x11gost;
gate->hash = (void*)&x11gost_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

158
algo/x13/skunk-4way.c Normal file
View File

@@ -0,0 +1,158 @@
#include "skunk-gate.h"
#ifdef __AVX2__
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/skein/skein-hash-4way.h"
#include "algo/gost/sph_gost.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/cubehash/sse2/cubehash_sse2.h"
typedef struct {
skein512_4way_context skein;
cubehashParam cube;
sph_fugue512_context fugue;
sph_gost512_context gost;
} skunk_4way_ctx_holder;
static __thread skunk_4way_ctx_holder skunk_4way_ctx;
void skunk_4way_hash( void *output, const void *input )
{
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
skunk_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &skunk_4way_ctx, sizeof(skunk_4way_ctx) );
skein512_4way( &ctx.skein, input, 80 );
skein512_4way_close( &ctx.skein, vhash );
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*)hash0, 64 );
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1, 64 );
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2, 64 );
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_gost512( &ctx.gost, hash0, 64 );
sph_gost512_close( &ctx.gost, hash0 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash1, 64 );
sph_gost512_close( &ctx.gost, hash1 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash2, 64 );
sph_gost512_close( &ctx.gost, hash2 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash3, 64 );
sph_gost512_close( &ctx.gost, hash3 );
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
}
int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
const uint32_t Htarg = ptarget[7];
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( opt_benchmark )
((uint32_t*)ptarget)[7] = 0x0cff;
for ( int k = 0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] );
uint64_t *edata = (uint64_t*)endiandata;
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
skunk_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n +=4;
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
*hashes_done = n - first_nonce + 1;
return num_found;
}
bool skunk_4way_thread_init()
{
skein512_4way_init( &skunk_4way_ctx.skein );
cubehashInit( &skunk_4way_ctx.cube, 512, 16, 32 );
sph_fugue512_init( &skunk_4way_ctx.fugue );
sph_gost512_init( &skunk_4way_ctx.gost );
return true;
}
#endif

18
algo/x13/skunk-gate.c Normal file
View File

@@ -0,0 +1,18 @@
#include "skunk-gate.h"
bool register_skunk_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
#if defined (SKUNK_4WAY)
gate->miner_thread_init = (void*)&skunk_4way_thread_init;
gate->scanhash = (void*)&scanhash_skunk_4way;
gate->hash = (void*)&skunk_4way_hash;
// init_skunk_4way_ctx();
#else
gate->miner_thread_init = (void*)&skunk_thread_init;
gate->scanhash = (void*)&scanhash_skunk;
gate->hash = (void*)&skunkhash;
#endif
return true;
}

33
algo/x13/skunk-gate.h Normal file
View File

@@ -0,0 +1,33 @@
#ifndef SKUNK_GATE_H__
#define SKUNK_GATE_H__ 1
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(HASH_4WAY)
#define SKUNK_4WAY
#endif
bool register_skunk_algo( algo_gate_t* gate );
#if defined(SKUNK_4WAY)
void skunk_4way_hash( void *state, const void *input );
int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
bool skunk_4way_thread_init();
//void init_skunk_4way_ctx();
#endif
void skunkhash( void *state, const void *input );
int scanhash_skunk( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
bool skunk_thread_init();
#endif

View File

@@ -1,10 +1,8 @@
#include "algo-gate-api.h"
#include "skunk-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/gost/sph_gost.h"
#include "algo/skein/sph_skein.h"
#include "algo/fugue/sph_fugue.h"
@@ -90,12 +88,3 @@ bool skunk_thread_init()
sph_gost512_init( &skunk_ctx.gost );
return true;
}
bool register_skunk_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->miner_thread_init = (void*)&skunk_thread_init;
gate->scanhash = (void*)&scanhash_skunk;
gate->hash = (void*)&skunkhash;
return true;
}

View File

@@ -7,7 +7,7 @@
#include <string.h>
#include <stdio.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
@@ -22,7 +22,7 @@
typedef struct {
blake512_4way_context blake;
sph_bmw512_context bmw;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
@@ -41,7 +41,7 @@ x13_4way_ctx_holder x13_4way_ctx __attribute__ ((aligned (64)));
void init_x13_4way_ctx()
{
blake512_4way_init( &x13_4way_ctx.blake );
sph_bmw512_init( &x13_4way_ctx.bmw );
bmw512_4way_init( &x13_4way_ctx.bmw );
init_groestl( &x13_4way_ctx.groestl, 64 );
skein512_4way_init( &x13_4way_ctx.skein );
jh512_4way_init( &x13_4way_ctx.jh );
@@ -69,22 +69,13 @@ void x13_4way_hash( void *state, const void *input )
blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 2 Bmw
sph_bmw512( &ctx.bmw, hash0, 64 );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &x13_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, 64 );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &x13_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, 64 );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &x13_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, 64 );
sph_bmw512_close( &ctx.bmw, hash3 );
// 3 Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
memcpy( &ctx.groestl, &x13_4way_ctx.groestl, sizeof(hashState_groestl) );

View File

@@ -7,7 +7,7 @@
#include <string.h>
#include <stdio.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
@@ -23,7 +23,7 @@
typedef struct {
blake512_4way_context blake;
sph_bmw512_context bmw;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
@@ -44,7 +44,7 @@ static __thread blake512_4way_context x13sm3_ctx_mid;
void init_x13sm3_4way_ctx()
{
blake512_4way_init( &x13sm3_4way_ctx.blake );
sph_bmw512_init( &x13sm3_4way_ctx.bmw );
bmw512_4way_init( &x13sm3_4way_ctx.bmw );
init_groestl( &x13sm3_4way_ctx.groestl, 64 );
skein512_4way_init( &x13sm3_4way_ctx.skein );
jh512_4way_init( &x13sm3_4way_ctx.jh );
@@ -76,22 +76,13 @@ void x13sm3_4way_hash( void *state, const void *input )
// blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// Bmw
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// Bmw
sph_bmw512( &ctx.bmw, hash0, 64 );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &x13sm3_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, 64 );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &x13sm3_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, 64 );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &x13sm3_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, 64 );
sph_bmw512_close( &ctx.bmw, hash3 );
// Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
memcpy( &ctx.groestl, &x13sm3_4way_ctx.groestl,

185
algo/x14/polytimos-4way.c Normal file
View File

@@ -0,0 +1,185 @@
#include "polytimos-gate.h"
#if defined(__AVX2__) && defined(__AES__)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/skein/skein-hash-4way.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/fugue//sph_fugue.h"
#include "algo/luffa/sse2/luffa_for_sse2.h"
//#include "algo/shabal/sph_shabal.h"
#include "algo/gost/sph_gost.h"
#include "algo/echo/aes_ni/hash_api.h"
typedef struct {
skein512_4way_context skein;
shabal512_4way_context shabal;
hashState_echo echo;
hashState_luffa luffa;
sph_fugue512_context fugue;
sph_gost512_context gost;
} poly_4way_ctx_holder;
poly_4way_ctx_holder poly_4way_ctx;
void init_polytimos_4way_ctx()
{
skein512_4way_init( &poly_4way_ctx.skein );
shabal512_4way_init( &poly_4way_ctx.shabal );
init_echo( &poly_4way_ctx.echo, 512 );
init_luffa( &poly_4way_ctx.luffa, 512 );
sph_fugue512_init( &poly_4way_ctx.fugue );
sph_gost512_init( &poly_4way_ctx.gost );
}
void polytimos_4way_hash( void *output, const void *input )
{
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
poly_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &poly_4way_ctx, sizeof(poly_4way_ctx) );
skein512_4way( &ctx.skein, input, 80 );
skein512_4way_close( &ctx.skein, vhash );
// Need to convert from 64 bit interleaved to 32 bit interleaved.
uint32_t vhash32[16*4];
mm256_reinterleave_4x32( vhash32, vhash, 512 );
shabal512_4way( &ctx.shabal, vhash32, 64 );
shabal512_4way_close( &ctx.shabal, vhash32 );
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash32, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash0,
(const BitSequence *)hash0, 512 );
memcpy( &ctx.echo, &poly_4way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash1,
(const BitSequence *) hash1, 512 );
memcpy( &ctx.echo, &poly_4way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash2,
(const BitSequence *) hash2, 512 );
memcpy( &ctx.echo, &poly_4way_ctx.echo, sizeof(hashState_echo) );
update_final_echo( &ctx.echo, (BitSequence *)hash3,
(const BitSequence *) hash3, 512 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0,
(const BitSequence*)hash0, 64 );
memcpy( &ctx.luffa, &poly_4way_ctx.luffa, sizeof(hashState_luffa) );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash1,
(const BitSequence*)hash1, 64 );
memcpy( &ctx.luffa, &poly_4way_ctx.luffa, sizeof(hashState_luffa) );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash2,
(const BitSequence*)hash2, 64 );
memcpy( &ctx.luffa, &poly_4way_ctx.luffa, sizeof(hashState_luffa) );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash3,
(const BitSequence*)hash3, 64 );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
memcpy( &ctx.fugue, &poly_4way_ctx.fugue, sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
memcpy( &ctx.fugue, &poly_4way_ctx.fugue, sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
memcpy( &ctx.fugue, &poly_4way_ctx.fugue, sizeof(sph_fugue512_context) );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_gost512( &ctx.gost, hash0, 64 );
sph_gost512_close( &ctx.gost, hash0 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash1, 64 );
sph_gost512_close( &ctx.gost, hash1 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash2, 64 );
sph_gost512_close( &ctx.gost, hash2 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash3, 64 );
sph_gost512_close( &ctx.gost, hash3 );
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
}
int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
const uint32_t Htarg = ptarget[7];
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( opt_benchmark )
ptarget[7] = 0x0cff;
for ( int i=0; i < 19; i++ )
be32enc( &endiandata[i], pdata[i] );
uint64_t *edata = (uint64_t*)endiandata;
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
polytimos_4way_hash(hash, vdata);
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart));
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

View File

@@ -2,10 +2,16 @@
bool register_polytimos_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
#ifdef POLYTIMOS_4WAY
init_polytimos_4way_ctx();
gate->scanhash = (void*)&scanhash_polytimos_4way;
gate->hash = (void*)&polytimos_4way_hash;
#else
init_polytimos_context();
gate->scanhash = (void*)&scanhash_polytimos;
gate->hash = (void*)&polytimos_hash;
#endif
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

32
algo/x14/polytimos-gate.h Normal file
View File

@@ -0,0 +1,32 @@
#ifndef POLYTIMOS_GATE_H__
#define POLYTIMOS_GATE_H__ 1
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(HASH_4WAY) && defined(__AES__)
#define POLYTIMOS_4WAY
#endif
bool register_polytimos_algo( algo_gate_t* gate );
#if defined(POLYTIMOS_4WAY)
void polytimos_4way_hash( void *state, const void *input );
int scanhash_polytimos_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_polytimos_4way_ctx();
#endif
void polytimos_hash( void *state, const void *input );
int scanhash_polytimos( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_polytimos_ctx();
#endif

154
algo/x14/veltor-4way.c Normal file
View File

@@ -0,0 +1,154 @@
#include "veltor-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#if defined(__AVX2__) && defined(__AES__)
#include "algo/skein/skein-hash-4way.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/gost/sph_gost.h"
typedef struct {
skein512_4way_context skein;
sph_shavite512_context shavite;
shabal512_4way_context shabal;
sph_gost512_context gost;
} veltor_4way_ctx_holder;
veltor_4way_ctx_holder veltor_4way_ctx __attribute__ ((aligned (64)));
void init_veltor_4way_ctx()
{
skein512_4way_init( &veltor_4way_ctx.skein );
sph_shavite512_init( &veltor_4way_ctx.shavite );
shabal512_4way_init( &veltor_4way_ctx.shabal );
sph_gost512_init( &veltor_4way_ctx.gost );
}
void veltor_4way_hash( void *output, const void *input )
{
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
veltor_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &veltor_4way_ctx, sizeof(veltor_4way_ctx) );
skein512_4way( &ctx.skein, input, 80 );
skein512_4way_close( &ctx.skein, vhash );
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
sph_shavite512( &ctx.shavite, hash0, 64 );
sph_shavite512_close( &ctx.shavite, hash0 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash3, 64 );
sph_shavite512_close( &ctx.shavite, hash3 );
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
sph_gost512( &ctx.gost, hash0, 64 );
sph_gost512_close( &ctx.gost, hash0 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash1, 64 );
sph_gost512_close( &ctx.gost, hash1 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash2, 64 );
sph_gost512_close( &ctx.gost, hash2 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash3, 64 );
sph_gost512_close( &ctx.gost, hash3 );
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
}
int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( opt_benchmark )
ptarget[7] = 0x0cff;
for ( int i=0; i < 19; i++ )
{
be32enc( &endiandata[i], pdata[i] );
}
uint64_t *edata = (uint64_t*)endiandata;
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
veltor_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

18
algo/x14/veltor-gate.c Normal file
View File

@@ -0,0 +1,18 @@
#include "veltor-gate.h"
bool register_veltor_algo( algo_gate_t* gate )
{
#if defined (VELTOR_4WAY)
init_veltor_4way_ctx();
gate->scanhash = (void*)&scanhash_veltor_4way;
gate->hash = (void*)&veltor_4way_hash;
#else
init_veltor_ctx();
gate->scanhash = (void*)&scanhash_veltor;
gate->hash = (void*)&veltor_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

32
algo/x14/veltor-gate.h Normal file
View File

@@ -0,0 +1,32 @@
#ifndef VELTOR_GATE_H__
#define VELTOR_GATE_H__ 1
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(HASH_4WAY) && defined(__AES__)
#define VELTOR_4WAY
#endif
bool register_veltor_algo( algo_gate_t* gate );
#if defined(VELTOR_4WAY)
void veltor_4way_hash( void *state, const void *input );
int scanhash_veltor_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_veltor_4way_ctx();
#endif
void veltor_hash( void *state, const void *input );
int scanhash_veltor( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_veltor_ctx();
#endif

View File

@@ -1,4 +1,4 @@
#include "algo-gate-api.h"
#include "veltor-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -34,7 +34,7 @@ void veltor_skein512_midstate( const void* input )
sph_skein512( &veltor_skein_mid, input, 64 );
}
void veltorhash(void *output, const void *input)
void veltor_hash(void *output, const void *input)
{
uint32_t _ALIGN(64) hashA[16], hashB[16];
@@ -85,7 +85,7 @@ int scanhash_veltor(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
do {
be32enc(&endiandata[19], nonce);
veltorhash(hash, endiandata);
veltor_hash(hash, endiandata);
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
work_set_target_ratio(work, hash);
@@ -101,14 +101,3 @@ int scanhash_veltor(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
bool register_veltor_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT;
init_veltor_ctx();
gate->scanhash = (void*)&scanhash_veltor;
gate->hash = (void*)&veltorhash;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
}

View File

@@ -7,7 +7,7 @@
#include <string.h>
#include <stdio.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
@@ -20,11 +20,11 @@
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/shabal/shabal-hash-4way.h"
typedef struct {
blake512_4way_context blake;
sph_bmw512_context bmw;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
@@ -36,7 +36,7 @@ typedef struct {
hashState_echo echo;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
shabal512_4way_context shabal;
} x14_4way_ctx_holder;
x14_4way_ctx_holder x14_4way_ctx __attribute__ ((aligned (64)));
@@ -44,6 +44,7 @@ x14_4way_ctx_holder x14_4way_ctx __attribute__ ((aligned (64)));
void init_x14_4way_ctx()
{
blake512_4way_init( &x14_4way_ctx.blake );
bmw512_4way_init( &x14_4way_ctx.bmw );
sph_bmw512_init( &x14_4way_ctx.bmw );
init_groestl( &x14_4way_ctx.groestl, 64 );
skein512_4way_init( &x14_4way_ctx.skein );
@@ -56,7 +57,7 @@ void init_x14_4way_ctx()
init_echo( &x14_4way_ctx.echo, 512 );
sph_hamsi512_init( &x14_4way_ctx.hamsi );
sph_fugue512_init( &x14_4way_ctx.fugue );
sph_shabal512_init( &x14_4way_ctx.shabal );
shabal512_4way_init( &x14_4way_ctx.shabal );
};
void x14_4way_hash( void *state, const void *input )
@@ -73,22 +74,13 @@ void x14_4way_hash( void *state, const void *input )
blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 2 Bmw
sph_bmw512( &ctx.bmw, hash0, 64 );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &x14_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, 64 );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &x14_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, 64 );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &x14_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, 64 );
sph_bmw512_close( &ctx.bmw, hash3 );
// 3 Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
memcpy( &ctx.groestl, &x14_4way_ctx.groestl, sizeof(hashState_groestl) );
@@ -113,7 +105,7 @@ void x14_4way_hash( void *state, const void *input )
keccak512_4way( &ctx.keccak, vhash, 64 );
keccak512_4way_close( &ctx.keccak, vhash );
// Serial to the end
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 7 Luffa
@@ -144,9 +136,9 @@ void x14_4way_hash( void *state, const void *input )
memcpy( &ctx.shavite, &x14_4way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512( &ctx.shavite, hash1, 64 );
sph_shavite512_close( &ctx.shavite, hash1 );
memcpy( &ctx.shavite, &x14_4way_ctx.shavite,
sizeof(sph_shavite512_context) );
sph_shavite512_close( &ctx.shavite, hash1 );
sph_shavite512( &ctx.shavite, hash2, 64 );
sph_shavite512_close( &ctx.shavite, hash2 );
memcpy( &ctx.shavite, &x14_4way_ctx.shavite,
@@ -206,19 +198,12 @@ void x14_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
// 14 Shabal
sph_shabal512( &ctx.shabal, hash0, 64 );
sph_shabal512_close( &ctx.shabal, hash0 );
memcpy( &ctx.shabal, &x14_4way_ctx.shabal, sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash1, 64 );
sph_shabal512_close( &ctx.shabal, hash1 );
memcpy( &ctx.shabal, &x14_4way_ctx.shabal, sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash2, 64 );
sph_shabal512_close( &ctx.shabal, hash2 );
memcpy( &ctx.shabal, &x14_4way_ctx.shabal, sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash3, 64 );
sph_shabal512_close( &ctx.shabal, hash3 );
// 14 Shabal, parallel 32 bit
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
memcpy( state, hash0, 32 );
memcpy( state+32, hash1, 32 );
memcpy( state+64, hash2, 32 );

View File

@@ -11,7 +11,7 @@ bool register_x14_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x14;
gate->hash = (void*)&x14hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -7,7 +7,7 @@
#include <string.h>
#include <stdio.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
@@ -20,12 +20,12 @@
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/whirlpool/sph_whirlpool.h"
typedef struct {
blake512_4way_context blake;
sph_bmw512_context bmw;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
@@ -37,7 +37,7 @@ typedef struct {
hashState_echo echo;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
} x15_4way_ctx_holder;
@@ -46,6 +46,7 @@ x15_4way_ctx_holder x15_4way_ctx __attribute__ ((aligned (64)));
void init_x15_4way_ctx()
{
blake512_4way_init( &x15_4way_ctx.blake );
bmw512_4way_init( &x15_4way_ctx.bmw );
sph_bmw512_init( &x15_4way_ctx.bmw );
init_groestl( &x15_4way_ctx.groestl, 64 );
skein512_4way_init( &x15_4way_ctx.skein );
@@ -58,7 +59,7 @@ void init_x15_4way_ctx()
init_echo( &x15_4way_ctx.echo, 512 );
sph_hamsi512_init( &x15_4way_ctx.hamsi );
sph_fugue512_init( &x15_4way_ctx.fugue );
sph_shabal512_init( &x15_4way_ctx.shabal );
shabal512_4way_init( &x15_4way_ctx.shabal );
sph_whirlpool_init( &x15_4way_ctx.whirlpool );
};
@@ -76,22 +77,13 @@ void x15_4way_hash( void *state, const void *input )
blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 2 Bmw
sph_bmw512( &ctx.bmw, hash0, 64 );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &x15_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, 64 );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &x15_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, 64 );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &x15_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, 64 );
sph_bmw512_close( &ctx.bmw, hash3 );
// 3 Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
memcpy( &ctx.groestl, &x15_4way_ctx.groestl, sizeof(hashState_groestl) );
@@ -209,18 +201,11 @@ void x15_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
// 14 Shabal
sph_shabal512( &ctx.shabal, hash0, 64 );
sph_shabal512_close( &ctx.shabal, hash0 );
memcpy( &ctx.shabal, &x15_4way_ctx.shabal, sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash1, 64 );
sph_shabal512_close( &ctx.shabal, hash1 );
memcpy( &ctx.shabal, &x15_4way_ctx.shabal, sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash2, 64 );
sph_shabal512_close( &ctx.shabal, hash2 );
memcpy( &ctx.shabal, &x15_4way_ctx.shabal, sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash3, 64 );
sph_shabal512_close( &ctx.shabal, hash3 );
// 14 Shabal, parallel 32 bit
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
// 15 Whirlpool
sph_whirlpool( &ctx.whirlpool, hash0, 64 );

View File

@@ -11,7 +11,7 @@ bool register_x15_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x15;
gate->hash = (void*)&x15hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
return true;
};

View File

@@ -7,7 +7,7 @@
#include <string.h>
#include <stdio.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
@@ -17,17 +17,16 @@
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sse2/nist.h"
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/haval/sph-haval.h"
#include <openssl/sha.h>
typedef struct {
blake512_4way_context blake;
sph_bmw512_context bmw;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
@@ -39,7 +38,7 @@ typedef struct {
hashState_echo echo;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
SHA512_CTX sha512;
sph_haval256_5_context haval;
@@ -50,7 +49,7 @@ x17_4way_ctx_holder x17_4way_ctx __attribute__ ((aligned (64)));
void init_x17_4way_ctx()
{
blake512_4way_init( &x17_4way_ctx.blake );
sph_bmw512_init( &x17_4way_ctx.bmw );
bmw512_4way_init( &x17_4way_ctx.bmw );
init_groestl( &x17_4way_ctx.groestl, 64 );
skein512_4way_init( &x17_4way_ctx.skein );
jh512_4way_init( &x17_4way_ctx.jh );
@@ -62,8 +61,7 @@ void init_x17_4way_ctx()
init_echo( &x17_4way_ctx.echo, 512 );
sph_hamsi512_init( &x17_4way_ctx.hamsi );
sph_fugue512_init( &x17_4way_ctx.fugue );
sph_shabal512_init( &x17_4way_ctx.shabal );
sph_whirlpool_init( &x17_4way_ctx.whirlpool );
shabal512_4way_init( &x17_4way_ctx.shabal );
SHA512_Init( &x17_4way_ctx.sha512 );
sph_haval256_5_init( &x17_4way_ctx.haval );
};
@@ -82,22 +80,13 @@ void x17_4way_hash( void *state, const void *input )
blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 2 Bmw
sph_bmw512( &ctx.bmw, hash0, 64 );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &x17_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, 64 );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &x17_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, 64 );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &x17_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, 64 );
sph_bmw512_close( &ctx.bmw, hash3 );
// 3 Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
memcpy( &ctx.groestl, &x17_4way_ctx.groestl, sizeof(hashState_groestl) );
@@ -215,18 +204,11 @@ void x17_4way_hash( void *state, const void *input )
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
// 14 Shabal
sph_shabal512( &ctx.shabal, hash0, 64 );
sph_shabal512_close( &ctx.shabal, hash0 );
memcpy( &ctx.shabal, &x17_4way_ctx.shabal, sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash1, 64 );
sph_shabal512_close( &ctx.shabal, hash1 );
memcpy( &ctx.shabal, &x17_4way_ctx.shabal, sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash2, 64 );
sph_shabal512_close( &ctx.shabal, hash2 );
memcpy( &ctx.shabal, &x17_4way_ctx.shabal, sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash3, 64 );
sph_shabal512_close( &ctx.shabal, hash3 );
// 14 Shabal, parallel 32 bit
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
shabal512_4way( &ctx.shabal, vhash, 64 );
shabal512_4way_close( &ctx.shabal, vhash );
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
// 15 Whirlpool
sph_whirlpool( &ctx.whirlpool, hash0, 64 );

View File

@@ -11,7 +11,7 @@ bool register_x17_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_x17;
gate->hash = (void*)&x17_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
return true;
};

View File

@@ -7,7 +7,7 @@
#include <string.h>
#include <stdio.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
@@ -19,7 +19,7 @@
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/shabal/shabal-hash-4way.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/sha/sph_sha2.h"
#include "algo/haval/sph-haval.h"
@@ -27,7 +27,7 @@
typedef struct {
blake512_4way_context blake;
sph_bmw512_context bmw;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
@@ -39,7 +39,7 @@ typedef struct {
hashState_echo echo;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
sph_shabal512_context shabal;
shabal512_4way_context shabal;
sph_whirlpool_context whirlpool;
SHA512_CTX sha512;
sph_haval256_5_context haval;
@@ -52,7 +52,7 @@ static __thread blake512_4way_context xevan_blake_4way_mid
void init_xevan_4way_ctx()
{
blake512_4way_init(&xevan_4way_ctx.blake);
sph_bmw512_init(&xevan_4way_ctx.bmw);
bmw512_4way_init( &xevan_4way_ctx.bmw );
init_groestl( &xevan_4way_ctx.groestl, 64 );
skein512_4way_init(&xevan_4way_ctx.skein);
jh512_4way_init(&xevan_4way_ctx.jh);
@@ -64,7 +64,7 @@ void init_xevan_4way_ctx()
init_echo( &xevan_4way_ctx.echo, 512 );
sph_hamsi512_init( &xevan_4way_ctx.hamsi );
sph_fugue512_init( &xevan_4way_ctx.fugue );
sph_shabal512_init( &xevan_4way_ctx.shabal );
shabal512_4way_init( &xevan_4way_ctx.shabal );
sph_whirlpool_init( &xevan_4way_ctx.whirlpool );
SHA512_Init( &xevan_4way_ctx.sha512 );
sph_haval256_5_init( &xevan_4way_ctx.haval );
@@ -90,25 +90,18 @@ void xevan_4way_hash( void *output, const void *input )
xevan_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &xevan_4way_ctx, sizeof(xevan_4way_ctx) );
// parallel way
memcpy( &ctx.blake, &xevan_blake_4way_mid,
sizeof(xevan_blake_4way_mid) );
blake512_4way( &ctx.blake, input + (midlen<<2), tail );
blake512_4way_close(&ctx.blake, vhash);
memset( &vhash[8<<2], 0, 64<<2 );
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
sph_bmw512( &ctx.bmw, hash0, dataLen );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &xevan_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, dataLen );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &xevan_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, dataLen );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &xevan_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, dataLen );
sph_bmw512_close( &ctx.bmw, hash3 );
bmw512_4way( &ctx.bmw, vhash, dataLen );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
dataLen<<3 );
@@ -122,6 +115,7 @@ void xevan_4way_hash( void *output, const void *input )
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3,
dataLen<<3 );
// Parallel 4way
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
skein512_4way( &ctx.skein, vhash, dataLen );
@@ -133,6 +127,7 @@ void xevan_4way_hash( void *output, const void *input )
keccak512_4way( &ctx.keccak, vhash, dataLen );
keccak512_4way_close( &ctx.keccak, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0,
@@ -222,21 +217,13 @@ void xevan_4way_hash( void *output, const void *input )
sph_fugue512( &ctx.fugue, hash3, dataLen );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_shabal512( &ctx.shabal, hash0, dataLen );
sph_shabal512_close( &ctx.shabal, hash0 );
memcpy( &ctx.shabal, &xevan_4way_ctx.shabal,
sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash1, dataLen );
sph_shabal512_close( &ctx.shabal, hash1 );
memcpy( &ctx.shabal, &xevan_4way_ctx.shabal,
sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash2, dataLen );
sph_shabal512_close( &ctx.shabal, hash2 );
memcpy( &ctx.shabal, &xevan_4way_ctx.shabal,
sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash3, dataLen );
sph_shabal512_close( &ctx.shabal, hash3 );
// Parallel 4way
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
shabal512_4way( &ctx.shabal, vhash, dataLen );
shabal512_4way_close( &ctx.shabal, vhash );
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
// Serial
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
memcpy( &ctx.whirlpool, &xevan_4way_ctx.whirlpool,
@@ -286,19 +273,10 @@ void xevan_4way_hash( void *output, const void *input )
blake512_4way( &ctx.blake, vhash, dataLen );
blake512_4way_close(&ctx.blake, vhash);
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
bmw512_4way( &ctx.bmw, vhash, dataLen );
bmw512_4way_close( &ctx.bmw, vhash );
sph_bmw512( &ctx.bmw, hash0, dataLen );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &xevan_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, dataLen );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &xevan_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, dataLen );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &xevan_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, dataLen );
sph_bmw512_close( &ctx.bmw, hash3 );
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0,
dataLen<<3 );
@@ -412,20 +390,10 @@ void xevan_4way_hash( void *output, const void *input )
sph_fugue512( &ctx.fugue, hash3, dataLen );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_shabal512( &ctx.shabal, hash0, dataLen );
sph_shabal512_close( &ctx.shabal, hash0 );
memcpy( &ctx.shabal, &xevan_4way_ctx.shabal,
sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash1, dataLen );
sph_shabal512_close( &ctx.shabal, hash1 );
memcpy( &ctx.shabal, &xevan_4way_ctx.shabal,
sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash2, dataLen );
sph_shabal512_close( &ctx.shabal, hash2 );
memcpy( &ctx.shabal, &xevan_4way_ctx.shabal,
sizeof(sph_shabal512_context) );
sph_shabal512( &ctx.shabal, hash3, dataLen );
sph_shabal512_close( &ctx.shabal, hash3 );
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
shabal512_4way( &ctx.shabal, vhash, dataLen );
shabal512_4way_close( &ctx.shabal, vhash );
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
sph_whirlpool( &ctx.whirlpool, hash0, dataLen );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
@@ -480,7 +448,6 @@ int scanhash_xevan_4way( int thr_id, struct work *work, uint32_t max_nonce,
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
// uint32_t _ALIGN(64) hash[8];
uint32_t _ALIGN(64) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;

View File

@@ -16,7 +16,7 @@ bool register_xevan_algo( algo_gate_t* gate )
gate->scanhash = (void*)&scanhash_xevan;
gate->hash = (void*)&xevan_hash;
#endif
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
gate->set_target = (void*)&xevan_set_target;
gate->get_max64 = (void*)&get_max64_0xffffLL;
return true;

View File

@@ -438,6 +438,20 @@ bool register_yescrypt_algo( algo_gate_t* gate )
return true;
}
bool register_yescryptr8_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_yescrypt;
gate->hash = (void*)&yescrypt_hash;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&yescrypt_get_max64;
client_key_hack = false;
YESCRYPT_N = 2048;
YESCRYPT_R = 8;
YESCRYPT_P = 1;
return true;
}
bool register_yescryptr16_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | SHA_OPT;

184
avxdefs.h
View File

@@ -37,7 +37,7 @@
#define mm_one_16 _mm_set1_epi16( 1U )
// Constant minus 1
#define mm_neg1 _mm_set1_epi64x( 0xFFFFFFFFUL )
#define mm_neg1 _mm_set1_epi64x( 0xFFFFFFFFFFFFFFFFULL )
//
// Basic operations without equivalent SIMD intrinsic
@@ -55,11 +55,11 @@
// Return bit n in position, all other bits zeroed.
#define mm_bitextract_64 ( x, n ) \
_mm_and_si128( _mm_set1_epi64x( 1ULL << (n) ), x )
_mm_and_si128( _mm_slli_epi64( mm_one_64, n ), x )
#define mm_bitextract_32 ( x, n ) \
_mm_and_si128( _mm_set1_epi32( 1UL << (n) ), x )
_mm_and_si128( _mm_slli_epi32( mm_one_32, n ), x )
#define mm_bitextract_16 ( x, n ) \
_mm_and_si128( _mm_set1_epi16( 1U << (n) ), x )
_mm_and_si128( _mm_slli_epi16( mm_one_16, n ), x )
// Return bit n as bool
#define mm_bittest_64( x, n ) \
@@ -343,11 +343,11 @@ inline __m128i mm_byteswap_16( __m128i x )
// return bit n in position, all othr bits cleared
#define mm256_bitextract_64 ( x, n ) \
_mm256_and_si128( _mm256_set1_epi64x( 0ULL << (n) ), x )
_mm256_and_si128( _mm256_slli_epi64( mm256_one_64, n ), x )
#define mm256_bitextract_32 ( x, n ) \
_mm256_and_si128( _mm256_set1_epi32( 0UL << (n) ), x )
_mm256_and_si128( _mm256_slli_epi32( mm256_one_32, n ), x )
#define mm256_bitextract_16 ( x, n ) \
_mm256_and_si128( _mm256_set1_epi16( 0U << (n) ), x )
_mm256_and_si128( _mm256_slli_epi16( mm256_one_16, n ), x )
// Return bit n as bool (bit 0)
#define mm256_bittest_64( x, n ) \
@@ -359,17 +359,17 @@ inline __m128i mm_byteswap_16( __m128i x )
// Return x with bit n set/cleared in all elements
#define mm256_bitset_64( x, n ) \
_mm256_or_si256( _mm256_set1_epi64x( 1ULL << (n) ), x )
_mm256_or_si256( _mm256_slli_epi64( mm256_one_64, n ), x )
#define mm256_bitclr_64( x, n ) \
_mm256_andnot_si256( _mm256_set1_epi64x( 1ULL << (n) ), x )
_mm256_andnot_si256( _mm256_slli_epi64( mm256_one_64, n ), x )
#define mm256_bitset_32( x, n ) \
_mm256_or_si256( _mm256_set1_epi32( 1UL << (n) ), x )
_mm256_or_si256( _mm256_slli_epi32( mm256_one_32, n ), x )
#define mm256_bitclr_32( x, n ) \
_mm256_andnot_si256( mm256_not( _mm256_set1_epi32( 1UL << (n) ), x )
_mm256_andnot_si256( _mm256_slli_epi32( mm256_one_32, n ), x )
#define mm256_bitset_16( x, n ) \
_mm256_or_si256( _mm256_set1_epi16( 1U << (n) ), x )
_mm256_or_si256( _mm256_slli_epi16( mm256_one_16, n ), x )
#define mm256_bitclr_16( x, n ) \
_mm256_andnot_si256( _mm256_set1_epi16( 1U << (n) ), x )
_mm256_andnot_si256( _mm256_slli_epi16( mm256_one_16, n ), x )
// Return x with bit n toggled
#define mm256_bitflip_64( x, n ) \
@@ -448,22 +448,21 @@ inline bool memcmp_256( __m256i src1, __m256i src2, int n )
// shift, a little more work is needed.
// Optimized 64 bit permutations
// Swap 128, aka rotate 2x64, 4x32, 8x16, 16x8
// Swap 128 bit elements in 256 bit vector
#define mm256_swap_128( w ) _mm256_permute4x64_epi64( w, 0x4e )
//#define mm256_swap_128( w ) _mm256_permute2x128_si256( w, w, 1 )
// Rotate 256 bit vector by one 64 bit element, aka 2x32, 4x16, 8x8
// Rotate 256 bit vector by one 64 bit element
#define mm256_rotl256_1x64( w ) _mm256_permute4x64_epi64( w, 0x93 )
#define mm256_rotr256_1x64( w ) _mm256_permute4x64_epi64( w, 0x39 )
// Swap hi/lo 64 bits in each 128 bit element
// Swap 64 bits in each 128 bit element of 256 bit vector
#define mm256_swap128_64( x ) _mm256_shuffle_epi32( x, 0x4e )
// Rotate 128 bit elements by 32 bits
// Rotate 128 bit elements in 256 bit vector by 32 bits
#define mm256_rotr128_1x32( x ) _mm256_shuffle_epi32( x, 0x39 )
#define mm256_rotl128_1x32( x ) _mm256_shuffle_epi32( x, 0x93 )
// Swap hi/lo 32 bits in each 64 bit element
// Swap 32 bits in each 64 bit element olf 256 bit vector
#define mm256_swap64_32( x ) _mm256_shuffle_epi32( x, 0xb1 )
// Less efficient but more versatile. Use only for rotations that are not
@@ -487,9 +486,9 @@ inline bool memcmp_256( __m256i src1, __m256i src2, int n )
// Rotate two 256 bit vectors as one 512 bit vector
// Fast but limited to 128 bit granularity
#define mm256_swap512_256(a, b) _mm256_permute2x128_si256( a, b, 0x1032 )
#define mm256_rotr512_1x128(a, b) _mm256_permute2x128_si256( a, b, 0x0321 )
#define mm256_rotl512_1x128(a, b) _mm256_permute2x128_si256( a, b, 0x2103 )
#define mm256_swap512_256(a, b) _mm256_permute2x128_si256( a, b, 0x4e )
#define mm256_rotr512_1x128(a, b) _mm256_permute2x128_si256( a, b, 0x39 )
#define mm256_rotl512_1x128(a, b) _mm256_permute2x128_si256( a, b, 0x93 )
// Much slower, for 64 and 32 bit granularity
#define mm256_rotr512_1x64(a, b) \
@@ -677,6 +676,23 @@ inline void mm_interleave_4x32( void *dst, const void *src0, const void *src1,
d[17] = _mm_set_epi32( s3[17], s2[17], s1[17], s0[17] );
d[18] = _mm_set_epi32( s3[18], s2[18], s1[18], s0[18] );
d[19] = _mm_set_epi32( s3[19], s2[19], s1[19], s0[19] );
if ( bit_len <= 640 ) return;
d[20] = _mm_set_epi32( s3[20], s2[20], s1[20], s0[20] );
d[21] = _mm_set_epi32( s3[21], s2[21], s1[21], s0[21] );
d[22] = _mm_set_epi32( s3[22], s2[22], s1[22], s0[22] );
d[23] = _mm_set_epi32( s3[23], s2[23], s1[23], s0[23] );
d[24] = _mm_set_epi32( s3[24], s2[24], s1[24], s0[24] );
d[25] = _mm_set_epi32( s3[25], s2[25], s1[25], s0[25] );
d[26] = _mm_set_epi32( s3[26], s2[26], s1[26], s0[26] );
d[27] = _mm_set_epi32( s3[27], s2[27], s1[27], s0[27] );
d[28] = _mm_set_epi32( s3[28], s2[28], s1[28], s0[28] );
d[29] = _mm_set_epi32( s3[29], s2[29], s1[29], s0[29] );
d[30] = _mm_set_epi32( s3[30], s2[30], s1[30], s0[30] );
d[31] = _mm_set_epi32( s3[31], s2[31], s1[31], s0[31] );
// bit_len == 1024
}
// bit_len must be multiple of 32
@@ -735,6 +751,24 @@ inline void mm_deinterleave_4x32( void *dst0, void *dst1, void *dst2,
d1[4] = _mm_set_epi32( s[77], s[73], s[69], s[65] );
d2[4] = _mm_set_epi32( s[78], s[74], s[70], s[66] );
d3[4] = _mm_set_epi32( s[79], s[75], s[71], s[67] );
if ( bit_len <= 640 ) return;
d0[5] = _mm_set_epi32( s[92], s[88], s[84], s[80] );
d1[5] = _mm_set_epi32( s[93], s[89], s[85], s[81] );
d2[5] = _mm_set_epi32( s[94], s[90], s[86], s[82] );
d3[5] = _mm_set_epi32( s[95], s[91], s[87], s[83] );
d0[6] = _mm_set_epi32( s[108], s[104], s[100], s[ 96] );
d1[6] = _mm_set_epi32( s[109], s[105], s[101], s[ 97] );
d2[6] = _mm_set_epi32( s[110], s[106], s[102], s[ 98] );
d3[6] = _mm_set_epi32( s[111], s[107], s[103], s[ 99] );
d0[7] = _mm_set_epi32( s[124], s[120], s[116], s[112] );
d1[7] = _mm_set_epi32( s[125], s[121], s[117], s[113] );
d2[7] = _mm_set_epi32( s[126], s[122], s[118], s[114] );
d3[7] = _mm_set_epi32( s[127], s[123], s[119], s[115] );
// bit_len == 1024
}
// deinterleave 4 arrays into individual buffers for scalarm processing
@@ -1074,6 +1108,41 @@ inline void mm256_deinterleave_8x32x( uint32_t *dst0, uint32_t *dst1,
}
}
// Can't do it in place
inline void mm256_reinterleave_4x64x( void *dst, void *src, int bit_len )
{
__m256i* d = (__m256i*)dst;
uint32_t *s = (uint32_t*)src;
d[0] = _mm256_set_epi32( s[7], s[3], s[6], s[2], s[5], s[1], s[4], s[0] );
d[1] = _mm256_set_epi32( s[15],s[11],s[14],s[10],s[13],s[9],s[12], s[8] );
d[2] = _mm256_set_epi32( s[23],s[19],s[22],s[18],s[21],s[17],s[20],s[16] );
d[3] = _mm256_set_epi32( s[31],s[27],s[30],s[26],s[29],s[25],s[28],s[24] );
if ( bit_len <= 256 ) return;
d[4] = _mm256_set_epi32( s[39],s[35],s[38],s[34],s[37],s[33],s[36],s[32] );
d[5] = _mm256_set_epi32( s[47],s[43],s[46],s[42],s[45],s[41],s[44],s[40] );
d[6] = _mm256_set_epi32( s[55],s[51],s[54],s[50],s[53],s[49],s[52],s[48] );
d[7] = _mm256_set_epi32( s[63],s[59],s[62],s[58],s[61],s[57],s[60],s[56] );
if ( bit_len <= 512 ) return;
d[8] = _mm256_set_epi32( s[71],s[67],s[70],s[66],s[69],s[65],s[68],s[64] );
d[9] = _mm256_set_epi32( s[79],s[75],s[78],s[74],s[77],s[73],s[76],s[72] );
if ( bit_len <= 640 ) return;
d[10] = _mm256_set_epi32(s[87],s[83],s[86],s[82],s[85],s[81],s[84],s[80]);
d[11] = _mm256_set_epi32(s[95],s[91],s[94],s[90],s[93],s[89],s[92],s[88]);
d[12] = _mm256_set_epi32(s[103],s[99],s[102],s[98],s[101],s[97],s[100],s[96]);
d[13] = _mm256_set_epi32(s[111],s[107],s[110],s[106],s[109],s[105],s[108],s[104]);
d[14] = _mm256_set_epi32(s[119],s[115],s[118],s[114],s[117],s[113],s[116],s[112]);
d[15] = _mm256_set_epi32(s[127],s[123],s[126],s[122],s[125],s[121],s[124],s[120]);
// bit_len == 1024
}
// likely of no use.
// convert 4x32 byte (128 bit) vectors to 4x64 (256 bit) vectors for AVX2
// bit_len must be multiple of 64
@@ -1081,35 +1150,70 @@ inline void mm256_reinterleave_4x64( uint64_t *dst, uint32_t *src,
int bit_len )
{
uint32_t *d = (uint32_t*)dst;
uint32_t *s = (uint32_t*)src;
for ( int i = 0; i < bit_len >> 5; i += 8 )
{
*( d + i ) = *( src + i ); // 0 <- 0 8 <- 8
*( d + i + 1 ) = *( src + i + 4 ); // 1 <- 4 9 <- 12
*( d + i + 2 ) = *( src + i + 1 ); // 2 <- 1 10 <- 9
*( d + i + 3 ) = *( src + i + 5 ); // 3 <- 5 11 <- 13
*( d + i + 4 ) = *( src + i + 2 ); // 4 <- 2 12 <- 10
*( d + i + 5 ) = *( src + i + 6 ); // 5 <- 6 13 <- 14
*( d + i + 6 ) = *( src + i + 3 ); // 6 <- 3 14 <- 11
*( d + i + 7 ) = *( src + i + 7 ); // 7 <- 7 15 <- 15
*( d + i ) = *( s + i ); // 0 <- 0 8 <- 8
*( d + i + 1 ) = *( s + i + 4 ); // 1 <- 4 9 <- 12
*( d + i + 2 ) = *( s + i + 1 ); // 2 <- 1 10 <- 9
*( d + i + 3 ) = *( s + i + 5 ); // 3 <- 5 11 <- 13
*( d + i + 4 ) = *( s + i + 2 ); // 4 <- 2 12 <- 10
*( d + i + 5 ) = *( s + i + 6 ); // 5 <- 6 13 <- 14
*( d + i + 6 ) = *( s + i + 3 ); // 6 <- 3 14 <- 11
*( d + i + 7 ) = *( s + i + 7 ); // 7 <- 7 15 <- 15
}
}
// convert 4x64 byte (256 bit) vectors to 4x32 (128 bit) vectors for AVX
// bit_len must be multiple of 64
inline void mm_reinterleave_4x32( uint32_t *dst, uint64_t *src,
int bit_len )
inline void mm256_reinterleave_4x32( void *dst, void *src, int bit_len )
{
__m256i *d = (__m256i*)dst;
uint32_t *s = (uint32_t*)src;
d[0] = _mm256_set_epi32( s[ 7],s[ 5],s[ 3],s[ 1],s[ 6],s[ 4],s[ 2],s[ 0] );
d[1] = _mm256_set_epi32( s[15],s[13],s[11],s[ 9],s[14],s[12],s[10],s[ 8] );
d[2] = _mm256_set_epi32( s[23],s[21],s[19],s[17],s[22],s[20],s[18],s[16] );
d[3] = _mm256_set_epi32( s[31],s[29],s[27],s[25],s[30],s[28],s[26],s[24] );
if ( bit_len <= 256 ) return;
d[4] = _mm256_set_epi32( s[39],s[37],s[35],s[33],s[38],s[36],s[34],s[32] );
d[5] = _mm256_set_epi32( s[47],s[45],s[43],s[41],s[46],s[44],s[42],s[40] );
d[6] = _mm256_set_epi32( s[55],s[53],s[51],s[49],s[54],s[52],s[50],s[48] );
d[7] = _mm256_set_epi32( s[63],s[61],s[59],s[57],s[62],s[60],s[58],s[56] );
if ( bit_len <= 512 ) return;
d[8] = _mm256_set_epi32( s[71],s[69],s[67],s[65],s[70],s[68],s[66],s[64] );
d[9] = _mm256_set_epi32( s[79],s[77],s[75],s[73],s[78],s[76],s[74],s[72] );
if ( bit_len <= 640 ) return;
d[10] = _mm256_set_epi32( s[87],s[85],s[83],s[81],s[86],s[84],s[82],s[80] );
d[11] = _mm256_set_epi32( s[95],s[93],s[91],s[89],s[94],s[92],s[90],s[88] );
d[12] = _mm256_set_epi32( s[103],s[101],s[99],s[97],s[102],s[100],s[98],s[96] );
d[13] = _mm256_set_epi32( s[111],s[109],s[107],s[105],s[110],s[108],s[106],s[104] );
d[14] = _mm256_set_epi32( s[119],s[117],s[115],s[113],s[118],s[116],s[114],s[112] );
d[15] = _mm256_set_epi32( s[127],s[125],s[123],s[121],s[126],s[124],s[122],s[120] );
// bit_len == 1024
}
inline void mm_reinterleave_4x32( void *dst, void *src, int bit_len )
{
uint32_t *d = (uint32_t*)dst;
uint32_t *s = (uint32_t*)src;
for ( int i = 0; i < bit_len >> 5; i +=8 )
{
*( dst + i ) = *( s + i );
*( dst + i + 1 ) = *( s + i + 2 );
*( dst + i + 2 ) = *( s + i + 4 );
*( dst + i + 3 ) = *( s + i + 6 );
*( dst + i + 4 ) = *( s + i + 1 );
*( dst + i + 5 ) = *( s + i + 3 );
*( dst + i + 6 ) = *( s + i + 5 );
*( dst + i + 7 ) = *( s + i + 7 );
*( d + i ) = *( s + i );
*( d + i + 1 ) = *( s + i + 2 );
*( d + i + 2 ) = *( s + i + 4 );
*( d + i + 3 ) = *( s + i + 6 );
*( d + i + 4 ) = *( s + i + 1 );
*( d + i + 5 ) = *( s + i + 3 );
*( d + i + 6 ) = *( s + i + 5 );
*( d + i + 7 ) = *( s + i + 7 );
}
}

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.8.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.9.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.7.8'
PACKAGE_STRING='cpuminer-opt 3.7.8'
PACKAGE_VERSION='3.7.9'
PACKAGE_STRING='cpuminer-opt 3.7.9'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.7.8 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.7.9 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1392,7 +1392,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.7.8:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.7.9:";;
esac
cat <<\_ACEOF
@@ -1497,7 +1497,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.7.8
cpuminer-opt configure 3.7.9
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.7.8, which was
It was created by cpuminer-opt $as_me 3.7.9, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2981,7 +2981,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.7.8'
VERSION='3.7.9'
cat >>confdefs.h <<_ACEOF
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.7.8, which was
This file was extended by cpuminer-opt $as_me 3.7.9, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6743,7 +6743,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.7.8
cpuminer-opt config.status 3.7.9
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.7.8])
AC_INIT([cpuminer-opt], [3.7.9])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -546,6 +546,7 @@ enum algos {
ALGO_X17,
ALGO_XEVAN,
ALGO_YESCRYPT,
ALGO_YESCRYPTR8,
ALGO_YESCRYPTR16,
ALGO_ZR5,
ALGO_COUNT
@@ -617,6 +618,7 @@ static const char* const algo_names[] = {
"x17",
"xevan",
"yescrypt",
"yescryptr8",
"yescryptr16",
"zr5",
"\0"
@@ -741,8 +743,9 @@ Options:\n\
x14 X14\n\
x15 X15\n\
x17\n\
xevan Bitsend\n\
xevan Bitsend (BSD)\n\
yescrypt Globlboost-Y (BSTY)\n\
yescryptr8 BitZeny (ZNY)\n\
yescryptr16 Yenten (YTN)\n\
zr5 Ziftr\n\
-o, --url=URL URL of mining server\n\

View File

@@ -31,6 +31,7 @@ CFLAGS="-O3 -march=core-avx2 -Wall -DFOUR_WAY" ./configure $F
make
mv cpuminer.exe release/cpuminer-4way.exe
make clean || echo clean
CFLAGS="-O3 -march=corei7-avx -msha -Wall" ./configure $F
make
strip -s cpuminer.exe