mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v23.8
This commit is contained in:
@@ -21,112 +21,92 @@
|
||||
#include "hash_api.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
MYALIGN const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F};
|
||||
MYALIGN const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC};
|
||||
MYALIGN const unsigned int _k_opt[] = {0xD6B66000, 0xFF9F4929, 0xDEBE6808, 0xF7974121, 0x50BCEC00, 0x01EDBD51, 0xB05C0CE0, 0xE10D5DB1};
|
||||
MYALIGN const unsigned int _k_inv[] = {0x0D080180, 0x0E05060F, 0x0A0B0C02, 0x04070309, 0x0F0B0780, 0x01040A06, 0x02050809, 0x030D0E0C};
|
||||
MYALIGN const unsigned int _k_sb1[] = {0xCB503E00, 0xB19BE18F, 0x142AF544, 0xA5DF7A6E, 0xFAE22300, 0x3618D415, 0x0D2ED9EF, 0x3BF7CCC1};
|
||||
MYALIGN const unsigned int _k_sb2[] = {0x0B712400, 0xE27A93C6, 0xBC982FCD, 0x5EB7E955, 0x0AE12900, 0x69EB8840, 0xAB82234A, 0xC2A163C8};
|
||||
MYALIGN const unsigned int _k_sb3[] = {0xC0211A00, 0x53E17249, 0xA8B2DA89, 0xFB68933B, 0xF0030A00, 0x5FF35C55, 0xA6ACFAA5, 0xF956AF09};
|
||||
MYALIGN const unsigned int _k_sb4[] = {0x3FD64100, 0xE1E937A0, 0x49087E9F, 0xA876DE97, 0xC393EA00, 0x3D50AED7, 0x876D2914, 0xBA44FE79};
|
||||
MYALIGN const unsigned int _k_sb5[] = {0xF4867F00, 0x5072D62F, 0x5D228BDB, 0x0DA9A4F9, 0x3971C900, 0x0B487AC2, 0x8A43F0FB, 0x81B332B8};
|
||||
MYALIGN const unsigned int _k_sb7[] = {0xFFF75B00, 0xB20845E9, 0xE1BAA416, 0x531E4DAC, 0x3390E000, 0x62A3F282, 0x21C1D3B1, 0x43125170};
|
||||
MYALIGN const unsigned int _k_sbo[] = {0x6FBDC700, 0xD0D26D17, 0xC502A878, 0x15AABF7A, 0x5FBB6A00, 0xCFE474A5, 0x412B35FA, 0x8E1E90D1};
|
||||
MYALIGN const unsigned int _k_h63[] = {0x63636363, 0x63636363, 0x63636363, 0x63636363};
|
||||
MYALIGN const unsigned int _k_hc6[] = {0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6, 0xc6c6c6c6};
|
||||
MYALIGN const unsigned int _k_h5b[] = {0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b};
|
||||
MYALIGN const unsigned int _k_h4e[] = {0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e, 0x4e4e4e4e};
|
||||
MYALIGN const unsigned int _k_h0e[] = {0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e};
|
||||
MYALIGN const unsigned int _k_h15[] = {0x15151515, 0x15151515, 0x15151515, 0x15151515};
|
||||
MYALIGN const unsigned int _k_aesmix1[] = {0x0f0a0500, 0x030e0904, 0x07020d08, 0x0b06010c};
|
||||
MYALIGN const unsigned int _k_aesmix2[] = {0x000f0a05, 0x04030e09, 0x0807020d, 0x0c0b0601};
|
||||
MYALIGN const unsigned int _k_aesmix3[] = {0x05000f0a, 0x0904030e, 0x0d080702, 0x010c0b06};
|
||||
MYALIGN const unsigned int _k_aesmix4[] = {0x0a05000f, 0x0e090403, 0x020d0807, 0x06010c0b};
|
||||
const uint32_t const1[] __attribute__ ((aligned (32))) =
|
||||
{ 0x00000001, 0x00000000, 0x00000000, 0x00000000 };
|
||||
const uint32_t mul2mask[] __attribute__ ((aligned (16))) =
|
||||
{ 0x00001b00, 0x00000000, 0x00000000, 0x00000000 };
|
||||
const uint32_t lsbmask[] __attribute__ ((aligned (16))) =
|
||||
{ 0x01010101, 0x01010101, 0x01010101, 0x01010101 };
|
||||
const uint32_t invshiftrows[] __attribute__ ((aligned (16))) =
|
||||
{ 0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c };
|
||||
|
||||
#define ECHO_SUBBYTES4( state, j ) \
|
||||
state[0][j] = v128_aesenc( state[0][j], k1 ); \
|
||||
k1 = v128_add32( k1, cast_v128(const1) ); \
|
||||
state[1][j] = v128_aesenc( state[1][j], k1 ); \
|
||||
k1 = v128_add32( k1, cast_v128(const1) ); \
|
||||
state[2][j] = v128_aesenc( state[2][j], k1 ); \
|
||||
k1 = v128_add32( k1, cast_v128(const1) ); \
|
||||
state[3][j] = v128_aesenc( state[3][j], k1 ); \
|
||||
k1 = v128_add32( k1, cast_v128(const1) ); \
|
||||
state[0][j] = v128_aesenc_nokey( state[0][j] ); \
|
||||
state[1][j] = v128_aesenc_nokey( state[1][j] ); \
|
||||
state[2][j] = v128_aesenc_nokey( state[2][j] ); \
|
||||
state[3][j] = v128_aesenc_nokey( state[3][j] )
|
||||
|
||||
MYALIGN const unsigned int const1[] = {0x00000001, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int mul2mask[] = {0x00001b00, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int lsbmask[] = {0x01010101, 0x01010101, 0x01010101, 0x01010101};
|
||||
MYALIGN const unsigned int invshiftrows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c};
|
||||
MYALIGN const unsigned int zero[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int mul2ipt[] = {0x728efc00, 0x6894e61a, 0x3fc3b14d, 0x25d9ab57, 0xfd5ba600, 0x2a8c71d7, 0x1eb845e3, 0xc96f9234};
|
||||
#define ECHO_SUBBYTES( state, i, j ) \
|
||||
state[i][j] = v128_aesenc( state[i][j], k1 ); \
|
||||
k1 = v128_add32( k1, cast_v128(const1) ); \
|
||||
state[i][j] = v128_aesenc_nokey( state[i][j] )
|
||||
|
||||
|
||||
#define ECHO_SUBBYTES4(state, j) \
|
||||
state[0][j] = v128_aesenc(state[0][j], k1);\
|
||||
k1 = v128_add32(k1, cast_v128(const1));\
|
||||
state[1][j] = v128_aesenc(state[1][j], k1);\
|
||||
k1 = v128_add32(k1, cast_v128(const1));\
|
||||
state[2][j] = v128_aesenc(state[2][j], k1);\
|
||||
k1 = v128_add32(k1, cast_v128(const1));\
|
||||
state[3][j] = v128_aesenc(state[3][j], k1);\
|
||||
k1 = v128_add32(k1, cast_v128(const1));\
|
||||
state[0][j] = v128_aesenc(state[0][j], v128_zero ); \
|
||||
state[1][j] = v128_aesenc(state[1][j], v128_zero ); \
|
||||
state[2][j] = v128_aesenc(state[2][j], v128_zero ); \
|
||||
state[3][j] = v128_aesenc(state[3][j], v128_zero )
|
||||
|
||||
#define ECHO_SUBBYTES(state, i, j) \
|
||||
state[i][j] = v128_aesenc(state[i][j], k1);\
|
||||
k1 = v128_add32(k1, cast_v128(const1));\
|
||||
state[i][j] = v128_aesenc(state[i][j], cast_v128(zero))
|
||||
|
||||
#define ECHO_MIXBYTES(state1, state2, j, t1, t2, s2) \
|
||||
s2 = v128_add8(state1[0][j], state1[0][j]);\
|
||||
t1 = v128_sr16(state1[0][j], 7);\
|
||||
t1 = v128_and(t1, cast_v128(lsbmask));\
|
||||
t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
|
||||
s2 = v128_xor(s2, t2);\
|
||||
state2[0][j] = s2;\
|
||||
state2[1][j] = state1[0][j];\
|
||||
state2[2][j] = state1[0][j];\
|
||||
state2[3][j] = v128_xor(s2, state1[0][j]);\
|
||||
s2 = v128_add8(state1[1][(j + 1) & 3], state1[1][(j + 1) & 3]);\
|
||||
t1 = v128_sr16(state1[1][(j + 1) & 3], 7);\
|
||||
t1 = v128_and(t1, cast_v128(lsbmask));\
|
||||
t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
|
||||
s2 = v128_xor(s2, t2);\
|
||||
state2[0][j] = v128_xor3(state2[0][j], s2, state1[1][(j + 1) & 3] );\
|
||||
state2[1][j] = v128_xor(state2[1][j], s2);\
|
||||
state2[2][j] = v128_xor(state2[2][j], state1[1][(j + 1) & 3]);\
|
||||
state2[3][j] = v128_xor(state2[3][j], state1[1][(j + 1) & 3]);\
|
||||
s2 = v128_add8(state1[2][(j + 2) & 3], state1[2][(j + 2) & 3]);\
|
||||
t1 = v128_sr16(state1[2][(j + 2) & 3], 7);\
|
||||
t1 = v128_and(t1, cast_v128(lsbmask));\
|
||||
t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
|
||||
s2 = v128_xor(s2, t2);\
|
||||
state2[0][j] = v128_xor(state2[0][j], state1[2][(j + 2) & 3]);\
|
||||
state2[1][j] = v128_xor3(state2[1][j], s2, state1[2][(j + 2) & 3] );\
|
||||
state2[2][j] = v128_xor(state2[2][j], s2);\
|
||||
state2[3][j] = v128_xor(state2[3][j], state1[2][(j + 2) & 3]);\
|
||||
s2 = v128_add8(state1[3][(j + 3) & 3], state1[3][(j + 3) & 3]);\
|
||||
t1 = v128_sr16(state1[3][(j + 3) & 3], 7);\
|
||||
t1 = v128_and(t1, cast_v128(lsbmask));\
|
||||
t2 = v128_shuffle8(cast_v128(mul2mask), t1);\
|
||||
s2 = v128_xor(s2, t2);\
|
||||
state2[0][j] = v128_xor(state2[0][j], state1[3][(j + 3) & 3]);\
|
||||
state2[1][j] = v128_xor(state2[1][j], state1[3][(j + 3) & 3]);\
|
||||
state2[2][j] = v128_xor3(state2[2][j], s2, state1[3][(j + 3) & 3] );\
|
||||
state2[3][j] = v128_xor(state2[3][j], s2)
|
||||
#define ECHO_MIXBYTES( state1, state2, j, t1, t2, s2 ) \
|
||||
s2 = v128_add8( state1[0][j], state1[0][j] ); \
|
||||
t1 = v128_sr16( state1[0][j], 7 ); \
|
||||
t1 = v128_and( t1, cast_v128(lsbmask) ); \
|
||||
t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
|
||||
s2 = v128_xor( s2, t2 ); \
|
||||
state2[0][j] = s2; \
|
||||
state2[1][j] = state1[0][j]; \
|
||||
state2[2][j] = state1[0][j]; \
|
||||
state2[3][j] = v128_xor(s2, state1[0][j] ); \
|
||||
s2 = v128_add8( state1[1][(j + 1) & 3], state1[1][(j + 1) & 3] ); \
|
||||
t1 = v128_sr16( state1[1][(j + 1) & 3], 7 ); \
|
||||
t1 = v128_and( t1, cast_v128(lsbmask) ); \
|
||||
t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
|
||||
s2 = v128_xor( s2, t2 ); \
|
||||
state2[0][j] = v128_xor3( state2[0][j], s2, state1[1][(j + 1) & 3] );\
|
||||
state2[1][j] = v128_xor( state2[1][j], s2 ); \
|
||||
state2[2][j] = v128_xor( state2[2][j], state1[1][(j + 1) & 3] ); \
|
||||
state2[3][j] = v128_xor( state2[3][j], state1[1][(j + 1) & 3] ); \
|
||||
s2 = v128_add8( state1[2][(j + 2) & 3], state1[2][(j + 2) & 3] ); \
|
||||
t1 = v128_sr16( state1[2][(j + 2) & 3], 7 ); \
|
||||
t1 = v128_and( t1, cast_v128(lsbmask) ); \
|
||||
t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
|
||||
s2 = v128_xor( s2, t2 ); \
|
||||
state2[0][j] = v128_xor( state2[0][j], state1[2][(j + 2) & 3] ); \
|
||||
state2[1][j] = v128_xor3( state2[1][j], s2, state1[2][(j + 2) & 3] ); \
|
||||
state2[2][j] = v128_xor( state2[2][j], s2 ); \
|
||||
state2[3][j] = v128_xor( state2[3][j], state1[2][(j + 2) & 3] ); \
|
||||
s2 = v128_add8( state1[3][(j + 3) & 3], state1[3][(j + 3) & 3] ); \
|
||||
t1 = v128_sr16( state1[3][(j + 3) & 3], 7 ); \
|
||||
t1 = v128_and( t1, cast_v128(lsbmask) ); \
|
||||
t2 = v128_shuffle8( cast_v128(mul2mask), t1 ); \
|
||||
s2 = v128_xor( s2, t2 ); \
|
||||
state2[0][j] = v128_xor( state2[0][j], state1[3][(j + 3) & 3] ); \
|
||||
state2[1][j] = v128_xor( state2[1][j], state1[3][(j + 3) & 3] ); \
|
||||
state2[2][j] = v128_xor3( state2[2][j], s2, state1[3][(j + 3) & 3] ); \
|
||||
state2[3][j] = v128_xor( state2[3][j], s2 )
|
||||
|
||||
|
||||
#define ECHO_ROUND_UNROLL2 \
|
||||
ECHO_SUBBYTES4(_state, 0);\
|
||||
ECHO_SUBBYTES4(_state, 1);\
|
||||
ECHO_SUBBYTES4(_state, 2);\
|
||||
ECHO_SUBBYTES4(_state, 3);\
|
||||
ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
|
||||
ECHO_SUBBYTES4(_state2, 0);\
|
||||
ECHO_SUBBYTES4(_state2, 1);\
|
||||
ECHO_SUBBYTES4(_state2, 2);\
|
||||
ECHO_SUBBYTES4(_state2, 3);\
|
||||
ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
|
||||
{ \
|
||||
ECHO_SUBBYTES4( _state, 0 ); \
|
||||
ECHO_SUBBYTES4( _state, 1 ); \
|
||||
ECHO_SUBBYTES4( _state, 2 ); \
|
||||
ECHO_SUBBYTES4( _state, 3 ); \
|
||||
ECHO_MIXBYTES( _state, _state2, 0, t1, t2, s2 ); \
|
||||
ECHO_MIXBYTES( _state, _state2, 1, t1, t2, s2 ); \
|
||||
ECHO_MIXBYTES( _state, _state2, 2, t1, t2, s2 ); \
|
||||
ECHO_MIXBYTES( _state, _state2, 3, t1, t2, s2 ); \
|
||||
ECHO_SUBBYTES4( _state2, 0 ); \
|
||||
ECHO_SUBBYTES4( _state2, 1 ); \
|
||||
ECHO_SUBBYTES4( _state2, 2 ); \
|
||||
ECHO_SUBBYTES4( _state2, 3 ); \
|
||||
ECHO_MIXBYTES( _state2, _state, 0, t1, t2, s2 ); \
|
||||
ECHO_MIXBYTES( _state2, _state, 1, t1, t2, s2 ); \
|
||||
ECHO_MIXBYTES( _state2, _state, 2, t1, t2, s2 ); \
|
||||
ECHO_MIXBYTES( _state2, _state, 3, t1, t2, s2 ); \
|
||||
}
|
||||
|
||||
/*
|
||||
#define ECHO_ROUND_UNROLL2 \
|
||||
|
Reference in New Issue
Block a user