mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
90137b391e | ||
![]() |
8727d79182 | ||
![]() |
17ccbc328f | ||
![]() |
0e3945ddb5 | ||
![]() |
7d2ef7973d | ||
![]() |
e6fd9b1d69 | ||
![]() |
1a234cbe53 |
@@ -40,7 +40,7 @@ $ mkdir $HOME/usr/lib
|
|||||||
version available in the repositories.
|
version available in the repositories.
|
||||||
|
|
||||||
Download the following source code packages from their respective and
|
Download the following source code packages from their respective and
|
||||||
respected download locations, copy them to ~/usr/lib/ and uncompress them.
|
respected download locations, copy them to $HOME/usr/lib/ and uncompress them.
|
||||||
|
|
||||||
openssl: https://github.com/openssl/openssl/releases
|
openssl: https://github.com/openssl/openssl/releases
|
||||||
|
|
||||||
@@ -149,85 +149,10 @@ Copy cpuminer.exe to the release directory, compress and copy the release direct
|
|||||||
|
|
||||||
Run cpuminer
|
Run cpuminer
|
||||||
|
|
||||||
In a command windows change directories to the unzipped release folder. to get a list of all options:
|
In a command windows change directories to the unzipped release folder. To get a list of all options:
|
||||||
|
|
||||||
cpuminer.exe --help
|
cpuminer.exe --help
|
||||||
|
|
||||||
Command options are specific to where you mine. Refer to the pool's instructions on how to set them.
|
Command options are specific to where you mine. Refer to the pool's instructions on how to set them.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Create a link to the locally compiled version of gmp.h
|
|
||||||
|
|
||||||
$ ln -s $LOCAL_LIB/gmp-version/gmp.h ./gmp.h
|
|
||||||
|
|
||||||
Edit configure.ac to fix lipthread package name.
|
|
||||||
|
|
||||||
sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
|
|
||||||
|
|
||||||
|
|
||||||
7. Compile
|
|
||||||
|
|
||||||
you can use the default compile if you intend to use cpuminer-opt on the
|
|
||||||
same CPU and the virtual machine supports that architecture.
|
|
||||||
|
|
||||||
./build.sh
|
|
||||||
|
|
||||||
Otherwise you can compile manually while setting options in CFLAGS.
|
|
||||||
|
|
||||||
Some common options:
|
|
||||||
|
|
||||||
To compile for a specific CPU architecture:
|
|
||||||
|
|
||||||
CFLAGS="-O3 -march=znver1 -Wall" ./configure --with-curl
|
|
||||||
|
|
||||||
This will compile for AMD Ryzen.
|
|
||||||
|
|
||||||
You can compile more generically for a set of specific CPU features
|
|
||||||
if you know what features you want:
|
|
||||||
|
|
||||||
CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure --with-curl
|
|
||||||
|
|
||||||
This will compile for an older CPU that does not have AVX.
|
|
||||||
|
|
||||||
You can find several examples in build-allarch.sh
|
|
||||||
|
|
||||||
If you have a CPU with more than 64 threads and Windows 7 or higher you
|
|
||||||
can enable the CPU Groups feature:
|
|
||||||
|
|
||||||
-D_WIN32_WINNT==0x0601
|
|
||||||
|
|
||||||
Once you have run configure successfully run make with n CPU threads:
|
|
||||||
|
|
||||||
make -j n
|
|
||||||
|
|
||||||
Copy cpuminer.exe to the release directory, compress and copy the release
|
|
||||||
directory to a Windows system and run cpuminer.exe from the command line.
|
|
||||||
|
|
||||||
Run cpuminer
|
|
||||||
|
|
||||||
In a command windows change directories to the unzipped release folder.
|
|
||||||
to get a list of all options:
|
|
||||||
|
|
||||||
cpuminer.exe --help
|
|
||||||
|
|
||||||
Command options are specific to where you mine. Refer to the pool's
|
|
||||||
instructions on how to set them.
|
|
||||||
|
@@ -21,6 +21,7 @@ cpuminer_SOURCES = \
|
|||||||
api.c \
|
api.c \
|
||||||
sysinfos.c \
|
sysinfos.c \
|
||||||
algo-gate-api.c\
|
algo-gate-api.c\
|
||||||
|
malloc-huge.c \
|
||||||
algo/argon2/argon2a/argon2a.c \
|
algo/argon2/argon2a/argon2a.c \
|
||||||
algo/argon2/argon2a/ar2/argon2.c \
|
algo/argon2/argon2a/ar2/argon2.c \
|
||||||
algo/argon2/argon2a/ar2/opt.c \
|
algo/argon2/argon2a/ar2/opt.c \
|
||||||
@@ -171,6 +172,7 @@ cpuminer_SOURCES = \
|
|||||||
algo/sha/hmac-sha256-hash-4way.c \
|
algo/sha/hmac-sha256-hash-4way.c \
|
||||||
algo/sha/sha256d.c \
|
algo/sha/sha256d.c \
|
||||||
algo/sha/sha2.c \
|
algo/sha/sha2.c \
|
||||||
|
algo/sha/sha256d-4way.c \
|
||||||
algo/sha/sha256t-gate.c \
|
algo/sha/sha256t-gate.c \
|
||||||
algo/sha/sha256t-4way.c \
|
algo/sha/sha256t-4way.c \
|
||||||
algo/sha/sha256t.c \
|
algo/sha/sha256t.c \
|
||||||
|
48
README.txt
48
README.txt
@@ -18,14 +18,14 @@ error to find the fastest one that works. Pay attention to
|
|||||||
the features listed at cpuminer startup to ensure you are mining at
|
the features listed at cpuminer startup to ensure you are mining at
|
||||||
optimum speed using the best available features.
|
optimum speed using the best available features.
|
||||||
|
|
||||||
Architecture names and compile options used are only provided for Intel
|
Architecture names and compile options used are only provided for
|
||||||
Core series. Budget CPUs like Pentium and Celeron are often missing some
|
mainstream desktop CPUs. Budget CPUs like Pentium and Celeron are often
|
||||||
features.
|
missing some features. Check your CPU.
|
||||||
|
|
||||||
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
Support for AMD CPUs older than Ryzen is incomplete and without specific
|
||||||
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
recommendations. Find the best fit. CPUs older than Piledriver, including
|
||||||
these CPUs. Some algos may crash the miner with an invalid instruction.
|
Athlon x2 and Phenom II x4, are not supported by cpuminer-opt due to an
|
||||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
incompatible implementation of SSE2 on these CPUs.
|
||||||
|
|
||||||
More information for Intel and AMD CPU architectures and their features
|
More information for Intel and AMD CPU architectures and their features
|
||||||
can be found on Wikipedia.
|
can be found on Wikipedia.
|
||||||
@@ -34,26 +34,21 @@ https://en.wikipedia.org/wiki/List_of_Intel_CPU_microarchitectures
|
|||||||
|
|
||||||
https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures
|
https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures
|
||||||
|
|
||||||
|
File name Architecture name
|
||||||
|
|
||||||
Exe file name Compile flags Arch name
|
cpuminer-sse2.exe Core2, Nehalem, generic x86_64 with SSE2
|
||||||
|
cpuminer-aes-sse42.exe Westmere
|
||||||
|
cpuminer-avx.exe Sandybridge, Ivybridge
|
||||||
|
cpuminer-avx2.exe Haswell, Skylake, Kabylake, Coffeelake, Cometlake
|
||||||
|
cpuminer-avx2-sha.exe AMD Zen1, Zen2
|
||||||
|
cpuminer-avx2-sha-vaes.exe Intel Alderlake*, AMD Zen3
|
||||||
|
cpuminer-avx512.exe Intel HEDT Skylake-X, Cascadelake
|
||||||
|
cpuminer-avx512-sha-vaes.exe Icelake, Tigerlake, Rocketlake
|
||||||
|
|
||||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
* Alderlake is a hybrid architecture. With the E-cores disabled it may be
|
||||||
cpuminer-aes-sse42.exe "-march=westmere" Westmere
|
possible to enable AVX512 on the the P-cores and use the avx512-sha-vaes
|
||||||
cpuminer-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge
|
build. This is not officially supported by Intel at time of writing.
|
||||||
cpuminer-avx2.exe "-march=core-avx2 -maes" Haswell(1)
|
Check for current information.
|
||||||
cpuminer-avx512.exe "-march=skylake-avx512" Skylake-X, Cascadelake
|
|
||||||
cpuminer-avx512-sha.exe "-march=cascadelake -msha" Rocketlake(2)
|
|
||||||
cpuminer-avx512-sha-vaes.exe "-march=icelake-client" Icelake, Tigerlake(3)
|
|
||||||
cpuminer-zen.exe "-march=znver1" AMD Zen1, Zen2
|
|
||||||
cpuminer-zen3.exe "-march=znver2 -mvaes" Zen3(4)
|
|
||||||
|
|
||||||
(1) Haswell includes Broadwell, Skylake, Kabylake, Coffeelake & Cometlake.
|
|
||||||
(2) Rocketlake build uses cascadelake+sha as a workaround until Rocketlake
|
|
||||||
compiler support is avalable.
|
|
||||||
(3) Icelake & Tigerlake are only available on some laptops. Mining with a
|
|
||||||
laptop is not recommended.
|
|
||||||
(4) Zen3 build uses zen2+vaes as a workaround until Zen3 compiler support is
|
|
||||||
available. Zen2 CPUs should use Zen1 build.
|
|
||||||
|
|
||||||
Notes about included DLL files:
|
Notes about included DLL files:
|
||||||
|
|
||||||
@@ -66,8 +61,7 @@ https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source
|
|||||||
|
|
||||||
Some DLL filess may already be installed on the system by Windows or third
|
Some DLL filess may already be installed on the system by Windows or third
|
||||||
party packages. They often will work and may be used instead of the included
|
party packages. They often will work and may be used instead of the included
|
||||||
file. Without a compelling reason to do so it's recommended to use the included
|
file.
|
||||||
files as they are packaged.
|
|
||||||
|
|
||||||
If you like this software feel free to donate:
|
If you like this software feel free to donate:
|
||||||
|
|
||||||
|
@@ -65,6 +65,88 @@ If not what makes it happen or not happen?
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
v3.19.5
|
||||||
|
|
||||||
|
Enhanced stratum-keepalive preemptively resets the stratum connection
|
||||||
|
before the server to avoid lost shares.
|
||||||
|
|
||||||
|
Added build-msys2.sh scrypt for easier compiling on Windows, see Wiki for details.
|
||||||
|
|
||||||
|
X16RT: eliminate unnecessary recalculations of the hash order.
|
||||||
|
|
||||||
|
Fix a few compiler warnings.
|
||||||
|
|
||||||
|
Fixed log colour error when a block is solved.
|
||||||
|
|
||||||
|
v3.19.4
|
||||||
|
|
||||||
|
#359: Fix verthash memory allocation for non-hugepages, broken in v3.19.3.
|
||||||
|
|
||||||
|
New option stratum-keepalive prevents stratum timeouts when no shares are
|
||||||
|
submitted for several minutes due to high difficulty.
|
||||||
|
|
||||||
|
Fixed a bug displaying optimizations for some algos.
|
||||||
|
|
||||||
|
v3.19.3
|
||||||
|
|
||||||
|
Linux: Faster verthash (+25%), scryptn2 (+2%) when huge pages are available.
|
||||||
|
|
||||||
|
Small speed up for Hamsi AVX2 & AVX512, Keccak AVX512.
|
||||||
|
|
||||||
|
v3.19.2
|
||||||
|
|
||||||
|
Fixed log displaying incorrect memory usage for scrypt, broken in v3.19.1.
|
||||||
|
|
||||||
|
Reduce log noise when replies to submitted shares are lost due to stratum errors.
|
||||||
|
|
||||||
|
Fugue prehash optimization for X16r family AVX2 & AVX512.
|
||||||
|
|
||||||
|
Small speed improvement for Hamsi AVX2 & AVX512.
|
||||||
|
|
||||||
|
Win: With CPU groups enabled the number of CPUs displayed in the ASCII art
|
||||||
|
affinity map is the number of CPUs in a CPU group, was number of CPUs up to 64.
|
||||||
|
|
||||||
|
v3.19.1
|
||||||
|
|
||||||
|
Changes to Windows binaries package:
|
||||||
|
- builds for CPUs with AVX or lower have CPU groups disabled,
|
||||||
|
- zen3 build renamed to avx2-sha-vaes to support Alderlake as well as Zen3,
|
||||||
|
- zen build renamed to avx2-sha, supports Zen1 & Zen2,
|
||||||
|
- avx512-sha build removed, Rocketlake CPUs can use avx512-sha-vaes,
|
||||||
|
- see README.txt for compatibility details.
|
||||||
|
|
||||||
|
Fixed a few compiler warnings that are new in GCC 11.
|
||||||
|
Other minor fixes.
|
||||||
|
|
||||||
|
v3.19.0
|
||||||
|
|
||||||
|
Windows binaries now built with support for CPU groups, requires Windows 7.
|
||||||
|
|
||||||
|
Changes to cpu-affinity:
|
||||||
|
- PR#346: Fixed incorrect CPU affinity on Windows built for CPU groups,
|
||||||
|
- added support for CPU affinity for up to 256 threads or CPUs,
|
||||||
|
- streamlined code for more efficient initialization of miner threads,
|
||||||
|
- precise affining of each miner thread to a specific CPU,
|
||||||
|
- added an option to disable CPU affinity with "--cpu-affinity 0"
|
||||||
|
|
||||||
|
Faster sha256t with AVX512 & AVX2.
|
||||||
|
|
||||||
|
Added stratum error count to stats log, reported only when non-zero.
|
||||||
|
|
||||||
|
v3.18.2
|
||||||
|
|
||||||
|
Issue #342, fixed Groestl AES on Windows, broken in v3.18.0.
|
||||||
|
|
||||||
|
AVX512 for sha256d.
|
||||||
|
|
||||||
|
SSE42 and AVX may now be displayed as mining features at startup.
|
||||||
|
This is hard coded for each algo, and is only implemented for scrypt
|
||||||
|
at this time as it is the only algo with significant performance differences
|
||||||
|
with those features.
|
||||||
|
|
||||||
|
Fixed an issue where a high hashrate algo could cause excessive invalid hash
|
||||||
|
rate log reports when starting up in benchmark mode.
|
||||||
|
|
||||||
v3.18.1
|
v3.18.1
|
||||||
|
|
||||||
More speed for scrypt:
|
More speed for scrypt:
|
||||||
|
@@ -97,7 +97,6 @@ typedef uint32_t set_t;
|
|||||||
#define SHA_OPT 0x20 // Zen1, Icelake (sha256)
|
#define SHA_OPT 0x20 // Zen1, Icelake (sha256)
|
||||||
#define AVX512_OPT 0x40 // Skylake-X (AVX512[F,VL,DQ,BW])
|
#define AVX512_OPT 0x40 // Skylake-X (AVX512[F,VL,DQ,BW])
|
||||||
#define VAES_OPT 0x80 // Icelake (VAES & AVX512)
|
#define VAES_OPT 0x80 // Icelake (VAES & AVX512)
|
||||||
#define VAES256_OPT 0x100 // Zen3 (VAES without AVX512)
|
|
||||||
|
|
||||||
|
|
||||||
// return set containing all elements from sets a & b
|
// return set containing all elements from sets a & b
|
||||||
|
@@ -344,7 +344,7 @@ static size_t
|
|||||||
detect_cpu(void) {
|
detect_cpu(void) {
|
||||||
//union { uint8_t s[12]; uint32_t i[3]; } vendor_string;
|
//union { uint8_t s[12]; uint32_t i[3]; } vendor_string;
|
||||||
//cpu_vendors_x86 vendor = cpu_nobody;
|
//cpu_vendors_x86 vendor = cpu_nobody;
|
||||||
x86_regs regs;
|
x86_regs regs; regs.eax = regs.ebx = regs.ecx = 0;
|
||||||
uint32_t max_level, max_ext_level;
|
uint32_t max_level, max_ext_level;
|
||||||
size_t cpu_flags = 0;
|
size_t cpu_flags = 0;
|
||||||
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
|
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
|
||||||
|
@@ -4,11 +4,12 @@ typedef void (FASTCALL *scrypt_ROMixfn)(scrypt_mix_word_t *X/*[chunkWords]*/, sc
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* romix pre/post nop function */
|
/* romix pre/post nop function */
|
||||||
|
/*
|
||||||
static void asm_calling_convention
|
static void asm_calling_convention
|
||||||
scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) {
|
scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) {
|
||||||
(void)blocks; (void)nblocks;
|
(void)blocks; (void)nblocks;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
/* romix pre/post endian conversion function */
|
/* romix pre/post endian conversion function */
|
||||||
static void asm_calling_convention
|
static void asm_calling_convention
|
||||||
scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) {
|
scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) {
|
||||||
|
@@ -37,6 +37,13 @@
|
|||||||
|
|
||||||
#if defined(__AVX512F__)
|
#if defined(__AVX512F__)
|
||||||
|
|
||||||
|
static inline __m512i blamka( __m512i x, __m512i y )
|
||||||
|
{
|
||||||
|
__m512i xy = _mm512_mul_epu32( x, y );
|
||||||
|
return _mm512_add_epi64( _mm512_add_epi64( x, y ),
|
||||||
|
_mm512_add_epi64( xy, xy ) );
|
||||||
|
}
|
||||||
|
|
||||||
static void fill_block( __m512i *state, const block *ref_block,
|
static void fill_block( __m512i *state, const block *ref_block,
|
||||||
block *next_block, int with_xor )
|
block *next_block, int with_xor )
|
||||||
{
|
{
|
||||||
|
@@ -328,9 +328,7 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
|||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
#define ROR64(x, n) _mm512_ror_epi64((x), (n))
|
static inline __m512i muladd(__m512i x, __m512i y)
|
||||||
|
|
||||||
static __m512i muladd(__m512i x, __m512i y)
|
|
||||||
{
|
{
|
||||||
__m512i z = _mm512_mul_epu32(x, y);
|
__m512i z = _mm512_mul_epu32(x, y);
|
||||||
return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
|
return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
|
||||||
@@ -344,8 +342,8 @@ static __m512i muladd(__m512i x, __m512i y)
|
|||||||
D0 = _mm512_xor_si512(D0, A0); \
|
D0 = _mm512_xor_si512(D0, A0); \
|
||||||
D1 = _mm512_xor_si512(D1, A1); \
|
D1 = _mm512_xor_si512(D1, A1); \
|
||||||
\
|
\
|
||||||
D0 = ROR64(D0, 32); \
|
D0 = _mm512_ror_epi64(D0, 32); \
|
||||||
D1 = ROR64(D1, 32); \
|
D1 = _mm512_ror_epi64(D1, 32); \
|
||||||
\
|
\
|
||||||
C0 = muladd(C0, D0); \
|
C0 = muladd(C0, D0); \
|
||||||
C1 = muladd(C1, D1); \
|
C1 = muladd(C1, D1); \
|
||||||
@@ -353,8 +351,8 @@ static __m512i muladd(__m512i x, __m512i y)
|
|||||||
B0 = _mm512_xor_si512(B0, C0); \
|
B0 = _mm512_xor_si512(B0, C0); \
|
||||||
B1 = _mm512_xor_si512(B1, C1); \
|
B1 = _mm512_xor_si512(B1, C1); \
|
||||||
\
|
\
|
||||||
B0 = ROR64(B0, 24); \
|
B0 = _mm512_ror_epi64(B0, 24); \
|
||||||
B1 = ROR64(B1, 24); \
|
B1 = _mm512_ror_epi64(B1, 24); \
|
||||||
} while ((void)0, 0)
|
} while ((void)0, 0)
|
||||||
|
|
||||||
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||||
@@ -365,8 +363,8 @@ static __m512i muladd(__m512i x, __m512i y)
|
|||||||
D0 = _mm512_xor_si512(D0, A0); \
|
D0 = _mm512_xor_si512(D0, A0); \
|
||||||
D1 = _mm512_xor_si512(D1, A1); \
|
D1 = _mm512_xor_si512(D1, A1); \
|
||||||
\
|
\
|
||||||
D0 = ROR64(D0, 16); \
|
D0 = _mm512_ror_epi64(D0, 16); \
|
||||||
D1 = ROR64(D1, 16); \
|
D1 = _mm512_ror_epi64(D1, 16); \
|
||||||
\
|
\
|
||||||
C0 = muladd(C0, D0); \
|
C0 = muladd(C0, D0); \
|
||||||
C1 = muladd(C1, D1); \
|
C1 = muladd(C1, D1); \
|
||||||
@@ -374,8 +372,8 @@ static __m512i muladd(__m512i x, __m512i y)
|
|||||||
B0 = _mm512_xor_si512(B0, C0); \
|
B0 = _mm512_xor_si512(B0, C0); \
|
||||||
B1 = _mm512_xor_si512(B1, C1); \
|
B1 = _mm512_xor_si512(B1, C1); \
|
||||||
\
|
\
|
||||||
B0 = ROR64(B0, 63); \
|
B0 = _mm512_ror_epi64(B0, 63); \
|
||||||
B1 = ROR64(B1, 63); \
|
B1 = _mm512_ror_epi64(B1, 63); \
|
||||||
} while ((void)0, 0)
|
} while ((void)0, 0)
|
||||||
|
|
||||||
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||||
@@ -417,11 +415,10 @@ static __m512i muladd(__m512i x, __m512i y)
|
|||||||
|
|
||||||
#define SWAP_HALVES(A0, A1) \
|
#define SWAP_HALVES(A0, A1) \
|
||||||
do { \
|
do { \
|
||||||
__m512i t0, t1; \
|
__m512i t; \
|
||||||
t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
|
t = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
|
||||||
t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
|
A1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
|
||||||
A0 = t0; \
|
A0 = t; \
|
||||||
A1 = t1; \
|
|
||||||
} while((void)0, 0)
|
} while((void)0, 0)
|
||||||
|
|
||||||
#define SWAP_QUARTERS(A0, A1) \
|
#define SWAP_QUARTERS(A0, A1) \
|
||||||
|
@@ -8,7 +8,7 @@ uint32_t *decred_get_nonceptr( uint32_t *work_data )
|
|||||||
return &work_data[ DECRED_NONCE_INDEX ];
|
return &work_data[ DECRED_NONCE_INDEX ];
|
||||||
}
|
}
|
||||||
|
|
||||||
double decred_calc_network_diff( struct work* work )
|
long double decred_calc_network_diff( struct work* work )
|
||||||
{
|
{
|
||||||
// sample for diff 43.281 : 1c05ea29
|
// sample for diff 43.281 : 1c05ea29
|
||||||
// todo: endian reversed on longpoll could be zr5 specific...
|
// todo: endian reversed on longpoll could be zr5 specific...
|
||||||
@@ -16,7 +16,7 @@ double decred_calc_network_diff( struct work* work )
|
|||||||
uint32_t bits = ( nbits & 0xffffff );
|
uint32_t bits = ( nbits & 0xffffff );
|
||||||
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
|
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
|
||||||
int m;
|
int m;
|
||||||
double d = (double)0x0000ffff / (double)bits;
|
long double d = (long double)0x0000ffff / (long double)bits;
|
||||||
|
|
||||||
for ( m = shift; m < 29; m++ )
|
for ( m = shift; m < 29; m++ )
|
||||||
d *= 256.0;
|
d *= 256.0;
|
||||||
@@ -25,7 +25,7 @@ double decred_calc_network_diff( struct work* work )
|
|||||||
if ( shift == 28 )
|
if ( shift == 28 )
|
||||||
d *= 256.0; // testnet
|
d *= 256.0; // testnet
|
||||||
if ( opt_debug_diff )
|
if ( opt_debug_diff )
|
||||||
applog( LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d,
|
applog( LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", (double)d,
|
||||||
shift, bits );
|
shift, bits );
|
||||||
return net_diff;
|
return net_diff;
|
||||||
}
|
}
|
||||||
@@ -70,7 +70,10 @@ void decred_be_build_stratum_request( char *req, struct work *work,
|
|||||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||||
free(xnonce2str);
|
free(xnonce2str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !defined(min)
|
||||||
#define min(a,b) (a>b ? (b) :(a))
|
#define min(a,b) (a>b ? (b) :(a))
|
||||||
|
#endif
|
||||||
|
|
||||||
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||||
{
|
{
|
||||||
|
@@ -37,12 +37,23 @@ typedef struct
|
|||||||
|
|
||||||
} hashState_fugue __attribute__ ((aligned (64)));
|
} hashState_fugue __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
|
||||||
|
// These functions are deprecated, use the lower case macro aliases that use
|
||||||
|
// the standard interface. This will be cleaned up at a later date.
|
||||||
HashReturn fugue512_Init(hashState_fugue *state, int hashbitlen);
|
HashReturn fugue512_Init(hashState_fugue *state, int hashbitlen);
|
||||||
|
|
||||||
HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength databitlen);
|
HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength databitlen);
|
||||||
|
|
||||||
HashReturn fugue512_Final(hashState_fugue *state, void *hashval);
|
HashReturn fugue512_Final(hashState_fugue *state, void *hashval);
|
||||||
|
|
||||||
|
#define fugue512_init( state ) \
|
||||||
|
fugue512_Init( state, 512 )
|
||||||
|
#define fugue512_update( state, data, len ) \
|
||||||
|
fugue512_Update( state, data, (len)<<3 )
|
||||||
|
#define fugue512_final \
|
||||||
|
fugue512_Final
|
||||||
|
|
||||||
|
|
||||||
HashReturn fugue512_full(hashState_fugue *hs, void *hashval, const void *data, DataLength databitlen);
|
HashReturn fugue512_full(hashState_fugue *hs, void *hashval, const void *data, DataLength databitlen);
|
||||||
|
|
||||||
#endif // AES
|
#endif // AES
|
||||||
|
@@ -545,31 +545,33 @@ static const sph_u32 T512[64][16] = {
|
|||||||
#define sE c7
|
#define sE c7
|
||||||
#define sF m7
|
#define sF m7
|
||||||
|
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
|
||||||
// Hamsi 8 way AVX512
|
// Hamsi 8 way AVX512
|
||||||
|
|
||||||
|
// Intel says _mm512_movepi64_mask has (1L/1T) timimg while
|
||||||
|
// _mm512_cmplt_epi64_mask as (3L/1T) timing, however, when tested hashing X13
|
||||||
|
// on i9-9940x cmplt with zero was 3% faster than movepi.
|
||||||
|
|
||||||
#define INPUT_BIG8 \
|
#define INPUT_BIG8 \
|
||||||
do { \
|
do { \
|
||||||
__m512i db = *buf; \
|
__m512i db = _mm512_ror_epi64( *buf, 1 ); \
|
||||||
const uint64_t *tp = (uint64_t*)&T512[0][0]; \
|
const __m512i zero = m512_zero; \
|
||||||
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = m512_zero; \
|
const uint64_t *tp = (const uint64_t*)T512; \
|
||||||
|
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = zero; \
|
||||||
for ( int u = 0; u < 64; u++ ) \
|
for ( int u = 0; u < 64; u++ ) \
|
||||||
{ \
|
{ \
|
||||||
__m512i dm = _mm512_and_si512( db, m512_one_64 ) ; \
|
const __mmask8 dm = _mm512_cmplt_epi64_mask( db, zero ); \
|
||||||
dm = mm512_negate_32( _mm512_or_si512( dm, \
|
m0 = _mm512_mask_xor_epi64( m0, dm, m0, m512_const1_64( tp[0] ) ); \
|
||||||
_mm512_slli_epi64( dm, 32 ) ) ); \
|
m1 = _mm512_mask_xor_epi64( m1, dm, m1, m512_const1_64( tp[1] ) ); \
|
||||||
m0 = mm512_xorand( m0, dm, m512_const1_64( tp[0] ) ); \
|
m2 = _mm512_mask_xor_epi64( m2, dm, m2, m512_const1_64( tp[2] ) ); \
|
||||||
m1 = mm512_xorand( m1, dm, m512_const1_64( tp[1] ) ); \
|
m3 = _mm512_mask_xor_epi64( m3, dm, m3, m512_const1_64( tp[3] ) ); \
|
||||||
m2 = mm512_xorand( m2, dm, m512_const1_64( tp[2] ) ); \
|
m4 = _mm512_mask_xor_epi64( m4, dm, m4, m512_const1_64( tp[4] ) ); \
|
||||||
m3 = mm512_xorand( m3, dm, m512_const1_64( tp[3] ) ); \
|
m5 = _mm512_mask_xor_epi64( m5, dm, m5, m512_const1_64( tp[5] ) ); \
|
||||||
m4 = mm512_xorand( m4, dm, m512_const1_64( tp[4] ) ); \
|
m6 = _mm512_mask_xor_epi64( m6, dm, m6, m512_const1_64( tp[6] ) ); \
|
||||||
m5 = mm512_xorand( m5, dm, m512_const1_64( tp[5] ) ); \
|
m7 = _mm512_mask_xor_epi64( m7, dm, m7, m512_const1_64( tp[7] ) ); \
|
||||||
m6 = mm512_xorand( m6, dm, m512_const1_64( tp[6] ) ); \
|
db = _mm512_ror_epi64( db, 1 ); \
|
||||||
m7 = mm512_xorand( m7, dm, m512_const1_64( tp[7] ) ); \
|
|
||||||
tp += 8; \
|
tp += 8; \
|
||||||
db = _mm512_srli_epi64( db, 1 ); \
|
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
@@ -609,199 +611,192 @@ do { \
|
|||||||
|
|
||||||
#define READ_STATE_BIG8(sc) \
|
#define READ_STATE_BIG8(sc) \
|
||||||
do { \
|
do { \
|
||||||
c0 = sc->h[0x0]; \
|
c0 = sc->h[0]; \
|
||||||
c1 = sc->h[0x1]; \
|
c1 = sc->h[1]; \
|
||||||
c2 = sc->h[0x2]; \
|
c2 = sc->h[2]; \
|
||||||
c3 = sc->h[0x3]; \
|
c3 = sc->h[3]; \
|
||||||
c4 = sc->h[0x4]; \
|
c4 = sc->h[4]; \
|
||||||
c5 = sc->h[0x5]; \
|
c5 = sc->h[5]; \
|
||||||
c6 = sc->h[0x6]; \
|
c6 = sc->h[6]; \
|
||||||
c7 = sc->h[0x7]; \
|
c7 = sc->h[7]; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define WRITE_STATE_BIG8(sc) \
|
#define WRITE_STATE_BIG8(sc) \
|
||||||
do { \
|
do { \
|
||||||
sc->h[0x0] = c0; \
|
sc->h[0] = c0; \
|
||||||
sc->h[0x1] = c1; \
|
sc->h[1] = c1; \
|
||||||
sc->h[0x2] = c2; \
|
sc->h[2] = c2; \
|
||||||
sc->h[0x3] = c3; \
|
sc->h[3] = c3; \
|
||||||
sc->h[0x4] = c4; \
|
sc->h[4] = c4; \
|
||||||
sc->h[0x5] = c5; \
|
sc->h[5] = c5; \
|
||||||
sc->h[0x6] = c6; \
|
sc->h[6] = c6; \
|
||||||
sc->h[0x7] = c7; \
|
sc->h[7] = c7; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
#define ROUND_BIG8( alpha ) \
|
#define ROUND_BIG8( alpha ) \
|
||||||
do { \
|
do { \
|
||||||
__m512i t0, t1, t2, t3; \
|
__m512i t0, t1, t2, t3; \
|
||||||
s0 = _mm512_xor_si512( s0, alpha[ 0] ); \
|
s0 = _mm512_xor_si512( s0, alpha[ 0] ); /* m0 */ \
|
||||||
s1 = _mm512_xor_si512( s1, alpha[ 1] ); \
|
s1 = _mm512_xor_si512( s1, alpha[ 1] ); /* c0 */ \
|
||||||
s2 = _mm512_xor_si512( s2, alpha[ 2] ); \
|
s2 = _mm512_xor_si512( s2, alpha[ 2] ); /* m1 */ \
|
||||||
s3 = _mm512_xor_si512( s3, alpha[ 3] ); \
|
s3 = _mm512_xor_si512( s3, alpha[ 3] ); /* c1 */ \
|
||||||
s4 = _mm512_xor_si512( s4, alpha[ 4] ); \
|
s4 = _mm512_xor_si512( s4, alpha[ 4] ); /* c2 */ \
|
||||||
s5 = _mm512_xor_si512( s5, alpha[ 5] ); \
|
s5 = _mm512_xor_si512( s5, alpha[ 5] ); /* m2 */ \
|
||||||
s6 = _mm512_xor_si512( s6, alpha[ 6] ); \
|
s6 = _mm512_xor_si512( s6, alpha[ 6] ); /* c3 */ \
|
||||||
s7 = _mm512_xor_si512( s7, alpha[ 7] ); \
|
s7 = _mm512_xor_si512( s7, alpha[ 7] ); /* m3 */ \
|
||||||
s8 = _mm512_xor_si512( s8, alpha[ 8] ); \
|
s8 = _mm512_xor_si512( s8, alpha[ 8] ); /* m4 */ \
|
||||||
s9 = _mm512_xor_si512( s9, alpha[ 9] ); \
|
s9 = _mm512_xor_si512( s9, alpha[ 9] ); /* c4 */ \
|
||||||
sA = _mm512_xor_si512( sA, alpha[10] ); \
|
sA = _mm512_xor_si512( sA, alpha[10] ); /* m5 */ \
|
||||||
sB = _mm512_xor_si512( sB, alpha[11] ); \
|
sB = _mm512_xor_si512( sB, alpha[11] ); /* c5 */ \
|
||||||
sC = _mm512_xor_si512( sC, alpha[12] ); \
|
sC = _mm512_xor_si512( sC, alpha[12] ); /* c6 */ \
|
||||||
sD = _mm512_xor_si512( sD, alpha[13] ); \
|
sD = _mm512_xor_si512( sD, alpha[13] ); /* m6 */ \
|
||||||
sE = _mm512_xor_si512( sE, alpha[14] ); \
|
sE = _mm512_xor_si512( sE, alpha[14] ); /* c7 */ \
|
||||||
sF = _mm512_xor_si512( sF, alpha[15] ); \
|
sF = _mm512_xor_si512( sF, alpha[15] ); /* m7 */ \
|
||||||
\
|
\
|
||||||
SBOX8( s0, s4, s8, sC ); \
|
SBOX8( s0, s4, s8, sC ); /* ( m0, c2, m4, c6 ) */ \
|
||||||
SBOX8( s1, s5, s9, sD ); \
|
SBOX8( s1, s5, s9, sD ); /* ( c0, m2, c4, m6 ) */ \
|
||||||
SBOX8( s2, s6, sA, sE ); \
|
SBOX8( s2, s6, sA, sE ); /* ( m1, c3, m5, c7 ) */ \
|
||||||
SBOX8( s3, s7, sB, sF ); \
|
SBOX8( s3, s7, sB, sF ); /* ( c1, m3, c5, m7 ) */ \
|
||||||
\
|
\
|
||||||
t1 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( s4, 4 ), \
|
s4 = mm512_swap64_32( s4 ); \
|
||||||
_mm512_bslli_epi128( s5, 4 ) ); \
|
s5 = mm512_swap64_32( s5 ); \
|
||||||
t3 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( sD, 4 ), \
|
sD = mm512_swap64_32( sD ); \
|
||||||
_mm512_bslli_epi128( sE, 4 ) ); \
|
sE = mm512_swap64_32( sE ); \
|
||||||
|
t1 = _mm512_mask_blend_epi32( 0xaaaa, s4, s5 ); \
|
||||||
|
t3 = _mm512_mask_blend_epi32( 0xaaaa, sD, sE ); \
|
||||||
L8( s0, t1, s9, t3 ); \
|
L8( s0, t1, s9, t3 ); \
|
||||||
s4 = _mm512_mask_blend_epi32( 0xaaaa, s4, _mm512_bslli_epi128( t1, 4 ) ); \
|
s4 = _mm512_mask_blend_epi32( 0x5555, s4, t1 ); \
|
||||||
s5 = _mm512_mask_blend_epi32( 0x5555, s5, _mm512_bsrli_epi128( t1, 4 ) ); \
|
s5 = _mm512_mask_blend_epi32( 0xaaaa, s5, t1 ); \
|
||||||
sD = _mm512_mask_blend_epi32( 0xaaaa, sD, _mm512_bslli_epi128( t3, 4 ) ); \
|
sD = _mm512_mask_blend_epi32( 0x5555, sD, t3 ); \
|
||||||
sE = _mm512_mask_blend_epi32( 0x5555, sE, _mm512_bsrli_epi128( t3, 4 ) ); \
|
sE = _mm512_mask_blend_epi32( 0xaaaa, sE, t3 ); \
|
||||||
\
|
\
|
||||||
t1 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( s5, 4 ), \
|
s6 = mm512_swap64_32( s6 ); \
|
||||||
_mm512_bslli_epi128( s6, 4 ) ); \
|
sF = mm512_swap64_32( sF ); \
|
||||||
t3 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( sE, 4 ), \
|
t1 = _mm512_mask_blend_epi32( 0xaaaa, s5, s6 ); \
|
||||||
_mm512_bslli_epi128( sF, 4 ) ); \
|
t3 = _mm512_mask_blend_epi32( 0xaaaa, sE, sF ); \
|
||||||
L8( s1, t1, sA, t3 ); \
|
L8( s1, t1, sA, t3 ); \
|
||||||
s5 = _mm512_mask_blend_epi32( 0xaaaa, s5, _mm512_bslli_epi128( t1, 4 ) ); \
|
s5 = _mm512_mask_blend_epi32( 0x5555, s5, t1 ); \
|
||||||
s6 = _mm512_mask_blend_epi32( 0x5555, s6, _mm512_bsrli_epi128( t1, 4 ) ); \
|
s6 = _mm512_mask_blend_epi32( 0xaaaa, s6, t1 ); \
|
||||||
sE = _mm512_mask_blend_epi32( 0xaaaa, sE, _mm512_bslli_epi128( t3, 4 ) ); \
|
sE = _mm512_mask_blend_epi32( 0x5555, sE, t3 ); \
|
||||||
sF = _mm512_mask_blend_epi32( 0x5555, sF, _mm512_bsrli_epi128( t3, 4 ) ); \
|
sF = _mm512_mask_blend_epi32( 0xaaaa, sF, t3 ); \
|
||||||
\
|
\
|
||||||
t1 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( s6, 4 ), \
|
s7 = mm512_swap64_32( s7 ); \
|
||||||
_mm512_bslli_epi128( s7, 4 ) ); \
|
sC = mm512_swap64_32( sC ); \
|
||||||
t3 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( sF, 4 ), \
|
t1 = _mm512_mask_blend_epi32( 0xaaaa, s6, s7 ); \
|
||||||
_mm512_bslli_epi128( sC, 4 ) ); \
|
t3 = _mm512_mask_blend_epi32( 0xaaaa, sF, sC ); \
|
||||||
L8( s2, t1, sB, t3 ); \
|
L8( s2, t1, sB, t3 ); \
|
||||||
s6 = _mm512_mask_blend_epi32( 0xaaaa, s6, _mm512_bslli_epi128( t1, 4 ) ); \
|
s6 = _mm512_mask_blend_epi32( 0x5555, s6, t1 ); \
|
||||||
s7 = _mm512_mask_blend_epi32( 0x5555, s7, _mm512_bsrli_epi128( t1, 4 ) ); \
|
s7 = _mm512_mask_blend_epi32( 0xaaaa, s7, t1 ); \
|
||||||
sF = _mm512_mask_blend_epi32( 0xaaaa, sF, _mm512_bslli_epi128( t3, 4 ) ); \
|
sF = _mm512_mask_blend_epi32( 0x5555, sF, t3 ); \
|
||||||
sC = _mm512_mask_blend_epi32( 0x5555, sC, _mm512_bsrli_epi128( t3, 4 ) ); \
|
sC = _mm512_mask_blend_epi32( 0xaaaa, sC, t3 ); \
|
||||||
|
s6 = mm512_swap64_32( s6 ); \
|
||||||
|
sF = mm512_swap64_32( sF ); \
|
||||||
\
|
\
|
||||||
t1 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( s7, 4 ), \
|
t1 = _mm512_mask_blend_epi32( 0xaaaa, s7, s4 ); \
|
||||||
_mm512_bslli_epi128( s4, 4 ) ); \
|
t3 = _mm512_mask_blend_epi32( 0xaaaa, sC, sD ); \
|
||||||
t3 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( sC, 4 ), \
|
|
||||||
_mm512_bslli_epi128( sD, 4 ) ); \
|
|
||||||
L8( s3, t1, s8, t3 ); \
|
L8( s3, t1, s8, t3 ); \
|
||||||
s7 = _mm512_mask_blend_epi32( 0xaaaa, s7, _mm512_bslli_epi128( t1, 4 ) ); \
|
s7 = _mm512_mask_blend_epi32( 0x5555, s7, t1 ); \
|
||||||
s4 = _mm512_mask_blend_epi32( 0x5555, s4, _mm512_bsrli_epi128( t1, 4 ) ); \
|
s4 = _mm512_mask_blend_epi32( 0xaaaa, s4, t1 ); \
|
||||||
sC = _mm512_mask_blend_epi32( 0xaaaa, sC, _mm512_bslli_epi128( t3, 4 ) ); \
|
sC = _mm512_mask_blend_epi32( 0x5555, sC, t3 ); \
|
||||||
sD = _mm512_mask_blend_epi32( 0x5555, sD, _mm512_bsrli_epi128( t3, 4 ) ); \
|
sD = _mm512_mask_blend_epi32( 0xaaaa, sD, t3 ); \
|
||||||
|
s7 = mm512_swap64_32( s7 ); \
|
||||||
|
sC = mm512_swap64_32( sC ); \
|
||||||
\
|
\
|
||||||
t0 = _mm512_mask_blend_epi32( 0xaaaa, s0, _mm512_bslli_epi128( s8, 4 ) ); \
|
t0 = _mm512_mask_blend_epi32( 0xaaaa, s0, mm512_swap64_32( s8 ) ); \
|
||||||
t1 = _mm512_mask_blend_epi32( 0xaaaa, s1, s9 ); \
|
t1 = _mm512_mask_blend_epi32( 0xaaaa, s1, s9 ); \
|
||||||
t2 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( s2, 4 ), sA ); \
|
t2 = _mm512_mask_blend_epi32( 0xaaaa, mm512_swap64_32( s2 ), sA ); \
|
||||||
t3 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( s3, 4 ), \
|
t3 = _mm512_mask_blend_epi32( 0x5555, s3, sB ); \
|
||||||
_mm512_bslli_epi128( sB, 4 ) ); \
|
t3 = mm512_swap64_32( t3 ); \
|
||||||
L8( t0, t1, t2, t3 ); \
|
L8( t0, t1, t2, t3 ); \
|
||||||
|
t3 = mm512_swap64_32( t3 ); \
|
||||||
s0 = _mm512_mask_blend_epi32( 0x5555, s0, t0 ); \
|
s0 = _mm512_mask_blend_epi32( 0x5555, s0, t0 ); \
|
||||||
s8 = _mm512_mask_blend_epi32( 0x5555, s8, _mm512_bsrli_epi128( t0, 4 ) ); \
|
s8 = _mm512_mask_blend_epi32( 0x5555, s8, mm512_swap64_32( t0 ) ); \
|
||||||
s1 = _mm512_mask_blend_epi32( 0x5555, s1, t1 ); \
|
s1 = _mm512_mask_blend_epi32( 0x5555, s1, t1 ); \
|
||||||
s9 = _mm512_mask_blend_epi32( 0xaaaa, s9, t1 ); \
|
s9 = _mm512_mask_blend_epi32( 0xaaaa, s9, t1 ); \
|
||||||
s2 = _mm512_mask_blend_epi32( 0xaaaa, s2, _mm512_bslli_epi128( t2, 4 ) ); \
|
s2 = _mm512_mask_blend_epi32( 0xaaaa, s2, mm512_swap64_32( t2 ) ); \
|
||||||
sA = _mm512_mask_blend_epi32( 0xaaaa, sA, t2 ); \
|
sA = _mm512_mask_blend_epi32( 0xaaaa, sA, t2 ); \
|
||||||
s3 = _mm512_mask_blend_epi32( 0xaaaa, s3, _mm512_bslli_epi128( t3, 4 ) ); \
|
s3 = _mm512_mask_blend_epi32( 0xaaaa, s3, t3 ); \
|
||||||
sB = _mm512_mask_blend_epi32( 0x5555, sB, _mm512_bsrli_epi128( t3, 4 ) ); \
|
sB = _mm512_mask_blend_epi32( 0x5555, sB, t3 ); \
|
||||||
\
|
\
|
||||||
t0 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( s4, 4 ), sC ); \
|
t0 = _mm512_mask_blend_epi32( 0xaaaa, s4, sC ); \
|
||||||
t1 = _mm512_mask_blend_epi32( 0xaaaa, _mm512_bsrli_epi128( s5, 4 ), \
|
t1 = _mm512_mask_blend_epi32( 0xaaaa, s5, sD ); \
|
||||||
_mm512_bslli_epi128( sD, 4 ) ); \
|
t2 = _mm512_mask_blend_epi32( 0xaaaa, s6, sE ); \
|
||||||
t2 = _mm512_mask_blend_epi32( 0xaaaa, s6, _mm512_bslli_epi128( sE, 4 ) ); \
|
|
||||||
t3 = _mm512_mask_blend_epi32( 0xaaaa, s7, sF ); \
|
t3 = _mm512_mask_blend_epi32( 0xaaaa, s7, sF ); \
|
||||||
L8( t0, t1, t2, t3 ); \
|
L8( t0, t1, t2, t3 ); \
|
||||||
s4 = _mm512_mask_blend_epi32( 0xaaaa, s4, _mm512_bslli_epi128( t0, 4 ) ); \
|
s4 = _mm512_mask_blend_epi32( 0x5555, s4, t0 ); \
|
||||||
sC = _mm512_mask_blend_epi32( 0xaaaa, sC, t0 ); \
|
sC = _mm512_mask_blend_epi32( 0xaaaa, sC, t0 ); \
|
||||||
s5 = _mm512_mask_blend_epi32( 0xaaaa, s5, _mm512_bslli_epi128( t1, 4 ) ); \
|
s5 = _mm512_mask_blend_epi32( 0x5555, s5, t1 ); \
|
||||||
sD = _mm512_mask_blend_epi32( 0x5555, sD, _mm512_bsrli_epi128( t1, 4 ) ); \
|
sD = _mm512_mask_blend_epi32( 0xaaaa, sD, t1 ); \
|
||||||
s6 = _mm512_mask_blend_epi32( 0x5555, s6, t2 ); \
|
s6 = _mm512_mask_blend_epi32( 0x5555, s6, t2 ); \
|
||||||
sE = _mm512_mask_blend_epi32( 0x5555, sE, _mm512_bsrli_epi128( t2, 4 ) ); \
|
sE = _mm512_mask_blend_epi32( 0xaaaa, sE, t2 ); \
|
||||||
s7 = _mm512_mask_blend_epi32( 0x5555, s7, t3 ); \
|
s7 = _mm512_mask_blend_epi32( 0x5555, s7, t3 ); \
|
||||||
sF = _mm512_mask_blend_epi32( 0xaaaa, sF, t3 ); \
|
sF = _mm512_mask_blend_epi32( 0xaaaa, sF, t3 ); \
|
||||||
|
s4 = mm512_swap64_32( s4 ); \
|
||||||
|
s5 = mm512_swap64_32( s5 ); \
|
||||||
|
sD = mm512_swap64_32( sD ); \
|
||||||
|
sE = mm512_swap64_32( sE ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define P_BIG8 \
|
#define P_BIG8 \
|
||||||
do { \
|
do { \
|
||||||
__m512i alpha[16]; \
|
__m512i alpha[16]; \
|
||||||
|
const uint64_t A0 = ( (uint64_t*)alpha_n )[0]; \
|
||||||
for( int i = 0; i < 16; i++ ) \
|
for( int i = 0; i < 16; i++ ) \
|
||||||
alpha[i] = m512_const1_64( ( (uint64_t*)alpha_n )[i] ); \
|
alpha[i] = m512_const1_64( ( (uint64_t*)alpha_n )[i] ); \
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)1 << 32 ) \
|
alpha[0] = m512_const1_64( (1ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_n )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)2 << 32 ) \
|
alpha[0] = m512_const1_64( (2ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_n )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)3 << 32 ) \
|
alpha[0] = m512_const1_64( (3ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_n )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)4 << 32 ) \
|
alpha[0] = m512_const1_64( (4ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_n )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)5 << 32 ) \
|
alpha[0] = m512_const1_64( (5ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_n )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define PF_BIG8 \
|
#define PF_BIG8 \
|
||||||
do { \
|
do { \
|
||||||
__m512i alpha[16]; \
|
__m512i alpha[16]; \
|
||||||
|
const uint64_t A0 = ( (uint64_t*)alpha_f )[0]; \
|
||||||
for( int i = 0; i < 16; i++ ) \
|
for( int i = 0; i < 16; i++ ) \
|
||||||
alpha[i] = m512_const1_64( ( (uint64_t*)alpha_f )[i] ); \
|
alpha[i] = m512_const1_64( ( (uint64_t*)alpha_f )[i] ); \
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)1 << 32 ) \
|
alpha[0] = m512_const1_64( ( 1ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)2 << 32 ) \
|
alpha[0] = m512_const1_64( ( 2ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)3 << 32 ) \
|
alpha[0] = m512_const1_64( ( 3ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)4 << 32 ) \
|
alpha[0] = m512_const1_64( ( 4ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)5 << 32 ) \
|
alpha[0] = m512_const1_64( ( 5ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)6 << 32 ) \
|
alpha[0] = m512_const1_64( ( 6ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)7 << 32 ) \
|
alpha[0] = m512_const1_64( ( 7ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)8 << 32 ) \
|
alpha[0] = m512_const1_64( ( 8ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)9 << 32 ) \
|
alpha[0] = m512_const1_64( ( 9ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)10 << 32 ) \
|
alpha[0] = m512_const1_64( (10ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
alpha[0] = m512_const1_64( ( (uint64_t)11 << 32 ) \
|
alpha[0] = m512_const1_64( (11ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG8( alpha ); \
|
ROUND_BIG8( alpha ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define T_BIG8 \
|
#define T_BIG8 \
|
||||||
do { /* order is important */ \
|
do { /* order is important */ \
|
||||||
c7 = sc->h[ 0x7 ] = _mm512_xor_si512( sc->h[ 0x7 ], sB ); \
|
c7 = sc->h[ 7 ] = _mm512_xor_si512( sc->h[ 7 ], sB ); /* c5 */ \
|
||||||
c6 = sc->h[ 0x6 ] = _mm512_xor_si512( sc->h[ 0x6 ], sA ); \
|
c6 = sc->h[ 6 ] = _mm512_xor_si512( sc->h[ 6 ], sA ); /* m5 */ \
|
||||||
c5 = sc->h[ 0x5 ] = _mm512_xor_si512( sc->h[ 0x5 ], s9 ); \
|
c5 = sc->h[ 5 ] = _mm512_xor_si512( sc->h[ 5 ], s9 ); /* c4 */ \
|
||||||
c4 = sc->h[ 0x4 ] = _mm512_xor_si512( sc->h[ 0x4 ], s8 ); \
|
c4 = sc->h[ 4 ] = _mm512_xor_si512( sc->h[ 4 ], s8 ); /* m4 */ \
|
||||||
c3 = sc->h[ 0x3 ] = _mm512_xor_si512( sc->h[ 0x3 ], s3 ); \
|
c3 = sc->h[ 3 ] = _mm512_xor_si512( sc->h[ 3 ], s3 ); /* c1 */ \
|
||||||
c2 = sc->h[ 0x2 ] = _mm512_xor_si512( sc->h[ 0x2 ], s2 ); \
|
c2 = sc->h[ 2 ] = _mm512_xor_si512( sc->h[ 2 ], s2 ); /* m1 */ \
|
||||||
c1 = sc->h[ 0x1 ] = _mm512_xor_si512( sc->h[ 0x1 ], s1 ); \
|
c1 = sc->h[ 1 ] = _mm512_xor_si512( sc->h[ 1 ], s1 ); /* c0 */ \
|
||||||
c0 = sc->h[ 0x0 ] = _mm512_xor_si512( sc->h[ 0x0 ], s0 ); \
|
c0 = sc->h[ 0 ] = _mm512_xor_si512( sc->h[ 0 ], s0 ); /* m0 */ \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
void hamsi_8way_big( hamsi_8way_big_context *sc, __m512i *buf, size_t num )
|
void hamsi_8way_big( hamsi_8way_big_context *sc, __m512i *buf, size_t num )
|
||||||
@@ -838,7 +833,6 @@ void hamsi_8way_big_final( hamsi_8way_big_context *sc, __m512i *buf )
|
|||||||
WRITE_STATE_BIG8( sc );
|
WRITE_STATE_BIG8( sc );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void hamsi512_8way_init( hamsi_8way_big_context *sc )
|
void hamsi512_8way_init( hamsi_8way_big_context *sc )
|
||||||
{
|
{
|
||||||
sc->partial_len = 0;
|
sc->partial_len = 0;
|
||||||
@@ -888,13 +882,12 @@ void hamsi512_8way_close( hamsi_8way_big_context *sc, void *dst )
|
|||||||
#define INPUT_BIG \
|
#define INPUT_BIG \
|
||||||
do { \
|
do { \
|
||||||
__m256i db = *buf; \
|
__m256i db = *buf; \
|
||||||
const uint64_t *tp = (uint64_t*)&T512[0][0]; \
|
const __m256i zero = m256_zero; \
|
||||||
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = m256_zero; \
|
const uint64_t *tp = (const uint64_t*)T512; \
|
||||||
for ( int u = 0; u < 64; u++ ) \
|
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = zero; \
|
||||||
|
for ( int u = 63; u >= 0; u-- ) \
|
||||||
{ \
|
{ \
|
||||||
__m256i dm = _mm256_and_si256( db, m256_one_64 ) ; \
|
__m256i dm = _mm256_cmpgt_epi64( zero, _mm256_slli_epi64( db, u ) ); \
|
||||||
dm = mm256_negate_32( _mm256_or_si256( dm, \
|
|
||||||
_mm256_slli_epi64( dm, 32 ) ) ); \
|
|
||||||
m0 = _mm256_xor_si256( m0, _mm256_and_si256( dm, \
|
m0 = _mm256_xor_si256( m0, _mm256_and_si256( dm, \
|
||||||
m256_const1_64( tp[0] ) ) ); \
|
m256_const1_64( tp[0] ) ) ); \
|
||||||
m1 = _mm256_xor_si256( m1, _mm256_and_si256( dm, \
|
m1 = _mm256_xor_si256( m1, _mm256_and_si256( dm, \
|
||||||
@@ -912,7 +905,6 @@ do { \
|
|||||||
m7 = _mm256_xor_si256( m7, _mm256_and_si256( dm, \
|
m7 = _mm256_xor_si256( m7, _mm256_and_si256( dm, \
|
||||||
m256_const1_64( tp[7] ) ) ); \
|
m256_const1_64( tp[7] ) ) ); \
|
||||||
tp += 8; \
|
tp += 8; \
|
||||||
db = _mm256_srli_epi64( db, 1 ); \
|
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
@@ -961,47 +953,28 @@ do { \
|
|||||||
|
|
||||||
#define READ_STATE_BIG(sc) \
|
#define READ_STATE_BIG(sc) \
|
||||||
do { \
|
do { \
|
||||||
c0 = sc->h[0x0]; \
|
c0 = sc->h[0]; \
|
||||||
c1 = sc->h[0x1]; \
|
c1 = sc->h[1]; \
|
||||||
c2 = sc->h[0x2]; \
|
c2 = sc->h[2]; \
|
||||||
c3 = sc->h[0x3]; \
|
c3 = sc->h[3]; \
|
||||||
c4 = sc->h[0x4]; \
|
c4 = sc->h[4]; \
|
||||||
c5 = sc->h[0x5]; \
|
c5 = sc->h[5]; \
|
||||||
c6 = sc->h[0x6]; \
|
c6 = sc->h[6]; \
|
||||||
c7 = sc->h[0x7]; \
|
c7 = sc->h[7]; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define WRITE_STATE_BIG(sc) \
|
#define WRITE_STATE_BIG(sc) \
|
||||||
do { \
|
do { \
|
||||||
sc->h[0x0] = c0; \
|
sc->h[0] = c0; \
|
||||||
sc->h[0x1] = c1; \
|
sc->h[1] = c1; \
|
||||||
sc->h[0x2] = c2; \
|
sc->h[2] = c2; \
|
||||||
sc->h[0x3] = c3; \
|
sc->h[3] = c3; \
|
||||||
sc->h[0x4] = c4; \
|
sc->h[4] = c4; \
|
||||||
sc->h[0x5] = c5; \
|
sc->h[5] = c5; \
|
||||||
sc->h[0x6] = c6; \
|
sc->h[6] = c6; \
|
||||||
sc->h[0x7] = c7; \
|
sc->h[7] = c7; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
/*
|
|
||||||
#define s0 m0
|
|
||||||
#define s1 c0
|
|
||||||
#define s2 m1
|
|
||||||
#define s3 c1
|
|
||||||
#define s4 c2
|
|
||||||
#define s5 m2
|
|
||||||
#define s6 c3
|
|
||||||
#define s7 m3
|
|
||||||
#define s8 m4
|
|
||||||
#define s9 c4
|
|
||||||
#define sA m5
|
|
||||||
#define sB c5
|
|
||||||
#define sC c6
|
|
||||||
#define sD m6
|
|
||||||
#define sE c7
|
|
||||||
#define sF m7
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define ROUND_BIG( alpha ) \
|
#define ROUND_BIG( alpha ) \
|
||||||
do { \
|
do { \
|
||||||
__m256i t0, t1, t2, t3; \
|
__m256i t0, t1, t2, t3; \
|
||||||
@@ -1027,151 +1000,145 @@ do { \
|
|||||||
SBOX( s2, s6, sA, sE ); \
|
SBOX( s2, s6, sA, sE ); \
|
||||||
SBOX( s3, s7, sB, sF ); \
|
SBOX( s3, s7, sB, sF ); \
|
||||||
\
|
\
|
||||||
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s4, 4 ), \
|
s4 = mm256_swap64_32( s4 ); \
|
||||||
_mm256_bslli_epi128( s5, 4 ), 0xAA ); \
|
s5 = mm256_swap64_32( s5 ); \
|
||||||
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sD, 4 ), \
|
sD = mm256_swap64_32( sD ); \
|
||||||
_mm256_bslli_epi128( sE, 4 ), 0xAA ); \
|
sE = mm256_swap64_32( sE ); \
|
||||||
|
t1 = _mm256_blend_epi32( s4, s5, 0xaa ); \
|
||||||
|
t3 = _mm256_blend_epi32( sD, sE, 0xaa ); \
|
||||||
L( s0, t1, s9, t3 ); \
|
L( s0, t1, s9, t3 ); \
|
||||||
s4 = _mm256_blend_epi32( s4, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
|
s4 = _mm256_blend_epi32( s4, t1, 0x55 ); \
|
||||||
s5 = _mm256_blend_epi32( s5, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
|
s5 = _mm256_blend_epi32( s5, t1, 0xaa ); \
|
||||||
sD = _mm256_blend_epi32( sD, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
|
sD = _mm256_blend_epi32( sD, t3, 0x55 ); \
|
||||||
sE = _mm256_blend_epi32( sE, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
|
sE = _mm256_blend_epi32( sE, t3, 0xaa ); \
|
||||||
\
|
\
|
||||||
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s5, 4 ), \
|
s6 = mm256_swap64_32( s6 ); \
|
||||||
_mm256_bslli_epi128( s6, 4 ), 0xAA ); \
|
sF = mm256_swap64_32( sF ); \
|
||||||
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sE, 4 ), \
|
t1 = _mm256_blend_epi32( s5, s6, 0xaa ); \
|
||||||
_mm256_bslli_epi128( sF, 4 ), 0xAA ); \
|
t3 = _mm256_blend_epi32( sE, sF, 0xaa ); \
|
||||||
L( s1, t1, sA, t3 ); \
|
L( s1, t1, sA, t3 ); \
|
||||||
s5 = _mm256_blend_epi32( s5, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
|
s5 = _mm256_blend_epi32( s5, t1, 0x55 ); \
|
||||||
s6 = _mm256_blend_epi32( s6, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
|
s6 = _mm256_blend_epi32( s6, t1, 0xaa ); \
|
||||||
sE = _mm256_blend_epi32( sE, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
|
sE = _mm256_blend_epi32( sE, t3, 0x55 ); \
|
||||||
sF = _mm256_blend_epi32( sF, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
|
sF = _mm256_blend_epi32( sF, t3, 0xaa ); \
|
||||||
\
|
\
|
||||||
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s6, 4 ), \
|
s7 = mm256_swap64_32( s7 ); \
|
||||||
_mm256_bslli_epi128( s7, 4 ), 0xAA ); \
|
sC = mm256_swap64_32( sC ); \
|
||||||
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sF, 4 ), \
|
t1 = _mm256_blend_epi32( s6, s7, 0xaa ); \
|
||||||
_mm256_bslli_epi128( sC, 4 ), 0xAA ); \
|
t3 = _mm256_blend_epi32( sF, sC, 0xaa ); \
|
||||||
L( s2, t1, sB, t3 ); \
|
L( s2, t1, sB, t3 ); \
|
||||||
s6 = _mm256_blend_epi32( s6, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
|
s6 = _mm256_blend_epi32( s6, t1, 0x55 ); \
|
||||||
s7 = _mm256_blend_epi32( s7, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
|
s7 = _mm256_blend_epi32( s7, t1, 0xaa ); \
|
||||||
sF = _mm256_blend_epi32( sF, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
|
sF = _mm256_blend_epi32( sF, t3, 0x55 ); \
|
||||||
sC = _mm256_blend_epi32( sC, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
|
sC = _mm256_blend_epi32( sC, t3, 0xaa ); \
|
||||||
|
s6 = mm256_swap64_32( s6 ); \
|
||||||
|
sF = mm256_swap64_32( sF ); \
|
||||||
\
|
\
|
||||||
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s7, 4 ), \
|
t1 = _mm256_blend_epi32( s7, s4, 0xaa ); \
|
||||||
_mm256_bslli_epi128( s4, 4 ), 0xAA ); \
|
t3 = _mm256_blend_epi32( sC, sD, 0xaa ); \
|
||||||
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sC, 4 ), \
|
|
||||||
_mm256_bslli_epi128( sD, 4 ), 0xAA ); \
|
|
||||||
L( s3, t1, s8, t3 ); \
|
L( s3, t1, s8, t3 ); \
|
||||||
s7 = _mm256_blend_epi32( s7, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
|
s7 = _mm256_blend_epi32( s7, t1, 0x55 ); \
|
||||||
s4 = _mm256_blend_epi32( s4, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
|
s4 = _mm256_blend_epi32( s4, t1, 0xaa ); \
|
||||||
sC = _mm256_blend_epi32( sC, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
|
sC = _mm256_blend_epi32( sC, t3, 0x55 ); \
|
||||||
sD = _mm256_blend_epi32( sD, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
|
sD = _mm256_blend_epi32( sD, t3, 0xaa ); \
|
||||||
|
s7 = mm256_swap64_32( s7 ); \
|
||||||
|
sC = mm256_swap64_32( sC ); \
|
||||||
\
|
\
|
||||||
t0 = _mm256_blend_epi32( s0, _mm256_bslli_epi128( s8, 4 ), 0xAA ); \
|
t0 = _mm256_blend_epi32( s0, mm256_swap64_32( s8 ), 0xaa ); \
|
||||||
t1 = _mm256_blend_epi32( s1, s9, 0xAA ); \
|
t1 = _mm256_blend_epi32( s1, s9, 0xaa ); \
|
||||||
t2 = _mm256_blend_epi32( _mm256_bsrli_epi128( s2, 4 ), sA, 0xAA ); \
|
t2 = _mm256_blend_epi32( mm256_swap64_32( s2 ), sA, 0xaa ); \
|
||||||
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( s3, 4 ), \
|
t3 = _mm256_blend_epi32( s3, sB, 0x55 ); \
|
||||||
_mm256_bslli_epi128( sB, 4 ), 0xAA ); \
|
t3 = mm256_swap64_32( t3 ); \
|
||||||
L( t0, t1, t2, t3 ); \
|
L( t0, t1, t2, t3 ); \
|
||||||
|
t3 = mm256_swap64_32( t3 ); \
|
||||||
s0 = _mm256_blend_epi32( s0, t0, 0x55 ); \
|
s0 = _mm256_blend_epi32( s0, t0, 0x55 ); \
|
||||||
s8 = _mm256_blend_epi32( s8, _mm256_bsrli_epi128( t0, 4 ), 0x55 ); \
|
s8 = _mm256_blend_epi32( s8, mm256_swap64_32( t0 ), 0x55 ); \
|
||||||
s1 = _mm256_blend_epi32( s1, t1, 0x55 ); \
|
s1 = _mm256_blend_epi32( s1, t1, 0x55 ); \
|
||||||
s9 = _mm256_blend_epi32( s9, t1, 0xAA ); \
|
s9 = _mm256_blend_epi32( s9, t1, 0xaa ); \
|
||||||
s2 = _mm256_blend_epi32( s2, _mm256_bslli_epi128( t2, 4 ), 0xAA ); \
|
s2 = _mm256_blend_epi32( s2, mm256_swap64_32( t2 ), 0xaa ); \
|
||||||
sA = _mm256_blend_epi32( sA, t2, 0xAA ); \
|
sA = _mm256_blend_epi32( sA, t2, 0xaa ); \
|
||||||
s3 = _mm256_blend_epi32( s3, _mm256_bslli_epi128( t3, 4 ), 0xAA ); \
|
s3 = _mm256_blend_epi32( s3, t3, 0xaa ); \
|
||||||
sB = _mm256_blend_epi32( sB, _mm256_bsrli_epi128( t3, 4 ), 0x55 ); \
|
sB = _mm256_blend_epi32( sB, t3, 0x55 ); \
|
||||||
\
|
\
|
||||||
t0 = _mm256_blend_epi32( _mm256_bsrli_epi128( s4, 4 ), sC, 0xAA ); \
|
t0 = _mm256_blend_epi32( s4, sC, 0xaa ); \
|
||||||
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s5, 4 ), \
|
t1 = _mm256_blend_epi32( s5, sD, 0xaa ); \
|
||||||
_mm256_bslli_epi128( sD, 4 ), 0xAA ); \
|
t2 = _mm256_blend_epi32( s6, sE, 0xaa ); \
|
||||||
t2 = _mm256_blend_epi32( s6, _mm256_bslli_epi128( sE, 4 ), 0xAA ); \
|
t3 = _mm256_blend_epi32( s7, sF, 0xaa ); \
|
||||||
t3 = _mm256_blend_epi32( s7, sF, 0xAA ); \
|
|
||||||
L( t0, t1, t2, t3 ); \
|
L( t0, t1, t2, t3 ); \
|
||||||
s4 = _mm256_blend_epi32( s4, _mm256_bslli_epi128( t0, 4 ), 0xAA ); \
|
s4 = _mm256_blend_epi32( s4, t0, 0x55 ); \
|
||||||
sC = _mm256_blend_epi32( sC, t0, 0xAA ); \
|
sC = _mm256_blend_epi32( sC, t0, 0xaa ); \
|
||||||
s5 = _mm256_blend_epi32( s5, _mm256_bslli_epi128( t1, 4 ), 0xAA ); \
|
s5 = _mm256_blend_epi32( s5, t1, 0x55 ); \
|
||||||
sD = _mm256_blend_epi32( sD, _mm256_bsrli_epi128( t1, 4 ), 0x55 ); \
|
sD = _mm256_blend_epi32( sD, t1, 0xaa ); \
|
||||||
s6 = _mm256_blend_epi32( s6, t2, 0x55 ); \
|
s6 = _mm256_blend_epi32( s6, t2, 0x55 ); \
|
||||||
sE = _mm256_blend_epi32( sE, _mm256_bsrli_epi128( t2, 4 ), 0x55 ); \
|
sE = _mm256_blend_epi32( sE, t2, 0xaa ); \
|
||||||
s7 = _mm256_blend_epi32( s7, t3, 0x55 ); \
|
s7 = _mm256_blend_epi32( s7, t3, 0x55 ); \
|
||||||
sF = _mm256_blend_epi32( sF, t3, 0xAA ); \
|
sF = _mm256_blend_epi32( sF, t3, 0xaa ); \
|
||||||
|
s4 = mm256_swap64_32( s4 ); \
|
||||||
|
s5 = mm256_swap64_32( s5 ); \
|
||||||
|
sD = mm256_swap64_32( sD ); \
|
||||||
|
sE = mm256_swap64_32( sE ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define P_BIG \
|
#define P_BIG \
|
||||||
do { \
|
do { \
|
||||||
__m256i alpha[16]; \
|
__m256i alpha[16]; \
|
||||||
|
const uint64_t A0 = ( (uint64_t*)alpha_n )[0]; \
|
||||||
for( int i = 0; i < 16; i++ ) \
|
for( int i = 0; i < 16; i++ ) \
|
||||||
alpha[i] = m256_const1_64( ( (uint64_t*)alpha_n )[i] ); \
|
alpha[i] = m256_const1_64( ( (uint64_t*)alpha_n )[i] ); \
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)1 << 32 ) \
|
alpha[0] = m256_const1_64( (1ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_n )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)2 << 32 ) \
|
alpha[0] = m256_const1_64( (2ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_n )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)3 << 32 ) \
|
alpha[0] = m256_const1_64( (3ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_n )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)4 << 32 ) \
|
alpha[0] = m256_const1_64( (4ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_n )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)5 << 32 ) \
|
alpha[0] = m256_const1_64( (5ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_n )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define PF_BIG \
|
#define PF_BIG \
|
||||||
do { \
|
do { \
|
||||||
__m256i alpha[16]; \
|
__m256i alpha[16]; \
|
||||||
|
const uint64_t A0 = ( (uint64_t*)alpha_f )[0]; \
|
||||||
for( int i = 0; i < 16; i++ ) \
|
for( int i = 0; i < 16; i++ ) \
|
||||||
alpha[i] = m256_const1_64( ( (uint64_t*)alpha_f )[i] ); \
|
alpha[i] = m256_const1_64( ( (uint64_t*)alpha_f )[i] ); \
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)1 << 32 ) \
|
alpha[0] = m256_const1_64( ( 1ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)2 << 32 ) \
|
alpha[0] = m256_const1_64( ( 2ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)3 << 32 ) \
|
alpha[0] = m256_const1_64( ( 3ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)4 << 32 ) \
|
alpha[0] = m256_const1_64( ( 4ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)5 << 32 ) \
|
alpha[0] = m256_const1_64( ( 5ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)6 << 32 ) \
|
alpha[0] = m256_const1_64( ( 6ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)7 << 32 ) \
|
alpha[0] = m256_const1_64( ( 7ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)8 << 32 ) \
|
alpha[0] = m256_const1_64( ( 8ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)9 << 32 ) \
|
alpha[0] = m256_const1_64( ( 9ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)10 << 32 ) \
|
alpha[0] = m256_const1_64( (10ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
alpha[0] = m256_const1_64( ( (uint64_t)11 << 32 ) \
|
alpha[0] = m256_const1_64( (11ULL << 32) ^ A0 ); \
|
||||||
^ ( (uint64_t*)alpha_f )[0] ); \
|
|
||||||
ROUND_BIG( alpha ); \
|
ROUND_BIG( alpha ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define T_BIG \
|
#define T_BIG \
|
||||||
do { /* order is important */ \
|
do { /* order is important */ \
|
||||||
c7 = sc->h[ 0x7 ] = _mm256_xor_si256( sc->h[ 0x7 ], sB ); \
|
c7 = sc->h[ 7 ] = _mm256_xor_si256( sc->h[ 7 ], sB ); \
|
||||||
c6 = sc->h[ 0x6 ] = _mm256_xor_si256( sc->h[ 0x6 ], sA ); \
|
c6 = sc->h[ 6 ] = _mm256_xor_si256( sc->h[ 6 ], sA ); \
|
||||||
c5 = sc->h[ 0x5 ] = _mm256_xor_si256( sc->h[ 0x5 ], s9 ); \
|
c5 = sc->h[ 5 ] = _mm256_xor_si256( sc->h[ 5 ], s9 ); \
|
||||||
c4 = sc->h[ 0x4 ] = _mm256_xor_si256( sc->h[ 0x4 ], s8 ); \
|
c4 = sc->h[ 4 ] = _mm256_xor_si256( sc->h[ 4 ], s8 ); \
|
||||||
c3 = sc->h[ 0x3 ] = _mm256_xor_si256( sc->h[ 0x3 ], s3 ); \
|
c3 = sc->h[ 3 ] = _mm256_xor_si256( sc->h[ 3 ], s3 ); \
|
||||||
c2 = sc->h[ 0x2 ] = _mm256_xor_si256( sc->h[ 0x2 ], s2 ); \
|
c2 = sc->h[ 2 ] = _mm256_xor_si256( sc->h[ 2 ], s2 ); \
|
||||||
c1 = sc->h[ 0x1 ] = _mm256_xor_si256( sc->h[ 0x1 ], s1 ); \
|
c1 = sc->h[ 1 ] = _mm256_xor_si256( sc->h[ 1 ], s1 ); \
|
||||||
c0 = sc->h[ 0x0 ] = _mm256_xor_si256( sc->h[ 0x0 ], s0 ); \
|
c0 = sc->h[ 0 ] = _mm256_xor_si256( sc->h[ 0 ], s0 ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
void hamsi_big( hamsi_4way_big_context *sc, __m256i *buf, size_t num )
|
void hamsi_big( hamsi_4way_big_context *sc, __m256i *buf, size_t num )
|
||||||
|
@@ -45,6 +45,6 @@ void sha512Compute32b_parallel(
|
|||||||
uint64_t *data[SHA512_PARALLEL_N],
|
uint64_t *data[SHA512_PARALLEL_N],
|
||||||
uint64_t *digest[SHA512_PARALLEL_N]);
|
uint64_t *digest[SHA512_PARALLEL_N]);
|
||||||
|
|
||||||
void sha512ProcessBlock(Sha512Context *context);
|
void sha512ProcessBlock(Sha512Context contexti[2] );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -53,7 +53,8 @@ static const uint64_t RC[] = {
|
|||||||
#define WRITE_STATE(sc)
|
#define WRITE_STATE(sc)
|
||||||
|
|
||||||
#define MOV64(d, s) (d = s)
|
#define MOV64(d, s) (d = s)
|
||||||
#define XOR64_IOTA XOR64
|
#define XOR64_IOTA XOR
|
||||||
|
|
||||||
|
|
||||||
#define LPAR (
|
#define LPAR (
|
||||||
#define RPAR )
|
#define RPAR )
|
||||||
@@ -71,14 +72,15 @@ static const uint64_t RC[] = {
|
|||||||
// Targetted macros, keccak-macros.h is included for each target.
|
// Targetted macros, keccak-macros.h is included for each target.
|
||||||
|
|
||||||
#define DECL64(x) __m512i x
|
#define DECL64(x) __m512i x
|
||||||
#define XOR64(d, a, b) (d = _mm512_xor_si512(a,b))
|
#define XOR(d, a, b) (d = _mm512_xor_si512(a,b))
|
||||||
|
#define XOR64 XOR
|
||||||
#define AND64(d, a, b) (d = _mm512_and_si512(a,b))
|
#define AND64(d, a, b) (d = _mm512_and_si512(a,b))
|
||||||
#define OR64(d, a, b) (d = _mm512_or_si512(a,b))
|
#define OR64(d, a, b) (d = _mm512_or_si512(a,b))
|
||||||
#define NOT64(d, s) (d = _mm512_xor_si512(s,m512_neg1))
|
#define NOT64(d, s) (d = _mm512_xor_si512(s,m512_neg1))
|
||||||
#define ROL64(d, v, n) (d = mm512_rol_64(v, n))
|
#define ROL64(d, v, n) (d = mm512_rol_64(v, n))
|
||||||
#define XOROR(d, a, b, c) (d = mm512_xoror(a, b, c))
|
#define XOROR(d, a, b, c) (d = mm512_xoror(a, b, c))
|
||||||
#define XORAND(d, a, b, c) (d = mm512_xorand(a, b, c))
|
#define XORAND(d, a, b, c) (d = mm512_xorand(a, b, c))
|
||||||
|
#define XOR3( d, a, b, c ) (d = mm512_xor3( a, b, c ))
|
||||||
|
|
||||||
#include "keccak-macros.c"
|
#include "keccak-macros.c"
|
||||||
|
|
||||||
@@ -236,6 +238,7 @@ keccak512_8way_close(void *cc, void *dst)
|
|||||||
#undef INPUT_BUF
|
#undef INPUT_BUF
|
||||||
#undef DECL64
|
#undef DECL64
|
||||||
#undef XOR64
|
#undef XOR64
|
||||||
|
#undef XOR
|
||||||
#undef AND64
|
#undef AND64
|
||||||
#undef OR64
|
#undef OR64
|
||||||
#undef NOT64
|
#undef NOT64
|
||||||
@@ -243,7 +246,7 @@ keccak512_8way_close(void *cc, void *dst)
|
|||||||
#undef KECCAK_F_1600
|
#undef KECCAK_F_1600
|
||||||
#undef XOROR
|
#undef XOROR
|
||||||
#undef XORAND
|
#undef XORAND
|
||||||
|
#undef XOR3
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
|
|
||||||
// AVX2
|
// AVX2
|
||||||
@@ -255,13 +258,15 @@ keccak512_8way_close(void *cc, void *dst)
|
|||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define DECL64(x) __m256i x
|
#define DECL64(x) __m256i x
|
||||||
#define XOR64(d, a, b) (d = _mm256_xor_si256(a,b))
|
#define XOR(d, a, b) (d = _mm256_xor_si256(a,b))
|
||||||
|
#define XOR64 XOR
|
||||||
#define AND64(d, a, b) (d = _mm256_and_si256(a,b))
|
#define AND64(d, a, b) (d = _mm256_and_si256(a,b))
|
||||||
#define OR64(d, a, b) (d = _mm256_or_si256(a,b))
|
#define OR64(d, a, b) (d = _mm256_or_si256(a,b))
|
||||||
#define NOT64(d, s) (d = _mm256_xor_si256(s,m256_neg1))
|
#define NOT64(d, s) (d = _mm256_xor_si256(s,m256_neg1))
|
||||||
#define ROL64(d, v, n) (d = mm256_rol_64(v, n))
|
#define ROL64(d, v, n) (d = mm256_rol_64(v, n))
|
||||||
#define XOROR(d, a, b, c) (d = _mm256_xor_si256(a, _mm256_or_si256(b, c)))
|
#define XOROR(d, a, b, c) (d = _mm256_xor_si256(a, _mm256_or_si256(b, c)))
|
||||||
#define XORAND(d, a, b, c) (d = _mm256_xor_si256(a, _mm256_and_si256(b, c)))
|
#define XORAND(d, a, b, c) (d = _mm256_xor_si256(a, _mm256_and_si256(b, c)))
|
||||||
|
#define XOR3( d, a, b, c ) (d = mm256_xor3( a, b, c ))
|
||||||
|
|
||||||
#include "keccak-macros.c"
|
#include "keccak-macros.c"
|
||||||
|
|
||||||
@@ -421,6 +426,7 @@ keccak512_4way_close(void *cc, void *dst)
|
|||||||
#undef INPUT_BUF
|
#undef INPUT_BUF
|
||||||
#undef DECL64
|
#undef DECL64
|
||||||
#undef XOR64
|
#undef XOR64
|
||||||
|
#undef XOR
|
||||||
#undef AND64
|
#undef AND64
|
||||||
#undef OR64
|
#undef OR64
|
||||||
#undef NOT64
|
#undef NOT64
|
||||||
@@ -428,5 +434,6 @@ keccak512_4way_close(void *cc, void *dst)
|
|||||||
#undef KECCAK_F_1600
|
#undef KECCAK_F_1600
|
||||||
#undef XOROR
|
#undef XOROR
|
||||||
#undef XORAND
|
#undef XORAND
|
||||||
|
#undef XOR3
|
||||||
|
|
||||||
#endif // AVX2
|
#endif // AVX2
|
||||||
|
@@ -1,6 +1,19 @@
|
|||||||
#ifdef TH_ELT
|
#ifdef TH_ELT
|
||||||
#undef TH_ELT
|
#undef TH_ELT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
|
||||||
|
DECL64(tt0); \
|
||||||
|
DECL64(tt1); \
|
||||||
|
XOR3( tt0, d0, d1, d4 ); \
|
||||||
|
XOR( tt1, d2, d3 ); \
|
||||||
|
XOR( tt0, tt0, tt1 ); \
|
||||||
|
ROL64( tt0, tt0, 1 ); \
|
||||||
|
XOR3( tt1, c0, c1, c4 ); \
|
||||||
|
XOR3( tt0, tt0, c2, c3 ); \
|
||||||
|
XOR( t, tt0, tt1 ); \
|
||||||
|
} while (0)
|
||||||
|
/*
|
||||||
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
|
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
|
||||||
DECL64(tt0); \
|
DECL64(tt0); \
|
||||||
DECL64(tt1); \
|
DECL64(tt1); \
|
||||||
@@ -17,7 +30,7 @@
|
|||||||
XOR64(tt2, tt2, tt3); \
|
XOR64(tt2, tt2, tt3); \
|
||||||
XOR64(t, tt0, tt2); \
|
XOR64(t, tt0, tt2); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
*/
|
||||||
#ifdef THETA
|
#ifdef THETA
|
||||||
#undef THETA
|
#undef THETA
|
||||||
#endif
|
#endif
|
||||||
|
@@ -188,7 +188,7 @@ bool register_allium_algo( algo_gate_t* gate )
|
|||||||
gate->hash = (void*)&allium_hash;
|
gate->hash = (void*)&allium_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT
|
||||||
| VAES_OPT | VAES256_OPT;
|
| VAES_OPT;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
@@ -4,7 +4,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
double lbry_calc_network_diff( struct work *work )
|
long double lbry_calc_network_diff( struct work *work )
|
||||||
{
|
{
|
||||||
// sample for diff 43.281 : 1c05ea29
|
// sample for diff 43.281 : 1c05ea29
|
||||||
// todo: endian reversed on longpoll could be zr5 specific...
|
// todo: endian reversed on longpoll could be zr5 specific...
|
||||||
@@ -12,7 +12,7 @@ double lbry_calc_network_diff( struct work *work )
|
|||||||
uint32_t nbits = swab32( work->data[ LBRY_NBITS_INDEX ] );
|
uint32_t nbits = swab32( work->data[ LBRY_NBITS_INDEX ] );
|
||||||
uint32_t bits = (nbits & 0xffffff);
|
uint32_t bits = (nbits & 0xffffff);
|
||||||
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
|
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
|
||||||
double d = (double)0x0000ffff / (double)bits;
|
long double d = (long double)0x0000ffff / (long double)bits;
|
||||||
|
|
||||||
for (int m=shift; m < 29; m++) d *= 256.0;
|
for (int m=shift; m < 29; m++) d *= 256.0;
|
||||||
for (int m=29; m < shift; m++) d /= 256.0;
|
for (int m=29; m < shift; m++) d /= 256.0;
|
||||||
|
@@ -35,6 +35,7 @@
|
|||||||
|
|
||||||
#include "sph_ripemd.h"
|
#include "sph_ripemd.h"
|
||||||
|
|
||||||
|
#if 0
|
||||||
/*
|
/*
|
||||||
* Round functions for RIPEMD (original).
|
* Round functions for RIPEMD (original).
|
||||||
*/
|
*/
|
||||||
@@ -46,6 +47,7 @@ static const sph_u32 oIV[5] = {
|
|||||||
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89),
|
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89),
|
||||||
SPH_C32(0x98BADCFE), SPH_C32(0x10325476)
|
SPH_C32(0x98BADCFE), SPH_C32(0x10325476)
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Round functions for RIPEMD-128 and RIPEMD-160.
|
* Round functions for RIPEMD-128 and RIPEMD-160.
|
||||||
@@ -63,6 +65,8 @@ static const sph_u32 IV[5] = {
|
|||||||
|
|
||||||
#define ROTL SPH_ROTL32
|
#define ROTL SPH_ROTL32
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
/* ===================================================================== */
|
/* ===================================================================== */
|
||||||
/*
|
/*
|
||||||
* RIPEMD (original hash, deprecated).
|
* RIPEMD (original hash, deprecated).
|
||||||
@@ -479,7 +483,7 @@ sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4])
|
|||||||
* One round of RIPEMD-128. The data must be aligned for 32-bit access.
|
* One round of RIPEMD-128. The data must be aligned for 32-bit access.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
ripemd128_round(const unsigned char *data, sph_u32 r[5])
|
ripemd128_round(const unsigned char *data, sph_u32 r[4])
|
||||||
{
|
{
|
||||||
#if SPH_LITTLE_FAST
|
#if SPH_LITTLE_FAST
|
||||||
|
|
||||||
@@ -539,6 +543,8 @@ sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4])
|
|||||||
#undef RIPEMD128_IN
|
#undef RIPEMD128_IN
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
/* ===================================================================== */
|
/* ===================================================================== */
|
||||||
/*
|
/*
|
||||||
* RIPEMD-160.
|
* RIPEMD-160.
|
||||||
|
@@ -84,6 +84,7 @@
|
|||||||
* can be cloned by copying the context (e.g. with a simple
|
* can be cloned by copying the context (e.g. with a simple
|
||||||
* <code>memcpy()</code>).
|
* <code>memcpy()</code>).
|
||||||
*/
|
*/
|
||||||
|
#if 0
|
||||||
typedef struct {
|
typedef struct {
|
||||||
#ifndef DOXYGEN_IGNORE
|
#ifndef DOXYGEN_IGNORE
|
||||||
unsigned char buf[64]; /* first field, for alignment */
|
unsigned char buf[64]; /* first field, for alignment */
|
||||||
@@ -204,6 +205,8 @@ void sph_ripemd128_close(void *cc, void *dst);
|
|||||||
*/
|
*/
|
||||||
void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]);
|
void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
/* ===================================================================== */
|
/* ===================================================================== */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -337,42 +337,42 @@ do{ \
|
|||||||
XC2 = XOR( XC2, TC ); \
|
XC2 = XOR( XC2, TC ); \
|
||||||
\
|
\
|
||||||
TA = ADD32( XA2, XA1 ); \
|
TA = ADD32( XA2, XA1 ); \
|
||||||
|
XA1 = ROL_1X32( XA1 ); \
|
||||||
TB = ADD32( XB2, XB1 ); \
|
TB = ADD32( XB2, XB1 ); \
|
||||||
TC = ADD32( XC2, XC1 ); \
|
TC = ADD32( XC2, XC1 ); \
|
||||||
TA = ROL32( TA, 13 ); \
|
|
||||||
XA1 = ROL_1X32( XA1 ); \
|
|
||||||
XB1 = ROL_1X32( XB1 ); \
|
XB1 = ROL_1X32( XB1 ); \
|
||||||
XC1 = ROL_1X32( XC1 ); \
|
TA = ROL32( TA, 13 ); \
|
||||||
XA3 = XOR( XA3, TA ); \
|
XA3 = XOR( XA3, TA ); \
|
||||||
|
XC1 = ROL_1X32( XC1 ); \
|
||||||
TB = ROL32( TB, 13 ); \
|
TB = ROL32( TB, 13 ); \
|
||||||
XB3 = XOR( XB3, TB ); \
|
XB3 = XOR( XB3, TB ); \
|
||||||
TC = ROL32( TC, 13 ); \
|
TC = ROL32( TC, 13 ); \
|
||||||
XC3 = XOR( XC3, TC ); \
|
XC3 = XOR( XC3, TC ); \
|
||||||
\
|
\
|
||||||
TA = ADD32( XA3, XA2 ); \
|
TA = ADD32( XA3, XA2 ); \
|
||||||
|
XA2 = SWAP_64( XA2 ); \
|
||||||
TB = ADD32( XB3, XB2 ); \
|
TB = ADD32( XB3, XB2 ); \
|
||||||
TC = ADD32( XC3, XC2 ); \
|
TC = ADD32( XC3, XC2 ); \
|
||||||
TA = ROL32( TA, 18 ); \
|
TA = ROL32( TA, 18 ); \
|
||||||
XA2 = SWAP_64( XA2 ); \
|
|
||||||
XB2 = SWAP_64( XB2 ); \
|
XB2 = SWAP_64( XB2 ); \
|
||||||
XC2 = SWAP_64( XC2 ); \
|
|
||||||
XA0 = XOR( XA0, TA ); \
|
XA0 = XOR( XA0, TA ); \
|
||||||
TB = ROL32( TB, 18 ); \
|
TB = ROL32( TB, 18 ); \
|
||||||
XB0 = XOR( XB0, TB ); \
|
XB0 = XOR( XB0, TB ); \
|
||||||
|
XC2 = SWAP_64( XC2 ); \
|
||||||
TC = ROL32( TC, 18 ); \
|
TC = ROL32( TC, 18 ); \
|
||||||
XC0 = XOR( XC0, TC ); \
|
XC0 = XOR( XC0, TC ); \
|
||||||
\
|
\
|
||||||
TA = ADD32( XA0, XA1 ); \
|
TA = ADD32( XA0, XA1 ); \
|
||||||
|
XA3 = ROR_1X32( XA3 ); \
|
||||||
TB = ADD32( XB0, XB1 ); \
|
TB = ADD32( XB0, XB1 ); \
|
||||||
TC = ADD32( XC0, XC1 ); \
|
TC = ADD32( XC0, XC1 ); \
|
||||||
TA = ROL32( TA, 7 ); \
|
TA = ROL32( TA, 7 ); \
|
||||||
XA3 = ROR_1X32( XA3 ); \
|
XB3 = ROR_1X32( XB3 ); \
|
||||||
XA3 = XOR( XA3, TA ); \
|
XA3 = XOR( XA3, TA ); \
|
||||||
TB = ROL32( TB, 7 ); \
|
TB = ROL32( TB, 7 ); \
|
||||||
XB3 = ROR_1X32( XB3 ); \
|
XC3 = ROR_1X32( XC3 ); \
|
||||||
XB3 = XOR( XB3, TB ); \
|
XB3 = XOR( XB3, TB ); \
|
||||||
TC = ROL32( TC, 7 ); \
|
TC = ROL32( TC, 7 ); \
|
||||||
XC3 = ROR_1X32( XC3 ); \
|
|
||||||
XC3 = XOR( XC3, TC ); \
|
XC3 = XOR( XC3, TC ); \
|
||||||
\
|
\
|
||||||
TA = ADD32( XA3, XA0 ); \
|
TA = ADD32( XA3, XA0 ); \
|
||||||
@@ -399,24 +399,24 @@ do{ \
|
|||||||
XC1 = XOR( XC1, TC ); \
|
XC1 = XOR( XC1, TC ); \
|
||||||
\
|
\
|
||||||
TA = ADD32( XA1, XA2 ); \
|
TA = ADD32( XA1, XA2 ); \
|
||||||
|
XA2 = SWAP_64( XA2 ); \
|
||||||
TB = ADD32( XB1, XB2 ); \
|
TB = ADD32( XB1, XB2 ); \
|
||||||
|
XB2 = SWAP_64( XB2 ); \
|
||||||
TA = ROL32( TA, 18); \
|
TA = ROL32( TA, 18); \
|
||||||
TC = ADD32( XC1, XC2 ); \
|
TC = ADD32( XC1, XC2 ); \
|
||||||
XA2 = SWAP_64( XA2 ); \
|
XC2 = SWAP_64( XC2 ); \
|
||||||
TB = ROL32( TB, 18); \
|
TB = ROL32( TB, 18); \
|
||||||
XA0 = XOR( XA0, TA ); \
|
XA0 = XOR( XA0, TA ); \
|
||||||
XB2 = SWAP_64( XB2 ); \
|
XA1 = ROR_1X32( XA1 ); \
|
||||||
TC = ROL32( TC, 18); \
|
TC = ROL32( TC, 18); \
|
||||||
XB0 = XOR( XB0, TB ); \
|
XB0 = XOR( XB0, TB ); \
|
||||||
XC2 = SWAP_64( XC2 ); \
|
|
||||||
XA1 = ROR_1X32( XA1 ); \
|
|
||||||
XB1 = ROR_1X32( XB1 ); \
|
XB1 = ROR_1X32( XB1 ); \
|
||||||
XC0 = XOR( XC0, TC ); \
|
XC0 = XOR( XC0, TC ); \
|
||||||
XC1 = ROR_1X32( XC1 ); \
|
XC1 = ROR_1X32( XC1 ); \
|
||||||
} while (0);
|
} while (0);
|
||||||
|
|
||||||
|
|
||||||
// slow rol, an attempt to optimze non-avx512 bit rotations
|
// slow rot, an attempt to optimze non-avx512 bit rotations
|
||||||
// Contains target specific instructions, only for use with 128 bit vectors
|
// Contains target specific instructions, only for use with 128 bit vectors
|
||||||
#define SALSA_2ROUNDS_SIMD128_3BUF_SLOROT \
|
#define SALSA_2ROUNDS_SIMD128_3BUF_SLOROT \
|
||||||
do{ \
|
do{ \
|
||||||
|
@@ -28,13 +28,13 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "algo-gate-api.h"
|
#include "algo-gate-api.h"
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include "algo/sha/sha-hash-4way.h"
|
#include "algo/sha/sha-hash-4way.h"
|
||||||
#include "algo/sha/sha256-hash.h"
|
#include "algo/sha/sha256-hash.h"
|
||||||
#include <mm_malloc.h>
|
#include <mm_malloc.h>
|
||||||
|
#include "malloc-huge.h"
|
||||||
|
|
||||||
static const uint32_t keypad[12] = {
|
static const uint32_t keypad[12] = {
|
||||||
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280
|
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280
|
||||||
@@ -55,11 +55,25 @@ static const uint32_t sha256_initial_state[8] =
|
|||||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||||
};
|
};
|
||||||
|
|
||||||
static int scrypt_throughput = 0;
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
|
||||||
|
#define SCRYPT_THROUGHPUT 16
|
||||||
|
|
||||||
|
#elif defined(__AVX2__)
|
||||||
|
|
||||||
|
#define SCRYPT_THROUGHPUT 8
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define SCRYPT_THROUGHPUT 4
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// static int scrypt_throughput = 0;
|
||||||
|
|
||||||
static int scratchbuf_size = 0;
|
static int scratchbuf_size = 0;
|
||||||
|
|
||||||
static __thread char *scratchbuf = NULL;
|
static __thread uint32_t *scratchbuf = NULL;
|
||||||
|
|
||||||
// change this to a constant to be used directly as input state arg
|
// change this to a constant to be used directly as input state arg
|
||||||
// vectors still need an init function.
|
// vectors still need an init function.
|
||||||
@@ -709,15 +723,11 @@ static inline void PBKDF2_SHA256_128_32_16way( uint32_t *tstate,
|
|||||||
|
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
|
|
||||||
//#if defined(USE_ASM) && defined(__x86_64__)
|
|
||||||
|
|
||||||
#define SCRYPT_MAX_WAYS 12
|
#define SCRYPT_MAX_WAYS 12
|
||||||
#define HAVE_SCRYPT_3WAY 1
|
#define HAVE_SCRYPT_3WAY 1
|
||||||
//int scrypt_best_throughput();
|
|
||||||
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
||||||
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
||||||
|
|
||||||
//#if defined(USE_AVX2)
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
#undef SCRYPT_MAX_WAYS
|
#undef SCRYPT_MAX_WAYS
|
||||||
#define SCRYPT_MAX_WAYS 24
|
#define SCRYPT_MAX_WAYS 24
|
||||||
@@ -727,40 +737,39 @@ void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
|
|||||||
|
|
||||||
#ifndef SCRYPT_MAX_WAYS
|
#ifndef SCRYPT_MAX_WAYS
|
||||||
#define SCRYPT_MAX_WAYS 1
|
#define SCRYPT_MAX_WAYS 1
|
||||||
//#define scrypt_best_throughput() 1
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "scrypt-core-4way.h"
|
#include "scrypt-core-4way.h"
|
||||||
|
|
||||||
|
/*
|
||||||
static bool scrypt_N_1_1_256( const uint32_t *input, uint32_t *output,
|
static bool scrypt_N_1_1_256( const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thr_id )
|
uint32_t *midstate, int N, int thr_id )
|
||||||
{
|
{
|
||||||
uint32_t tstate[8], ostate[8];
|
uint32_t tstate[8], ostate[8];
|
||||||
uint32_t X[32];
|
uint32_t X[32];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
memcpy(tstate, midstate, 32);
|
memcpy(tstate, midstate, 32);
|
||||||
HMAC_SHA256_80_init(input, tstate, ostate);
|
HMAC_SHA256_80_init(input, tstate, ostate);
|
||||||
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
|
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
|
||||||
|
|
||||||
scrypt_core_simd128( X, V, N ); // woring
|
scrypt_core_simd128( X, scratchbuf, N ); // woring
|
||||||
// scrypt_core_1way( X, V, N ); // working
|
// scrypt_core_1way( X, V, N ); // working
|
||||||
// scrypt_core(X, V, N);
|
// scrypt_core(X, V, N);
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if ( SCRYPT_THROUGHPUT == 8 )
|
||||||
|
|
||||||
static int scrypt_N_1_1_256_8way( const uint32_t *input, uint32_t *output,
|
static int scrypt_N_1_1_256_8way( const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thrid )
|
uint32_t *midstate, int N, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[ 8*8 ];
|
uint32_t _ALIGN(128) tstate[ 8*8 ];
|
||||||
uint32_t _ALIGN(128) ostate[ 8*8 ];
|
uint32_t _ALIGN(128) ostate[ 8*8 ];
|
||||||
uint32_t _ALIGN(128) W[ 8*32 ];
|
uint32_t _ALIGN(128) W[ 8*32 ];
|
||||||
uint32_t _ALIGN(128) X[ 8*32 ];
|
uint32_t _ALIGN(128) X[ 8*32 ];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
intrlv_8x32( W, input, input+ 20, input+ 40, input+ 60,
|
intrlv_8x32( W, input, input+ 20, input+ 40, input+ 60,
|
||||||
input+80, input+100, input+120, input+140, 640 );
|
input+80, input+100, input+120, input+140, 640 );
|
||||||
@@ -774,11 +783,11 @@ static int scrypt_N_1_1_256_8way( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
if ( opt_param_n > 0x4000 )
|
if ( opt_param_n > 0x4000 )
|
||||||
{
|
{
|
||||||
scrypt_core_simd128_3buf( X, V, N );
|
scrypt_core_simd128_3buf( X, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_3buf( X+ 96, V, N );
|
scrypt_core_simd128_3buf( X+ 96, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_2buf( X+192, V, N );
|
scrypt_core_simd128_2buf( X+192, scratchbuf, N );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -786,13 +795,13 @@ static int scrypt_N_1_1_256_8way( const uint32_t *input, uint32_t *output,
|
|||||||
intrlv_2x128( W+ 64, X+ 64, X+ 96, 1024 );
|
intrlv_2x128( W+ 64, X+ 64, X+ 96, 1024 );
|
||||||
intrlv_2x128( W+128, X+128, X+160, 1024 );
|
intrlv_2x128( W+128, X+128, X+160, 1024 );
|
||||||
intrlv_2x128( W+192, X+192, X+224, 1024 );
|
intrlv_2x128( W+192, X+192, X+224, 1024 );
|
||||||
scrypt_core_2way_simd128( (__m256i*) W, (__m256i*)V, N );
|
scrypt_core_2way_simd128( (__m256i*) W, (__m256i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_2way_simd128( (__m256i*)(W+ 64), (__m256i*)V, N );
|
scrypt_core_2way_simd128( (__m256i*)(W+ 64), (__m256i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_2way_simd128( (__m256i*)(W+128), (__m256i*)V, N );
|
scrypt_core_2way_simd128( (__m256i*)(W+128), (__m256i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_2way_simd128( (__m256i*)(W+192), (__m256i*)V, N );
|
scrypt_core_2way_simd128( (__m256i*)(W+192), (__m256i*)scratchbuf, N );
|
||||||
dintrlv_2x128( X, X+ 32, W, 1024 );
|
dintrlv_2x128( X, X+ 32, W, 1024 );
|
||||||
dintrlv_2x128( X+ 64, X+ 96, W+ 64, 1024 );
|
dintrlv_2x128( X+ 64, X+ 96, W+ 64, 1024 );
|
||||||
dintrlv_2x128( X+128, X+160, W+128, 1024 );
|
dintrlv_2x128( X+128, X+160, W+128, 1024 );
|
||||||
@@ -928,16 +937,15 @@ static int scrypt_N_1_1_256_8way( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
#endif // AVX2
|
#endif // AVX2
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if ( SCRYPT_THROUGHPUT == 16 )
|
||||||
|
|
||||||
static int scrypt_N_1_1_256_16way( const uint32_t *input, uint32_t *output,
|
static int scrypt_N_1_1_256_16way( const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thrid )
|
uint32_t *midstate, int N, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[ 16*8 ];
|
uint32_t _ALIGN(128) tstate[ 16*8 ];
|
||||||
uint32_t _ALIGN(128) ostate[ 16*8 ];
|
uint32_t _ALIGN(128) ostate[ 16*8 ];
|
||||||
uint32_t _ALIGN(128) W[ 16*32 ];
|
uint32_t _ALIGN(128) W[ 16*32 ];
|
||||||
uint32_t _ALIGN(128) X[ 16*32 ];
|
uint32_t _ALIGN(128) X[ 16*32 ];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
intrlv_16x32( W, input, input+ 20, input+ 40, input+ 60,
|
intrlv_16x32( W, input, input+ 20, input+ 40, input+ 60,
|
||||||
input+ 80, input+100, input+120, input+140,
|
input+ 80, input+100, input+120, input+140,
|
||||||
@@ -956,17 +964,17 @@ static int scrypt_N_1_1_256_16way( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
if ( opt_param_n > 0x4000 )
|
if ( opt_param_n > 0x4000 )
|
||||||
{
|
{
|
||||||
scrypt_core_simd128_3buf( X, V, N );
|
scrypt_core_simd128_3buf( X, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_3buf( X+ 96, V, N );
|
scrypt_core_simd128_3buf( X+ 96, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_2buf( X+192, V, N );
|
scrypt_core_simd128_2buf( X+192, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_3buf( X+256, V, N );
|
scrypt_core_simd128_3buf( X+256, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_3buf( X+352, V, N );
|
scrypt_core_simd128_3buf( X+352, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_2buf( X+448, V, N );
|
scrypt_core_simd128_2buf( X+448, scratchbuf, N );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -974,13 +982,13 @@ static int scrypt_N_1_1_256_16way( const uint32_t *input, uint32_t *output,
|
|||||||
intrlv_4x128( W+128, X+128, X+160, X+192, X+224, 1024 );
|
intrlv_4x128( W+128, X+128, X+160, X+192, X+224, 1024 );
|
||||||
intrlv_4x128( W+256, X+256, X+288, X+320, X+352, 1024 );
|
intrlv_4x128( W+256, X+256, X+288, X+320, X+352, 1024 );
|
||||||
intrlv_4x128( W+384, X+384, X+416, X+448, X+480, 1024 );
|
intrlv_4x128( W+384, X+384, X+416, X+448, X+480, 1024 );
|
||||||
scrypt_core_4way_simd128( (__m512i*) W, (__m512i*)V, N );
|
scrypt_core_4way_simd128( (__m512i*) W, (__m512i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_4way_simd128( (__m512i*)(W+128), (__m512i*)V, N );
|
scrypt_core_4way_simd128( (__m512i*)(W+128), (__m512i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_4way_simd128( (__m512i*)(W+256), (__m512i*)V, N );
|
scrypt_core_4way_simd128( (__m512i*)(W+256), (__m512i*)scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_4way_simd128( (__m512i*)(W+384), (__m512i*)V, N );
|
scrypt_core_4way_simd128( (__m512i*)(W+384), (__m512i*)scratchbuf, N );
|
||||||
dintrlv_4x128( X, X+ 32, X+ 64, X+ 96, W, 1024 );
|
dintrlv_4x128( X, X+ 32, X+ 64, X+ 96, W, 1024 );
|
||||||
dintrlv_4x128( X+128, X+160, X+192, X+224, W+128, 1024 );
|
dintrlv_4x128( X+128, X+160, X+192, X+224, W+128, 1024 );
|
||||||
dintrlv_4x128( X+256, X+288, X+320, X+352, W+256, 1024 );
|
dintrlv_4x128( X+256, X+288, X+320, X+352, W+256, 1024 );
|
||||||
@@ -1236,15 +1244,13 @@ static int scrypt_N_1_1_256_16way( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
|
|
||||||
#if defined(__SHA__)
|
#if 0
|
||||||
|
|
||||||
static int scrypt_N_1_1_256_sha_2buf( const uint32_t *input, uint32_t *output,
|
static int scrypt_N_1_1_256_sha_2buf( const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thrid )
|
uint32_t *midstate, int N, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[ 2*8 ];
|
uint32_t _ALIGN(128) tstate[ 2*8 ];
|
||||||
uint32_t _ALIGN(128) ostate[ 2*8 ];
|
uint32_t _ALIGN(128) ostate[ 2*8 ];
|
||||||
uint32_t _ALIGN(128) W[ 2*32 ];
|
uint32_t _ALIGN(128) W[ 2*32 ];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
memcpy( tstate, midstate, 32 );
|
memcpy( tstate, midstate, 32 );
|
||||||
memcpy( tstate+ 8, midstate, 32 );
|
memcpy( tstate+ 8, midstate, 32 );
|
||||||
@@ -1254,7 +1260,7 @@ static int scrypt_N_1_1_256_sha_2buf( const uint32_t *input, uint32_t *output,
|
|||||||
PBKDF2_SHA256_80_128_SHA_2BUF( tstate, tstate+8, ostate, ostate+8,
|
PBKDF2_SHA256_80_128_SHA_2BUF( tstate, tstate+8, ostate, ostate+8,
|
||||||
input, input+20, W, W+32 );
|
input, input+20, W, W+32 );
|
||||||
|
|
||||||
scrypt_core_simd128_2buf( W, V, N );
|
scrypt_core_simd128_2buf( W, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32_SHA_2BUF( tstate, tstate+8, ostate, ostate+8, W, W+32,
|
PBKDF2_SHA256_128_32_SHA_2BUF( tstate, tstate+8, ostate, ostate+8, W, W+32,
|
||||||
@@ -1264,12 +1270,11 @@ static int scrypt_N_1_1_256_sha_2buf( const uint32_t *input, uint32_t *output,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
|
static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thrid )
|
uint32_t *midstate, int N, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[4 * 8];
|
uint32_t _ALIGN(128) tstate[4 * 8];
|
||||||
uint32_t _ALIGN(128) ostate[4 * 8];
|
uint32_t _ALIGN(128) ostate[4 * 8];
|
||||||
uint32_t _ALIGN(128) W[4 * 32];
|
uint32_t _ALIGN(128) W[4 * 32];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
memcpy( tstate, midstate, 32 );
|
memcpy( tstate, midstate, 32 );
|
||||||
memcpy( tstate+ 8, midstate, 32 );
|
memcpy( tstate+ 8, midstate, 32 );
|
||||||
@@ -1300,9 +1305,9 @@ static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
// working, double buffered linear simd
|
// working, double buffered linear simd
|
||||||
scrypt_core_simd128_2buf( W, V, N );
|
scrypt_core_simd128_2buf( W, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_2buf( W+64, V, N );
|
scrypt_core_simd128_2buf( W+64, scratchbuf, N );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
scrypt_core_simd128_3buf( W, V, N );
|
scrypt_core_simd128_3buf( W, V, N );
|
||||||
@@ -1323,17 +1328,15 @@ static int scrypt_N_1_1_256_4way_sha( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#else
|
#if ( SCRYPT_THROUGHPUT == 4 )
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
|
||||||
static int scrypt_N_1_1_256_4way( const uint32_t *input, uint32_t *output,
|
static int scrypt_N_1_1_256_4way( const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N, int thrid )
|
uint32_t *midstate, int N, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[ 4*8 ];
|
uint32_t _ALIGN(128) tstate[ 4*8 ];
|
||||||
uint32_t _ALIGN(128) ostate[ 4*8 ];
|
uint32_t _ALIGN(128) ostate[ 4*8 ];
|
||||||
uint32_t _ALIGN(128) W[ 4*32 ];
|
uint32_t _ALIGN(128) W[ 4*32 ];
|
||||||
uint32_t *V = (uint32_t*)scratchpad;
|
|
||||||
|
|
||||||
intrlv_4x32( W, input, input+20, input+40, input+60, 640 );
|
intrlv_4x32( W, input, input+20, input+40, input+60, 640 );
|
||||||
for ( int i = 0; i < 8; i++ )
|
for ( int i = 0; i < 8; i++ )
|
||||||
@@ -1346,13 +1349,13 @@ static int scrypt_N_1_1_256_4way( const uint32_t *input, uint32_t *output,
|
|||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) X[ 4*32 ];
|
uint32_t _ALIGN(128) X[ 4*32 ];
|
||||||
dintrlv_4x32( X, X+32, X+64, X+96, W, 1024 );
|
dintrlv_4x32( X, X+32, X+64, X+96, W, 1024 );
|
||||||
scrypt_core_simd128_2buf( X, V, N );
|
scrypt_core_simd128_2buf( X, scratchbuf, N );
|
||||||
if ( work_restart[thrid].restart ) return 0;
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
scrypt_core_simd128_2buf( X+64, V, N );
|
scrypt_core_simd128_2buf( X+64, scratchbuf, N );
|
||||||
intrlv_4x32( W, X, X+32, X+64, X+96, 1024 );
|
intrlv_4x32( W, X, X+32, X+64, X+96, 1024 );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
scrypt_core_4way( (__m128i*)W, (__m128i*)V, N );
|
scrypt_core_4way( (__m128i*)W, (__m128i*)scratchbuf, N );
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1398,65 +1401,73 @@ static int scrypt_N_1_1_256_4way( const uint32_t *input, uint32_t *output,
|
|||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_SHA256_4WAY */
|
#endif // SCRYPT_THROUGHPUT == 4
|
||||||
|
|
||||||
#endif // SHA
|
//#endif // SHA
|
||||||
|
|
||||||
extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
|
uint32_t _ALIGN(64) hash[ 8*SCRYPT_THROUGHPUT ];
|
||||||
|
uint32_t _ALIGN(64) data[ 20*SCRYPT_THROUGHPUT ];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
|
|
||||||
uint32_t midstate[8];
|
uint32_t midstate[8];
|
||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
int thr_id = mythr->id;
|
int thr_id = mythr->id;
|
||||||
int throughput = scrypt_throughput;
|
|
||||||
int i;
|
int i;
|
||||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
|
||||||
for ( i = 0; i < throughput; i++ )
|
for ( i = 0; i < SCRYPT_THROUGHPUT; i++ )
|
||||||
memcpy( data + i * 20, pdata, 80 );
|
memcpy( data + i * 20, pdata, 80 );
|
||||||
|
|
||||||
sha256_transform_le( midstate, data, sha256_initial_state );
|
sha256_transform_le( midstate, data, sha256_initial_state );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
bool rc = true;
|
bool rc = true;
|
||||||
for ( i = 0; i < throughput; i++ ) data[ i*20 + 19 ] = ++n;
|
for ( i = 0; i < SCRYPT_THROUGHPUT; i++ ) data[ i*20 + 19 ] = ++n;
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
//#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
if ( throughput == 16 )
|
#if ( SCRYPT_THROUGHPUT == 16 )
|
||||||
rc = scrypt_N_1_1_256_16way( data, hash, midstate, scratchbuf,
|
// if ( SCRYPT_THROUGHPUT == 16 )
|
||||||
opt_param_n, thr_id );
|
rc = scrypt_N_1_1_256_16way( data, hash, midstate, opt_param_n,
|
||||||
else
|
thr_id );
|
||||||
#endif
|
// else
|
||||||
#if defined(__AVX2__)
|
//#endif
|
||||||
if ( throughput == 8 )
|
//#if defined(__AVX2__)
|
||||||
rc = scrypt_N_1_1_256_8way( data, hash, midstate, scratchbuf,
|
#elif ( SCRYPT_THROUGHPUT == 8 )
|
||||||
opt_param_n, thr_id );
|
// if ( SCRYPT_THROUGHPUT == 8 )
|
||||||
else
|
rc = scrypt_N_1_1_256_8way( data, hash, midstate, opt_param_n,
|
||||||
#endif
|
thr_id );
|
||||||
if ( throughput == 4 ) // slower on Ryzen than 8way
|
// else
|
||||||
#if defined(__SHA__)
|
//#endif
|
||||||
rc = scrypt_N_1_1_256_4way_sha( data, hash, midstate, scratchbuf,
|
#elif ( SCRYPT_THROUGHPUT == 4 )
|
||||||
opt_param_n, thr_id );
|
// if ( SCRYPT_THROUGHPUT == 4 ) // slower on Ryzen than 8way
|
||||||
|
//#if defined(__SHA__)
|
||||||
|
// rc = scrypt_N_1_1_256_4way_sha( data, hash, midstate, opt_param_n,
|
||||||
|
// thr_id );
|
||||||
|
//#else
|
||||||
|
rc = scrypt_N_1_1_256_4way( data, hash, midstate, opt_param_n,
|
||||||
|
thr_id );
|
||||||
#else
|
#else
|
||||||
rc = scrypt_N_1_1_256_4way( data, hash, midstate, scratchbuf,
|
|
||||||
opt_param_n, thr_id );
|
#error "Invalid SCRYPT_THROUGHPUT"
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
/*
|
||||||
#if defined(__SHA__)
|
#if defined(__SHA__)
|
||||||
else
|
else
|
||||||
if (throughput == 2 ) // slower on Ryzen than 4way_sha & 8way
|
if ( SCRYPT_THROUGHPUT == 2 ) // slower on Ryzen than 4way_sha & 8way
|
||||||
rc = scrypt_N_1_1_256_sha_2buf( data, hash, midstate, scratchbuf,
|
rc = scrypt_N_1_1_256_sha_2buf( data, hash, midstate, opt_param_n,
|
||||||
opt_param_n, thr_id );
|
thr_id );
|
||||||
#endif
|
#endif
|
||||||
else // should never get here
|
else // should never get here
|
||||||
rc = scrypt_N_1_1_256( data, hash, midstate, scratchbuf,
|
rc = scrypt_N_1_1_256( data, hash, midstate, opt_param_n, thr_id );
|
||||||
opt_param_n, thr_id );
|
*/
|
||||||
|
|
||||||
// test the hash
|
// test the hash
|
||||||
if ( rc )
|
if ( rc )
|
||||||
for ( i = 0; i < throughput; i++ )
|
for ( i = 0; i < SCRYPT_THROUGHPUT; i++ )
|
||||||
{
|
{
|
||||||
if ( unlikely( valid_hash( hash + i*8, ptarget ) && !opt_benchmark ) )
|
if ( unlikely( valid_hash( hash + i*8, ptarget ) && !opt_benchmark ) )
|
||||||
{
|
{
|
||||||
@@ -1468,7 +1479,7 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
} while ( likely( ( n < ( max_nonce - throughput ) ) && !(*restart) ) );
|
} while ( likely( ( n < ( max_nonce - SCRYPT_THROUGHPUT ) ) && !(*restart) ) );
|
||||||
|
|
||||||
*hashes_done = n - pdata[19];
|
*hashes_done = n - pdata[19];
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
@@ -1477,9 +1488,17 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
bool scrypt_miner_thread_init( int thr_id )
|
bool scrypt_miner_thread_init( int thr_id )
|
||||||
{
|
{
|
||||||
scratchbuf = _mm_malloc( scratchbuf_size, 128 );
|
scratchbuf = malloc_hugepages( scratchbuf_size );
|
||||||
if ( scratchbuf )
|
if ( scratchbuf )
|
||||||
return true;
|
{
|
||||||
|
if ( opt_debug )
|
||||||
|
applog( LOG_NOTICE, "Thread %u is using huge pages", thr_id );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
scratchbuf = _mm_malloc( scratchbuf_size, 128 );
|
||||||
|
|
||||||
|
if ( scratchbuf ) return true;
|
||||||
|
|
||||||
applog( LOG_ERR, "Thread %u: Scrypt buffer allocation failed", thr_id );
|
applog( LOG_ERR, "Thread %u: Scrypt buffer allocation failed", thr_id );
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -1489,7 +1508,7 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
|||||||
//#if defined(__SHA__)
|
//#if defined(__SHA__)
|
||||||
// gate->optimizations = SSE2_OPT | SHA_OPT;
|
// gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||||
//#else
|
//#else
|
||||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
gate->optimizations = SSE2_OPT | SSE42_OPT | AVX_OPT | AVX2_OPT | AVX512_OPT;
|
||||||
//#endif
|
//#endif
|
||||||
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
|
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
|
||||||
gate->scanhash = (void*)&scanhash_scrypt;
|
gate->scanhash = (void*)&scanhash_scrypt;
|
||||||
@@ -1497,8 +1516,11 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
|||||||
opt_param_n = opt_param_n ? opt_param_n : 1024;
|
opt_param_n = opt_param_n ? opt_param_n : 1024;
|
||||||
applog( LOG_INFO,"Scrypt paramaters: N= %d, R= 1", opt_param_n );
|
applog( LOG_INFO,"Scrypt paramaters: N= %d, R= 1", opt_param_n );
|
||||||
|
|
||||||
|
// scrypt_throughput can be defined at compile time and used to replace
|
||||||
|
// MAX_WAYS to reduce memory usage.
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
scrypt_throughput = 16;
|
// scrypt_throughput = 16;
|
||||||
if ( opt_param_n > 0x4000 )
|
if ( opt_param_n > 0x4000 )
|
||||||
scratchbuf_size = opt_param_n * 3 * 128; // 3 buf
|
scratchbuf_size = opt_param_n * 3 * 128; // 3 buf
|
||||||
else
|
else
|
||||||
@@ -1511,13 +1533,13 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#elif defined(__AVX2__)
|
#elif defined(__AVX2__)
|
||||||
scrypt_throughput = 8;
|
// scrypt_throughput = 8;
|
||||||
if ( opt_param_n > 0x4000 )
|
if ( opt_param_n > 0x4000 )
|
||||||
scratchbuf_size = opt_param_n * 3 * 128; // 3 buf
|
scratchbuf_size = opt_param_n * 3 * 128; // 3 buf
|
||||||
else
|
else
|
||||||
scratchbuf_size = opt_param_n * 2 * 128; // 2 way
|
scratchbuf_size = opt_param_n * 2 * 128; // 2 way
|
||||||
#else
|
#else
|
||||||
scrypt_throughput = 4;
|
// scrypt_throughput = 4;
|
||||||
if ( opt_param_n > 0x4000 )
|
if ( opt_param_n > 0x4000 )
|
||||||
scratchbuf_size = opt_param_n * 2 * 128; // 2 buf
|
scratchbuf_size = opt_param_n * 2 * 128; // 2 buf
|
||||||
else
|
else
|
||||||
@@ -1531,9 +1553,8 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
|||||||
|
|
||||||
format_number_si( &t_size, t_units );
|
format_number_si( &t_size, t_units );
|
||||||
format_number_si( &d_size, d_units );
|
format_number_si( &d_size, d_units );
|
||||||
|
|
||||||
applog( LOG_INFO,"Throughput %d/thr, Buffer %.0f %siB/thr, Total %.0f %siB\n",
|
applog( LOG_INFO,"Throughput %d/thr, Buffer %.0f %siB/thr, Total %.0f %siB\n",
|
||||||
scrypt_throughput, t_size, t_units, d_size, d_units );
|
SCRYPT_THROUGHPUT, t_size, t_units, d_size, d_units );
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
@@ -62,6 +62,12 @@ void sha256_4way_transform_le( __m128i *state_out, const __m128i *data,
|
|||||||
const __m128i *state_in );
|
const __m128i *state_in );
|
||||||
void sha256_4way_transform_be( __m128i *state_out, const __m128i *data,
|
void sha256_4way_transform_be( __m128i *state_out, const __m128i *data,
|
||||||
const __m128i *state_in );
|
const __m128i *state_in );
|
||||||
|
void sha256_4way_prehash_3rounds( __m128i *state_mid, __m128i *X,
|
||||||
|
const __m128i *W, const __m128i *state_in );
|
||||||
|
void sha256_4way_final_rounds( __m128i *state_out, const __m128i *data,
|
||||||
|
const __m128i *state_in, const __m128i *state_mid, const __m128i *X );
|
||||||
|
int sha256_4way_transform_le_short( __m128i *state_out, const __m128i *data,
|
||||||
|
const __m128i *state_in );
|
||||||
|
|
||||||
#endif // SSE2
|
#endif // SSE2
|
||||||
|
|
||||||
@@ -84,6 +90,13 @@ void sha256_8way_transform_le( __m256i *state_out, const __m256i *data,
|
|||||||
void sha256_8way_transform_be( __m256i *state_out, const __m256i *data,
|
void sha256_8way_transform_be( __m256i *state_out, const __m256i *data,
|
||||||
const __m256i *state_in );
|
const __m256i *state_in );
|
||||||
|
|
||||||
|
void sha256_8way_prehash_3rounds( __m256i *state_mid, __m256i *X,
|
||||||
|
const __m256i *W, const __m256i *state_in );
|
||||||
|
void sha256_8way_final_rounds( __m256i *state_out, const __m256i *data,
|
||||||
|
const __m256i *state_in, const __m256i *state_mid, const __m256i *X );
|
||||||
|
int sha256_8way_transform_le_short( __m256i *state_out, const __m256i *data,
|
||||||
|
const __m256i *state_in );
|
||||||
|
|
||||||
#endif // AVX2
|
#endif // AVX2
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
@@ -104,10 +117,13 @@ void sha256_16way_transform_le( __m512i *state_out, const __m512i *data,
|
|||||||
const __m512i *state_in );
|
const __m512i *state_in );
|
||||||
void sha256_16way_transform_be( __m512i *state_out, const __m512i *data,
|
void sha256_16way_transform_be( __m512i *state_out, const __m512i *data,
|
||||||
const __m512i *state_in );
|
const __m512i *state_in );
|
||||||
void sha256_16way_prehash_3rounds( __m512i *state_mid, const __m512i *W,
|
void sha256_16way_prehash_3rounds( __m512i *state_mid, __m512i *X,
|
||||||
const __m512i *state_in );
|
const __m512i *W, const __m512i *state_in );
|
||||||
void sha256_16way_final_rounds( __m512i *state_out, const __m512i *data,
|
void sha256_16way_final_rounds( __m512i *state_out, const __m512i *data,
|
||||||
const __m512i *state_in, const __m512i *state_mid );
|
const __m512i *state_in, const __m512i *state_mid, const __m512i *X );
|
||||||
|
|
||||||
|
int sha256_16way_transform_le_short( __m512i *state_out, const __m512i *data,
|
||||||
|
const __m512i *state_in );
|
||||||
|
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
|
|
||||||
|
@@ -8,7 +8,7 @@
|
|||||||
* any later version. See COPYING for more details.
|
* any later version. See COPYING for more details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "algo-gate-api.h"
|
#include "sha256d-4way.h"
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
@@ -181,6 +181,8 @@ static const uint32_t sha256d_hash1[16] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// this performs the entire hash all over again, why?
|
// this performs the entire hash all over again, why?
|
||||||
|
// because main function only does 56 rounds.
|
||||||
|
|
||||||
static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
|
static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
|
||||||
{
|
{
|
||||||
uint32_t S[16];
|
uint32_t S[16];
|
||||||
@@ -492,7 +494,7 @@ static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
|
|||||||
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
|
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
|
||||||
const uint32_t *midstate, const uint32_t *prehash);
|
const uint32_t *midstate, const uint32_t *prehash);
|
||||||
|
|
||||||
static inline int scanhash_sha256d_4way( struct work *work,
|
static inline int scanhash_sha256d_4way_pooler( struct work *work,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
@@ -553,7 +555,7 @@ static inline int scanhash_sha256d_4way( struct work *work,
|
|||||||
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
|
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
|
||||||
const uint32_t *midstate, const uint32_t *prehash);
|
const uint32_t *midstate, const uint32_t *prehash);
|
||||||
|
|
||||||
static inline int scanhash_sha256d_8way( struct work *work,
|
static inline int scanhash_sha256d_8way_pooler( struct work *work,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
@@ -609,8 +611,8 @@ static inline int scanhash_sha256d_8way( struct work *work,
|
|||||||
|
|
||||||
#endif /* HAVE_SHA256_8WAY */
|
#endif /* HAVE_SHA256_8WAY */
|
||||||
|
|
||||||
int scanhash_sha256d( struct work *work,
|
int scanhash_sha256d_pooler( struct work *work, uint32_t max_nonce,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -625,11 +627,11 @@ int scanhash_sha256d( struct work *work,
|
|||||||
|
|
||||||
#ifdef HAVE_SHA256_8WAY
|
#ifdef HAVE_SHA256_8WAY
|
||||||
if ( sha256_use_8way() )
|
if ( sha256_use_8way() )
|
||||||
return scanhash_sha256d_8way( work, max_nonce, hashes_done, mythr );
|
return scanhash_sha256d_8way_pooler( work, max_nonce, hashes_done, mythr );
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAVE_SHA256_4WAY
|
#ifdef HAVE_SHA256_4WAY
|
||||||
if ( sha256_use_4way() )
|
if ( sha256_use_4way() )
|
||||||
return scanhash_sha256d_4way( work, max_nonce, hashes_done, mythr );
|
return scanhash_sha256d_4way_pooler( work, max_nonce, hashes_done, mythr );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
memcpy(data, pdata + 16, 64);
|
memcpy(data, pdata + 16, 64);
|
||||||
@@ -690,8 +692,15 @@ int scanhash_SHA256d( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
bool register_sha256d_algo( algo_gate_t* gate )
|
bool register_sha256d_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_sha256d;
|
#if defined(SHA256D_16WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256d_16way;
|
||||||
|
//#elif defined(SHA256D_8WAY)
|
||||||
|
// gate->scanhash = (void*)&scanhash_sha256d_8way;
|
||||||
|
#else
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256d_pooler;
|
||||||
|
// gate->scanhash = (void*)&scanhash_sha256d_4way;
|
||||||
|
#endif
|
||||||
// gate->hash = (void*)&sha256d;
|
// gate->hash = (void*)&sha256d;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -53,4 +53,8 @@ void sha256_ni2way_transform_be( uint32_t *out_X, uint32_t*out_Y,
|
|||||||
#define sha256_transform_be sph_sha256_transform_be
|
#define sha256_transform_be sph_sha256_transform_be
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// SHA can't do only 3 rounds
|
||||||
|
#define sha256_prehash_3rounds sph_sha256_prehash_3rounds
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
#include "sha256t-gate.h"
|
#include "sha256d-4way.h"
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -10,13 +10,14 @@
|
|||||||
int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
|
__m512i vdata[32] __attribute__ ((aligned (128)));
|
||||||
__m512i block[16] __attribute__ ((aligned (64)));
|
__m512i block[16] __attribute__ ((aligned (64)));
|
||||||
__m512i hash32[8] __attribute__ ((aligned (32)));
|
__m512i hash32[8] __attribute__ ((aligned (64)));
|
||||||
__m512i initstate[8] __attribute__ ((aligned (32)));
|
__m512i initstate[8] __attribute__ ((aligned (64)));
|
||||||
__m512i midstate[8] __attribute__ ((aligned (32)));
|
__m512i midstate1[8] __attribute__ ((aligned (64)));
|
||||||
__m512i midstate2[8] __attribute__ ((aligned (32)));
|
__m512i midstate2[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
__m512i mexp_pre[16] __attribute__ ((aligned (64)));
|
||||||
__m512i vdata[20] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -36,6 +37,14 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
||||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_512( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m512_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_512( block + 9, 6 );
|
||||||
|
block[15] = m512_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
// initialize state
|
// initialize state
|
||||||
initstate[0] = m512_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m512_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
@@ -46,28 +55,20 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
initstate[6] = m512_const1_64( 0x1F83D9AB1F83D9AB );
|
initstate[6] = m512_const1_64( 0x1F83D9AB1F83D9AB );
|
||||||
initstate[7] = m512_const1_64( 0x5BE0CD195BE0CD19 );
|
initstate[7] = m512_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
// hash first 64 byte block of data
|
sha256_16way_transform_le( midstate1, vdata, initstate );
|
||||||
sha256_16way_transform_le( midstate, vdata, initstate );
|
|
||||||
|
|
||||||
// Do 3 rounds on the first 12 bytes of the next block
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
sha256_16way_prehash_3rounds( midstate2, vdata + 16, midstate );
|
sha256_16way_prehash_3rounds( midstate2, mexp_pre, vdata+16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
// 1. final 16 bytes of data, with padding
|
||||||
memcpy_512( block, vdata + 16, 4 );
|
sha256_16way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
block[ 4] = last_byte;
|
mexp_pre );
|
||||||
memset_zero_512( block + 5, 10 );
|
|
||||||
block[15] = m512_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_16way_final_rounds( hash32, block, midstate, midstate2 );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_512( block, hash32, 8 );
|
if ( sha256_16way_transform_le_short( hash32, block, initstate ) )
|
||||||
block[ 8] = last_byte;
|
{
|
||||||
memset_zero_512( block + 9, 6 );
|
|
||||||
block[15] = m512_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_16way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// byte swap final hash for testing
|
// byte swap final hash for testing
|
||||||
mm512_block_bswap_32( hash32, hash32 );
|
mm512_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
@@ -81,6 +82,7 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
submit_solution( work, lane_hash, mythr );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev, sixteen );
|
*noncev = _mm512_add_epi32( *noncev, sixteen );
|
||||||
n += 16;
|
n += 16;
|
||||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
@@ -96,12 +98,14 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
__m256i block[16] __attribute__ ((aligned (64)));
|
__m256i vdata[32] __attribute__ ((aligned (64)));
|
||||||
|
__m256i block[16] __attribute__ ((aligned (32)));
|
||||||
__m256i hash32[8] __attribute__ ((aligned (32)));
|
__m256i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m256i initstate[8] __attribute__ ((aligned (32)));
|
__m256i initstate[8] __attribute__ ((aligned (32)));
|
||||||
__m256i midstate[8] __attribute__ ((aligned (32)));
|
__m256i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
|
__m256i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
|
__m256i mexp_pre[16] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
__m256i vdata[20] __attribute__ ((aligned (32)));
|
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -120,6 +124,14 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_256( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m256_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_256( block + 9, 6 );
|
||||||
|
block[15] = m256_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
// initialize state
|
// initialize state
|
||||||
initstate[0] = m256_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m256_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
@@ -130,25 +142,21 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
initstate[6] = m256_const1_64( 0x1F83D9AB1F83D9AB );
|
initstate[6] = m256_const1_64( 0x1F83D9AB1F83D9AB );
|
||||||
initstate[7] = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
initstate[7] = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
// hash first 64 bytes of data
|
sha256_8way_transform_le( midstate1, vdata, initstate );
|
||||||
sha256_8way_transform_le( midstate, vdata, initstate );
|
|
||||||
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
|
sha256_8way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
// 1. final 16 bytes of data, with padding
|
||||||
memcpy_256( block, vdata + 16, 4 );
|
sha256_8way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
block[ 4] = last_byte;
|
mexp_pre );
|
||||||
memset_zero_256( block + 5, 10 );
|
|
||||||
block[15] = m256_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_8way_transform_le( hash32, block, midstate );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_256( block, hash32, 8 );
|
if ( unlikely(
|
||||||
block[ 8] = last_byte;
|
sha256_8way_transform_le_short( hash32, block, initstate ) ) )
|
||||||
memset_zero_256( block + 9, 6 );
|
{
|
||||||
block[15] = m256_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_8way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// byte swap final hash for testing
|
// byte swap final hash for testing
|
||||||
mm256_block_bswap_32( hash32, hash32 );
|
mm256_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
@@ -162,6 +170,7 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
submit_solution( work, lane_hash, mythr );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev, eight );
|
*noncev = _mm256_add_epi32( *noncev, eight );
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
@@ -177,12 +186,14 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
__m128i block[16] __attribute__ ((aligned (64)));
|
__m128i vdata[32] __attribute__ ((aligned (64)));
|
||||||
|
__m128i block[16] __attribute__ ((aligned (32)));
|
||||||
__m128i hash32[8] __attribute__ ((aligned (32)));
|
__m128i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m128i initstate[8] __attribute__ ((aligned (32)));
|
__m128i initstate[8] __attribute__ ((aligned (32)));
|
||||||
__m128i midstate[8] __attribute__ ((aligned (32)));
|
__m128i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
|
__m128i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
|
__m128i mexp_pre[16] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
__m128i vdata[20] __attribute__ ((aligned (32)));
|
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -201,6 +212,14 @@ int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_128( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m128_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_128( block + 9, 6 );
|
||||||
|
block[15] = m128_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
// initialize state
|
// initialize state
|
||||||
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
@@ -212,24 +231,20 @@ int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 );
|
initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
// hash first 64 bytes of data
|
// hash first 64 bytes of data
|
||||||
sha256_4way_transform_le( midstate, vdata, initstate );
|
sha256_4way_transform_le( midstate1, vdata, initstate );
|
||||||
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
|
sha256_4way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
// 1. final 16 bytes of data, with padding
|
||||||
memcpy_128( block, vdata + 16, 4 );
|
sha256_4way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
block[ 4] = last_byte;
|
mexp_pre );
|
||||||
memset_zero_128( block + 5, 10 );
|
|
||||||
block[15] = m128_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_4way_transform_le( hash32, block, midstate );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_128( block, hash32, 8 );
|
if ( unlikely(
|
||||||
block[ 8] = last_byte;
|
sha256_4way_transform_le_short( hash32, block, initstate ) ) )
|
||||||
memset_zero_128( block + 9, 6 );
|
{
|
||||||
block[15] = m128_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_4way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// byte swap final hash for testing
|
// byte swap final hash for testing
|
||||||
mm128_block_bswap_32( hash32, hash32 );
|
mm128_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
@@ -243,6 +258,7 @@ int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
submit_solution( work, lane_hash, mythr );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
*noncev = _mm_add_epi32( *noncev, four );
|
*noncev = _mm_add_epi32( *noncev, four );
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
@@ -253,3 +269,20 @@ int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
bool register_sha256d_algo( algo_gate_t* gate )
|
||||||
|
{
|
||||||
|
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||||
|
#if defined(SHA256D_16WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256d_16way;
|
||||||
|
#elif defined(SHA256D_8WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256d_8way;
|
||||||
|
#elif defined(SHA256D_4WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_sha256d_4way;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// gate->hash = (void*)&sha256d;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
|
46
algo/sha/sha256d-4way.h
Normal file
46
algo/sha/sha256d-4way.h
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#ifndef __SHA256D_4WAY_H__
|
||||||
|
#define __SHA256D_4WAY_H__ 1
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "algo-gate-api.h"
|
||||||
|
|
||||||
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
#define SHA256D_16WAY 1
|
||||||
|
#elif defined(__AVX2__)
|
||||||
|
#define SHA256D_8WAY 1
|
||||||
|
#else
|
||||||
|
#define SHA256D_4WAY 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
bool register_sha256d_algo( algo_gate_t* gate );
|
||||||
|
|
||||||
|
#if defined(SHA256D_16WAY)
|
||||||
|
|
||||||
|
int scanhash_sha256d_16way( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(SHA256D_8WAY)
|
||||||
|
|
||||||
|
int scanhash_sha256d_8way( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(SHA256D_4WAY)
|
||||||
|
|
||||||
|
int scanhash_sha256d_4way( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
#if defined(__SHA__)
|
||||||
|
|
||||||
|
int scanhash_sha256d( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@@ -10,13 +10,14 @@
|
|||||||
int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
|
__m512i vdata[32] __attribute__ ((aligned (128)));
|
||||||
__m512i block[16] __attribute__ ((aligned (64)));
|
__m512i block[16] __attribute__ ((aligned (64)));
|
||||||
__m512i hash32[8] __attribute__ ((aligned (32)));
|
__m512i hash32[8] __attribute__ ((aligned (64)));
|
||||||
__m512i initstate[8] __attribute__ ((aligned (32)));
|
__m512i initstate[8] __attribute__ ((aligned (64)));
|
||||||
__m512i midstate[8] __attribute__ ((aligned (32)));
|
__m512i midstate1[8] __attribute__ ((aligned (64)));
|
||||||
__m512i midstate2[8] __attribute__ ((aligned (32)));
|
__m512i midstate2[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
__m512i mexp_pre[16] __attribute__ ((aligned (64)));
|
||||||
__m512i vdata[20] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -36,7 +37,14 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
||||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
// initialize state
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_512( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m512_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_512( block + 9, 6 );
|
||||||
|
block[15] = m512_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
initstate[0] = m512_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m512_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
initstate[2] = m512_const1_64( 0x3C6EF3723C6EF372 );
|
initstate[2] = m512_const1_64( 0x3C6EF3723C6EF372 );
|
||||||
@@ -46,37 +54,29 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
initstate[6] = m512_const1_64( 0x1F83D9AB1F83D9AB );
|
initstate[6] = m512_const1_64( 0x1F83D9AB1F83D9AB );
|
||||||
initstate[7] = m512_const1_64( 0x5BE0CD195BE0CD19 );
|
initstate[7] = m512_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
// hash first 64 byte block of data
|
sha256_16way_transform_le( midstate1, vdata, initstate );
|
||||||
sha256_16way_transform_le( midstate, vdata, initstate );
|
|
||||||
|
|
||||||
// Do 3 rounds on the first 12 bytes of the next block
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
sha256_16way_prehash_3rounds( midstate2, vdata + 16, midstate );
|
sha256_16way_prehash_3rounds( midstate2, mexp_pre, vdata+16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
// 1. final 16 bytes of data, pre-padded
|
||||||
memcpy_512( block, vdata + 16, 4 );
|
sha256_16way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
block[ 4] = last_byte;
|
mexp_pre );
|
||||||
memset_zero_512( block + 5, 10 );
|
|
||||||
block[15] = m512_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_16way_final_rounds( hash32, block, midstate, midstate2 );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_512( block, hash32, 8 );
|
sha256_16way_transform_le( block, block, initstate );
|
||||||
block[ 8] = last_byte;
|
|
||||||
memset_zero_512( block + 9, 6 );
|
|
||||||
block[15] = m512_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_16way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// 3. 32 byte hash from 2.
|
// 3. 32 byte hash from 2.
|
||||||
memcpy_512( block, hash32, 8 );
|
if ( unlikely(
|
||||||
sha256_16way_transform_le( hash32, block, initstate );
|
sha256_16way_transform_le_short( hash32, block, initstate ) ) )
|
||||||
|
{
|
||||||
// byte swap final hash for testing
|
// byte swap final hash for testing
|
||||||
mm512_block_bswap_32( hash32, hash32 );
|
mm512_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
for ( int lane = 0; lane < 16; lane++ )
|
for ( int lane = 0; lane < 16; lane++ )
|
||||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
if ( hash32_d7[ lane ] <= targ32_d7 )
|
||||||
{
|
{
|
||||||
extr_lane_16x32( lane_hash, hash32, lane, 256 );
|
extr_lane_16x32( lane_hash, hash32, lane, 256 );
|
||||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
@@ -85,6 +85,7 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
submit_solution( work, lane_hash, mythr );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev, sixteen );
|
*noncev = _mm512_add_epi32( *noncev, sixteen );
|
||||||
n += 16;
|
n += 16;
|
||||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
@@ -101,12 +102,14 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
__m256i block[16] __attribute__ ((aligned (64)));
|
__m256i vdata[32] __attribute__ ((aligned (64)));
|
||||||
|
__m256i block[16] __attribute__ ((aligned (32)));
|
||||||
__m256i hash32[8] __attribute__ ((aligned (32)));
|
__m256i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m256i initstate[8] __attribute__ ((aligned (32)));
|
__m256i initstate[8] __attribute__ ((aligned (32)));
|
||||||
__m256i midstate[8] __attribute__ ((aligned (32)));
|
__m256i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
|
__m256i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
|
__m256i mexp_pre[16] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
__m256i vdata[20] __attribute__ ((aligned (32)));
|
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -125,6 +128,14 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_256( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m256_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_256( block + 9, 6 );
|
||||||
|
block[15] = m256_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
// initialize state
|
// initialize state
|
||||||
initstate[0] = m256_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m256_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
@@ -135,34 +146,29 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
initstate[6] = m256_const1_64( 0x1F83D9AB1F83D9AB );
|
initstate[6] = m256_const1_64( 0x1F83D9AB1F83D9AB );
|
||||||
initstate[7] = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
initstate[7] = m256_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
// hash first 64 bytes of data
|
sha256_8way_transform_le( midstate1, vdata, initstate );
|
||||||
sha256_8way_transform_le( midstate, vdata, initstate );
|
|
||||||
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
|
sha256_8way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
// 1. final 16 bytes of data, with padding
|
||||||
memcpy_256( block, vdata + 16, 4 );
|
sha256_8way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
block[ 4] = last_byte;
|
mexp_pre );
|
||||||
memset_zero_256( block + 5, 10 );
|
|
||||||
block[15] = m256_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_8way_transform_le( hash32, block, midstate );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_256( block, hash32, 8 );
|
sha256_8way_transform_le( block, block, initstate );
|
||||||
block[ 8] = last_byte;
|
|
||||||
memset_zero_256( block + 9, 6 );
|
|
||||||
block[15] = m256_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_8way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// 3. 32 byte hash from 2.
|
// 3. 32 byte hash from 2.
|
||||||
memcpy_256( block, hash32, 8 );
|
if ( unlikely(
|
||||||
sha256_8way_transform_le( hash32, block, initstate );
|
sha256_8way_transform_le_short( hash32, block, initstate ) ) )
|
||||||
|
{
|
||||||
// byte swap final hash for testing
|
// byte swap final hash for testing
|
||||||
mm256_block_bswap_32( hash32, hash32 );
|
mm256_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
for ( int lane = 0; lane < 8; lane++ )
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
if ( hash32_d7[ lane ] <= targ32_d7 )
|
||||||
{
|
{
|
||||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
@@ -171,6 +177,7 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
submit_solution( work, lane_hash, mythr );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev, eight );
|
*noncev = _mm256_add_epi32( *noncev, eight );
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
@@ -181,17 +188,23 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(SHA256T_4WAY)
|
#if defined(SHA256T_4WAY)
|
||||||
|
|
||||||
|
// Optimizations are slower with AVX/SSE2
|
||||||
|
// https://github.com/JayDDee/cpuminer-opt/issues/344
|
||||||
|
/*
|
||||||
int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
__m128i block[16] __attribute__ ((aligned (64)));
|
__m128i vdata[32] __attribute__ ((aligned (64)));
|
||||||
|
__m128i block[16] __attribute__ ((aligned (32)));
|
||||||
__m128i hash32[8] __attribute__ ((aligned (32)));
|
__m128i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m128i initstate[8] __attribute__ ((aligned (32)));
|
__m128i initstate[8] __attribute__ ((aligned (32)));
|
||||||
__m128i midstate[8] __attribute__ ((aligned (32)));
|
__m128i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
|
__m128i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
|
__m128i mexp_pre[16] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
__m128i vdata[20] __attribute__ ((aligned (32)));
|
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -210,6 +223,101 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_128( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m128_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_128( block + 9, 6 );
|
||||||
|
block[15] = m128_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
|
// initialize state
|
||||||
|
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
|
||||||
|
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
|
initstate[2] = m128_const1_64( 0x3C6EF3723C6EF372 );
|
||||||
|
initstate[3] = m128_const1_64( 0xA54FF53AA54FF53A );
|
||||||
|
initstate[4] = m128_const1_64( 0x510E527F510E527F );
|
||||||
|
initstate[5] = m128_const1_64( 0x9B05688C9B05688C );
|
||||||
|
initstate[6] = m128_const1_64( 0x1F83D9AB1F83D9AB );
|
||||||
|
initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
|
// hash first 64 bytes of data
|
||||||
|
sha256_4way_transform_le( midstate1, vdata, initstate );
|
||||||
|
|
||||||
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
|
sha256_4way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
// 1. final 16 bytes of data, with padding
|
||||||
|
sha256_4way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
|
mexp_pre );
|
||||||
|
|
||||||
|
// 2. 32 byte hash from 1.
|
||||||
|
sha256_4way_transform_le( block, block, initstate );
|
||||||
|
|
||||||
|
// 3. 32 byte hash from 2.
|
||||||
|
if ( unlikely(
|
||||||
|
sha256_4way_transform_le_short( hash32, block, initstate ) ) )
|
||||||
|
{
|
||||||
|
// byte swap final hash for testing
|
||||||
|
mm128_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
|
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||||
|
{
|
||||||
|
extr_lane_4x32( lane_hash, hash32, lane, 256 );
|
||||||
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*noncev = _mm_add_epi32( *noncev, four );
|
||||||
|
n += 4;
|
||||||
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
__m128i vdata[32] __attribute__ ((aligned (64)));
|
||||||
|
__m128i block[16] __attribute__ ((aligned (32)));
|
||||||
|
__m128i hash32[8] __attribute__ ((aligned (32)));
|
||||||
|
__m128i initstate[8] __attribute__ ((aligned (32)));
|
||||||
|
__m128i midstate[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
const uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t targ32_d7 = ptarget[7];
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 4;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
__m128i *noncev = vdata + 19;
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
const __m128i last_byte = m128_const1_32( 0x80000000 );
|
||||||
|
const __m128i four = m128_const1_32( 4 );
|
||||||
|
|
||||||
|
for ( int i = 0; i < 19; i++ )
|
||||||
|
vdata[i] = m128_const1_32( pdata[i] );
|
||||||
|
|
||||||
|
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_128( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m128_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_128( block + 9, 6 );
|
||||||
|
block[15] = m128_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
// initialize state
|
// initialize state
|
||||||
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
@@ -225,25 +333,9 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
sha256_4way_transform_le( block, vdata+16, midstate );
|
||||||
memcpy_128( block, vdata + 16, 4 );
|
sha256_4way_transform_le( block, block, initstate );
|
||||||
block[ 4] = last_byte;
|
|
||||||
memset_zero_128( block + 5, 10 );
|
|
||||||
block[15] = m128_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_4way_transform_le( hash32, block, midstate );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
|
||||||
memcpy_128( block, hash32, 8 );
|
|
||||||
block[ 8] = last_byte;
|
|
||||||
memset_zero_128( block + 9, 6 );
|
|
||||||
block[15] = m128_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_4way_transform_le( hash32, block, initstate );
|
sha256_4way_transform_le( hash32, block, initstate );
|
||||||
|
|
||||||
// 3. 32 byte hash from 2.
|
|
||||||
memcpy_128( block, hash32, 8 );
|
|
||||||
sha256_4way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// byte swap final hash for testing
|
|
||||||
mm128_block_bswap_32( hash32, hash32 );
|
mm128_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
for ( int lane = 0; lane < 4; lane++ )
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
@@ -264,5 +356,6 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -702,6 +702,36 @@ memcpy( state_out, state_in, 32 );
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sph_sha256_prehash_3rounds( uint32_t *state_out, const uint32_t *data,
|
||||||
|
const uint32_t *state_in )
|
||||||
|
{
|
||||||
|
uint32_t t1, t2, X_xor_Y, Y_xor_Z = state_in[1] ^ state_in[2];
|
||||||
|
memcpy( state_out, state_in, 32 );
|
||||||
|
|
||||||
|
t1 = state_out[7] + BSG2_1( state_out[4] )
|
||||||
|
+ CH( state_out[4], state_out[5], state_out[6] ) + 0x428A2F98 + data[0];
|
||||||
|
t2 = BSG2_0( state_out[0] )
|
||||||
|
+ MAJ( state_out[0], state_out[1], state_out[2] );
|
||||||
|
Y_xor_Z = X_xor_Y;
|
||||||
|
state_out[3] += t1;
|
||||||
|
state_out[7] = t1 + t2;
|
||||||
|
|
||||||
|
t1 = state_out[6] + BSG2_1( state_out[3] )
|
||||||
|
+ CH( state_out[3], state_out[4], state_out[5] ) + 0x71374491 + data[1];
|
||||||
|
t2 = BSG2_0( state_out[7] )
|
||||||
|
+ MAJ( state_out[7], state_out[0], state_out[1] );
|
||||||
|
Y_xor_Z = X_xor_Y;
|
||||||
|
state_out[2] += t1;
|
||||||
|
state_out[6] = t1 + t2;
|
||||||
|
|
||||||
|
t1 = state_out[5] + BSG2_1( state_out[2] )
|
||||||
|
+ CH( state_out[2], state_out[3], state_out[4] ) + 0xB5C0FBCF + data[2];
|
||||||
|
t2 = BSG2_0( state_out[6] )
|
||||||
|
+ MAJ( state_out[6], state_out[7], state_out[0] );
|
||||||
|
state_out[1] += t1;
|
||||||
|
state_out[5] = t1 + t2;
|
||||||
|
}
|
||||||
|
|
||||||
/* see sph_sha2.h */
|
/* see sph_sha2.h */
|
||||||
void
|
void
|
||||||
sph_sha224_init(void *cc)
|
sph_sha224_init(void *cc)
|
||||||
|
@@ -215,6 +215,9 @@ void sph_sha256_transform_le( uint32_t *state_out, const uint32_t *data,
|
|||||||
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
|
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
|
||||||
const uint32_t *state_in );
|
const uint32_t *state_in );
|
||||||
|
|
||||||
|
void sph_sha256_prehash_3rounds( uint32_t *state_out, const uint32_t *data,
|
||||||
|
const uint32_t *state_in );
|
||||||
|
|
||||||
|
|
||||||
#if SPH_64
|
#if SPH_64
|
||||||
|
|
||||||
|
@@ -62,8 +62,8 @@ extern "C"{
|
|||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
#define DECL_STATE8 \
|
#define DECL_STATE8 \
|
||||||
__m256i A00, A01, A02, A03, A04, A05, A06, A07, \
|
__m256i A0, A1, A2, A3, A4, A5, A6, A7, \
|
||||||
A08, A09, A0A, A0B; \
|
A8, A9, AA, AB; \
|
||||||
__m256i B0, B1, B2, B3, B4, B5, B6, B7, \
|
__m256i B0, B1, B2, B3, B4, B5, B6, B7, \
|
||||||
B8, B9, BA, BB, BC, BD, BE, BF; \
|
B8, B9, BA, BB, BC, BD, BE, BF; \
|
||||||
__m256i C0, C1, C2, C3, C4, C5, C6, C7, \
|
__m256i C0, C1, C2, C3, C4, C5, C6, C7, \
|
||||||
@@ -78,18 +78,18 @@ extern "C"{
|
|||||||
{ \
|
{ \
|
||||||
if ( (state)->state_loaded ) \
|
if ( (state)->state_loaded ) \
|
||||||
{ \
|
{ \
|
||||||
A00 = (state)->A[0]; \
|
A0 = (state)->A[0]; \
|
||||||
A01 = (state)->A[1]; \
|
A1 = (state)->A[1]; \
|
||||||
A02 = (state)->A[2]; \
|
A2 = (state)->A[2]; \
|
||||||
A03 = (state)->A[3]; \
|
A3 = (state)->A[3]; \
|
||||||
A04 = (state)->A[4]; \
|
A4 = (state)->A[4]; \
|
||||||
A05 = (state)->A[5]; \
|
A5 = (state)->A[5]; \
|
||||||
A06 = (state)->A[6]; \
|
A6 = (state)->A[6]; \
|
||||||
A07 = (state)->A[7]; \
|
A7 = (state)->A[7]; \
|
||||||
A08 = (state)->A[8]; \
|
A8 = (state)->A[8]; \
|
||||||
A09 = (state)->A[9]; \
|
A9 = (state)->A[9]; \
|
||||||
A0A = (state)->A[10]; \
|
AA = (state)->A[10]; \
|
||||||
A0B = (state)->A[11]; \
|
AB = (state)->A[11]; \
|
||||||
B0 = (state)->B[0]; \
|
B0 = (state)->B[0]; \
|
||||||
B1 = (state)->B[1]; \
|
B1 = (state)->B[1]; \
|
||||||
B2 = (state)->B[2]; \
|
B2 = (state)->B[2]; \
|
||||||
@@ -126,18 +126,18 @@ extern "C"{
|
|||||||
else \
|
else \
|
||||||
{ \
|
{ \
|
||||||
(state)->state_loaded = true; \
|
(state)->state_loaded = true; \
|
||||||
A00 = m256_const1_64( 0x20728DFD20728DFD ); \
|
A0 = m256_const1_64( 0x20728DFD20728DFD ); \
|
||||||
A01 = m256_const1_64( 0x46C0BD5346C0BD53 ); \
|
A1 = m256_const1_64( 0x46C0BD5346C0BD53 ); \
|
||||||
A02 = m256_const1_64( 0xE782B699E782B699 ); \
|
A2 = m256_const1_64( 0xE782B699E782B699 ); \
|
||||||
A03 = m256_const1_64( 0x5530463255304632 ); \
|
A3 = m256_const1_64( 0x5530463255304632 ); \
|
||||||
A04 = m256_const1_64( 0x71B4EF9071B4EF90 ); \
|
A4 = m256_const1_64( 0x71B4EF9071B4EF90 ); \
|
||||||
A05 = m256_const1_64( 0x0EA9E82C0EA9E82C ); \
|
A5 = m256_const1_64( 0x0EA9E82C0EA9E82C ); \
|
||||||
A06 = m256_const1_64( 0xDBB930F1DBB930F1 ); \
|
A6 = m256_const1_64( 0xDBB930F1DBB930F1 ); \
|
||||||
A07 = m256_const1_64( 0xFAD06B8BFAD06B8B ); \
|
A7 = m256_const1_64( 0xFAD06B8BFAD06B8B ); \
|
||||||
A08 = m256_const1_64( 0xBE0CAE40BE0CAE40 ); \
|
A8 = m256_const1_64( 0xBE0CAE40BE0CAE40 ); \
|
||||||
A09 = m256_const1_64( 0x8BD144108BD14410 ); \
|
A9 = m256_const1_64( 0x8BD144108BD14410 ); \
|
||||||
A0A = m256_const1_64( 0x76D2ADAC76D2ADAC ); \
|
AA = m256_const1_64( 0x76D2ADAC76D2ADAC ); \
|
||||||
A0B = m256_const1_64( 0x28ACAB7F28ACAB7F ); \
|
AB = m256_const1_64( 0x28ACAB7F28ACAB7F ); \
|
||||||
B0 = m256_const1_64( 0xC1099CB7C1099CB7 ); \
|
B0 = m256_const1_64( 0xC1099CB7C1099CB7 ); \
|
||||||
B1 = m256_const1_64( 0x07B385F307B385F3 ); \
|
B1 = m256_const1_64( 0x07B385F307B385F3 ); \
|
||||||
B2 = m256_const1_64( 0xE7442C26E7442C26 ); \
|
B2 = m256_const1_64( 0xE7442C26E7442C26 ); \
|
||||||
@@ -176,18 +176,18 @@ extern "C"{
|
|||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define WRITE_STATE8(state) do { \
|
#define WRITE_STATE8(state) do { \
|
||||||
(state)->A[0] = A00; \
|
(state)->A[0] = A0; \
|
||||||
(state)->A[1] = A01; \
|
(state)->A[1] = A1; \
|
||||||
(state)->A[2] = A02; \
|
(state)->A[2] = A2; \
|
||||||
(state)->A[3] = A03; \
|
(state)->A[3] = A3; \
|
||||||
(state)->A[4] = A04; \
|
(state)->A[4] = A4; \
|
||||||
(state)->A[5] = A05; \
|
(state)->A[5] = A5; \
|
||||||
(state)->A[6] = A06; \
|
(state)->A[6] = A6; \
|
||||||
(state)->A[7] = A07; \
|
(state)->A[7] = A7; \
|
||||||
(state)->A[8] = A08; \
|
(state)->A[8] = A8; \
|
||||||
(state)->A[9] = A09; \
|
(state)->A[9] = A9; \
|
||||||
(state)->A[10] = A0A; \
|
(state)->A[10] = AA; \
|
||||||
(state)->A[11] = A0B; \
|
(state)->A[11] = AB; \
|
||||||
(state)->B[0] = B0; \
|
(state)->B[0] = B0; \
|
||||||
(state)->B[1] = B1; \
|
(state)->B[1] = B1; \
|
||||||
(state)->B[2] = B2; \
|
(state)->B[2] = B2; \
|
||||||
@@ -286,8 +286,8 @@ do { \
|
|||||||
|
|
||||||
#define XOR_W8 \
|
#define XOR_W8 \
|
||||||
do { \
|
do { \
|
||||||
A00 = _mm256_xor_si256( A00, _mm256_set1_epi32( Wlow ) ); \
|
A0 = _mm256_xor_si256( A0, _mm256_set1_epi32( Wlow ) ); \
|
||||||
A01 = _mm256_xor_si256( A01, _mm256_set1_epi32( Whigh ) ); \
|
A1 = _mm256_xor_si256( A1, _mm256_set1_epi32( Whigh ) ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define SWAP_BC8 \
|
#define SWAP_BC8 \
|
||||||
@@ -321,60 +321,60 @@ do { \
|
|||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define PERM_STEP_0_8 do { \
|
#define PERM_STEP_0_8 do { \
|
||||||
PERM_ELT8(A00, A0B, B0, BD, B9, B6, C8, M0); \
|
PERM_ELT8(A0, AB, B0, BD, B9, B6, C8, M0); \
|
||||||
PERM_ELT8(A01, A00, B1, BE, BA, B7, C7, M1); \
|
PERM_ELT8(A1, A0, B1, BE, BA, B7, C7, M1); \
|
||||||
PERM_ELT8(A02, A01, B2, BF, BB, B8, C6, M2); \
|
PERM_ELT8(A2, A1, B2, BF, BB, B8, C6, M2); \
|
||||||
PERM_ELT8(A03, A02, B3, B0, BC, B9, C5, M3); \
|
PERM_ELT8(A3, A2, B3, B0, BC, B9, C5, M3); \
|
||||||
PERM_ELT8(A04, A03, B4, B1, BD, BA, C4, M4); \
|
PERM_ELT8(A4, A3, B4, B1, BD, BA, C4, M4); \
|
||||||
PERM_ELT8(A05, A04, B5, B2, BE, BB, C3, M5); \
|
PERM_ELT8(A5, A4, B5, B2, BE, BB, C3, M5); \
|
||||||
PERM_ELT8(A06, A05, B6, B3, BF, BC, C2, M6); \
|
PERM_ELT8(A6, A5, B6, B3, BF, BC, C2, M6); \
|
||||||
PERM_ELT8(A07, A06, B7, B4, B0, BD, C1, M7); \
|
PERM_ELT8(A7, A6, B7, B4, B0, BD, C1, M7); \
|
||||||
PERM_ELT8(A08, A07, B8, B5, B1, BE, C0, M8); \
|
PERM_ELT8(A8, A7, B8, B5, B1, BE, C0, M8); \
|
||||||
PERM_ELT8(A09, A08, B9, B6, B2, BF, CF, M9); \
|
PERM_ELT8(A9, A8, B9, B6, B2, BF, CF, M9); \
|
||||||
PERM_ELT8(A0A, A09, BA, B7, B3, B0, CE, MA); \
|
PERM_ELT8(AA, A9, BA, B7, B3, B0, CE, MA); \
|
||||||
PERM_ELT8(A0B, A0A, BB, B8, B4, B1, CD, MB); \
|
PERM_ELT8(AB, AA, BB, B8, B4, B1, CD, MB); \
|
||||||
PERM_ELT8(A00, A0B, BC, B9, B5, B2, CC, MC); \
|
PERM_ELT8(A0, AB, BC, B9, B5, B2, CC, MC); \
|
||||||
PERM_ELT8(A01, A00, BD, BA, B6, B3, CB, MD); \
|
PERM_ELT8(A1, A0, BD, BA, B6, B3, CB, MD); \
|
||||||
PERM_ELT8(A02, A01, BE, BB, B7, B4, CA, ME); \
|
PERM_ELT8(A2, A1, BE, BB, B7, B4, CA, ME); \
|
||||||
PERM_ELT8(A03, A02, BF, BC, B8, B5, C9, MF); \
|
PERM_ELT8(A3, A2, BF, BC, B8, B5, C9, MF); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define PERM_STEP_1_8 do { \
|
#define PERM_STEP_1_8 do { \
|
||||||
PERM_ELT8(A04, A03, B0, BD, B9, B6, C8, M0); \
|
PERM_ELT8(A4, A3, B0, BD, B9, B6, C8, M0); \
|
||||||
PERM_ELT8(A05, A04, B1, BE, BA, B7, C7, M1); \
|
PERM_ELT8(A5, A4, B1, BE, BA, B7, C7, M1); \
|
||||||
PERM_ELT8(A06, A05, B2, BF, BB, B8, C6, M2); \
|
PERM_ELT8(A6, A5, B2, BF, BB, B8, C6, M2); \
|
||||||
PERM_ELT8(A07, A06, B3, B0, BC, B9, C5, M3); \
|
PERM_ELT8(A7, A6, B3, B0, BC, B9, C5, M3); \
|
||||||
PERM_ELT8(A08, A07, B4, B1, BD, BA, C4, M4); \
|
PERM_ELT8(A8, A7, B4, B1, BD, BA, C4, M4); \
|
||||||
PERM_ELT8(A09, A08, B5, B2, BE, BB, C3, M5); \
|
PERM_ELT8(A9, A8, B5, B2, BE, BB, C3, M5); \
|
||||||
PERM_ELT8(A0A, A09, B6, B3, BF, BC, C2, M6); \
|
PERM_ELT8(AA, A9, B6, B3, BF, BC, C2, M6); \
|
||||||
PERM_ELT8(A0B, A0A, B7, B4, B0, BD, C1, M7); \
|
PERM_ELT8(AB, AA, B7, B4, B0, BD, C1, M7); \
|
||||||
PERM_ELT8(A00, A0B, B8, B5, B1, BE, C0, M8); \
|
PERM_ELT8(A0, AB, B8, B5, B1, BE, C0, M8); \
|
||||||
PERM_ELT8(A01, A00, B9, B6, B2, BF, CF, M9); \
|
PERM_ELT8(A1, A0, B9, B6, B2, BF, CF, M9); \
|
||||||
PERM_ELT8(A02, A01, BA, B7, B3, B0, CE, MA); \
|
PERM_ELT8(A2, A1, BA, B7, B3, B0, CE, MA); \
|
||||||
PERM_ELT8(A03, A02, BB, B8, B4, B1, CD, MB); \
|
PERM_ELT8(A3, A2, BB, B8, B4, B1, CD, MB); \
|
||||||
PERM_ELT8(A04, A03, BC, B9, B5, B2, CC, MC); \
|
PERM_ELT8(A4, A3, BC, B9, B5, B2, CC, MC); \
|
||||||
PERM_ELT8(A05, A04, BD, BA, B6, B3, CB, MD); \
|
PERM_ELT8(A5, A4, BD, BA, B6, B3, CB, MD); \
|
||||||
PERM_ELT8(A06, A05, BE, BB, B7, B4, CA, ME); \
|
PERM_ELT8(A6, A5, BE, BB, B7, B4, CA, ME); \
|
||||||
PERM_ELT8(A07, A06, BF, BC, B8, B5, C9, MF); \
|
PERM_ELT8(A7, A6, BF, BC, B8, B5, C9, MF); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define PERM_STEP_2_8 do { \
|
#define PERM_STEP_2_8 do { \
|
||||||
PERM_ELT8(A08, A07, B0, BD, B9, B6, C8, M0); \
|
PERM_ELT8(A8, A7, B0, BD, B9, B6, C8, M0); \
|
||||||
PERM_ELT8(A09, A08, B1, BE, BA, B7, C7, M1); \
|
PERM_ELT8(A9, A8, B1, BE, BA, B7, C7, M1); \
|
||||||
PERM_ELT8(A0A, A09, B2, BF, BB, B8, C6, M2); \
|
PERM_ELT8(AA, A9, B2, BF, BB, B8, C6, M2); \
|
||||||
PERM_ELT8(A0B, A0A, B3, B0, BC, B9, C5, M3); \
|
PERM_ELT8(AB, AA, B3, B0, BC, B9, C5, M3); \
|
||||||
PERM_ELT8(A00, A0B, B4, B1, BD, BA, C4, M4); \
|
PERM_ELT8(A0, AB, B4, B1, BD, BA, C4, M4); \
|
||||||
PERM_ELT8(A01, A00, B5, B2, BE, BB, C3, M5); \
|
PERM_ELT8(A1, A0, B5, B2, BE, BB, C3, M5); \
|
||||||
PERM_ELT8(A02, A01, B6, B3, BF, BC, C2, M6); \
|
PERM_ELT8(A2, A1, B6, B3, BF, BC, C2, M6); \
|
||||||
PERM_ELT8(A03, A02, B7, B4, B0, BD, C1, M7); \
|
PERM_ELT8(A3, A2, B7, B4, B0, BD, C1, M7); \
|
||||||
PERM_ELT8(A04, A03, B8, B5, B1, BE, C0, M8); \
|
PERM_ELT8(A4, A3, B8, B5, B1, BE, C0, M8); \
|
||||||
PERM_ELT8(A05, A04, B9, B6, B2, BF, CF, M9); \
|
PERM_ELT8(A5, A4, B9, B6, B2, BF, CF, M9); \
|
||||||
PERM_ELT8(A06, A05, BA, B7, B3, B0, CE, MA); \
|
PERM_ELT8(A6, A5, BA, B7, B3, B0, CE, MA); \
|
||||||
PERM_ELT8(A07, A06, BB, B8, B4, B1, CD, MB); \
|
PERM_ELT8(A7, A6, BB, B8, B4, B1, CD, MB); \
|
||||||
PERM_ELT8(A08, A07, BC, B9, B5, B2, CC, MC); \
|
PERM_ELT8(A8, A7, BC, B9, B5, B2, CC, MC); \
|
||||||
PERM_ELT8(A09, A08, BD, BA, B6, B3, CB, MD); \
|
PERM_ELT8(A9, A8, BD, BA, B6, B3, CB, MD); \
|
||||||
PERM_ELT8(A0A, A09, BE, BB, B7, B4, CA, ME); \
|
PERM_ELT8(AA, A9, BE, BB, B7, B4, CA, ME); \
|
||||||
PERM_ELT8(A0B, A0A, BF, BC, B8, B5, C9, MF); \
|
PERM_ELT8(AB, AA, BF, BC, B8, B5, C9, MF); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define APPLY_P8 \
|
#define APPLY_P8 \
|
||||||
@@ -398,42 +398,42 @@ do { \
|
|||||||
PERM_STEP_0_8; \
|
PERM_STEP_0_8; \
|
||||||
PERM_STEP_1_8; \
|
PERM_STEP_1_8; \
|
||||||
PERM_STEP_2_8; \
|
PERM_STEP_2_8; \
|
||||||
A0B = _mm256_add_epi32( A0B, C6 ); \
|
AB = _mm256_add_epi32( AB, C6 ); \
|
||||||
A0A = _mm256_add_epi32( A0A, C5 ); \
|
AA = _mm256_add_epi32( AA, C5 ); \
|
||||||
A09 = _mm256_add_epi32( A09, C4 ); \
|
A9 = _mm256_add_epi32( A9, C4 ); \
|
||||||
A08 = _mm256_add_epi32( A08, C3 ); \
|
A8 = _mm256_add_epi32( A8, C3 ); \
|
||||||
A07 = _mm256_add_epi32( A07, C2 ); \
|
A7 = _mm256_add_epi32( A7, C2 ); \
|
||||||
A06 = _mm256_add_epi32( A06, C1 ); \
|
A6 = _mm256_add_epi32( A6, C1 ); \
|
||||||
A05 = _mm256_add_epi32( A05, C0 ); \
|
A5 = _mm256_add_epi32( A5, C0 ); \
|
||||||
A04 = _mm256_add_epi32( A04, CF ); \
|
A4 = _mm256_add_epi32( A4, CF ); \
|
||||||
A03 = _mm256_add_epi32( A03, CE ); \
|
A3 = _mm256_add_epi32( A3, CE ); \
|
||||||
A02 = _mm256_add_epi32( A02, CD ); \
|
A2 = _mm256_add_epi32( A2, CD ); \
|
||||||
A01 = _mm256_add_epi32( A01, CC ); \
|
A1 = _mm256_add_epi32( A1, CC ); \
|
||||||
A00 = _mm256_add_epi32( A00, CB ); \
|
A0 = _mm256_add_epi32( A0, CB ); \
|
||||||
A0B = _mm256_add_epi32( A0B, CA ); \
|
AB = _mm256_add_epi32( AB, CA ); \
|
||||||
A0A = _mm256_add_epi32( A0A, C9 ); \
|
AA = _mm256_add_epi32( AA, C9 ); \
|
||||||
A09 = _mm256_add_epi32( A09, C8 ); \
|
A9 = _mm256_add_epi32( A9, C8 ); \
|
||||||
A08 = _mm256_add_epi32( A08, C7 ); \
|
A8 = _mm256_add_epi32( A8, C7 ); \
|
||||||
A07 = _mm256_add_epi32( A07, C6 ); \
|
A7 = _mm256_add_epi32( A7, C6 ); \
|
||||||
A06 = _mm256_add_epi32( A06, C5 ); \
|
A6 = _mm256_add_epi32( A6, C5 ); \
|
||||||
A05 = _mm256_add_epi32( A05, C4 ); \
|
A5 = _mm256_add_epi32( A5, C4 ); \
|
||||||
A04 = _mm256_add_epi32( A04, C3 ); \
|
A4 = _mm256_add_epi32( A4, C3 ); \
|
||||||
A03 = _mm256_add_epi32( A03, C2 ); \
|
A3 = _mm256_add_epi32( A3, C2 ); \
|
||||||
A02 = _mm256_add_epi32( A02, C1 ); \
|
A2 = _mm256_add_epi32( A2, C1 ); \
|
||||||
A01 = _mm256_add_epi32( A01, C0 ); \
|
A1 = _mm256_add_epi32( A1, C0 ); \
|
||||||
A00 = _mm256_add_epi32( A00, CF ); \
|
A0 = _mm256_add_epi32( A0, CF ); \
|
||||||
A0B = _mm256_add_epi32( A0B, CE ); \
|
AB = _mm256_add_epi32( AB, CE ); \
|
||||||
A0A = _mm256_add_epi32( A0A, CD ); \
|
AA = _mm256_add_epi32( AA, CD ); \
|
||||||
A09 = _mm256_add_epi32( A09, CC ); \
|
A9 = _mm256_add_epi32( A9, CC ); \
|
||||||
A08 = _mm256_add_epi32( A08, CB ); \
|
A8 = _mm256_add_epi32( A8, CB ); \
|
||||||
A07 = _mm256_add_epi32( A07, CA ); \
|
A7 = _mm256_add_epi32( A7, CA ); \
|
||||||
A06 = _mm256_add_epi32( A06, C9 ); \
|
A6 = _mm256_add_epi32( A6, C9 ); \
|
||||||
A05 = _mm256_add_epi32( A05, C8 ); \
|
A5 = _mm256_add_epi32( A5, C8 ); \
|
||||||
A04 = _mm256_add_epi32( A04, C7 ); \
|
A4 = _mm256_add_epi32( A4, C7 ); \
|
||||||
A03 = _mm256_add_epi32( A03, C6 ); \
|
A3 = _mm256_add_epi32( A3, C6 ); \
|
||||||
A02 = _mm256_add_epi32( A02, C5 ); \
|
A2 = _mm256_add_epi32( A2, C5 ); \
|
||||||
A01 = _mm256_add_epi32( A01, C4 ); \
|
A1 = _mm256_add_epi32( A1, C4 ); \
|
||||||
A00 = _mm256_add_epi32( A00, C3 ); \
|
A0 = _mm256_add_epi32( A0, C3 ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define INCR_W8 do { \
|
#define INCR_W8 do { \
|
||||||
@@ -660,8 +660,8 @@ shabal512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|||||||
|
|
||||||
|
|
||||||
#define DECL_STATE \
|
#define DECL_STATE \
|
||||||
__m128i A00, A01, A02, A03, A04, A05, A06, A07, \
|
__m128i A0, A1, A2, A3, A4, A5, A6, A7, \
|
||||||
A08, A09, A0A, A0B; \
|
A8, A9, AA, AB; \
|
||||||
__m128i B0, B1, B2, B3, B4, B5, B6, B7, \
|
__m128i B0, B1, B2, B3, B4, B5, B6, B7, \
|
||||||
B8, B9, BA, BB, BC, BD, BE, BF; \
|
B8, B9, BA, BB, BC, BD, BE, BF; \
|
||||||
__m128i C0, C1, C2, C3, C4, C5, C6, C7, \
|
__m128i C0, C1, C2, C3, C4, C5, C6, C7, \
|
||||||
@@ -676,18 +676,18 @@ shabal512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|||||||
{ \
|
{ \
|
||||||
if ( (state)->state_loaded ) \
|
if ( (state)->state_loaded ) \
|
||||||
{ \
|
{ \
|
||||||
A00 = (state)->A[0]; \
|
A0 = (state)->A[0]; \
|
||||||
A01 = (state)->A[1]; \
|
A1 = (state)->A[1]; \
|
||||||
A02 = (state)->A[2]; \
|
A2 = (state)->A[2]; \
|
||||||
A03 = (state)->A[3]; \
|
A3 = (state)->A[3]; \
|
||||||
A04 = (state)->A[4]; \
|
A4 = (state)->A[4]; \
|
||||||
A05 = (state)->A[5]; \
|
A5 = (state)->A[5]; \
|
||||||
A06 = (state)->A[6]; \
|
A6 = (state)->A[6]; \
|
||||||
A07 = (state)->A[7]; \
|
A7 = (state)->A[7]; \
|
||||||
A08 = (state)->A[8]; \
|
A8 = (state)->A[8]; \
|
||||||
A09 = (state)->A[9]; \
|
A9 = (state)->A[9]; \
|
||||||
A0A = (state)->A[10]; \
|
AA = (state)->A[10]; \
|
||||||
A0B = (state)->A[11]; \
|
AB = (state)->A[11]; \
|
||||||
B0 = (state)->B[0]; \
|
B0 = (state)->B[0]; \
|
||||||
B1 = (state)->B[1]; \
|
B1 = (state)->B[1]; \
|
||||||
B2 = (state)->B[2]; \
|
B2 = (state)->B[2]; \
|
||||||
@@ -724,18 +724,18 @@ shabal512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|||||||
else \
|
else \
|
||||||
{ \
|
{ \
|
||||||
(state)->state_loaded = true; \
|
(state)->state_loaded = true; \
|
||||||
A00 = m128_const1_64( 0x20728DFD20728DFD ); \
|
A0 = m128_const1_64( 0x20728DFD20728DFD ); \
|
||||||
A01 = m128_const1_64( 0x46C0BD5346C0BD53 ); \
|
A1 = m128_const1_64( 0x46C0BD5346C0BD53 ); \
|
||||||
A02 = m128_const1_64( 0xE782B699E782B699 ); \
|
A2 = m128_const1_64( 0xE782B699E782B699 ); \
|
||||||
A03 = m128_const1_64( 0x5530463255304632 ); \
|
A3 = m128_const1_64( 0x5530463255304632 ); \
|
||||||
A04 = m128_const1_64( 0x71B4EF9071B4EF90 ); \
|
A4 = m128_const1_64( 0x71B4EF9071B4EF90 ); \
|
||||||
A05 = m128_const1_64( 0x0EA9E82C0EA9E82C ); \
|
A5 = m128_const1_64( 0x0EA9E82C0EA9E82C ); \
|
||||||
A06 = m128_const1_64( 0xDBB930F1DBB930F1 ); \
|
A6 = m128_const1_64( 0xDBB930F1DBB930F1 ); \
|
||||||
A07 = m128_const1_64( 0xFAD06B8BFAD06B8B ); \
|
A7 = m128_const1_64( 0xFAD06B8BFAD06B8B ); \
|
||||||
A08 = m128_const1_64( 0xBE0CAE40BE0CAE40 ); \
|
A8 = m128_const1_64( 0xBE0CAE40BE0CAE40 ); \
|
||||||
A09 = m128_const1_64( 0x8BD144108BD14410 ); \
|
A9 = m128_const1_64( 0x8BD144108BD14410 ); \
|
||||||
A0A = m128_const1_64( 0x76D2ADAC76D2ADAC ); \
|
AA = m128_const1_64( 0x76D2ADAC76D2ADAC ); \
|
||||||
A0B = m128_const1_64( 0x28ACAB7F28ACAB7F ); \
|
AB = m128_const1_64( 0x28ACAB7F28ACAB7F ); \
|
||||||
B0 = m128_const1_64( 0xC1099CB7C1099CB7 ); \
|
B0 = m128_const1_64( 0xC1099CB7C1099CB7 ); \
|
||||||
B1 = m128_const1_64( 0x07B385F307B385F3 ); \
|
B1 = m128_const1_64( 0x07B385F307B385F3 ); \
|
||||||
B2 = m128_const1_64( 0xE7442C26E7442C26 ); \
|
B2 = m128_const1_64( 0xE7442C26E7442C26 ); \
|
||||||
@@ -774,18 +774,18 @@ shabal512_8way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define WRITE_STATE(state) do { \
|
#define WRITE_STATE(state) do { \
|
||||||
(state)->A[0] = A00; \
|
(state)->A[0] = A0; \
|
||||||
(state)->A[1] = A01; \
|
(state)->A[1] = A1; \
|
||||||
(state)->A[2] = A02; \
|
(state)->A[2] = A2; \
|
||||||
(state)->A[3] = A03; \
|
(state)->A[3] = A3; \
|
||||||
(state)->A[4] = A04; \
|
(state)->A[4] = A4; \
|
||||||
(state)->A[5] = A05; \
|
(state)->A[5] = A5; \
|
||||||
(state)->A[6] = A06; \
|
(state)->A[6] = A6; \
|
||||||
(state)->A[7] = A07; \
|
(state)->A[7] = A7; \
|
||||||
(state)->A[8] = A08; \
|
(state)->A[8] = A8; \
|
||||||
(state)->A[9] = A09; \
|
(state)->A[9] = A9; \
|
||||||
(state)->A[10] = A0A; \
|
(state)->A[10] = AA; \
|
||||||
(state)->A[11] = A0B; \
|
(state)->A[11] = AB; \
|
||||||
(state)->B[0] = B0; \
|
(state)->B[0] = B0; \
|
||||||
(state)->B[1] = B1; \
|
(state)->B[1] = B1; \
|
||||||
(state)->B[2] = B2; \
|
(state)->B[2] = B2; \
|
||||||
@@ -884,8 +884,8 @@ do { \
|
|||||||
|
|
||||||
#define XOR_W \
|
#define XOR_W \
|
||||||
do { \
|
do { \
|
||||||
A00 = _mm_xor_si128( A00, _mm_set1_epi32( Wlow ) ); \
|
A0 = _mm_xor_si128( A0, _mm_set1_epi32( Wlow ) ); \
|
||||||
A01 = _mm_xor_si128( A01, _mm_set1_epi32( Whigh ) ); \
|
A1 = _mm_xor_si128( A1, _mm_set1_epi32( Whigh ) ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
@@ -940,60 +940,60 @@ do { \
|
|||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define PERM_STEP_0 do { \
|
#define PERM_STEP_0 do { \
|
||||||
PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
|
PERM_ELT(A0, AB, B0, BD, B9, B6, C8, M0); \
|
||||||
PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
|
PERM_ELT(A1, A0, B1, BE, BA, B7, C7, M1); \
|
||||||
PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
|
PERM_ELT(A2, A1, B2, BF, BB, B8, C6, M2); \
|
||||||
PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
|
PERM_ELT(A3, A2, B3, B0, BC, B9, C5, M3); \
|
||||||
PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
|
PERM_ELT(A4, A3, B4, B1, BD, BA, C4, M4); \
|
||||||
PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
|
PERM_ELT(A5, A4, B5, B2, BE, BB, C3, M5); \
|
||||||
PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
|
PERM_ELT(A6, A5, B6, B3, BF, BC, C2, M6); \
|
||||||
PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
|
PERM_ELT(A7, A6, B7, B4, B0, BD, C1, M7); \
|
||||||
PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
|
PERM_ELT(A8, A7, B8, B5, B1, BE, C0, M8); \
|
||||||
PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
|
PERM_ELT(A9, A8, B9, B6, B2, BF, CF, M9); \
|
||||||
PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
|
PERM_ELT(AA, A9, BA, B7, B3, B0, CE, MA); \
|
||||||
PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
|
PERM_ELT(AB, AA, BB, B8, B4, B1, CD, MB); \
|
||||||
PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
|
PERM_ELT(A0, AB, BC, B9, B5, B2, CC, MC); \
|
||||||
PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
|
PERM_ELT(A1, A0, BD, BA, B6, B3, CB, MD); \
|
||||||
PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
|
PERM_ELT(A2, A1, BE, BB, B7, B4, CA, ME); \
|
||||||
PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
|
PERM_ELT(A3, A2, BF, BC, B8, B5, C9, MF); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define PERM_STEP_1 do { \
|
#define PERM_STEP_1 do { \
|
||||||
PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
|
PERM_ELT(A4, A3, B0, BD, B9, B6, C8, M0); \
|
||||||
PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
|
PERM_ELT(A5, A4, B1, BE, BA, B7, C7, M1); \
|
||||||
PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
|
PERM_ELT(A6, A5, B2, BF, BB, B8, C6, M2); \
|
||||||
PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
|
PERM_ELT(A7, A6, B3, B0, BC, B9, C5, M3); \
|
||||||
PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
|
PERM_ELT(A8, A7, B4, B1, BD, BA, C4, M4); \
|
||||||
PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
|
PERM_ELT(A9, A8, B5, B2, BE, BB, C3, M5); \
|
||||||
PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
|
PERM_ELT(AA, A9, B6, B3, BF, BC, C2, M6); \
|
||||||
PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
|
PERM_ELT(AB, AA, B7, B4, B0, BD, C1, M7); \
|
||||||
PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
|
PERM_ELT(A0, AB, B8, B5, B1, BE, C0, M8); \
|
||||||
PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
|
PERM_ELT(A1, A0, B9, B6, B2, BF, CF, M9); \
|
||||||
PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
|
PERM_ELT(A2, A1, BA, B7, B3, B0, CE, MA); \
|
||||||
PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
|
PERM_ELT(A3, A2, BB, B8, B4, B1, CD, MB); \
|
||||||
PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
|
PERM_ELT(A4, A3, BC, B9, B5, B2, CC, MC); \
|
||||||
PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
|
PERM_ELT(A5, A4, BD, BA, B6, B3, CB, MD); \
|
||||||
PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
|
PERM_ELT(A6, A5, BE, BB, B7, B4, CA, ME); \
|
||||||
PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
|
PERM_ELT(A7, A6, BF, BC, B8, B5, C9, MF); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define PERM_STEP_2 do { \
|
#define PERM_STEP_2 do { \
|
||||||
PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
|
PERM_ELT(A8, A7, B0, BD, B9, B6, C8, M0); \
|
||||||
PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
|
PERM_ELT(A9, A8, B1, BE, BA, B7, C7, M1); \
|
||||||
PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
|
PERM_ELT(AA, A9, B2, BF, BB, B8, C6, M2); \
|
||||||
PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
|
PERM_ELT(AB, AA, B3, B0, BC, B9, C5, M3); \
|
||||||
PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
|
PERM_ELT(A0, AB, B4, B1, BD, BA, C4, M4); \
|
||||||
PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
|
PERM_ELT(A1, A0, B5, B2, BE, BB, C3, M5); \
|
||||||
PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
|
PERM_ELT(A2, A1, B6, B3, BF, BC, C2, M6); \
|
||||||
PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
|
PERM_ELT(A3, A2, B7, B4, B0, BD, C1, M7); \
|
||||||
PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
|
PERM_ELT(A4, A3, B8, B5, B1, BE, C0, M8); \
|
||||||
PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
|
PERM_ELT(A5, A4, B9, B6, B2, BF, CF, M9); \
|
||||||
PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
|
PERM_ELT(A6, A5, BA, B7, B3, B0, CE, MA); \
|
||||||
PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
|
PERM_ELT(A7, A6, BB, B8, B4, B1, CD, MB); \
|
||||||
PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
|
PERM_ELT(A8, A7, BC, B9, B5, B2, CC, MC); \
|
||||||
PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
|
PERM_ELT(A9, A8, BD, BA, B6, B3, CB, MD); \
|
||||||
PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
|
PERM_ELT(AA, A9, BE, BB, B7, B4, CA, ME); \
|
||||||
PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
|
PERM_ELT(AB, AA, BF, BC, B8, B5, C9, MF); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define APPLY_P \
|
#define APPLY_P \
|
||||||
@@ -1017,42 +1017,42 @@ do { \
|
|||||||
PERM_STEP_0; \
|
PERM_STEP_0; \
|
||||||
PERM_STEP_1; \
|
PERM_STEP_1; \
|
||||||
PERM_STEP_2; \
|
PERM_STEP_2; \
|
||||||
A0B = _mm_add_epi32( A0B, C6 ); \
|
AB = _mm_add_epi32( AB, C6 ); \
|
||||||
A0A = _mm_add_epi32( A0A, C5 ); \
|
AA = _mm_add_epi32( AA, C5 ); \
|
||||||
A09 = _mm_add_epi32( A09, C4 ); \
|
A9 = _mm_add_epi32( A9, C4 ); \
|
||||||
A08 = _mm_add_epi32( A08, C3 ); \
|
A8 = _mm_add_epi32( A8, C3 ); \
|
||||||
A07 = _mm_add_epi32( A07, C2 ); \
|
A7 = _mm_add_epi32( A7, C2 ); \
|
||||||
A06 = _mm_add_epi32( A06, C1 ); \
|
A6 = _mm_add_epi32( A6, C1 ); \
|
||||||
A05 = _mm_add_epi32( A05, C0 ); \
|
A5 = _mm_add_epi32( A5, C0 ); \
|
||||||
A04 = _mm_add_epi32( A04, CF ); \
|
A4 = _mm_add_epi32( A4, CF ); \
|
||||||
A03 = _mm_add_epi32( A03, CE ); \
|
A3 = _mm_add_epi32( A3, CE ); \
|
||||||
A02 = _mm_add_epi32( A02, CD ); \
|
A2 = _mm_add_epi32( A2, CD ); \
|
||||||
A01 = _mm_add_epi32( A01, CC ); \
|
A1 = _mm_add_epi32( A1, CC ); \
|
||||||
A00 = _mm_add_epi32( A00, CB ); \
|
A0 = _mm_add_epi32( A0, CB ); \
|
||||||
A0B = _mm_add_epi32( A0B, CA ); \
|
AB = _mm_add_epi32( AB, CA ); \
|
||||||
A0A = _mm_add_epi32( A0A, C9 ); \
|
AA = _mm_add_epi32( AA, C9 ); \
|
||||||
A09 = _mm_add_epi32( A09, C8 ); \
|
A9 = _mm_add_epi32( A9, C8 ); \
|
||||||
A08 = _mm_add_epi32( A08, C7 ); \
|
A8 = _mm_add_epi32( A8, C7 ); \
|
||||||
A07 = _mm_add_epi32( A07, C6 ); \
|
A7 = _mm_add_epi32( A7, C6 ); \
|
||||||
A06 = _mm_add_epi32( A06, C5 ); \
|
A6 = _mm_add_epi32( A6, C5 ); \
|
||||||
A05 = _mm_add_epi32( A05, C4 ); \
|
A5 = _mm_add_epi32( A5, C4 ); \
|
||||||
A04 = _mm_add_epi32( A04, C3 ); \
|
A4 = _mm_add_epi32( A4, C3 ); \
|
||||||
A03 = _mm_add_epi32( A03, C2 ); \
|
A3 = _mm_add_epi32( A3, C2 ); \
|
||||||
A02 = _mm_add_epi32( A02, C1 ); \
|
A2 = _mm_add_epi32( A2, C1 ); \
|
||||||
A01 = _mm_add_epi32( A01, C0 ); \
|
A1 = _mm_add_epi32( A1, C0 ); \
|
||||||
A00 = _mm_add_epi32( A00, CF ); \
|
A0 = _mm_add_epi32( A0, CF ); \
|
||||||
A0B = _mm_add_epi32( A0B, CE ); \
|
AB = _mm_add_epi32( AB, CE ); \
|
||||||
A0A = _mm_add_epi32( A0A, CD ); \
|
AA = _mm_add_epi32( AA, CD ); \
|
||||||
A09 = _mm_add_epi32( A09, CC ); \
|
A9 = _mm_add_epi32( A9, CC ); \
|
||||||
A08 = _mm_add_epi32( A08, CB ); \
|
A8 = _mm_add_epi32( A8, CB ); \
|
||||||
A07 = _mm_add_epi32( A07, CA ); \
|
A7 = _mm_add_epi32( A7, CA ); \
|
||||||
A06 = _mm_add_epi32( A06, C9 ); \
|
A6 = _mm_add_epi32( A6, C9 ); \
|
||||||
A05 = _mm_add_epi32( A05, C8 ); \
|
A5 = _mm_add_epi32( A5, C8 ); \
|
||||||
A04 = _mm_add_epi32( A04, C7 ); \
|
A4 = _mm_add_epi32( A4, C7 ); \
|
||||||
A03 = _mm_add_epi32( A03, C6 ); \
|
A3 = _mm_add_epi32( A3, C6 ); \
|
||||||
A02 = _mm_add_epi32( A02, C5 ); \
|
A2 = _mm_add_epi32( A2, C5 ); \
|
||||||
A01 = _mm_add_epi32( A01, C4 ); \
|
A1 = _mm_add_epi32( A1, C4 ); \
|
||||||
A00 = _mm_add_epi32( A00, C3 ); \
|
A0 = _mm_add_epi32( A0, C3 ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define INCR_W do { \
|
#define INCR_W do { \
|
||||||
|
@@ -74,7 +74,7 @@ typedef struct {
|
|||||||
|
|
||||||
void sm3_init(sm3_ctx_t *ctx);
|
void sm3_init(sm3_ctx_t *ctx);
|
||||||
void sm3_update(sm3_ctx_t *ctx, const unsigned char* data, size_t data_len);
|
void sm3_update(sm3_ctx_t *ctx, const unsigned char* data, size_t data_len);
|
||||||
void sm3_final(sm3_ctx_t *ctx, unsigned char digest[SM3_DIGEST_LENGTH]);
|
void sm3_final(sm3_ctx_t *ctx, unsigned char *digest);
|
||||||
void sm3_compress(uint32_t digest[8], const unsigned char block[SM3_BLOCK_SIZE]);
|
void sm3_compress(uint32_t digest[8], const unsigned char block[SM3_BLOCK_SIZE]);
|
||||||
void sm3(const unsigned char *data, size_t datalen,
|
void sm3(const unsigned char *data, size_t datalen,
|
||||||
unsigned char digest[SM3_DIGEST_LENGTH]);
|
unsigned char digest[SM3_DIGEST_LENGTH]);
|
||||||
|
@@ -1,912 +0,0 @@
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// SWIFFTX ANSI C OPTIMIZED 32BIT IMPLEMENTATION FOR NIST SHA-3 COMPETITION
|
|
||||||
//
|
|
||||||
// SWIFFTX.c
|
|
||||||
//
|
|
||||||
// October 2008
|
|
||||||
//
|
|
||||||
// This is the source file of the OPTIMIZED 32BIT implementation of SWIFFTX hash function.
|
|
||||||
// SWIFFTX is a candidate function for SHA-3 NIST competition.
|
|
||||||
// More details about SWIFFTX can be found in the accompanying submission documents.
|
|
||||||
//
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
#include "swifftx.h"
|
|
||||||
// See the remarks concerning compatibility issues inside stdint.h.
|
|
||||||
#include "stdint.h"
|
|
||||||
// Remove this while using gcc:
|
|
||||||
//#include "stdbool.h"
|
|
||||||
#include <memory.h>
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Constants and static tables portion.
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
// In SWIFFTX we work over Z_257, so this is the modulus and the arithmetic is performed modulo
|
|
||||||
// this number.
|
|
||||||
#define FIELD_SIZE 257
|
|
||||||
|
|
||||||
// The size of FFT we use:
|
|
||||||
#define N 64
|
|
||||||
|
|
||||||
#define LOGN 6
|
|
||||||
|
|
||||||
#define EIGHTH_N (N / 8)
|
|
||||||
|
|
||||||
// The number of FFTS done on the input.
|
|
||||||
#define M (SWIFFTX_INPUT_BLOCK_SIZE / 8) // 32
|
|
||||||
|
|
||||||
// Omega is the 128th root of unity in Z_257.
|
|
||||||
// We choose w = 42.
|
|
||||||
#define OMEGA 42
|
|
||||||
|
|
||||||
// The size of the inner FFT lookup table:
|
|
||||||
#define W 8
|
|
||||||
|
|
||||||
// Calculates the sum and the difference of two numbers.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - A: the first operand. After the operation stores the sum of the two operands.
|
|
||||||
// - B: the second operand. After the operation stores the difference between the first and the
|
|
||||||
// second operands.
|
|
||||||
#define ADD_SUB_4WAY( A, B ) \
|
|
||||||
{ \
|
|
||||||
__m128i temp = B; \
|
|
||||||
B = _mm_sub_epi32( A, B ); \
|
|
||||||
A = _mm_add_epi32( A, temp ); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//#define ADD_SUB(A, B) {register int temp = (B); B = ((A) - (B)); A = ((A) + (temp));}
|
|
||||||
|
|
||||||
// Quickly reduces an integer modulo 257.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - A: the input.
|
|
||||||
|
|
||||||
#define Q_REDUCE( A ) ( _mm_sub_epi32( \
|
|
||||||
_mm_and_epi32( A, m128_const1_32( 0xff ) ), \
|
|
||||||
_mm_srli_epi32( A, 8 ) ) )
|
|
||||||
|
|
||||||
//#define Q_REDUCE(A) (((A) & 0xff) - ((A) >> 8))
|
|
||||||
|
|
||||||
// Since we need to do the setup only once, this is the indicator variable:
|
|
||||||
static bool wasSetupDone = false;
|
|
||||||
|
|
||||||
// This array stores the powers of omegas that correspond to the indices, which are the input
|
|
||||||
// values. Known also as the "outer FFT twiddle factors".
|
|
||||||
swift_int16_t multipliers[N];
|
|
||||||
|
|
||||||
// This array stores the powers of omegas, multiplied by the corresponding values.
|
|
||||||
// We store this table to save computation time.
|
|
||||||
//
|
|
||||||
// To calculate the intermediate value of the compression function (the first out of two
|
|
||||||
// stages), we multiply the k-th bit of x_i by w^[(2i + 1) * k]. {x_i} is the input to the
|
|
||||||
// compression function, i is between 0 and 31, x_i is a 64-bit value.
|
|
||||||
// One can see the formula for this (intermediate) stage in the SWIFFT FSE 2008 paper --
|
|
||||||
// formula (2), section 3, page 6.
|
|
||||||
swift_int16_t fftTable[256 * EIGHTH_N];
|
|
||||||
|
|
||||||
// The A's we use in SWIFFTX shall be random elements of Z_257.
|
|
||||||
// We generated these A's from the decimal expansion of PI as follows: we converted each
|
|
||||||
// triple of digits into a decimal number d. If d < (257 * 3) we used (d % 257) for the next A
|
|
||||||
// element, otherwise move to the next triple of digits in the expansion. This guarntees that
|
|
||||||
// the A's are random, provided that PI digits are.
|
|
||||||
const swift_int16_t As[3 * M * N] =
|
|
||||||
{141, 78, 139, 75, 238, 205, 129, 126, 22, 245, 197, 169, 142, 118, 105, 78,
|
|
||||||
50, 149, 29, 208, 114, 34, 85, 117, 67, 148, 86, 256, 25, 49, 133, 93,
|
|
||||||
95, 36, 68, 231, 211, 102, 151, 128, 224, 117, 193, 27, 102, 187, 7, 105,
|
|
||||||
45, 130, 108, 124, 171, 151, 189, 128, 218, 134, 233, 165, 14, 201, 145, 134,
|
|
||||||
52, 203, 91, 96, 197, 69, 134, 213, 136, 93, 3, 249, 141, 16, 210, 73,
|
|
||||||
6, 92, 58, 74, 174, 6, 254, 91, 201, 107, 110, 76, 103, 11, 73, 16,
|
|
||||||
34, 209, 7, 127, 146, 254, 95, 176, 57, 13, 108, 245, 77, 92, 186, 117,
|
|
||||||
124, 97, 105, 118, 34, 74, 205, 122, 235, 53, 94, 238, 210, 227, 183, 11,
|
|
||||||
129, 159, 105, 183, 142, 129, 86, 21, 137, 138, 224, 223, 190, 188, 179, 188,
|
|
||||||
256, 25, 217, 176, 36, 176, 238, 127, 160, 210, 155, 148, 132, 0, 54, 127,
|
|
||||||
145, 6, 46, 85, 243, 95, 173, 123, 178, 207, 211, 183, 224, 173, 146, 35,
|
|
||||||
71, 114, 50, 22, 175, 1, 28, 19, 112, 129, 21, 34, 161, 159, 115, 52,
|
|
||||||
4, 193, 211, 92, 115, 49, 59, 217, 218, 96, 61, 81, 24, 202, 198, 89,
|
|
||||||
45, 128, 8, 51, 253, 87, 171, 35, 4, 188, 171, 10, 3, 137, 238, 73,
|
|
||||||
19, 208, 124, 163, 103, 177, 155, 147, 46, 84, 253, 233, 171, 241, 211, 217,
|
|
||||||
159, 48, 96, 79, 237, 18, 171, 226, 99, 1, 97, 195, 216, 163, 198, 95,
|
|
||||||
0, 201, 65, 228, 21, 153, 124, 230, 44, 35, 44, 108, 85, 156, 249, 207,
|
|
||||||
26, 222, 131, 1, 60, 242, 197, 150, 181, 19, 116, 213, 75, 98, 124, 240,
|
|
||||||
123, 207, 62, 255, 60, 143, 187, 157, 139, 9, 12, 104, 89, 49, 193, 146,
|
|
||||||
104, 196, 181, 82, 198, 253, 192, 191, 255, 122, 212, 104, 47, 20, 132, 208,
|
|
||||||
46, 170, 2, 69, 234, 36, 56, 163, 28, 152, 104, 238, 162, 56, 24, 58,
|
|
||||||
38, 150, 193, 254, 253, 125, 173, 35, 73, 126, 247, 239, 216, 6, 199, 15,
|
|
||||||
90, 12, 97, 122, 9, 84, 207, 127, 219, 72, 58, 30, 29, 182, 41, 192,
|
|
||||||
235, 248, 237, 74, 72, 176, 210, 252, 45, 64, 165, 87, 202, 241, 236, 223,
|
|
||||||
151, 242, 119, 239, 52, 112, 169, 28, 13, 37, 160, 60, 158, 81, 133, 60,
|
|
||||||
16, 145, 249, 192, 173, 217, 214, 93, 141, 184, 54, 34, 161, 104, 157, 95,
|
|
||||||
38, 133, 218, 227, 211, 181, 9, 66, 137, 143, 77, 33, 248, 159, 4, 55,
|
|
||||||
228, 48, 99, 219, 222, 184, 15, 36, 254, 256, 157, 237, 87, 139, 209, 113,
|
|
||||||
232, 85, 126, 167, 197, 100, 103, 166, 64, 225, 125, 205, 117, 135, 84, 128,
|
|
||||||
231, 112, 90, 241, 28, 22, 210, 147, 186, 49, 230, 21, 108, 39, 194, 47,
|
|
||||||
123, 199, 107, 114, 30, 210, 250, 143, 59, 156, 131, 133, 221, 27, 76, 99,
|
|
||||||
208, 250, 78, 12, 211, 141, 95, 81, 195, 106, 8, 232, 150, 212, 205, 221,
|
|
||||||
11, 225, 87, 219, 126, 136, 137, 180, 198, 48, 68, 203, 239, 252, 194, 235,
|
|
||||||
142, 137, 174, 172, 190, 145, 250, 221, 182, 204, 1, 195, 130, 153, 83, 241,
|
|
||||||
161, 239, 211, 138, 11, 169, 155, 245, 174, 49, 10, 166, 16, 130, 181, 139,
|
|
||||||
222, 222, 112, 99, 124, 94, 51, 243, 133, 194, 244, 136, 35, 248, 201, 177,
|
|
||||||
178, 186, 129, 102, 89, 184, 180, 41, 149, 96, 165, 72, 225, 231, 134, 158,
|
|
||||||
199, 28, 249, 16, 225, 195, 10, 210, 164, 252, 138, 8, 35, 152, 213, 199,
|
|
||||||
82, 116, 97, 230, 63, 199, 241, 35, 79, 120, 54, 174, 67, 112, 1, 76,
|
|
||||||
69, 222, 194, 96, 82, 94, 25, 228, 196, 145, 155, 136, 228, 234, 46, 101,
|
|
||||||
246, 51, 103, 166, 246, 75, 9, 200, 161, 4, 108, 35, 129, 168, 208, 144,
|
|
||||||
50, 14, 13, 220, 41, 132, 122, 127, 194, 9, 232, 234, 107, 28, 187, 8,
|
|
||||||
51, 141, 97, 221, 225, 9, 113, 170, 166, 102, 135, 22, 231, 185, 227, 187,
|
|
||||||
110, 145, 251, 146, 76, 22, 146, 228, 7, 53, 64, 25, 62, 198, 130, 190,
|
|
||||||
221, 232, 169, 64, 188, 199, 237, 249, 173, 218, 196, 191, 48, 224, 5, 113,
|
|
||||||
100, 166, 160, 21, 191, 197, 61, 162, 149, 171, 240, 183, 129, 231, 123, 204,
|
|
||||||
192, 179, 134, 15, 47, 161, 142, 177, 239, 234, 186, 237, 231, 53, 208, 95,
|
|
||||||
146, 36, 225, 231, 89, 142, 93, 248, 137, 124, 83, 39, 69, 77, 89, 208,
|
|
||||||
182, 48, 85, 147, 244, 164, 246, 68, 38, 190, 220, 35, 202, 91, 157, 151,
|
|
||||||
201, 240, 185, 218, 4, 152, 2, 132, 177, 88, 190, 196, 229, 74, 220, 135,
|
|
||||||
137, 196, 11, 47, 5, 251, 106, 144, 163, 60, 222, 127, 52, 57, 202, 102,
|
|
||||||
64, 140, 110, 206, 23, 182, 39, 245, 1, 163, 157, 186, 163, 80, 7, 230,
|
|
||||||
44, 249, 176, 102, 164, 125, 147, 120, 18, 191, 186, 125, 64, 65, 198, 157,
|
|
||||||
164, 213, 95, 61, 13, 181, 208, 91, 242, 197, 158, 34, 98, 169, 91, 14,
|
|
||||||
17, 93, 157, 17, 65, 30, 183, 6, 139, 58, 255, 108, 100, 136, 209, 144,
|
|
||||||
164, 6, 237, 33, 210, 110, 57, 126, 197, 136, 125, 244, 165, 151, 168, 3,
|
|
||||||
143, 251, 247, 155, 136, 130, 88, 14, 74, 121, 250, 133, 21, 226, 185, 232,
|
|
||||||
118, 132, 89, 64, 204, 161, 2, 70, 224, 159, 35, 204, 123, 180, 13, 52,
|
|
||||||
231, 57, 25, 78, 66, 69, 97, 42, 198, 84, 176, 59, 8, 232, 125, 134,
|
|
||||||
193, 2, 232, 109, 216, 69, 90, 142, 32, 38, 249, 37, 75, 180, 184, 188,
|
|
||||||
19, 47, 120, 87, 146, 70, 232, 120, 191, 45, 33, 38, 19, 248, 110, 110,
|
|
||||||
44, 64, 2, 84, 244, 228, 252, 228, 170, 123, 38, 144, 213, 144, 171, 212,
|
|
||||||
243, 87, 189, 46, 128, 110, 84, 77, 65, 183, 61, 184, 101, 44, 168, 68,
|
|
||||||
14, 106, 105, 8, 227, 211, 166, 39, 152, 43, 52, 254, 197, 55, 119, 89,
|
|
||||||
168, 65, 53, 138, 177, 56, 219, 0, 58, 121, 148, 18, 44, 100, 215, 103,
|
|
||||||
145, 229, 117, 196, 91, 89, 113, 143, 172, 239, 249, 184, 154, 39, 112, 65,
|
|
||||||
204, 42, 84, 38, 155, 151, 151, 16, 100, 87, 174, 162, 145, 147, 149, 186,
|
|
||||||
237, 145, 134, 144, 198, 235, 213, 163, 48, 230, 24, 47, 57, 71, 127, 0,
|
|
||||||
150, 219, 12, 81, 197, 150, 131, 13, 169, 63, 175, 184, 48, 235, 65, 243,
|
|
||||||
149, 200, 163, 254, 202, 114, 247, 67, 143, 250, 126, 228, 80, 130, 216, 214,
|
|
||||||
36, 2, 230, 33, 119, 125, 3, 142, 237, 100, 3, 152, 197, 174, 244, 129,
|
|
||||||
232, 30, 206, 199, 39, 210, 220, 43, 237, 221, 201, 54, 179, 42, 28, 133,
|
|
||||||
246, 203, 198, 177, 0, 28, 194, 85, 223, 109, 155, 147, 221, 60, 133, 108,
|
|
||||||
157, 254, 26, 75, 157, 185, 49, 142, 31, 137, 71, 43, 63, 64, 237, 148,
|
|
||||||
237, 172, 159, 160, 155, 254, 234, 224, 140, 193, 114, 140, 62, 109, 136, 39,
|
|
||||||
255, 8, 158, 146, 128, 49, 222, 96, 57, 209, 180, 249, 202, 127, 113, 231,
|
|
||||||
78, 178, 46, 33, 228, 215, 104, 31, 207, 186, 82, 41, 42, 39, 103, 119,
|
|
||||||
123, 133, 243, 254, 238, 156, 90, 186, 37, 212, 33, 107, 252, 51, 177, 36,
|
|
||||||
237, 76, 159, 245, 93, 214, 97, 56, 190, 38, 160, 94, 105, 222, 220, 158,
|
|
||||||
49, 16, 191, 52, 120, 87, 179, 2, 27, 144, 223, 230, 184, 6, 129, 227,
|
|
||||||
69, 47, 215, 181, 162, 139, 72, 200, 45, 163, 159, 62, 2, 221, 124, 40,
|
|
||||||
159, 242, 35, 208, 179, 166, 98, 67, 178, 68, 143, 225, 178, 146, 187, 159,
|
|
||||||
57, 66, 176, 192, 236, 250, 168, 224, 122, 43, 159, 120, 133, 165, 122, 64,
|
|
||||||
87, 74, 161, 241, 9, 87, 90, 24, 255, 113, 203, 220, 57, 139, 197, 159,
|
|
||||||
31, 151, 27, 140, 77, 162, 7, 27, 84, 228, 187, 220, 53, 126, 162, 242,
|
|
||||||
84, 181, 223, 103, 86, 177, 207, 31, 140, 18, 207, 256, 201, 166, 96, 23,
|
|
||||||
233, 103, 197, 84, 161, 75, 59, 149, 138, 154, 119, 92, 16, 53, 116, 97,
|
|
||||||
220, 114, 35, 45, 77, 209, 40, 196, 71, 22, 81, 178, 110, 14, 3, 180,
|
|
||||||
110, 129, 112, 47, 18, 61, 134, 78, 73, 79, 254, 232, 125, 180, 205, 54,
|
|
||||||
220, 119, 63, 89, 181, 52, 77, 109, 151, 77, 80, 207, 144, 25, 20, 6,
|
|
||||||
208, 47, 201, 206, 192, 14, 73, 176, 256, 201, 207, 87, 216, 60, 56, 73,
|
|
||||||
92, 243, 179, 113, 49, 59, 55, 168, 121, 137, 69, 154, 95, 57, 187, 47,
|
|
||||||
129, 4, 15, 92, 6, 116, 69, 196, 48, 134, 84, 81, 111, 56, 38, 176,
|
|
||||||
239, 6, 128, 72, 242, 134, 36, 221, 59, 48, 242, 68, 130, 110, 171, 89,
|
|
||||||
13, 220, 48, 29, 5, 75, 104, 233, 91, 129, 105, 162, 44, 113, 163, 163,
|
|
||||||
85, 147, 190, 111, 197, 80, 213, 153, 81, 68, 203, 33, 161, 165, 10, 61,
|
|
||||||
120, 252, 0, 205, 28, 42, 193, 64, 39, 37, 83, 175, 5, 218, 215, 174,
|
|
||||||
128, 121, 231, 11, 150, 145, 135, 197, 136, 91, 193, 5, 107, 88, 82, 6,
|
|
||||||
4, 188, 256, 70, 40, 2, 167, 57, 169, 203, 115, 254, 215, 172, 84, 80,
|
|
||||||
188, 167, 34, 137, 43, 243, 2, 79, 178, 38, 188, 135, 233, 194, 208, 13,
|
|
||||||
11, 151, 231, 196, 12, 122, 162, 56, 17, 114, 191, 207, 90, 132, 64, 238,
|
|
||||||
187, 6, 198, 176, 240, 88, 118, 236, 15, 226, 166, 22, 193, 229, 82, 246,
|
|
||||||
213, 64, 37, 63, 31, 243, 252, 37, 156, 38, 175, 204, 138, 141, 211, 82,
|
|
||||||
106, 217, 97, 139, 153, 56, 129, 218, 158, 9, 83, 26, 87, 112, 71, 21,
|
|
||||||
250, 5, 65, 141, 68, 116, 231, 113, 10, 218, 99, 205, 201, 92, 157, 4,
|
|
||||||
97, 46, 49, 220, 72, 139, 103, 171, 149, 129, 193, 19, 69, 245, 43, 31,
|
|
||||||
58, 68, 36, 195, 159, 22, 54, 34, 233, 141, 205, 100, 226, 96, 22, 192,
|
|
||||||
41, 231, 24, 79, 234, 138, 30, 120, 117, 216, 172, 197, 172, 107, 86, 29,
|
|
||||||
181, 151, 0, 6, 146, 186, 68, 55, 54, 58, 213, 182, 60, 231, 33, 232,
|
|
||||||
77, 210, 216, 154, 80, 51, 141, 122, 68, 148, 219, 122, 254, 48, 64, 175,
|
|
||||||
41, 115, 62, 243, 141, 81, 119, 121, 5, 68, 121, 88, 239, 29, 230, 90,
|
|
||||||
135, 159, 35, 223, 168, 112, 49, 37, 146, 60, 126, 134, 42, 145, 115, 90,
|
|
||||||
73, 133, 211, 86, 120, 141, 122, 241, 127, 56, 130, 36, 174, 75, 83, 246,
|
|
||||||
112, 45, 136, 194, 201, 115, 1, 156, 114, 167, 208, 12, 176, 147, 32, 170,
|
|
||||||
251, 100, 102, 220, 122, 210, 6, 49, 75, 201, 38, 105, 132, 135, 126, 102,
|
|
||||||
13, 121, 76, 228, 202, 20, 61, 213, 246, 13, 207, 42, 148, 168, 37, 253,
|
|
||||||
34, 94, 141, 185, 18, 234, 157, 109, 104, 64, 250, 125, 49, 236, 86, 48,
|
|
||||||
196, 77, 75, 237, 156, 103, 225, 19, 110, 229, 22, 68, 177, 93, 221, 181,
|
|
||||||
152, 153, 61, 108, 101, 74, 247, 195, 127, 216, 30, 166, 168, 61, 83, 229,
|
|
||||||
120, 156, 96, 120, 201, 124, 43, 27, 253, 250, 120, 143, 89, 235, 189, 243,
|
|
||||||
150, 7, 127, 119, 149, 244, 84, 185, 134, 34, 128, 193, 236, 234, 132, 117,
|
|
||||||
137, 32, 145, 184, 44, 121, 51, 76, 11, 228, 142, 251, 39, 77, 228, 251,
|
|
||||||
41, 58, 246, 107, 125, 187, 9, 240, 35, 8, 11, 162, 242, 220, 158, 163,
|
|
||||||
2, 184, 163, 227, 242, 2, 100, 101, 2, 78, 129, 34, 89, 28, 26, 157,
|
|
||||||
79, 31, 107, 250, 194, 156, 186, 69, 212, 66, 41, 180, 139, 42, 211, 253,
|
|
||||||
256, 239, 29, 129, 104, 248, 182, 68, 1, 189, 48, 226, 36, 229, 3, 158,
|
|
||||||
41, 53, 241, 22, 115, 174, 16, 163, 224, 19, 112, 219, 177, 233, 42, 27,
|
|
||||||
250, 134, 18, 28, 145, 122, 68, 34, 134, 31, 147, 17, 39, 188, 150, 76,
|
|
||||||
45, 42, 167, 249, 12, 16, 23, 182, 13, 79, 121, 3, 70, 197, 239, 44,
|
|
||||||
86, 177, 255, 81, 64, 171, 138, 131, 73, 110, 44, 201, 254, 198, 146, 91,
|
|
||||||
48, 9, 104, 31, 29, 161, 101, 31, 138, 180, 231, 233, 79, 137, 61, 236,
|
|
||||||
140, 15, 249, 218, 234, 119, 99, 195, 110, 137, 237, 207, 8, 31, 45, 24,
|
|
||||||
90, 155, 203, 253, 192, 203, 65, 176, 210, 171, 142, 214, 220, 122, 136, 237,
|
|
||||||
189, 186, 147, 40, 80, 254, 173, 33, 191, 46, 192, 26, 108, 255, 228, 205,
|
|
||||||
61, 76, 39, 107, 225, 126, 228, 182, 140, 251, 143, 134, 252, 168, 221, 8,
|
|
||||||
185, 85, 60, 233, 147, 244, 87, 137, 8, 140, 96, 80, 53, 45, 175, 160,
|
|
||||||
124, 189, 112, 37, 144, 19, 70, 17, 170, 242, 2, 3, 28, 95, 120, 199,
|
|
||||||
212, 43, 9, 117, 86, 151, 101, 241, 200, 145, 241, 19, 178, 69, 204, 197,
|
|
||||||
227, 166, 94, 7, 193, 45, 247, 234, 19, 187, 212, 212, 236, 125, 33, 95,
|
|
||||||
198, 121, 122, 103, 77, 155, 235, 49, 25, 237, 249, 11, 162, 7, 238, 24,
|
|
||||||
16, 150, 129, 25, 152, 17, 42, 67, 247, 162, 77, 154, 31, 133, 55, 137,
|
|
||||||
79, 119, 153, 10, 86, 28, 244, 186, 41, 169, 106, 44, 10, 49, 110, 179,
|
|
||||||
32, 133, 155, 244, 61, 70, 131, 168, 170, 39, 231, 252, 32, 69, 92, 238,
|
|
||||||
239, 35, 132, 136, 236, 167, 90, 32, 123, 88, 69, 22, 20, 89, 145, 166,
|
|
||||||
30, 118, 75, 4, 49, 31, 225, 54, 11, 50, 56, 191, 246, 1, 187, 33,
|
|
||||||
119, 107, 139, 68, 19, 240, 131, 55, 94, 113, 31, 252, 12, 179, 121, 2,
|
|
||||||
120, 252, 0, 76, 41, 80, 185, 42, 62, 121, 105, 159, 121, 109, 111, 98,
|
|
||||||
7, 118, 86, 29, 210, 70, 231, 179, 223, 229, 164, 70, 62, 47, 0, 206,
|
|
||||||
204, 178, 168, 120, 224, 166, 99, 25, 103, 63, 246, 224, 117, 204, 75, 124,
|
|
||||||
140, 133, 110, 110, 222, 88, 151, 118, 46, 37, 22, 143, 158, 40, 2, 50,
|
|
||||||
153, 94, 190, 199, 13, 198, 127, 211, 180, 90, 183, 98, 0, 142, 210, 154,
|
|
||||||
100, 187, 67, 231, 202, 100, 198, 235, 252, 160, 247, 124, 247, 14, 121, 221,
|
|
||||||
57, 88, 253, 243, 185, 89, 45, 249, 221, 194, 108, 175, 193, 119, 50, 141,
|
|
||||||
223, 133, 136, 64, 176, 250, 129, 100, 124, 94, 181, 159, 99, 185, 177, 240,
|
|
||||||
135, 42, 103, 52, 202, 208, 143, 186, 193, 103, 154, 237, 102, 88, 225, 161,
|
|
||||||
50, 188, 191, 109, 12, 87, 19, 227, 247, 183, 13, 52, 205, 170, 205, 146,
|
|
||||||
89, 160, 18, 105, 192, 73, 231, 225, 184, 157, 252, 220, 61, 59, 169, 183,
|
|
||||||
221, 20, 141, 20, 158, 101, 245, 7, 245, 225, 118, 137, 84, 55, 19, 27,
|
|
||||||
164, 110, 35, 25, 202, 94, 150, 46, 91, 152, 130, 1, 7, 46, 16, 237,
|
|
||||||
171, 109, 19, 200, 65, 38, 10, 213, 70, 96, 126, 226, 185, 225, 181, 46,
|
|
||||||
10, 165, 11, 123, 53, 158, 22, 147, 64, 22, 227, 69, 182, 237, 197, 37,
|
|
||||||
39, 49, 186, 223, 139, 128, 55, 36, 166, 178, 220, 20, 98, 172, 166, 253,
|
|
||||||
45, 0, 120, 180, 189, 185, 158, 159, 196, 6, 214, 79, 141, 52, 156, 107,
|
|
||||||
5, 109, 142, 159, 33, 64, 190, 133, 95, 132, 95, 202, 160, 63, 186, 23,
|
|
||||||
231, 107, 163, 33, 234, 15, 244, 77, 108, 49, 51, 7, 164, 87, 142, 99,
|
|
||||||
240, 202, 47, 256, 118, 190, 196, 178, 217, 42, 39, 153, 21, 192, 232, 202,
|
|
||||||
14, 82, 179, 64, 233, 4, 219, 10, 133, 78, 43, 144, 146, 216, 202, 81,
|
|
||||||
71, 252, 8, 201, 68, 256, 85, 233, 164, 88, 176, 30, 5, 152, 126, 179,
|
|
||||||
249, 84, 140, 190, 159, 54, 118, 98, 2, 159, 27, 133, 74, 121, 239, 196,
|
|
||||||
71, 149, 119, 135, 102, 20, 87, 112, 44, 75, 221, 3, 151, 158, 5, 98,
|
|
||||||
152, 25, 97, 106, 63, 171, 240, 79, 234, 240, 230, 92, 76, 70, 173, 196,
|
|
||||||
36, 225, 218, 133, 64, 240, 150, 41, 146, 66, 133, 51, 134, 73, 170, 238,
|
|
||||||
140, 90, 45, 89, 46, 147, 96, 169, 174, 174, 244, 151, 90, 40, 32, 74,
|
|
||||||
38, 154, 246, 57, 31, 14, 189, 151, 83, 243, 197, 183, 220, 185, 53, 225,
|
|
||||||
51, 106, 188, 208, 222, 248, 93, 13, 93, 215, 131, 25, 142, 185, 113, 222,
|
|
||||||
131, 215, 149, 50, 159, 85, 32, 5, 205, 192, 2, 227, 42, 214, 197, 42,
|
|
||||||
126, 182, 68, 123, 109, 36, 237, 179, 170, 199, 77, 256, 5, 128, 214, 243,
|
|
||||||
137, 177, 170, 253, 179, 180, 153, 236, 100, 196, 216, 231, 198, 37, 192, 80,
|
|
||||||
121, 221, 246, 1, 16, 246, 29, 78, 64, 148, 124, 38, 96, 125, 28, 20,
|
|
||||||
48, 51, 73, 187, 139, 208, 98, 253, 221, 188, 84, 129, 1, 205, 95, 205,
|
|
||||||
117, 79, 71, 126, 134, 237, 19, 184, 137, 125, 129, 178, 223, 54, 188, 112,
|
|
||||||
30, 7, 225, 228, 205, 184, 233, 87, 117, 22, 58, 10, 8, 42, 2, 114,
|
|
||||||
254, 19, 17, 13, 150, 92, 233, 179, 63, 12, 60, 171, 127, 35, 50, 5,
|
|
||||||
195, 113, 241, 25, 249, 184, 166, 44, 221, 35, 151, 116, 8, 54, 195, 89,
|
|
||||||
218, 186, 132, 5, 41, 89, 226, 177, 11, 41, 87, 172, 5, 23, 20, 59,
|
|
||||||
228, 94, 76, 33, 137, 43, 151, 221, 61, 232, 4, 120, 93, 217, 80, 228,
|
|
||||||
228, 6, 58, 25, 62, 84, 91, 48, 209, 20, 247, 243, 55, 106, 80, 79,
|
|
||||||
235, 34, 20, 180, 146, 2, 236, 13, 236, 206, 243, 222, 204, 83, 148, 213,
|
|
||||||
214, 117, 237, 98, 0, 90, 204, 168, 32, 41, 126, 67, 191, 74, 27, 255,
|
|
||||||
26, 75, 240, 113, 185, 105, 167, 154, 112, 67, 151, 63, 161, 134, 239, 176,
|
|
||||||
42, 87, 249, 130, 45, 242, 17, 100, 107, 120, 212, 218, 237, 76, 231, 162,
|
|
||||||
175, 172, 118, 155, 92, 36, 124, 17, 121, 71, 13, 9, 82, 126, 147, 142,
|
|
||||||
218, 148, 138, 80, 163, 106, 164, 123, 140, 129, 35, 42, 186, 154, 228, 214,
|
|
||||||
75, 73, 8, 253, 42, 153, 232, 164, 95, 24, 110, 90, 231, 197, 90, 196,
|
|
||||||
57, 164, 252, 181, 31, 7, 97, 256, 35, 77, 200, 212, 99, 179, 92, 227,
|
|
||||||
17, 180, 49, 176, 9, 188, 13, 182, 93, 44, 128, 219, 134, 92, 151, 6,
|
|
||||||
23, 126, 200, 109, 66, 30, 140, 180, 146, 134, 67, 200, 7, 9, 223, 168,
|
|
||||||
186, 221, 3, 154, 150, 165, 43, 53, 138, 27, 86, 213, 235, 160, 70, 2,
|
|
||||||
240, 20, 89, 212, 84, 141, 168, 246, 183, 227, 30, 167, 138, 185, 253, 83,
|
|
||||||
52, 143, 236, 94, 59, 65, 89, 218, 194, 157, 164, 156, 111, 95, 202, 168,
|
|
||||||
245, 256, 151, 28, 222, 194, 72, 130, 217, 134, 253, 77, 246, 100, 76, 32,
|
|
||||||
254, 174, 182, 193, 14, 237, 74, 1, 74, 26, 135, 216, 152, 208, 112, 38,
|
|
||||||
181, 62, 25, 71, 61, 234, 254, 97, 191, 23, 92, 256, 190, 205, 6, 16,
|
|
||||||
134, 147, 210, 219, 148, 59, 73, 185, 24, 247, 174, 143, 116, 220, 128, 144,
|
|
||||||
111, 126, 101, 98, 130, 136, 101, 102, 69, 127, 24, 168, 146, 226, 226, 207,
|
|
||||||
176, 122, 149, 254, 134, 196, 22, 151, 197, 21, 50, 205, 116, 154, 65, 116,
|
|
||||||
177, 224, 127, 77, 177, 159, 225, 69, 176, 54, 100, 104, 140, 8, 11, 126,
|
|
||||||
11, 188, 185, 159, 107, 16, 254, 142, 80, 28, 5, 157, 104, 57, 109, 82,
|
|
||||||
102, 80, 173, 242, 238, 207, 57, 105, 237, 160, 59, 189, 189, 199, 26, 11,
|
|
||||||
190, 156, 97, 118, 20, 12, 254, 189, 165, 147, 142, 199, 5, 213, 64, 133,
|
|
||||||
108, 217, 133, 60, 94, 28, 116, 136, 47, 165, 125, 42, 183, 143, 14, 129,
|
|
||||||
223, 70, 212, 205, 181, 180, 3, 201, 182, 46, 57, 104, 239, 60, 99, 181,
|
|
||||||
220, 231, 45, 79, 156, 89, 149, 143, 190, 103, 153, 61, 235, 73, 136, 20,
|
|
||||||
89, 243, 16, 130, 247, 141, 134, 93, 80, 68, 85, 84, 8, 72, 194, 4,
|
|
||||||
242, 110, 19, 133, 199, 70, 172, 92, 132, 254, 67, 74, 36, 94, 13, 90,
|
|
||||||
154, 184, 9, 109, 118, 243, 214, 71, 36, 95, 0, 90, 201, 105, 112, 215,
|
|
||||||
69, 196, 224, 210, 236, 242, 155, 211, 37, 134, 69, 113, 157, 97, 68, 26,
|
|
||||||
230, 149, 219, 180, 20, 76, 172, 145, 154, 40, 129, 8, 93, 56, 162, 124,
|
|
||||||
207, 233, 105, 19, 3, 183, 155, 134, 8, 244, 213, 78, 139, 88, 156, 37,
|
|
||||||
51, 152, 111, 102, 112, 250, 114, 252, 201, 241, 133, 24, 136, 153, 5, 90,
|
|
||||||
210, 197, 216, 24, 131, 17, 147, 246, 13, 86, 3, 253, 179, 237, 101, 114,
|
|
||||||
243, 191, 207, 2, 220, 133, 244, 53, 87, 125, 154, 158, 197, 20, 8, 83,
|
|
||||||
32, 191, 38, 241, 204, 22, 168, 59, 217, 123, 162, 82, 21, 50, 130, 89,
|
|
||||||
239, 253, 195, 56, 253, 74, 147, 125, 234, 199, 250, 28, 65, 193, 22, 237,
|
|
||||||
193, 94, 58, 229, 139, 176, 69, 42, 179, 164, 150, 168, 246, 214, 86, 174,
|
|
||||||
59, 117, 15, 19, 76, 37, 214, 238, 153, 226, 154, 45, 109, 114, 198, 107,
|
|
||||||
45, 70, 238, 196, 142, 252, 244, 71, 123, 136, 134, 188, 99, 132, 25, 42,
|
|
||||||
240, 0, 196, 33, 26, 124, 256, 145, 27, 102, 153, 35, 28, 132, 221, 167,
|
|
||||||
138, 133, 41, 170, 95, 224, 40, 139, 239, 153, 1, 106, 255, 106, 170, 163,
|
|
||||||
127, 44, 155, 232, 194, 119, 232, 117, 239, 143, 108, 41, 3, 9, 180, 256,
|
|
||||||
144, 113, 133, 200, 79, 69, 128, 216, 31, 50, 102, 209, 249, 136, 150, 154,
|
|
||||||
182, 51, 228, 39, 127, 142, 87, 15, 94, 92, 187, 245, 31, 236, 64, 58,
|
|
||||||
114, 11, 17, 166, 189, 152, 218, 34, 123, 39, 58, 37, 153, 91, 63, 121,
|
|
||||||
31, 34, 12, 254, 106, 96, 171, 14, 155, 247, 214, 69, 24, 98, 3, 204,
|
|
||||||
202, 194, 207, 30, 253, 44, 119, 70, 14, 96, 82, 250, 63, 6, 232, 38,
|
|
||||||
89, 144, 102, 191, 82, 254, 20, 222, 96, 162, 110, 6, 159, 58, 200, 226,
|
|
||||||
98, 128, 42, 70, 84, 247, 128, 211, 136, 54, 143, 166, 60, 118, 99, 218,
|
|
||||||
27, 193, 85, 81, 219, 223, 46, 41, 23, 233, 152, 222, 36, 236, 54, 181,
|
|
||||||
56, 50, 4, 207, 129, 92, 78, 88, 197, 251, 131, 105, 31, 172, 38, 131,
|
|
||||||
19, 204, 129, 47, 227, 106, 202, 183, 23, 6, 77, 224, 102, 147, 11, 218,
|
|
||||||
131, 132, 60, 192, 208, 223, 236, 23, 103, 115, 89, 18, 185, 171, 70, 174,
|
|
||||||
139, 0, 100, 160, 221, 11, 228, 60, 12, 122, 114, 12, 157, 235, 148, 57,
|
|
||||||
83, 62, 173, 131, 169, 126, 85, 99, 93, 243, 81, 80, 29, 245, 206, 82,
|
|
||||||
236, 227, 166, 14, 230, 213, 144, 97, 27, 111, 99, 164, 105, 150, 89, 111,
|
|
||||||
252, 118, 140, 232, 120, 183, 137, 213, 232, 157, 224, 33, 134, 118, 186, 80,
|
|
||||||
159, 2, 186, 193, 54, 242, 25, 237, 232, 249, 226, 213, 90, 149, 90, 160,
|
|
||||||
118, 69, 64, 37, 10, 183, 109, 246, 30, 52, 219, 69, 189, 26, 116, 220,
|
|
||||||
50, 244, 243, 243, 139, 137, 232, 98, 38, 45, 256, 143, 171, 101, 73, 238,
|
|
||||||
123, 45, 194, 167, 250, 123, 12, 29, 136, 237, 141, 21, 89, 96, 199, 44,
|
|
||||||
8, 214, 208, 17, 113, 41, 137, 26, 166, 155, 89, 85, 54, 58, 97, 160,
|
|
||||||
50, 239, 58, 71, 21, 157, 139, 12, 37, 198, 182, 131, 149, 134, 16, 204,
|
|
||||||
164, 181, 248, 166, 52, 216, 136, 201, 37, 255, 187, 240, 5, 101, 147, 231,
|
|
||||||
14, 163, 253, 134, 146, 216, 8, 54, 224, 90, 220, 195, 75, 215, 186, 58,
|
|
||||||
71, 204, 124, 105, 239, 53, 16, 85, 69, 163, 195, 223, 33, 38, 69, 88,
|
|
||||||
88, 203, 99, 55, 176, 13, 156, 204, 236, 99, 194, 134, 75, 247, 126, 129,
|
|
||||||
160, 124, 233, 206, 139, 144, 154, 45, 233, 51, 206, 61, 60, 55, 205, 107,
|
|
||||||
84, 108, 96, 188, 203, 31, 89, 20, 115, 144, 137, 90, 237, 78, 231, 185,
|
|
||||||
120, 217, 1, 176, 169, 30, 155, 176, 100, 113, 53, 42, 193, 108, 14, 121,
|
|
||||||
176, 158, 137, 92, 178, 44, 110, 249, 108, 234, 94, 101, 128, 12, 250, 173,
|
|
||||||
72, 202, 232, 66, 139, 152, 189, 18, 32, 197, 9, 238, 246, 55, 119, 183,
|
|
||||||
196, 119, 113, 247, 191, 100, 200, 245, 46, 16, 234, 112, 136, 116, 232, 48,
|
|
||||||
176, 108, 11, 237, 14, 153, 93, 177, 124, 72, 67, 121, 135, 143, 45, 18,
|
|
||||||
97, 251, 184, 172, 136, 55, 213, 8, 103, 12, 221, 212, 13, 160, 116, 91,
|
|
||||||
237, 127, 218, 190, 103, 131, 77, 82, 36, 100, 22, 252, 79, 69, 54, 26,
|
|
||||||
65, 182, 115, 142, 247, 20, 89, 81, 188, 244, 27, 120, 240, 248, 13, 230,
|
|
||||||
67, 133, 32, 201, 129, 87, 9, 245, 66, 88, 166, 34, 46, 184, 119, 218,
|
|
||||||
144, 235, 163, 40, 138, 134, 127, 217, 64, 227, 116, 67, 55, 202, 130, 48,
|
|
||||||
199, 42, 251, 112, 124, 153, 123, 194, 243, 49, 250, 12, 78, 157, 167, 134,
|
|
||||||
210, 73, 156, 102, 21, 88, 216, 123, 45, 11, 208, 18, 47, 187, 20, 43,
|
|
||||||
3, 180, 124, 2, 136, 176, 77, 111, 138, 139, 91, 225, 126, 8, 74, 255,
|
|
||||||
88, 192, 193, 239, 138, 204, 139, 194, 166, 130, 252, 184, 140, 168, 30, 177,
|
|
||||||
121, 98, 131, 124, 69, 171, 75, 49, 184, 34, 76, 122, 202, 115, 184, 253,
|
|
||||||
120, 182, 33, 251, 1, 74, 216, 217, 243, 168, 70, 162, 119, 158, 197, 198,
|
|
||||||
61, 89, 7, 5, 54, 199, 211, 170, 23, 226, 44, 247, 165, 195, 7, 225,
|
|
||||||
91, 23, 50, 15, 51, 208, 106, 94, 12, 31, 43, 112, 146, 139, 246, 182,
|
|
||||||
113, 1, 97, 15, 66, 2, 51, 76, 164, 184, 237, 200, 218, 176, 72, 98,
|
|
||||||
33, 135, 38, 147, 140, 229, 50, 94, 81, 187, 129, 17, 238, 168, 146, 203,
|
|
||||||
181, 99, 164, 3, 104, 98, 255, 189, 114, 142, 86, 102, 229, 102, 80, 129,
|
|
||||||
64, 84, 79, 161, 81, 156, 128, 111, 164, 197, 18, 15, 55, 196, 198, 191,
|
|
||||||
28, 113, 117, 96, 207, 253, 19, 158, 231, 13, 53, 130, 252, 211, 58, 180,
|
|
||||||
212, 142, 7, 219, 38, 81, 62, 109, 167, 113, 33, 56, 97, 185, 157, 130,
|
|
||||||
186, 129, 119, 182, 196, 26, 54, 110, 65, 170, 166, 236, 30, 22, 162, 0,
|
|
||||||
106, 12, 248, 33, 48, 72, 159, 17, 76, 244, 172, 132, 89, 171, 196, 76,
|
|
||||||
254, 166, 76, 218, 226, 3, 52, 220, 238, 181, 179, 144, 225, 23, 3, 166,
|
|
||||||
158, 35, 228, 154, 204, 23, 203, 71, 134, 189, 18, 168, 236, 141, 117, 138,
|
|
||||||
2, 132, 78, 57, 154, 21, 250, 196, 184, 40, 161, 40, 10, 178, 134, 120,
|
|
||||||
132, 123, 101, 82, 205, 121, 55, 140, 231, 56, 231, 71, 206, 246, 198, 150,
|
|
||||||
146, 192, 45, 105, 242, 1, 125, 18, 176, 46, 222, 122, 19, 80, 113, 133,
|
|
||||||
131, 162, 81, 51, 98, 168, 247, 161, 139, 39, 63, 162, 22, 153, 170, 92,
|
|
||||||
91, 130, 174, 200, 45, 112, 99, 164, 132, 184, 191, 186, 200, 167, 86, 145,
|
|
||||||
167, 227, 130, 44, 12, 158, 172, 249, 204, 17, 54, 249, 16, 200, 21, 174,
|
|
||||||
67, 223, 105, 201, 50, 36, 133, 203, 244, 131, 228, 67, 29, 195, 91, 91,
|
|
||||||
55, 107, 167, 154, 170, 137, 218, 183, 169, 61, 99, 175, 128, 23, 142, 183,
|
|
||||||
66, 255, 59, 187, 66, 85, 212, 109, 168, 82, 16, 43, 67, 139, 114, 176,
|
|
||||||
216, 255, 130, 94, 152, 79, 183, 64, 100, 23, 214, 82, 34, 230, 48, 15,
|
|
||||||
242, 130, 50, 241, 81, 32, 5, 125, 183, 182, 184, 99, 248, 109, 159, 210,
|
|
||||||
226, 61, 119, 129, 39, 149, 78, 214, 107, 78, 147, 124, 228, 18, 143, 188,
|
|
||||||
84, 180, 233, 119, 64, 39, 158, 133, 177, 168, 6, 150, 80, 117, 150, 56,
|
|
||||||
49, 72, 49, 37, 30, 242, 49, 142, 33, 156, 34, 44, 44, 72, 58, 22,
|
|
||||||
249, 46, 168, 80, 25, 196, 64, 174, 97, 179, 244, 134, 213, 105, 63, 151,
|
|
||||||
21, 90, 168, 90, 245, 28, 157, 65, 250, 232, 188, 27, 99, 160, 156, 127,
|
|
||||||
68, 193, 10, 80, 205, 36, 138, 229, 12, 223, 70, 169, 251, 41, 48, 94,
|
|
||||||
41, 177, 99, 256, 158, 0, 6, 83, 231, 191, 120, 135, 157, 146, 218, 213,
|
|
||||||
160, 7, 47, 234, 98, 211, 79, 225, 179, 95, 175, 105, 185, 79, 115, 0,
|
|
||||||
104, 14, 65, 124, 15, 188, 52, 9, 253, 27, 132, 137, 13, 127, 75, 238,
|
|
||||||
185, 253, 33, 8, 52, 157, 164, 68, 232, 188, 69, 28, 209, 233, 5, 129,
|
|
||||||
216, 90, 252, 212, 33, 200, 222, 9, 112, 15, 43, 36, 226, 114, 15, 249,
|
|
||||||
217, 8, 148, 22, 147, 23, 143, 67, 222, 116, 235, 250, 212, 210, 39, 142,
|
|
||||||
108, 64, 209, 83, 73, 66, 99, 34, 17, 29, 45, 151, 244, 114, 28, 241,
|
|
||||||
144, 208, 146, 179, 132, 89, 217, 198, 252, 219, 205, 165, 75, 107, 11, 173,
|
|
||||||
76, 6, 196, 247, 152, 216, 248, 91, 209, 178, 57, 250, 174, 60, 79, 123,
|
|
||||||
18, 135, 9, 241, 230, 159, 184, 68, 156, 251, 215, 9, 113, 234, 75, 235,
|
|
||||||
103, 194, 205, 129, 230, 45, 96, 73, 157, 20, 200, 212, 212, 228, 161, 7,
|
|
||||||
231, 228, 108, 43, 198, 87, 140, 140, 4, 182, 164, 3, 53, 104, 250, 213,
|
|
||||||
85, 38, 89, 61, 52, 187, 35, 204, 86, 249, 100, 71, 248, 213, 163, 215,
|
|
||||||
66, 106, 252, 129, 40, 111, 47, 24, 186, 221, 85, 205, 199, 237, 122, 181,
|
|
||||||
32, 46, 182, 135, 33, 251, 142, 34, 208, 242, 128, 255, 4, 234, 15, 33,
|
|
||||||
167, 222, 32, 186, 191, 34, 255, 244, 98, 240, 228, 204, 30, 142, 32, 70,
|
|
||||||
69, 83, 110, 151, 10, 243, 141, 21, 223, 69, 61, 37, 59, 209, 102, 114,
|
|
||||||
223, 33, 129, 254, 255, 103, 86, 247, 235, 72, 126, 177, 102, 226, 102, 30,
|
|
||||||
149, 221, 62, 247, 251, 120, 163, 173, 57, 202, 204, 24, 39, 106, 120, 143,
|
|
||||||
202, 176, 191, 147, 37, 38, 51, 133, 47, 245, 157, 132, 154, 71, 183, 111,
|
|
||||||
30, 180, 18, 202, 82, 96, 170, 91, 157, 181, 212, 140, 256, 8, 196, 121,
|
|
||||||
149, 79, 66, 127, 113, 78, 4, 197, 84, 256, 111, 222, 102, 63, 228, 104,
|
|
||||||
136, 223, 67, 193, 93, 154, 249, 83, 204, 101, 200, 234, 84, 252, 230, 195,
|
|
||||||
43, 140, 120, 242, 89, 63, 166, 233, 209, 94, 43, 170, 126, 5, 205, 78,
|
|
||||||
112, 80, 143, 151, 146, 248, 137, 203, 45, 183, 61, 1, 155, 8, 102, 59,
|
|
||||||
68, 212, 230, 61, 254, 191, 128, 223, 176, 123, 229, 27, 146, 120, 96, 165,
|
|
||||||
213, 12, 232, 40, 186, 225, 66, 105, 200, 195, 212, 110, 237, 238, 151, 19,
|
|
||||||
12, 171, 150, 82, 7, 228, 79, 52, 15, 78, 62, 43, 21, 154, 114, 21,
|
|
||||||
12, 212, 256, 232, 125, 127, 5, 51, 37, 252, 136, 13, 47, 195, 168, 191,
|
|
||||||
231, 55, 57, 251, 214, 116, 15, 86, 210, 41, 249, 242, 119, 27, 250, 203,
|
|
||||||
107, 69, 90, 43, 206, 154, 127, 54, 100, 78, 187, 54, 244, 177, 234, 167,
|
|
||||||
202, 136, 209, 171, 69, 114, 133, 173, 26, 139, 78, 141, 128, 32, 124, 39,
|
|
||||||
45, 218, 96, 68, 90, 44, 67, 62, 83, 190, 188, 256, 103, 42, 102, 64,
|
|
||||||
249, 0, 141, 11, 61, 69, 70, 66, 233, 237, 29, 200, 251, 157, 71, 51,
|
|
||||||
64, 133, 113, 76, 35, 125, 76, 137, 217, 145, 35, 69, 226, 180, 56, 249,
|
|
||||||
156, 163, 176, 237, 81, 54, 85, 169, 115, 211, 129, 70, 248, 40, 252, 192,
|
|
||||||
194, 101, 247, 8, 181, 124, 217, 191, 194, 93, 99, 127, 117, 177, 144, 151,
|
|
||||||
228, 121, 32, 11, 89, 81, 26, 29, 183, 76, 249, 132, 179, 70, 34, 102,
|
|
||||||
20, 66, 87, 63, 124, 205, 174, 177, 87, 219, 73, 218, 91, 87, 176, 72,
|
|
||||||
15, 211, 47, 61, 251, 165, 39, 247, 146, 70, 150, 57, 1, 212, 36, 162,
|
|
||||||
39, 38, 16, 216, 3, 50, 116, 200, 32, 234, 77, 181, 155, 19, 90, 188,
|
|
||||||
36, 6, 254, 46, 46, 203, 25, 230, 181, 196, 4, 151, 225, 65, 122, 216,
|
|
||||||
168, 86, 158, 131, 136, 16, 49, 102, 233, 64, 154, 88, 228, 52, 146, 69,
|
|
||||||
93, 157, 243, 121, 70, 209, 126, 213, 88, 145, 236, 65, 70, 96, 204, 47,
|
|
||||||
10, 200, 77, 8, 103, 150, 48, 153, 5, 37, 52, 235, 209, 31, 181, 126,
|
|
||||||
83, 142, 224, 140, 6, 32, 200, 171, 160, 179, 115, 229, 75, 194, 208, 39,
|
|
||||||
59, 223, 52, 247, 38, 197, 135, 1, 6, 189, 106, 114, 168, 5, 211, 222,
|
|
||||||
44, 63, 90, 160, 116, 172, 170, 133, 125, 138, 39, 131, 23, 178, 10, 214,
|
|
||||||
36, 93, 28, 59, 68, 17, 123, 25, 255, 184, 204, 102, 194, 214, 129, 94,
|
|
||||||
159, 245, 112, 141, 62, 11, 61, 197, 124, 221, 205, 11, 79, 71, 201, 54,
|
|
||||||
58, 150, 29, 121, 87, 46, 240, 201, 68, 20, 194, 209, 47, 152, 158, 174,
|
|
||||||
193, 164, 120, 255, 216, 165, 247, 58, 85, 130, 220, 23, 122, 223, 188, 98,
|
|
||||||
21, 70, 72, 170, 150, 237, 76, 143, 112, 238, 206, 146, 215, 110, 4, 250,
|
|
||||||
68, 44, 174, 177, 30, 98, 143, 241, 180, 127, 113, 48, 0, 1, 179, 199,
|
|
||||||
59, 106, 201, 114, 29, 86, 173, 133, 217, 44, 200, 141, 107, 172, 16, 60,
|
|
||||||
82, 58, 239, 94, 141, 234, 186, 235, 109, 173, 249, 139, 141, 59, 100, 248,
|
|
||||||
84, 144, 49, 160, 51, 207, 164, 103, 74, 97, 146, 202, 193, 125, 168, 134,
|
|
||||||
236, 111, 135, 121, 59, 145, 168, 200, 181, 173, 109, 2, 255, 6, 9, 245,
|
|
||||||
90, 202, 214, 143, 121, 65, 85, 232, 132, 77, 228, 84, 26, 54, 184, 15,
|
|
||||||
161, 29, 177, 79, 43, 0, 156, 184, 163, 165, 62, 90, 179, 93, 45, 239,
|
|
||||||
1, 16, 120, 189, 127, 47, 74, 166, 20, 214, 233, 226, 89, 217, 229, 26,
|
|
||||||
156, 53, 162, 60, 21, 3, 192, 72, 111, 51, 53, 101, 181, 208, 88, 82,
|
|
||||||
179, 160, 219, 113, 240, 108, 43, 224, 162, 147, 62, 14, 95, 81, 205, 4,
|
|
||||||
160, 177, 225, 115, 29, 69, 235, 168, 148, 29, 128, 114, 124, 129, 172, 165,
|
|
||||||
215, 231, 214, 86, 160, 44, 157, 91, 248, 183, 73, 164, 56, 181, 162, 92,
|
|
||||||
141, 118, 127, 240, 196, 77, 0, 9, 244, 79, 250, 100, 195, 25, 255, 85,
|
|
||||||
94, 35, 212, 137, 107, 34, 110, 20, 200, 104, 17, 32, 231, 43, 150, 159,
|
|
||||||
231, 216, 223, 190, 226, 109, 162, 197, 87, 92, 224, 11, 111, 73, 60, 225,
|
|
||||||
238, 73, 246, 169, 19, 217, 119, 38, 121, 118, 70, 82, 99, 241, 110, 67,
|
|
||||||
31, 76, 146, 215, 124, 240, 31, 103, 139, 224, 75, 160, 31, 78, 93, 4,
|
|
||||||
64, 9, 103, 223, 6, 227, 119, 85, 116, 81, 21, 43, 46, 206, 234, 132,
|
|
||||||
85, 99, 22, 131, 135, 97, 86, 13, 234, 188, 21, 14, 89, 169, 207, 238,
|
|
||||||
219, 177, 190, 72, 157, 41, 114, 140, 92, 141, 186, 1, 63, 107, 225, 184,
|
|
||||||
118, 150, 153, 254, 241, 106, 120, 210, 104, 144, 151, 161, 88, 206, 125, 164,
|
|
||||||
15, 211, 173, 49, 146, 241, 71, 36, 58, 201, 46, 27, 33, 187, 91, 162,
|
|
||||||
117, 19, 210, 213, 187, 97, 193, 50, 190, 114, 217, 60, 61, 167, 207, 213,
|
|
||||||
213, 53, 135, 34, 156, 91, 115, 119, 46, 99, 242, 1, 90, 52, 198, 227,
|
|
||||||
201, 91, 216, 146, 210, 82, 121, 38, 73, 133, 182, 193, 132, 148, 246, 75,
|
|
||||||
109, 157, 179, 113, 176, 134, 205, 159, 148, 58, 103, 171, 132, 156, 133, 147,
|
|
||||||
161, 231, 39, 100, 175, 97, 125, 28, 183, 129, 135, 191, 202, 181, 29, 218,
|
|
||||||
43, 104, 148, 203, 189, 204, 4, 182, 169, 1, 134, 122, 141, 202, 13, 187,
|
|
||||||
177, 112, 162, 35, 231, 6, 8, 241, 99, 6, 191, 45, 113, 113, 101, 104};
|
|
||||||
|
|
||||||
// The S-Box we use for further linearity breaking.
|
|
||||||
// We created it by taking the digits of decimal expansion of e.
|
|
||||||
// The code that created it can be found in 'ProduceRandomSBox.c'.
|
|
||||||
unsigned char SBox[256] = {
|
|
||||||
//0 1 2 3 4 5 6 7 8 9 A B C D E F
|
|
||||||
0x7d, 0xd1, 0x70, 0x0b, 0xfa, 0x39, 0x18, 0xc3, 0xf3, 0xbb, 0xa7, 0xd4, 0x84, 0x25, 0x3b, 0x3c, // 0
|
|
||||||
0x2c, 0x15, 0x69, 0x9a, 0xf9, 0x27, 0xfb, 0x02, 0x52, 0xba, 0xa8, 0x4b, 0x20, 0xb5, 0x8b, 0x3a, // 1
|
|
||||||
0x88, 0x8e, 0x26, 0xcb, 0x71, 0x5e, 0xaf, 0xad, 0x0c, 0xac, 0xa1, 0x93, 0xc6, 0x78, 0xce, 0xfc, // 2
|
|
||||||
0x2a, 0x76, 0x17, 0x1f, 0x62, 0xc2, 0x2e, 0x99, 0x11, 0x37, 0x65, 0x40, 0xfd, 0xa0, 0x03, 0xc1, // 3
|
|
||||||
0xca, 0x48, 0xe2, 0x9b, 0x81, 0xe4, 0x1c, 0x01, 0xec, 0x68, 0x7a, 0x5a, 0x50, 0xf8, 0x0e, 0xa3, // 4
|
|
||||||
0xe8, 0x61, 0x2b, 0xa2, 0xeb, 0xcf, 0x8c, 0x3d, 0xb4, 0x95, 0x13, 0x08, 0x46, 0xab, 0x91, 0x7b, // 5
|
|
||||||
0xea, 0x55, 0x67, 0x9d, 0xdd, 0x29, 0x6a, 0x8f, 0x9f, 0x22, 0x4e, 0xf2, 0x57, 0xd2, 0xa9, 0xbd, // 6
|
|
||||||
0x38, 0x16, 0x5f, 0x4c, 0xf7, 0x9e, 0x1b, 0x2f, 0x30, 0xc7, 0x41, 0x24, 0x5c, 0xbf, 0x05, 0xf6, // 7
|
|
||||||
0x0a, 0x31, 0xa5, 0x45, 0x21, 0x33, 0x6b, 0x6d, 0x6c, 0x86, 0xe1, 0xa4, 0xe6, 0x92, 0x9c, 0xdf, // 8
|
|
||||||
0xe7, 0xbe, 0x28, 0xe3, 0xfe, 0x06, 0x4d, 0x98, 0x80, 0x04, 0x96, 0x36, 0x3e, 0x14, 0x4a, 0x34, // 9
|
|
||||||
0xd3, 0xd5, 0xdb, 0x44, 0xcd, 0xf5, 0x54, 0xdc, 0x89, 0x09, 0x90, 0x42, 0x87, 0xff, 0x7e, 0x56, // A
|
|
||||||
0x5d, 0x59, 0xd7, 0x23, 0x75, 0x19, 0x97, 0x73, 0x83, 0x64, 0x53, 0xa6, 0x1e, 0xd8, 0xb0, 0x49, // B
|
|
||||||
0x3f, 0xef, 0xbc, 0x7f, 0x43, 0xf0, 0xc9, 0x72, 0x0f, 0x63, 0x79, 0x2d, 0xc0, 0xda, 0x66, 0xc8, // C
|
|
||||||
0x32, 0xde, 0x47, 0x07, 0xb8, 0xe9, 0x1d, 0xc4, 0x85, 0x74, 0x82, 0xcc, 0x60, 0x51, 0x77, 0x0d, // D
|
|
||||||
0xaa, 0x35, 0xed, 0x58, 0x7c, 0x5b, 0xb9, 0x94, 0x6e, 0x8d, 0xb1, 0xc5, 0xb7, 0xee, 0xb6, 0xae, // E
|
|
||||||
0x10, 0xe0, 0xd6, 0xd9, 0xe5, 0x4f, 0xf1, 0x12, 0x00, 0xd0, 0xf4, 0x1a, 0x6f, 0x8a, 0xb3, 0xb2 }; // F
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// Helper functions definition portion.
|
|
||||||
//
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
// Don't vectorize, move decl to header file
|
|
||||||
|
|
||||||
// Translates an input array with values in base 257 to output array with values in base 256.
|
|
||||||
// Returns the carry bit.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - input: the input array of size EIGHTH_N. Each value in the array is a number in Z_257.
|
|
||||||
// The MSB is assumed to be the last one in the array.
|
|
||||||
// - output: the input array encoded in base 256.
|
|
||||||
//
|
|
||||||
// Returns:
|
|
||||||
// - The carry bit (MSB).
|
|
||||||
swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N]);
|
|
||||||
|
|
||||||
// Translates an input integer into the range (-FIELD_SIZE / 2) <= result <= (FIELD_SIZE / 2).
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - x: the input integer.
|
|
||||||
//
|
|
||||||
// Returns:
|
|
||||||
// - The result, which equals (x MOD FIELD_SIZE), such that |result| <= (FIELD_SIZE / 2).
|
|
||||||
int Center(int x);
|
|
||||||
|
|
||||||
// Calculates bit reversal permutation.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - input: the input to reverse.
|
|
||||||
// - numOfBits: the number of bits in the input to reverse.
|
|
||||||
//
|
|
||||||
// Returns:
|
|
||||||
// - The resulting number, which is obtained from the input by reversing its bits.
|
|
||||||
int ReverseBits(int input, int numOfBits);
|
|
||||||
|
|
||||||
// Initializes the FFT fast lookup table.
|
|
||||||
// Shall be called only once.
|
|
||||||
void InitializeSWIFFTX();
|
|
||||||
|
|
||||||
// Calculates the FFT.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - input: the input to the FFT.
|
|
||||||
// - output: the resulting output.
|
|
||||||
void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output);
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Helper functions implementation portion.
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
// Don't vectorize, delete this copy.
|
|
||||||
|
|
||||||
swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N])
|
|
||||||
{
|
|
||||||
swift_int32_t pairs[EIGHTH_N / 2];
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < EIGHTH_N; i += 2)
|
|
||||||
{
|
|
||||||
// input[i] + 257 * input[i + 1]
|
|
||||||
pairs[i >> 1] = input[i] + input[i + 1] + (input[i + 1] << 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = (EIGHTH_N / 2) - 1; i > 0; --i)
|
|
||||||
{
|
|
||||||
int j;
|
|
||||||
|
|
||||||
for (j = i - 1; j < (EIGHTH_N / 2) - 1; ++j)
|
|
||||||
{
|
|
||||||
// pairs[j + 1] * 513, because 257^2 = 513 % 256^2.
|
|
||||||
register swift_int32_t temp = pairs[j] + pairs[j + 1] + (pairs[j + 1] << 9);
|
|
||||||
pairs[j] = temp & 0xffff;
|
|
||||||
pairs[j + 1] += (temp >> 16);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < EIGHTH_N; i += 2)
|
|
||||||
{
|
|
||||||
output[i] = (unsigned char) (pairs[i >> 1] & 0xff);
|
|
||||||
output[i + 1] = (unsigned char) ((pairs[i >> 1] >> 8) & 0xff);
|
|
||||||
}
|
|
||||||
|
|
||||||
return (pairs[EIGHTH_N/2 - 1] >> 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
int Center(int x)
|
|
||||||
{
|
|
||||||
int result = x % FIELD_SIZE;
|
|
||||||
|
|
||||||
if (result > (FIELD_SIZE / 2))
|
|
||||||
result -= FIELD_SIZE;
|
|
||||||
|
|
||||||
if (result < (FIELD_SIZE / -2))
|
|
||||||
result += FIELD_SIZE;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ReverseBits(int input, int numOfBits)
|
|
||||||
{
|
|
||||||
register int reversed = 0;
|
|
||||||
|
|
||||||
for (input |= numOfBits; input > 1; input >>= 1)
|
|
||||||
reversed = (reversed << 1) | (input & 1);
|
|
||||||
|
|
||||||
return reversed;
|
|
||||||
}
|
|
||||||
|
|
||||||
void InitializeSWIFFTX()
|
|
||||||
{
|
|
||||||
int i, j, k, x;
|
|
||||||
// The powers of OMEGA
|
|
||||||
int omegaPowers[2 * N];
|
|
||||||
omegaPowers[0] = 1;
|
|
||||||
|
|
||||||
if (wasSetupDone)
|
|
||||||
return;
|
|
||||||
|
|
||||||
for (i = 1; i < (2 * N); ++i)
|
|
||||||
{
|
|
||||||
omegaPowers[i] = Center(omegaPowers[i - 1] * OMEGA);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < (N / W); ++i)
|
|
||||||
{
|
|
||||||
for (j = 0; j < W; ++j)
|
|
||||||
{
|
|
||||||
multipliers[(i << 3) + j] = omegaPowers[ReverseBits(i, N / W) * (2 * j + 1)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (x = 0; x < 256; ++x)
|
|
||||||
{
|
|
||||||
for (j = 0; j < 8; ++j)
|
|
||||||
{
|
|
||||||
register int temp = 0;
|
|
||||||
for (k = 0; k < 8; ++k)
|
|
||||||
{
|
|
||||||
temp += omegaPowers[(EIGHTH_N * (2 * j + 1) * ReverseBits(k, W)) % (2 * N)]
|
|
||||||
* ((x >> k) & 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
fftTable[(x << 3) + j] = Center(temp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
wasSetupDone = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// input should be deinterleaved in contiguos memory
|
|
||||||
// output and F are 4x32
|
|
||||||
// multipliers & fftTable are scalar 16
|
|
||||||
|
|
||||||
|
|
||||||
void FFT_4way(const unsigned char input[EIGHTH_N], swift_int32_t *output)
|
|
||||||
{
|
|
||||||
swift_int16_t *mult = multipliers;
|
|
||||||
m128_swift_int32_t F[64];
|
|
||||||
|
|
||||||
for (int i = 0; i < 8; i++)
|
|
||||||
{
|
|
||||||
int j = i<<3;
|
|
||||||
|
|
||||||
// Need to isolate bytes in input, 8 bytes per lane.
|
|
||||||
// Each iteration of the loop process one input vector
|
|
||||||
// Each lane reads a different index to ffttable.
|
|
||||||
|
|
||||||
// deinterleave the input!
|
|
||||||
|
|
||||||
// load table with 4 lanes from different indexes into fftTable
|
|
||||||
// extract bytes into m128 4x16
|
|
||||||
// mutiply by vectorized mult
|
|
||||||
|
|
||||||
// input[lane][byte]
|
|
||||||
|
|
||||||
__m128i table;
|
|
||||||
table = _mm_set_epi32( fftTable[ input[3][i] ],
|
|
||||||
fftTable[ input[2][i] ],
|
|
||||||
fftTable[ input[1][i] ],
|
|
||||||
fftTable[ input[0][i] ] );
|
|
||||||
|
|
||||||
F[i ] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ), table );
|
|
||||||
|
|
||||||
table = _mm_set_epi32( fftTable[ input[3][i+1] ]
|
|
||||||
fftTable[ input[2][i+1] ]
|
|
||||||
fftTable[ input[1][i+1] ]
|
|
||||||
fftTable[ input[0][i+1] ] );
|
|
||||||
|
|
||||||
F[i+8] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ), table );
|
|
||||||
|
|
||||||
|
|
||||||
m128_swift_int16_t *table = &( fftTable[input[i] << 3] );
|
|
||||||
|
|
||||||
F[i ] = _mm_mullo_epi32( mm128_const1_32( mult[j+0] ),
|
|
||||||
mm128_const1_32( table[0] ) );
|
|
||||||
F[i+ 8] = _mm_mullo_epi32( mm128_const1_32( mult[j+1] ),
|
|
||||||
mm128_const1_32( table[1] ) );
|
|
||||||
F[i+16] = _mm_mullo_epi32( mm128_const1_32( mult[j+2] ),
|
|
||||||
mm128_const1_32( table[2] ) );
|
|
||||||
F[i+24] = _mm_mullo_epi32( mm128_const1_32( mult[j+3] ),
|
|
||||||
mm128_const1_32( table[3] ) );
|
|
||||||
F[i+32] = _mm_mullo_epi32( mm128_const1_32( mult[j+4] ),
|
|
||||||
mm128_const1_32( table[4] ) );
|
|
||||||
F[i+40] = _mm_mullo_epi32( mm128_const1_32( mult[j+5] ),
|
|
||||||
mm128_const1_32( table[5] ) );
|
|
||||||
F[i+48] = _mm_mullo_epi32( mm128_const1_32( mult[j+6] ),
|
|
||||||
mm128_const1_32( table[6] ) );
|
|
||||||
F[i+56] = _mm_mullo_epi32( mm128_const1_32( mult[j+7] ),
|
|
||||||
mm128_const1_32( table[7] ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
for ( int i = 0; i < 8; i++ )
|
|
||||||
{
|
|
||||||
int j = i<<3;
|
|
||||||
ADD_SUB_4WAY( F[j ], F[j+1] );
|
|
||||||
ADD_SUB_4WAY( F[j+2], F[j+3] );
|
|
||||||
ADD_SUB_4WAY( F[j+4], F[j+5] );
|
|
||||||
ADD_SUB_4WAY( F[j+6], F[j+7] );
|
|
||||||
|
|
||||||
F[j+3] = _mm_slli_epi32( F[j+3], 4 );
|
|
||||||
F[j+7] = _mm_slli_epi32( F[j+7], 4 );
|
|
||||||
|
|
||||||
ADD_SUB_4WAY( F[j ], F[j+2] );
|
|
||||||
ADD_SUB_4WAY( F[j+1], F[j+3] );
|
|
||||||
ADD_SUB_4WAY( F[j+4], F[j+6] );
|
|
||||||
ADD_SUB_4WAY( F[j+5], F[j+7] );
|
|
||||||
|
|
||||||
F[j+5] = _mm_slli_epi32( F[j+5], 2 );
|
|
||||||
F[j+6] = _mm_slli_epi32( F[j+6], 4 );
|
|
||||||
F[j+7] = _mm_slli_epi32( F[j+7], 6 );
|
|
||||||
|
|
||||||
ADD_SUB_4WAY( F[j ], F[j+4] );
|
|
||||||
ADD_SUB_4WAY( F[j+1], F[j+5] );
|
|
||||||
ADD_SUB_4WAY( F[j+2], F[j+6] );
|
|
||||||
ADD_SUB_4WAY( F[j+3], F[j+7] );
|
|
||||||
|
|
||||||
output[i ] = Q_REDUCE_4WAY( F[j ] );
|
|
||||||
output[i+ 8] = Q_REDUCE_4WAY( F[j+1] );
|
|
||||||
output[i+16] = Q_REDUCE_4WAY( F[j+2] );
|
|
||||||
output[i+24] = Q_REDUCE_4WAY( F[j+3] );
|
|
||||||
output[i+32] = Q_REDUCE_4WAY( F[j+4] );
|
|
||||||
output[i+40] = Q_REDUCE_4WAY( F[j+5] );
|
|
||||||
output[i+48] = Q_REDUCE_4WAY( F[j+6] );
|
|
||||||
output[i+56] = Q_REDUCE_4WAY( F[j+7] );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculates the FFT part of SWIFFT.
|
|
||||||
// We divided the SWIFFT calculation into two, because that way we could save 2 computations of
|
|
||||||
// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs
|
|
||||||
// is only the A's part.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - input: the input to FFT.
|
|
||||||
// - m: the input size divided by 8. The function performs m FFTs.
|
|
||||||
// - output: will store the result.
|
|
||||||
void SWIFFTFFT(const unsigned char *input, int m, swift_int32_t *output)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0;
|
|
||||||
i < m;
|
|
||||||
i++, input += EIGHTH_N, output += N)
|
|
||||||
{
|
|
||||||
FFT(input, output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculates the 'sum' part of SWIFFT, including the base change at the end.
|
|
||||||
// We divided the SWIFFT calculation into two, because that way we could save 2 computations of
|
|
||||||
// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs
|
|
||||||
// is only the A's part.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - input: the input. Of size 64 * m.
|
|
||||||
// - m: the input size divided by 64.
|
|
||||||
// - output: will store the result.
|
|
||||||
// - a: the coefficients in the sum. Of size 64 * m.
|
|
||||||
void SWIFFTSum(const swift_int32_t *input, int m, unsigned char *output, const swift_int16_t *a)
|
|
||||||
{
|
|
||||||
int i, j;
|
|
||||||
swift_int32_t result[N];
|
|
||||||
register swift_int16_t carry = 0;
|
|
||||||
|
|
||||||
for (j = 0; j < N; ++j)
|
|
||||||
{
|
|
||||||
register swift_int32_t sum = 0;
|
|
||||||
const register swift_int32_t *f = input + j;
|
|
||||||
const register swift_int16_t *k = a + j;
|
|
||||||
|
|
||||||
for (i = 0; i < m; i++, f += N,k += N)
|
|
||||||
{
|
|
||||||
sum += (*f) * (*k);
|
|
||||||
}
|
|
||||||
|
|
||||||
result[j] = sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (j = 0; j < N; ++j)
|
|
||||||
{
|
|
||||||
result[j] = ((FIELD_SIZE << 22) + result[j]) % FIELD_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (j = 0; j < 8; ++j)
|
|
||||||
{
|
|
||||||
int register carryBit = TranslateToBase256(result + (j << 3), output + (j << 3));
|
|
||||||
carry |= carryBit << j;
|
|
||||||
}
|
|
||||||
|
|
||||||
output[N] = carry;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// On entry input is interleaved 4x64. SIZE is *4 lanes / 8 bytes,
|
|
||||||
// multiply by 2.
|
|
||||||
|
|
||||||
|
|
||||||
void ComputeSingleSWIFFTX_4way( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
|
||||||
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE],
|
|
||||||
bool doSmooth)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
// Will store the result of the FFT parts:
|
|
||||||
m128_swift_int32_t fftOut[N * M];
|
|
||||||
// swift_int32_t fftOut[N * M];
|
|
||||||
unsigned char intermediate[N * 3 + 8];
|
|
||||||
unsigned char carry0,carry1,carry2;
|
|
||||||
|
|
||||||
// Do the three SWIFFTS while remembering the three carry bytes (each carry byte gets
|
|
||||||
// overriden by the following SWIFFT):
|
|
||||||
|
|
||||||
// 1. Compute the FFT of the input - the common part for the first 3 SWIFFTs:
|
|
||||||
SWIFFTFFT(input, M, fftOut);
|
|
||||||
|
|
||||||
// 2. Compute the sums of the 3 SWIFFTs, each using a different set of coefficients:
|
|
||||||
|
|
||||||
// 2a. The first SWIFFT:
|
|
||||||
SWIFFTSum(fftOut, M, intermediate, As);
|
|
||||||
// Remember the carry byte:
|
|
||||||
carry0 = intermediate[N];
|
|
||||||
|
|
||||||
// 2b. The second one:
|
|
||||||
SWIFFTSum(fftOut, M, intermediate + N, As + (M * N));
|
|
||||||
carry1 = intermediate[2 * N];
|
|
||||||
|
|
||||||
// 2c. The third one:
|
|
||||||
SWIFFTSum(fftOut, M, intermediate + (2 * N), As + 2 * (M * N));
|
|
||||||
carry2 = intermediate[3 * N];
|
|
||||||
|
|
||||||
//2d. Put three carry bytes in their place
|
|
||||||
intermediate[3 * N] = carry0;
|
|
||||||
intermediate[(3 * N) + 1] = carry1;
|
|
||||||
intermediate[(3 * N) + 2] = carry2;
|
|
||||||
|
|
||||||
// Padding intermediate output with 5 zeroes.
|
|
||||||
memset(intermediate + (3 * N) + 3, 0, 5);
|
|
||||||
|
|
||||||
// Apply the S-Box:
|
|
||||||
for (i = 0; i < (3 * N) + 8; ++i)
|
|
||||||
{
|
|
||||||
intermediate[i] = SBox[intermediate[i]];
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. The final and last SWIFFT:
|
|
||||||
SWIFFTFFT(intermediate, 3 * (N/8) + 1, fftOut);
|
|
||||||
SWIFFTSum(fftOut, 3 * (N/8) + 1, output, As);
|
|
||||||
|
|
||||||
if (doSmooth)
|
|
||||||
{
|
|
||||||
unsigned char sum[N];
|
|
||||||
register int i, j;
|
|
||||||
memset(sum, 0, N);
|
|
||||||
|
|
||||||
for (i = 0; i < (N + 1) * 8; ++i)
|
|
||||||
{
|
|
||||||
register const swift_int16_t *AsRow;
|
|
||||||
register int AShift;
|
|
||||||
|
|
||||||
if (!(output[i >> 3] & (1 << (i & 7))))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
AsRow = As + N * M + (i & ~(N - 1)) ;
|
|
||||||
AShift = i & 63;
|
|
||||||
|
|
||||||
for (j = AShift; j < N; ++j)
|
|
||||||
{
|
|
||||||
sum[j] += AsRow[j - AShift];
|
|
||||||
}
|
|
||||||
|
|
||||||
for(j = 0; j < AShift; ++j)
|
|
||||||
{
|
|
||||||
sum[j] -= AsRow[N - AShift + j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < N; ++i)
|
|
||||||
{
|
|
||||||
output[i] = sum[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
output[N] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
@@ -604,21 +604,14 @@ void InitializeSWIFFTX()
|
|||||||
int omegaPowers[2 * N];
|
int omegaPowers[2 * N];
|
||||||
omegaPowers[0] = 1;
|
omegaPowers[0] = 1;
|
||||||
|
|
||||||
if (wasSetupDone)
|
if (wasSetupDone) return;
|
||||||
return;
|
|
||||||
|
|
||||||
for (i = 1; i < (2 * N); ++i)
|
for (i = 1; i < (2 * N); ++i)
|
||||||
{
|
|
||||||
omegaPowers[i] = Center(omegaPowers[i - 1] * OMEGA);
|
omegaPowers[i] = Center(omegaPowers[i - 1] * OMEGA);
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < (N / W); ++i)
|
for (i = 0; i < (N / W); ++i)
|
||||||
{
|
|
||||||
for (j = 0; j < W; ++j)
|
for (j = 0; j < W; ++j)
|
||||||
{
|
|
||||||
multipliers[(i << 3) + j] = omegaPowers[ReverseBits(i, N / W) * (2 * j + 1)];
|
multipliers[(i << 3) + j] = omegaPowers[ReverseBits(i, N / W) * (2 * j + 1)];
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (x = 0; x < 256; ++x)
|
for (x = 0; x < 256; ++x)
|
||||||
{
|
{
|
||||||
@@ -626,10 +619,8 @@ void InitializeSWIFFTX()
|
|||||||
{
|
{
|
||||||
register int temp = 0;
|
register int temp = 0;
|
||||||
for (k = 0; k < 8; ++k)
|
for (k = 0; k < 8; ++k)
|
||||||
{
|
|
||||||
temp += omegaPowers[(EIGHTH_N * (2 * j + 1) * ReverseBits(k, W)) % (2 * N)]
|
temp += omegaPowers[(EIGHTH_N * (2 * j + 1) * ReverseBits(k, W)) % (2 * N)]
|
||||||
* ((x >> k) & 1);
|
* ((x >> k) & 1);
|
||||||
}
|
|
||||||
|
|
||||||
fftTable[(x << 3) + j] = Center(temp);
|
fftTable[(x << 3) + j] = Center(temp);
|
||||||
}
|
}
|
||||||
@@ -703,18 +694,18 @@ void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output)
|
|||||||
|
|
||||||
#if defined (__AVX512VL__) && defined(__AVX512BW__)
|
#if defined (__AVX512VL__) && defined(__AVX512BW__)
|
||||||
|
|
||||||
#define Q_REDUCE( a ) \
|
const __m256i mask = _mm256_movm_epi8( 0x11111111 );
|
||||||
_mm256_sub_epi32( _mm256_and_si256( a, \
|
|
||||||
_mm256_movm_epi8( 0x11111111 ) ), _mm256_srai_epi32( a, 8 ) )
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define Q_REDUCE( a ) \
|
const __m256i mask = m256_const1_32( 0x000000ff );
|
||||||
_mm256_sub_epi32( _mm256_and_si256( a, \
|
|
||||||
m256_const1_32( 0x000000ff ) ), _mm256_srai_epi32( a, 8 ) )
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define Q_REDUCE( a ) \
|
||||||
|
_mm256_sub_epi32( _mm256_and_si256( a, mask ), \
|
||||||
|
_mm256_srai_epi32( a, 8 ) )
|
||||||
|
|
||||||
out[0] = Q_REDUCE( F[0] );
|
out[0] = Q_REDUCE( F[0] );
|
||||||
out[1] = Q_REDUCE( F[1] );
|
out[1] = Q_REDUCE( F[1] );
|
||||||
out[2] = Q_REDUCE( F[2] );
|
out[2] = Q_REDUCE( F[2] );
|
||||||
@@ -805,9 +796,10 @@ void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output)
|
|||||||
|
|
||||||
#undef ADD_SUB
|
#undef ADD_SUB
|
||||||
|
|
||||||
|
const __m128i mask = m128_const1_32( 0x000000ff );
|
||||||
|
|
||||||
#define Q_REDUCE( a ) \
|
#define Q_REDUCE( a ) \
|
||||||
_mm_sub_epi32( _mm_and_si128( a, \
|
_mm_sub_epi32( _mm_and_si128( a, mask ), _mm_srai_epi32( a, 8 ) )
|
||||||
m128_const1_32( 0x000000ff ) ), _mm_srai_epi32( a, 8 ) )
|
|
||||||
|
|
||||||
out[ 0] = Q_REDUCE( F[ 0] );
|
out[ 0] = Q_REDUCE( F[ 0] );
|
||||||
out[ 1] = Q_REDUCE( F[ 1] );
|
out[ 1] = Q_REDUCE( F[ 1] );
|
||||||
@@ -1357,6 +1349,7 @@ void SWIFFTSum( const swift_int32_t *input, int m, unsigned char *output,
|
|||||||
output[N] = carry;
|
output[N] = carry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
void ComputeSingleSWIFFTX_smooth(unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
void ComputeSingleSWIFFTX_smooth(unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
||||||
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE],
|
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE],
|
||||||
bool doSmooth)
|
bool doSmooth)
|
||||||
@@ -1434,14 +1427,14 @@ void ComputeSingleSWIFFTX_smooth(unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
|||||||
output[N] = 0;
|
output[N] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
void ComputeSingleSWIFFTX( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
void ComputeSingleSWIFFTX( unsigned char *input, unsigned char *output )
|
||||||
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE] )
|
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
// Will store the result of the FFT parts:
|
// Will store the result of the FFT parts:
|
||||||
swift_int32_t fftOut[N * M] __attribute__ ((aligned (64)));
|
swift_int32_t fftOut[N * M] __attribute__ ((aligned (64)));
|
||||||
unsigned char intermediate[N * 3 + 8] __attribute__ ((aligned (64)));
|
unsigned char sum[ N*3 + 8 ] __attribute__ ((aligned (64)));
|
||||||
unsigned char carry0,carry1,carry2;
|
unsigned char carry0,carry1,carry2;
|
||||||
|
|
||||||
// Do the three SWIFFTS while remembering the three carry bytes (each carry byte gets
|
// Do the three SWIFFTS while remembering the three carry bytes (each carry byte gets
|
||||||
@@ -1453,32 +1446,31 @@ void ComputeSingleSWIFFTX( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
|||||||
// 2. Compute the sums of the 3 SWIFFTs, each using a different set of coefficients:
|
// 2. Compute the sums of the 3 SWIFFTs, each using a different set of coefficients:
|
||||||
|
|
||||||
// 2a. The first SWIFFT:
|
// 2a. The first SWIFFT:
|
||||||
SWIFFTSum(fftOut, M, intermediate, As);
|
SWIFFTSum( fftOut, M, sum, As );
|
||||||
// Remember the carry byte:
|
carry0 = sum[N];
|
||||||
carry0 = intermediate[N];
|
|
||||||
|
|
||||||
// 2b. The second one:
|
// 2b. The second one:
|
||||||
SWIFFTSum(fftOut, M, intermediate + N, As + (M * N));
|
SWIFFTSum( fftOut, M, sum + N, As + M*N );
|
||||||
carry1 = intermediate[2 * N];
|
carry1 = sum[ 2*N ];
|
||||||
|
|
||||||
// 2c. The third one:
|
// 2c. The third one:
|
||||||
SWIFFTSum(fftOut, M, intermediate + (2 * N), As + 2 * (M * N));
|
SWIFFTSum( fftOut, M, sum + 2*N, As + 2*M*N );
|
||||||
carry2 = intermediate[3 * N];
|
carry2 = sum[ 3*N ];
|
||||||
|
|
||||||
//2d. Put three carry bytes in their place
|
//2d. Put three carry bytes in their place
|
||||||
intermediate[3 * N] = carry0;
|
sum[ 3*N ] = carry0;
|
||||||
intermediate[(3 * N) + 1] = carry1;
|
sum[ 3*N + 1 ] = carry1;
|
||||||
intermediate[(3 * N) + 2] = carry2;
|
sum[ 3*N + 2 ] = carry2;
|
||||||
|
|
||||||
// Padding intermediate output with 5 zeroes.
|
// Padding intermediate output with 5 zeroes.
|
||||||
memset(intermediate + (3 * N) + 3, 0, 5);
|
memset( sum + 3*N + 3, 0, 5 );
|
||||||
|
|
||||||
// Apply the S-Box:
|
// Apply the S-Box:
|
||||||
for ( i = 0; i < (3 * N) + 8; ++i )
|
for ( i = 0; i < (3 * N) + 8; ++i )
|
||||||
intermediate[i] = SBox[intermediate[i]];
|
sum[i] = SBox[ sum[i] ];
|
||||||
|
|
||||||
// 3. The final and last SWIFFT:
|
// 3. The final and last SWIFFT:
|
||||||
SWIFFTFFT(intermediate, 3 * (N/8) + 1, fftOut);
|
SWIFFTFFT( sum, 3 * (N/8) + 1, fftOut );
|
||||||
SWIFFTSum(fftOut, 3 * (N/8) + 1, output, As);
|
SWIFFTSum( fftOut, 3 * (N/8) + 1, sum, As );
|
||||||
|
memcpy( output, sum, SWIFFTX_OUTPUT_BLOCK_SIZE - 1 );
|
||||||
}
|
}
|
||||||
|
@@ -61,11 +61,10 @@ void ComputeSingleSWIFFT(unsigned char *input, unsigned short m,
|
|||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - Success value.
|
// - Success value.
|
||||||
void ComputeSingleSWIFFTX( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
void ComputeSingleSWIFFTX( unsigned char *input, unsigned char *output );
|
||||||
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE] );
|
|
||||||
|
|
||||||
void ComputeSingleSWIFFTX_smooth( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
//void ComputeSingleSWIFFTX_smooth( unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
|
||||||
unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE], bool doSmooth);
|
// unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE], bool doSmooth);
|
||||||
|
|
||||||
// Calculates the powers of OMEGA and generates the bit reversal permutation.
|
// Calculates the powers of OMEGA and generates the bit reversal permutation.
|
||||||
// You must call this function before doing SWIFFT/X, otherwise you will get zeroes everywhere.
|
// You must call this function before doing SWIFFT/X, otherwise you will get zeroes everywhere.
|
||||||
|
@@ -10,6 +10,7 @@
|
|||||||
#include "algo-gate-api.h"
|
#include "algo-gate-api.h"
|
||||||
#include "Verthash.h"
|
#include "Verthash.h"
|
||||||
#include "mm_malloc.h"
|
#include "mm_malloc.h"
|
||||||
|
#include "malloc-huge.h"
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
// Verthash info management
|
// Verthash info management
|
||||||
@@ -84,7 +85,14 @@ int verthash_info_init(verthash_info_t* info, const char* file_name)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Allocate data
|
// Allocate data
|
||||||
|
info->data = (uint8_t *)malloc_hugepages( fileSize );
|
||||||
|
if ( info->data )
|
||||||
|
{
|
||||||
|
if ( !opt_quiet ) applog( LOG_INFO, "Verthash data is using huge pages");
|
||||||
|
}
|
||||||
|
else
|
||||||
info->data = (uint8_t *)_mm_malloc( fileSize, 64 );
|
info->data = (uint8_t *)_mm_malloc( fileSize, 64 );
|
||||||
|
|
||||||
if ( !info->data )
|
if ( !info->data )
|
||||||
{
|
{
|
||||||
fclose( fileMiningData );
|
fclose( fileMiningData );
|
||||||
|
@@ -29,16 +29,11 @@ void sha3_4way_keccakf( __m256i st[25] )
|
|||||||
for ( r = 0; r < KECCAKF_ROUNDS; r++ )
|
for ( r = 0; r < KECCAKF_ROUNDS; r++ )
|
||||||
{
|
{
|
||||||
// Theta
|
// Theta
|
||||||
bc[0] = _mm256_xor_si256( st[0],
|
bc[0] = mm256_xor3( st[0], st[5], mm256_xor3( st[10], st[15], st[20] ) );
|
||||||
mm256_xor4( st[5], st[10], st[15], st[20] ) );
|
bc[1] = mm256_xor3( st[1], st[6], mm256_xor3( st[11], st[16], st[21] ) );
|
||||||
bc[1] = _mm256_xor_si256( st[1],
|
bc[2] = mm256_xor3( st[2], st[7], mm256_xor3( st[12], st[17], st[22] ) );
|
||||||
mm256_xor4( st[6], st[11], st[16], st[21] ) );
|
bc[3] = mm256_xor3( st[3], st[8], mm256_xor3( st[13], st[18], st[23] ) );
|
||||||
bc[2] = _mm256_xor_si256( st[2],
|
bc[4] = mm256_xor3( st[4], st[9], mm256_xor3( st[14], st[19], st[24] ) );
|
||||||
mm256_xor4( st[7], st[12], st[17], st[22] ) );
|
|
||||||
bc[3] = _mm256_xor_si256( st[3],
|
|
||||||
mm256_xor4( st[8], st[13], st[18], st[23] ) );
|
|
||||||
bc[4] = _mm256_xor_si256( st[4],
|
|
||||||
mm256_xor4( st[9], st[14], st[19], st[24] ) );
|
|
||||||
|
|
||||||
for ( i = 0; i < 5; i++ )
|
for ( i = 0; i < 5; i++ )
|
||||||
{
|
{
|
||||||
@@ -89,17 +84,13 @@ void sha3_4way_keccakf( __m256i st[25] )
|
|||||||
// Chi
|
// Chi
|
||||||
for ( j = 0; j < 25; j += 5 )
|
for ( j = 0; j < 25; j += 5 )
|
||||||
{
|
{
|
||||||
memcpy( bc, &st[ j ], 5*32 );
|
bc[0] = st[j];
|
||||||
st[ j ] = _mm256_xor_si256( st[ j ],
|
bc[1] = st[j+1];
|
||||||
_mm256_andnot_si256( bc[1], bc[2] ) );
|
st[ j ] = mm256_xorandnot( st[ j ], st[j+1], st[j+2] );
|
||||||
st[ j+1 ] = _mm256_xor_si256( st[ j+1 ],
|
st[ j+1 ] = mm256_xorandnot( st[ j+1 ], st[j+2], st[j+3] );
|
||||||
_mm256_andnot_si256( bc[2], bc[3] ) );
|
st[ j+2 ] = mm256_xorandnot( st[ j+2 ], st[j+3], st[j+4] );
|
||||||
st[ j+2 ] = _mm256_xor_si256( st[ j+2 ],
|
st[ j+3 ] = mm256_xorandnot( st[ j+3 ], st[j+4], bc[0] );
|
||||||
_mm256_andnot_si256( bc[3], bc[4] ) );
|
st[ j+4 ] = mm256_xorandnot( st[ j+4 ], bc[0], bc[1] );
|
||||||
st[ j+3 ] = _mm256_xor_si256( st[ j+3 ],
|
|
||||||
_mm256_andnot_si256( bc[4], bc[0] ) );
|
|
||||||
st[ j+4 ] = _mm256_xor_si256( st[ j+4 ],
|
|
||||||
_mm256_andnot_si256( bc[0], bc[1] ) );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Iota
|
// Iota
|
||||||
|
@@ -127,7 +127,7 @@ bool register_verthash_algo( algo_gate_t* gate )
|
|||||||
{
|
{
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
gate->scanhash = (void*)&scanhash_verthash;
|
gate->scanhash = (void*)&scanhash_verthash;
|
||||||
gate->optimizations = AVX2_OPT;
|
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||||
|
|
||||||
const char *verthash_data_file = opt_data_file ? opt_data_file
|
const char *verthash_data_file = opt_data_file ? opt_data_file
|
||||||
: default_verthash_data_file;
|
: default_verthash_data_file;
|
||||||
|
@@ -60,7 +60,14 @@ void x16r_8way_prehash( void *vdata, void *pdata )
|
|||||||
case HAMSI:
|
case HAMSI:
|
||||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||||
hamsi512_8way_init( &x16r_ctx.hamsi );
|
hamsi512_8way_init( &x16r_ctx.hamsi );
|
||||||
hamsi512_8way_update( &x16r_ctx.hamsi, vdata, 64 );
|
hamsi512_8way_update( &x16r_ctx.hamsi, vdata, 72 );
|
||||||
|
break;
|
||||||
|
case FUGUE:
|
||||||
|
mm128_bswap32_80( edata, pdata );
|
||||||
|
fugue512_init( &x16r_ctx.fugue );
|
||||||
|
fugue512_update( &x16r_ctx.fugue, edata, 76 );
|
||||||
|
intrlv_8x64( vdata, edata, edata, edata, edata,
|
||||||
|
edata, edata, edata, edata, 640 );
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
|
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
|
||||||
@@ -306,7 +313,7 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
|
|||||||
break;
|
break;
|
||||||
case HAMSI:
|
case HAMSI:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
hamsi512_8way_update( &ctx.hamsi, input + (64<<3), 16 );
|
hamsi512_8way_update( &ctx.hamsi, input + (72<<3), 8 );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||||
@@ -319,6 +326,34 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
|
|||||||
hash7, vhash );
|
hash7, vhash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
fugue512_update( &ctx.fugue, in0 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash0 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in1 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash1 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in2 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash2 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in3 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash3 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in4 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash4 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in5 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash5 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in6 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash6 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in7 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash7 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
fugue512_full( &ctx.fugue, hash0, in0, size );
|
fugue512_full( &ctx.fugue, hash0, in0, size );
|
||||||
fugue512_full( &ctx.fugue, hash1, in1, size );
|
fugue512_full( &ctx.fugue, hash1, in1, size );
|
||||||
fugue512_full( &ctx.fugue, hash2, in2, size );
|
fugue512_full( &ctx.fugue, hash2, in2, size );
|
||||||
@@ -327,6 +362,7 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
|
|||||||
fugue512_full( &ctx.fugue, hash5, in5, size );
|
fugue512_full( &ctx.fugue, hash5, in5, size );
|
||||||
fugue512_full( &ctx.fugue, hash6, in6, size );
|
fugue512_full( &ctx.fugue, hash6, in6, size );
|
||||||
fugue512_full( &ctx.fugue, hash7, in7, size );
|
fugue512_full( &ctx.fugue, hash7, in7, size );
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||||
@@ -347,25 +383,25 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
|
|||||||
{
|
{
|
||||||
sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 );
|
sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
||||||
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
|
||||||
sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 );
|
sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
||||||
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
|
||||||
sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 );
|
sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
||||||
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
|
||||||
sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 );
|
sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||||
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
|
||||||
sph_whirlpool( &ctx.whirlpool, in4 + 64, 16 );
|
sph_whirlpool( &ctx.whirlpool, in4 + 64, 16 );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash4 );
|
sph_whirlpool_close( &ctx.whirlpool, hash4 );
|
||||||
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
|
||||||
sph_whirlpool( &ctx.whirlpool, in5 + 64, 16 );
|
sph_whirlpool( &ctx.whirlpool, in5 + 64, 16 );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash5 );
|
sph_whirlpool_close( &ctx.whirlpool, hash5 );
|
||||||
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
|
||||||
sph_whirlpool( &ctx.whirlpool, in6 + 64, 16 );
|
sph_whirlpool( &ctx.whirlpool, in6 + 64, 16 );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash6 );
|
sph_whirlpool_close( &ctx.whirlpool, hash6 );
|
||||||
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
|
||||||
sph_whirlpool( &ctx.whirlpool, in7 + 64, 16 );
|
sph_whirlpool( &ctx.whirlpool, in7 + 64, 16 );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
||||||
}
|
}
|
||||||
@@ -532,7 +568,13 @@ void x16r_4way_prehash( void *vdata, void *pdata )
|
|||||||
case HAMSI:
|
case HAMSI:
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||||
hamsi512_4way_init( &x16r_ctx.hamsi );
|
hamsi512_4way_init( &x16r_ctx.hamsi );
|
||||||
hamsi512_4way_update( &x16r_ctx.hamsi, vdata, 64 );
|
hamsi512_4way_update( &x16r_ctx.hamsi, vdata, 72 );
|
||||||
|
break;
|
||||||
|
case FUGUE:
|
||||||
|
mm128_bswap32_80( edata, pdata );
|
||||||
|
fugue512_init( &x16r_ctx.fugue );
|
||||||
|
fugue512_update( &x16r_ctx.fugue, edata, 76 );
|
||||||
|
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
mm128_bswap32_intrlv80_4x32( vdata2, pdata );
|
mm128_bswap32_intrlv80_4x32( vdata2, pdata );
|
||||||
@@ -734,7 +776,7 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
|||||||
break;
|
break;
|
||||||
case HAMSI:
|
case HAMSI:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
hamsi512_4way_update( &ctx.hamsi, input + (64<<2), 16 );
|
hamsi512_4way_update( &ctx.hamsi, input + (72<<2), 8 );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
@@ -745,10 +787,27 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
|||||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
|
if ( i == 0 )
|
||||||
|
{
|
||||||
|
fugue512_update( &ctx.fugue, in0 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash0 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in1 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash1 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in2 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash2 );
|
||||||
|
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
|
||||||
|
fugue512_update( &ctx.fugue, in3 + 76, 4 );
|
||||||
|
fugue512_final( &ctx.fugue, hash3 );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
fugue512_full( &ctx.fugue, hash0, in0, size );
|
fugue512_full( &ctx.fugue, hash0, in0, size );
|
||||||
fugue512_full( &ctx.fugue, hash1, in1, size );
|
fugue512_full( &ctx.fugue, hash1, in1, size );
|
||||||
fugue512_full( &ctx.fugue, hash2, in2, size );
|
fugue512_full( &ctx.fugue, hash2, in2, size );
|
||||||
fugue512_full( &ctx.fugue, hash3, in3, size );
|
fugue512_full( &ctx.fugue, hash3, in3, size );
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
|
@@ -62,8 +62,7 @@ bool register_x16r_algo( algo_gate_t* gate )
|
|||||||
gate->scanhash = (void*)&scanhash_x16r;
|
gate->scanhash = (void*)&scanhash_x16r;
|
||||||
gate->hash = (void*)&x16r_hash;
|
gate->hash = (void*)&x16r_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||||
VAES_OPT | VAES256_OPT;
|
|
||||||
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
@@ -81,8 +80,7 @@ bool register_x16rv2_algo( algo_gate_t* gate )
|
|||||||
gate->scanhash = (void*)&scanhash_x16rv2;
|
gate->scanhash = (void*)&scanhash_x16rv2;
|
||||||
gate->hash = (void*)&x16rv2_hash;
|
gate->hash = (void*)&x16rv2_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||||
VAES_OPT | VAES256_OPT;
|
|
||||||
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
@@ -100,8 +98,7 @@ bool register_x16s_algo( algo_gate_t* gate )
|
|||||||
gate->scanhash = (void*)&scanhash_x16r;
|
gate->scanhash = (void*)&scanhash_x16r;
|
||||||
gate->hash = (void*)&x16r_hash;
|
gate->hash = (void*)&x16r_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||||
VAES_OPT | VAES256_OPT;
|
|
||||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
@@ -234,8 +231,7 @@ bool register_x16rt_algo( algo_gate_t* gate )
|
|||||||
gate->scanhash = (void*)&scanhash_x16rt;
|
gate->scanhash = (void*)&scanhash_x16rt;
|
||||||
gate->hash = (void*)&x16r_hash;
|
gate->hash = (void*)&x16r_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||||
VAES_OPT | VAES256_OPT;
|
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
@@ -252,8 +248,7 @@ bool register_x16rt_veil_algo( algo_gate_t* gate )
|
|||||||
gate->scanhash = (void*)&scanhash_x16rt;
|
gate->scanhash = (void*)&scanhash_x16rt;
|
||||||
gate->hash = (void*)&x16r_hash;
|
gate->hash = (void*)&x16r_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||||
VAES_OPT | VAES256_OPT;
|
|
||||||
gate->build_extraheader = (void*)&veil_build_extraheader;
|
gate->build_extraheader = (void*)&veil_build_extraheader;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
@@ -292,8 +287,7 @@ bool register_x21s_algo( algo_gate_t* gate )
|
|||||||
gate->hash = (void*)&x21s_hash;
|
gate->hash = (void*)&x21s_hash;
|
||||||
gate->miner_thread_init = (void*)&x21s_thread_init;
|
gate->miner_thread_init = (void*)&x21s_thread_init;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT |
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||||
VAES_OPT | VAES256_OPT;
|
|
||||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
|
@@ -24,15 +24,15 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( bench ) ptarget[7] = 0x0cff;
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
uint32_t ntime = bswap_32( pdata[17] );
|
uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
|
||||||
if ( s_ntime != ntime )
|
if ( s_ntime != masked_ntime )
|
||||||
{
|
{
|
||||||
x16rt_getTimeHash( ntime, &timeHash );
|
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||||
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||||
s_ntime = ntime;
|
s_ntime = masked_ntime;
|
||||||
if ( opt_debug && !thr_id )
|
if ( opt_debug && !thr_id )
|
||||||
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
||||||
x16r_hash_order, ntime, timeHash );
|
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_8way_prehash( vdata, pdata );
|
x16r_8way_prehash( vdata, pdata );
|
||||||
@@ -78,15 +78,15 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( bench ) ptarget[7] = 0x0cff;
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
uint32_t ntime = bswap_32( pdata[17] );
|
uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
|
||||||
if ( s_ntime != ntime )
|
if ( s_ntime != masked_ntime )
|
||||||
{
|
{
|
||||||
x16rt_getTimeHash( ntime, &timeHash );
|
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||||
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||||
s_ntime = ntime;
|
s_ntime = masked_ntime;
|
||||||
if ( opt_debug && !thr_id )
|
if ( opt_debug && !thr_id )
|
||||||
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
||||||
x16r_hash_order, ntime, timeHash );
|
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_4way_prehash( vdata, pdata );
|
x16r_4way_prehash( vdata, pdata );
|
||||||
|
@@ -20,15 +20,15 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
|||||||
mm128_bswap32_80( edata, pdata );
|
mm128_bswap32_80( edata, pdata );
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
uint32_t ntime = swab32( pdata[17] );
|
uint32_t masked_ntime = swab32( pdata[17] ) & 0xffffff80;
|
||||||
if ( s_ntime != ntime )
|
if ( s_ntime != masked_ntime )
|
||||||
{
|
{
|
||||||
x16rt_getTimeHash( ntime, &timeHash );
|
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||||
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||||
s_ntime = ntime;
|
s_ntime = masked_ntime;
|
||||||
if ( opt_debug && !thr_id )
|
if ( opt_debug && !thr_id )
|
||||||
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
||||||
x16r_hash_order, ntime, timeHash );
|
x16r_hash_order, swab32( pdata[17] ), timeHash );
|
||||||
}
|
}
|
||||||
|
|
||||||
x16r_prehash( edata, pdata );
|
x16r_prehash( edata, pdata );
|
||||||
|
@@ -12,7 +12,7 @@ bool register_sonoa_algo( algo_gate_t* gate )
|
|||||||
init_sonoa_ctx();
|
init_sonoa_ctx();
|
||||||
gate->hash = (void*)&sonoa_hash;
|
gate->hash = (void*)&sonoa_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT | VAES256_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -11,7 +11,7 @@ bool register_x17_algo( algo_gate_t* gate )
|
|||||||
#else
|
#else
|
||||||
gate->hash = (void*)&x17_hash;
|
gate->hash = (void*)&x17_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT | VAES256_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -12,7 +12,7 @@ bool register_xevan_algo( algo_gate_t* gate )
|
|||||||
init_xevan_ctx();
|
init_xevan_ctx();
|
||||||
gate->hash = (void*)&xevan_hash;
|
gate->hash = (void*)&xevan_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT | VAES256_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
@@ -31,8 +31,8 @@ bool register_x22i_algo( algo_gate_t* gate )
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT
|
gate->optimizations = SSE2_OPT | SSE42_OPT | AES_OPT | AVX2_OPT | SHA_OPT
|
||||||
| AVX512_OPT | VAES_OPT | VAES256_OPT;
|
| AVX512_OPT | VAES_OPT;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -48,8 +48,8 @@ bool register_x25x_algo( algo_gate_t* gate )
|
|||||||
gate->scanhash = (void*)&scanhash_x25x;
|
gate->scanhash = (void*)&scanhash_x25x;
|
||||||
gate->hash = (void*)&x25x_hash;
|
gate->hash = (void*)&x25x_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | SHA_OPT |
|
gate->optimizations = SSE2_OPT | SSE42_OPT | AES_OPT | AVX2_OPT | SHA_OPT |
|
||||||
AVX512_OPT | VAES_OPT | VAES256_OPT;
|
AVX512_OPT | VAES_OPT;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -4,128 +4,97 @@
|
|||||||
# during develpment. However the information contained may provide compilation
|
# during develpment. However the information contained may provide compilation
|
||||||
# tips to users.
|
# tips to users.
|
||||||
|
|
||||||
rm cpuminer-avx512-sha-vaes cpuminer-avx512-sha cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 > /dev/null
|
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 > /dev/null
|
||||||
|
|
||||||
# Icelake AVX512 SHA VAES
|
# AVX512 SHA VAES: Intel Core Icelake, Rocketlake
|
||||||
make distclean || echo clean
|
make distclean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
./autogen.sh || echo done
|
./autogen.sh || echo done
|
||||||
CFLAGS="-O3 -march=icelake-client -Wall -fno-common" ./configure --with-curl
|
CFLAGS="-O3 -march=icelake-client -Wall -fno-common" ./configure --with-curl
|
||||||
|
#CFLAGS="-O3 -march=rocketlake -Wall -fno-common" ./configure --with-curl
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe cpuminer-avx512-sha-vaes.exe
|
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
mv cpuminer cpuminer-avx512-sha-vaes
|
mv cpuminer cpuminer-avx512-sha-vaes
|
||||||
|
|
||||||
# Rocketlake AVX512 SHA AES
|
# AVX512 AES: Intel Core HEDT Sylake-X, Cascadelake
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=cascadelake -msha -Wall -fno-common" ./configure --with-curl
|
CFLAGS="-O3 -march=skylake-avx512 -maes -Wall -fno-common" ./configure --with-curl
|
||||||
#CFLAGS="-O3 -march=skylake-avx512 -msha -Wall -fno-common" ./configure --with-curl
|
|
||||||
# CFLAGS="-O3 -march=rocketlake -Wall -fno-common" ./configure --with-curl
|
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe cpuminer-avx512-sha.exe
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-avx512-sha
|
|
||||||
|
|
||||||
# Slylake-X AVX512 AES
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=skylake-avx512 -Wall -fno-common" ./configure --with-curl
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe cpuminer-avx512.exe
|
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
mv cpuminer cpuminer-avx512
|
mv cpuminer cpuminer-avx512
|
||||||
|
|
||||||
# Haswell AVX2 AES
|
# AVX2 SHA VAES: Intel Alderlake, AMD Zen3
|
||||||
|
make clean || echo done
|
||||||
|
rm -f config.status
|
||||||
|
# vaes doesn't include aes
|
||||||
|
CFLAGS="-O3 -maes -mavx2 -msha -mvaes -Wall -fno-common" ./configure --with-curl
|
||||||
|
make -j 8
|
||||||
|
strip -s cpuminer
|
||||||
|
mv cpuminer cpuminer-avx2-sha-vaes
|
||||||
|
|
||||||
|
# AVX2 SHA AES: AMD Zen1
|
||||||
|
make clean || echo done
|
||||||
|
rm -f config.status
|
||||||
|
CFLAGS="-O3 -march=znver1 -maes -Wall -fno-common" ./configure --with-curl
|
||||||
|
#CFLAGS="-O3 -maes -mavx2 -msha -Wall -fno-common" ./configure --with-curl
|
||||||
|
make -j 8
|
||||||
|
strip -s cpuminer
|
||||||
|
mv cpuminer cpuminer-avx2-sha
|
||||||
|
|
||||||
|
# AVX2 AES: Intel Haswell..Cometlake
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
# GCC 9 doesn't include AES with core-avx2
|
# GCC 9 doesn't include AES with core-avx2
|
||||||
CFLAGS="-O3 -march=core-avx2 -maes -Wall -fno-common" ./configure --with-curl
|
CFLAGS="-O3 -march=core-avx2 -maes -Wall -fno-common" ./configure --with-curl
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe cpuminer-avx2.exe
|
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
mv cpuminer cpuminer-avx2
|
mv cpuminer cpuminer-avx2
|
||||||
|
|
||||||
# Sandybridge AVX AES
|
# AVX AES: Intel Sandybridge, Ivybridge
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=corei7-avx -maes -Wall -fno-common" ./configure --with-curl
|
CFLAGS="-O3 -march=corei7-avx -maes -Wall -fno-common" ./configure --with-curl
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe cpuminer-avx.exe
|
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
mv cpuminer cpuminer-avx
|
mv cpuminer cpuminer-avx
|
||||||
|
|
||||||
# Westmere SSE4.2 AES
|
# SSE4.2 AES: Intel Westmere
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=westmere -maes -Wall -fno-common" ./configure --with-curl
|
CFLAGS="-O3 -march=westmere -maes -Wall -fno-common" ./configure --with-curl
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe cpuminer-aes-sse42.exe
|
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
mv cpuminer cpuminer-aes-sse42
|
mv cpuminer cpuminer-aes-sse42
|
||||||
|
|
||||||
# Nehalem SSE4.2
|
# SSE4.2: Intel Nehalem
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=corei7 -Wall -fno-common" ./configure --with-curl
|
CFLAGS="-O3 -march=corei7 -Wall -fno-common" ./configure --with-curl
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe cpuminer-sse42.exe
|
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
mv cpuminer cpuminer-sse42
|
mv cpuminer cpuminer-sse42
|
||||||
|
|
||||||
# Core2 SSSE3
|
# SSSE3: Intel Core2
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=core2 -Wall -fno-common" ./configure --with-curl
|
CFLAGS="-O3 -march=core2 -Wall -fno-common" ./configure --with-curl
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe cpuminer-ssse3.exe
|
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
mv cpuminer cpuminer-ssse3
|
mv cpuminer cpuminer-ssse3
|
||||||
|
|
||||||
# Generic SSE2
|
# SSE2
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -msse2 -Wall -fno-common" ./configure --with-curl
|
CFLAGS="-O3 -msse2 -Wall -fno-common" ./configure --with-curl
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe cpuminer-sse2.exe
|
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
mv cpuminer cpuminer-sse2
|
mv cpuminer cpuminer-sse2
|
||||||
|
|
||||||
# AMD Zen1 AVX2 SHA
|
# Native to host CPU
|
||||||
make clean || echo done
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=znver1 -Wall -fno-common" ./configure --with-curl
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe cpuminer-zen.exe
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-zen
|
|
||||||
|
|
||||||
# AMD Zen3 AVX2 SHA VAES
|
|
||||||
make clean || echo done
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=znver2 -mvaes -Wall -fno-common" ./configure --with-curl
|
|
||||||
# CFLAGS="-O3 -march=znver3 -Wall -fno-common" ./configure --with-curl
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe cpuminer-zen3.exe
|
|
||||||
strip -s cpuminer
|
|
||||||
mv cpuminer cpuminer-zen3
|
|
||||||
|
|
||||||
# Native to current CPU
|
|
||||||
make clean || echo done
|
make clean || echo done
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=native -Wall -fno-common" ./configure --with-curl
|
CFLAGS="-O3 -march=native -Wall -fno-common" ./configure --with-curl
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
|
|
||||||
|
10
build-msys2.sh
Executable file
10
build-msys2.sh
Executable file
@@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Compile on Windows using MSYS2 and MinGW.
|
||||||
|
|
||||||
|
make distclean || echo clean
|
||||||
|
rm -f config.status
|
||||||
|
./autogen.sh || echo done
|
||||||
|
CFLAGS="-O3 --param=evrp-mode=legacy -march=native -Wall -D_WIN32_WINNT=0x0601" ./configure --with-curl
|
||||||
|
make -j 4
|
||||||
|
strip -s cpuminer
|
@@ -2,8 +2,8 @@
|
|||||||
#
|
#
|
||||||
# make clean and rm all the targetted executables.
|
# make clean and rm all the targetted executables.
|
||||||
|
|
||||||
rm cpuminer-avx512-sha-vaes cpuminer-avx512-sha cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen cpuminer-sse42 cpuminer-ssse3 cpuminer-zen3 > /dev/null
|
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-avx2-sha cpuminer-sse42 cpuminer-ssse3 cpuminer-avx2-sha-vaes > /dev/null
|
||||||
|
|
||||||
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512-sha.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-zen3.exe > /dev/null
|
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512-sha.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-avx2-sha.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-avx2-sha-vaes.exe > /dev/null
|
||||||
|
|
||||||
make distclean > /dev/null
|
make distclean > /dev/null
|
||||||
|
4
compat.h
4
compat.h
@@ -3,6 +3,10 @@
|
|||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
|
||||||
|
#if _WIN32_WINNT==0x0601 // Windows 7
|
||||||
|
#define WINDOWS_CPU_GROUPS_ENABLED 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.18.1.
|
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.5.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='cpuminer-opt'
|
PACKAGE_NAME='cpuminer-opt'
|
||||||
PACKAGE_TARNAME='cpuminer-opt'
|
PACKAGE_TARNAME='cpuminer-opt'
|
||||||
PACKAGE_VERSION='3.18.1'
|
PACKAGE_VERSION='3.19.5'
|
||||||
PACKAGE_STRING='cpuminer-opt 3.18.1'
|
PACKAGE_STRING='cpuminer-opt 3.19.5'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures cpuminer-opt 3.18.1 to adapt to many kinds of systems.
|
\`configure' configures cpuminer-opt 3.19.5 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1404,7 +1404,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of cpuminer-opt 3.18.1:";;
|
short | recursive ) echo "Configuration of cpuminer-opt 3.19.5:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1509,7 +1509,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
cpuminer-opt configure 3.18.1
|
cpuminer-opt configure 3.19.5
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by cpuminer-opt $as_me 3.18.1, which was
|
It was created by cpuminer-opt $as_me 3.19.5, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -2993,7 +2993,7 @@ fi
|
|||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='cpuminer-opt'
|
PACKAGE='cpuminer-opt'
|
||||||
VERSION='3.18.1'
|
VERSION='3.19.5'
|
||||||
|
|
||||||
|
|
||||||
cat >>confdefs.h <<_ACEOF
|
cat >>confdefs.h <<_ACEOF
|
||||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by cpuminer-opt $as_me 3.18.1, which was
|
This file was extended by cpuminer-opt $as_me 3.19.5, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
cpuminer-opt config.status 3.18.1
|
cpuminer-opt config.status 3.19.5
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([cpuminer-opt], [3.18.1])
|
AC_INIT([cpuminer-opt], [3.19.5])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
|
460
cpu-miner.c
460
cpu-miner.c
@@ -3,7 +3,7 @@
|
|||||||
* Copyright 2012-2014 pooler
|
* Copyright 2012-2014 pooler
|
||||||
* Copyright 2014 Lucas Jones
|
* Copyright 2014 Lucas Jones
|
||||||
* Copyright 2014-2016 Tanguy Pruvot
|
* Copyright 2014-2016 Tanguy Pruvot
|
||||||
* Copyright 2016-2020 Jay D Dee
|
* Copyright 2016-2021 Jay D Dee
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms of the GNU General Public License as published by the Free
|
* under the terms of the GNU General Public License as published by the Free
|
||||||
@@ -115,28 +115,24 @@ int opt_param_n = 0;
|
|||||||
int opt_param_r = 0;
|
int opt_param_r = 0;
|
||||||
int opt_n_threads = 0;
|
int opt_n_threads = 0;
|
||||||
bool opt_sapling = false;
|
bool opt_sapling = false;
|
||||||
|
static uint64_t opt_affinity = 0xFFFFFFFFFFFFFFFFULL; // default, use all cores
|
||||||
// Windows doesn't support 128 bit affinity mask.
|
|
||||||
// Need compile time and run time test.
|
|
||||||
#if defined(__linux) && defined(GCC_INT128)
|
|
||||||
#define AFFINITY_USES_UINT128 1
|
|
||||||
static uint128_t opt_affinity = -1;
|
|
||||||
static bool affinity_uses_uint128 = true;
|
|
||||||
#else
|
|
||||||
static uint64_t opt_affinity = -1;
|
|
||||||
static bool affinity_uses_uint128 = false;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int opt_priority = 0; // deprecated
|
int opt_priority = 0; // deprecated
|
||||||
int num_cpus = 1;
|
int num_cpus = 1;
|
||||||
int num_cpugroups = 1;
|
int num_cpugroups = 1; // For Windows
|
||||||
char *rpc_url = NULL;;
|
#define max_cpus 256 // max for affinity
|
||||||
|
char *rpc_url = NULL;
|
||||||
char *rpc_userpass = NULL;
|
char *rpc_userpass = NULL;
|
||||||
char *rpc_user, *rpc_pass;
|
char *rpc_user, *rpc_pass;
|
||||||
char *short_url = NULL;
|
char *short_url = NULL;
|
||||||
char *coinbase_address;
|
char *coinbase_address;
|
||||||
char *opt_data_file = NULL;
|
char *opt_data_file = NULL;
|
||||||
bool opt_verify = false;
|
bool opt_verify = false;
|
||||||
|
static bool opt_stratum_keepalive = false;
|
||||||
|
static struct timeval stratum_keepalive_timer;
|
||||||
|
// Stratum typically times out in 5 minutes or 300 seconds
|
||||||
|
#define stratum_keepalive_timeout 180 // 3 minutes
|
||||||
|
static struct timeval stratum_reset_time;
|
||||||
|
|
||||||
|
|
||||||
// pk_buffer_size is used as a version selector by b58 code, therefore
|
// pk_buffer_size is used as a version selector by b58 code, therefore
|
||||||
// it must be set correctly to work.
|
// it must be set correctly to work.
|
||||||
@@ -166,6 +162,7 @@ uint32_t accepted_share_count = 0;
|
|||||||
uint32_t rejected_share_count = 0;
|
uint32_t rejected_share_count = 0;
|
||||||
uint32_t stale_share_count = 0;
|
uint32_t stale_share_count = 0;
|
||||||
uint32_t solved_block_count = 0;
|
uint32_t solved_block_count = 0;
|
||||||
|
uint32_t stratum_errors = 0;
|
||||||
double *thr_hashrates;
|
double *thr_hashrates;
|
||||||
double global_hashrate = 0.;
|
double global_hashrate = 0.;
|
||||||
double total_hashes = 0.;
|
double total_hashes = 0.;
|
||||||
@@ -196,7 +193,6 @@ int default_api_listen = 4048;
|
|||||||
static struct timeval session_start;
|
static struct timeval session_start;
|
||||||
static struct timeval five_min_start;
|
static struct timeval five_min_start;
|
||||||
static uint64_t session_first_block = 0;
|
static uint64_t session_first_block = 0;
|
||||||
static double latency_sum = 0.;
|
|
||||||
static uint64_t submit_sum = 0;
|
static uint64_t submit_sum = 0;
|
||||||
static uint64_t accept_sum = 0;
|
static uint64_t accept_sum = 0;
|
||||||
static uint64_t stale_sum = 0;
|
static uint64_t stale_sum = 0;
|
||||||
@@ -227,18 +223,25 @@ char* lp_id;
|
|||||||
|
|
||||||
static void workio_cmd_free(struct workio_cmd *wc);
|
static void workio_cmd_free(struct workio_cmd *wc);
|
||||||
|
|
||||||
static void format_affinity_map( char *map_str, uint64_t map )
|
// array mapping thread to cpu
|
||||||
{
|
static uint8_t thread_affinity_map[ max_cpus ];
|
||||||
int n = num_cpus < 64 ? num_cpus : 64;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
|
// display affinity mask graphically
|
||||||
|
static void format_affinity_mask( char *mask_str, uint64_t mask )
|
||||||
|
{
|
||||||
|
#if defined(WINDOWS_CPU_GROUPS_ENABLED)
|
||||||
|
int n = num_cpus / num_cpugroups;
|
||||||
|
#else
|
||||||
|
int n = num_cpus < 64 ? num_cpus : 64;
|
||||||
|
#endif
|
||||||
|
int i;
|
||||||
for ( i = 0; i < n; i++ )
|
for ( i = 0; i < n; i++ )
|
||||||
{
|
{
|
||||||
if ( map & 1 ) map_str[i] = '!';
|
if ( mask & 1 ) mask_str[i] = '!';
|
||||||
else map_str[i] = '.';
|
else mask_str[i] = '.';
|
||||||
map >>= 1;
|
mask >>= 1;
|
||||||
}
|
}
|
||||||
memset( &map_str[i], 0, 64 - i );
|
memset( &mask_str[i], 0, 64 - i );
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __linux /* Linux specific policy and affinity management */
|
#ifdef __linux /* Linux specific policy and affinity management */
|
||||||
@@ -260,93 +263,70 @@ static inline void drop_policy(void)
|
|||||||
#define pthread_setaffinity_np(tid,sz,s) {} /* only do process affinity */
|
#define pthread_setaffinity_np(tid,sz,s) {} /* only do process affinity */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Linux affinity can use int128.
|
static void affine_to_cpu( struct thr_info *thr )
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
static void affine_to_cpu_mask( int id, uint128_t mask )
|
|
||||||
#else
|
|
||||||
static void affine_to_cpu_mask( int id, uint64_t mask )
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
|
int thread = thr->id;
|
||||||
cpu_set_t set;
|
cpu_set_t set;
|
||||||
CPU_ZERO( &set );
|
CPU_ZERO( &set );
|
||||||
uint8_t ncpus = (num_cpus > 256) ? 256 : num_cpus;
|
CPU_SET( thread_affinity_map[ thread ], &set );
|
||||||
|
if ( opt_debug )
|
||||||
for ( uint8_t i = 0; i < ncpus; i++ )
|
applog( LOG_INFO, "Binding thread %d to cpu %d",
|
||||||
{
|
thread, thread_affinity_map[ thread ] );
|
||||||
// cpu mask
|
pthread_setaffinity_np( thr->pth, sizeof(set), &set );
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
if( ( mask & ( (uint128_t)1 << i ) ) ) CPU_SET( i, &set );
|
|
||||||
#else
|
|
||||||
if( (ncpus > 64) || ( mask & (1 << i) ) ) CPU_SET( i, &set );
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
if ( id == -1 )
|
|
||||||
{
|
|
||||||
// process affinity
|
|
||||||
sched_setaffinity(0, sizeof(&set), &set);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// thread only
|
|
||||||
pthread_setaffinity_np(thr_info[id].pth, sizeof(&set), &set);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(WIN32) /* Windows */
|
#elif defined(WIN32) /* Windows */
|
||||||
|
|
||||||
static inline void drop_policy(void) { }
|
static inline void drop_policy(void) { }
|
||||||
|
|
||||||
// Windows CPU groups to manage more than 64 CPUs.
|
// Windows CPU groups to manage more than 64 CPUs.
|
||||||
static void affine_to_cpu_mask( int id, uint64_t mask )
|
// mask arg is ignored
|
||||||
|
static void affine_to_cpu( struct thr_info *thr )
|
||||||
{
|
{
|
||||||
bool success;
|
int thread = thr->id;
|
||||||
unsigned long last_error;
|
unsigned long last_error;
|
||||||
// BOOL success;
|
bool ok;
|
||||||
// DWORD last_error;
|
|
||||||
|
|
||||||
if ( id == -1 )
|
#if defined(WINDOWS_CPU_GROUPS_ENABLED)
|
||||||
success = SetProcessAffinityMask( GetCurrentProcess(), mask );
|
unsigned long group_size = GetActiveProcessorCount( 0 );
|
||||||
|
unsigned long group = thread / group_size;
|
||||||
// Are Windows CPU Groups supported?
|
unsigned long cpu = thread_affinity_map[ thread % group_size ];
|
||||||
#if _WIN32_WINNT==0x0601
|
|
||||||
else if ( num_cpugroups == 1 )
|
|
||||||
success = SetThreadAffinityMask( GetCurrentThread(), mask );
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Find the correct cpu group
|
|
||||||
int cpu = id % num_cpus;
|
|
||||||
int group;
|
|
||||||
for( group = 0; group < num_cpugroups; group++ )
|
|
||||||
{
|
|
||||||
int cpus = GetActiveProcessorCount( group );
|
|
||||||
if ( cpu < cpus ) break;
|
|
||||||
cpu -= cpus;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (opt_debug)
|
|
||||||
applog(LOG_DEBUG, "Binding thread %d to cpu %d on cpu group %d (mask %x)",
|
|
||||||
id, cpu, group, (1ULL << cpu));
|
|
||||||
|
|
||||||
GROUP_AFFINITY affinity;
|
GROUP_AFFINITY affinity;
|
||||||
affinity.Group = group;
|
affinity.Group = group;
|
||||||
affinity.Mask = 1ULL << cpu;
|
affinity.Mask = 1ULL << cpu;
|
||||||
success = SetThreadGroupAffinity( GetCurrentThread(), &affinity, NULL );
|
|
||||||
}
|
if ( opt_debug )
|
||||||
|
applog( LOG_INFO, "Binding thread %d to cpu %d in cpu group %d",
|
||||||
|
thread, cpu, group );
|
||||||
|
|
||||||
|
ok = SetThreadGroupAffinity( GetCurrentThread(), &affinity, NULL );
|
||||||
|
|
||||||
#else
|
#else
|
||||||
else
|
|
||||||
success = SetThreadAffinityMask( GetCurrentThread(), mask );
|
unsigned long cpu = thread_affinity_map[ thread ];
|
||||||
|
uint64_t mask = 1ULL << cpu;
|
||||||
|
|
||||||
|
if ( opt_debug )
|
||||||
|
applog( LOG_INFO, "Binding thread %d to cpu %d", thread, cpu );
|
||||||
|
|
||||||
|
ok = SetThreadAffinityMask( GetCurrentThread(), mask );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!success)
|
if ( !ok )
|
||||||
{
|
{
|
||||||
last_error = GetLastError();
|
last_error = GetLastError();
|
||||||
applog(LOG_WARNING, "affine_to_cpu_mask for %u returned %x",
|
applog( LOG_WARNING, "affine_to_cpu_mask for %u returned 0x%x",
|
||||||
id, last_error);
|
thread, last_error );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static inline void drop_policy(void) { }
|
static inline void drop_policy(void) { }
|
||||||
static void affine_to_cpu_mask(int id, unsigned long mask) { }
|
static void affine_to_cpu( struct thr_info *thr ) { }
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// not very useful, just index the arrray directly.
|
// not very useful, just index the arrray directly.
|
||||||
@@ -1067,9 +1047,17 @@ void report_summary_log( bool force )
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ( !( force && ( submit_sum || ( et.tv_sec > 5 ) ) )
|
if ( !( force && ( submit_sum || ( et.tv_sec > 5 ) ) ) )
|
||||||
&& ( et.tv_sec < 300 ) )
|
{
|
||||||
|
if ( et.tv_sec < 300 )
|
||||||
return;
|
return;
|
||||||
|
if ( ( s_get_ptr != s_put_ptr ) && ( et.tv_sec < 360 ) )
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if ( !( force && ( submit_sum || ( et.tv_sec > 5 ) ) )
|
||||||
|
// && ( et.tv_sec < 300 ) )
|
||||||
|
// return;
|
||||||
|
|
||||||
// collect and reset periodic counters
|
// collect and reset periodic counters
|
||||||
pthread_mutex_lock( &stats_lock );
|
pthread_mutex_lock( &stats_lock );
|
||||||
@@ -1112,19 +1100,17 @@ void report_summary_log( bool force )
|
|||||||
applog( LOG_BLUE, "%s: %s", algo_names[ opt_algo ], short_url );
|
applog( LOG_BLUE, "%s: %s", algo_names[ opt_algo ], short_url );
|
||||||
applog2( LOG_NOTICE, "Periodic Report %s %s", et_str, upt_str );
|
applog2( LOG_NOTICE, "Periodic Report %s %s", et_str, upt_str );
|
||||||
applog2( LOG_INFO, "Share rate %.2f/min %.2f/min",
|
applog2( LOG_INFO, "Share rate %.2f/min %.2f/min",
|
||||||
submit_rate, (double)submitted_share_count*60. /
|
submit_rate, safe_div( (double)submitted_share_count*60.,
|
||||||
( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ) );
|
( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ), 0. ) );
|
||||||
applog2( LOG_INFO, "Hash rate %7.2f%sh/s %7.2f%sh/s (%.2f%sh/s)",
|
applog2( LOG_INFO, "Hash rate %7.2f%sh/s %7.2f%sh/s (%.2f%sh/s)",
|
||||||
shrate, shr_units, sess_hrate, sess_hr_units, ghrate, ghr_units );
|
shrate, shr_units, sess_hrate, sess_hr_units, ghrate, ghr_units );
|
||||||
|
|
||||||
if ( accepted_share_count < submitted_share_count )
|
if ( accepted_share_count < submitted_share_count )
|
||||||
{
|
{
|
||||||
double lost_ghrate = uptime.tv_sec == 0 ? 0.
|
double lost_ghrate = safe_div( target_diff
|
||||||
: target_diff
|
* (double)(submitted_share_count - accepted_share_count ),
|
||||||
* (double)(submitted_share_count - accepted_share_count )
|
(double)uptime.tv_sec, 0. );
|
||||||
/ (double)uptime.tv_sec;
|
double lost_shrate = safe_div( target_diff * (double)(submits - accepts ), share_time, 0. );
|
||||||
double lost_shrate = share_time == 0. ? 0.
|
|
||||||
: target_diff * (double)(submits - accepts ) / share_time;
|
|
||||||
char lshr_units[4] = {0};
|
char lshr_units[4] = {0};
|
||||||
char lghr_units[4] = {0};
|
char lghr_units[4] = {0};
|
||||||
scale_hash_for_display( &lost_shrate, lshr_units );
|
scale_hash_for_display( &lost_shrate, lshr_units );
|
||||||
@@ -1161,17 +1147,23 @@ void report_summary_log( bool force )
|
|||||||
applog2( prio, "Blocks Solved %7d %7d",
|
applog2( prio, "Blocks Solved %7d %7d",
|
||||||
solved, solved_block_count );
|
solved, solved_block_count );
|
||||||
}
|
}
|
||||||
|
if ( stratum_errors )
|
||||||
|
applog2( LOG_INFO, "Stratum resets %7d", stratum_errors );
|
||||||
|
|
||||||
applog2( LOG_INFO, "Hi/Lo Share Diff %.5g / %.5g",
|
applog2( LOG_INFO, "Hi/Lo Share Diff %.5g / %.5g",
|
||||||
highest_share, lowest_share );
|
highest_share, lowest_share );
|
||||||
|
|
||||||
int mismatch = submitted_share_count
|
int mismatch = submitted_share_count
|
||||||
- ( accepted_share_count + stale_share_count + rejected_share_count );
|
- ( accepted_share_count + stale_share_count + rejected_share_count );
|
||||||
|
|
||||||
if ( mismatch )
|
if ( mismatch )
|
||||||
{
|
{
|
||||||
if ( mismatch != 1 )
|
if ( stratum_errors )
|
||||||
applog2(LOG_MINR, "Count mismatch: %d, stats may be inaccurate", mismatch );
|
applog2( LOG_MINR, "Count mismatch: %d, stats may be inaccurate",
|
||||||
else
|
mismatch );
|
||||||
applog2(LOG_INFO, CL_LBL "Count mismatch, submitted share may still be pending" CL_N );
|
else if ( !opt_quiet )
|
||||||
|
applog2( LOG_INFO, CL_LBL
|
||||||
|
"Count mismatch, submitted share may still be pending" CL_N );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1287,7 +1279,6 @@ static int share_result( int result, struct work *work,
|
|||||||
else reject_sum++;
|
else reject_sum++;
|
||||||
}
|
}
|
||||||
submit_sum++;
|
submit_sum++;
|
||||||
latency_sum += latency;
|
|
||||||
|
|
||||||
pthread_mutex_unlock( &stats_lock );
|
pthread_mutex_unlock( &stats_lock );
|
||||||
|
|
||||||
@@ -1303,9 +1294,9 @@ static int share_result( int result, struct work *work,
|
|||||||
else rcol = CL_LRD;
|
else rcol = CL_LRD;
|
||||||
}
|
}
|
||||||
|
|
||||||
applog( LOG_INFO, "%d %s%s %s%s %s%s %s%s" CL_WHT ", %.3f sec (%dms)",
|
applog( LOG_INFO, "%d %s%s %s%s %s%s %s%s%s, %.3f sec (%dms)",
|
||||||
my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
|
my_stats.share_count, acol, ares, scol, sres, rcol, rres, bcol,
|
||||||
bres, share_time, latency );
|
bres, CL_N, share_time, latency );
|
||||||
|
|
||||||
if ( unlikely( opt_debug || !result || solved ) )
|
if ( unlikely( opt_debug || !result || solved ) )
|
||||||
{
|
{
|
||||||
@@ -2008,6 +1999,10 @@ void set_work_data_big_endian( struct work *work )
|
|||||||
// calculate net diff from nbits.
|
// calculate net diff from nbits.
|
||||||
double std_calc_network_diff( struct work* work )
|
double std_calc_network_diff( struct work* work )
|
||||||
{
|
{
|
||||||
|
uint32_t nbits = work->data[ algo_gate.nbits_index ];
|
||||||
|
uint32_t shift = nbits & 0xff;
|
||||||
|
uint32_t bits = bswap_32( nbits ) & 0x00ffffff;
|
||||||
|
/*
|
||||||
// sample for diff 43.281 : 1c05ea29
|
// sample for diff 43.281 : 1c05ea29
|
||||||
// todo: endian reversed on longpoll could be zr5 specific...
|
// todo: endian reversed on longpoll could be zr5 specific...
|
||||||
int nbits_index = algo_gate.nbits_index;
|
int nbits_index = algo_gate.nbits_index;
|
||||||
@@ -2015,15 +2010,17 @@ double std_calc_network_diff( struct work* work )
|
|||||||
: swab32( work->data[ nbits_index ] );
|
: swab32( work->data[ nbits_index ] );
|
||||||
uint32_t bits = ( nbits & 0xffffff );
|
uint32_t bits = ( nbits & 0xffffff );
|
||||||
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
|
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
|
||||||
|
*/
|
||||||
|
|
||||||
int m;
|
int m;
|
||||||
double d = (double)0x0000ffff / (double)bits;
|
long double d = (long double)0x0000ffff / (long double)bits;
|
||||||
for ( m = shift; m < 29; m++ )
|
for ( m = shift; m < 29; m++ )
|
||||||
d *= 256.0;
|
d *= 256.0;
|
||||||
for ( m = 29; m < shift; m++ )
|
for ( m = 29; m < shift; m++ )
|
||||||
d /= 256.0;
|
d /= 256.0;
|
||||||
if ( opt_debug_diff )
|
if ( opt_debug_diff )
|
||||||
applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
|
applog(LOG_DEBUG, "net diff: %8f -> shift %u, bits %08x", (double)d, shift, bits);
|
||||||
return d;
|
return (double)d;
|
||||||
}
|
}
|
||||||
|
|
||||||
void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||||
@@ -2117,7 +2114,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
|||||||
{
|
{
|
||||||
unsigned char *xnonce2str = bebin2hex( g_work->xnonce2,
|
unsigned char *xnonce2str = bebin2hex( g_work->xnonce2,
|
||||||
g_work->xnonce2_len );
|
g_work->xnonce2_len );
|
||||||
applog( LOG_INFO, "Extranonce2 %s, Block %d, Job %s",
|
applog( LOG_INFO, "Extranonce2 0x%s, Block %d, Job %s",
|
||||||
xnonce2str, sctx->block_height, g_work->job_id );
|
xnonce2str, sctx->block_height, g_work->job_id );
|
||||||
free( xnonce2str );
|
free( xnonce2str );
|
||||||
}
|
}
|
||||||
@@ -2162,7 +2159,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
|||||||
uint64_t net_ttf =
|
uint64_t net_ttf =
|
||||||
( last_block_height - session_first_block ) == 0 ? 0
|
( last_block_height - session_first_block ) == 0 ? 0
|
||||||
: et.tv_sec / ( last_block_height - session_first_block );
|
: et.tv_sec / ( last_block_height - session_first_block );
|
||||||
if ( net_diff && net_ttf )
|
if ( net_diff > 0. && net_ttf )
|
||||||
{
|
{
|
||||||
double net_hr = nd / net_ttf;
|
double net_hr = nd / net_ttf;
|
||||||
char net_hr_units[4] = {0};
|
char net_hr_units[4] = {0};
|
||||||
@@ -2175,7 +2172,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
|||||||
} // !quiet
|
} // !quiet
|
||||||
} // new diff/block
|
} // new diff/block
|
||||||
|
|
||||||
if ( new_job && !opt_quiet )
|
if ( new_job && !( opt_quiet || stratum_errors ) )
|
||||||
{
|
{
|
||||||
int mismatch = submitted_share_count - ( accepted_share_count
|
int mismatch = submitted_share_count - ( accepted_share_count
|
||||||
+ stale_share_count
|
+ stale_share_count
|
||||||
@@ -2243,49 +2240,9 @@ static void *miner_thread( void *userdata )
|
|||||||
if ( opt_priority == 0 )
|
if ( opt_priority == 0 )
|
||||||
drop_policy();
|
drop_policy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// CPU thread affinity
|
// CPU thread affinity
|
||||||
if ( num_cpus > 1 )
|
if ( opt_affinity && num_cpus > 1 ) affine_to_cpu( mythr );
|
||||||
{
|
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
// Default affinity
|
|
||||||
if ( (opt_affinity == (uint128_t)(-1) ) && opt_n_threads > 1 )
|
|
||||||
{
|
|
||||||
affine_to_cpu_mask( thr_id, (uint128_t)1 << (thr_id % num_cpus) );
|
|
||||||
if ( opt_debug )
|
|
||||||
applog( LOG_INFO, "Binding thread %d to cpu %d.",
|
|
||||||
thr_id, thr_id % num_cpus,
|
|
||||||
u128_hi64( (uint128_t)1 << (thr_id % num_cpus) ),
|
|
||||||
u128_lo64( (uint128_t)1 << (thr_id % num_cpus) ) );
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if ( ( opt_affinity == -1 ) && ( opt_n_threads > 1 ) )
|
|
||||||
{
|
|
||||||
affine_to_cpu_mask( thr_id, 1 << (thr_id % num_cpus) );
|
|
||||||
if (opt_debug)
|
|
||||||
applog( LOG_DEBUG, "Binding thread %d to cpu %d.",
|
|
||||||
thr_id, thr_id % num_cpus, 1 << (thr_id % num_cpus)) ;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
else // Custom affinity
|
|
||||||
{
|
|
||||||
affine_to_cpu_mask( thr_id, opt_affinity );
|
|
||||||
if ( opt_debug )
|
|
||||||
{
|
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
if ( num_cpus > 64 )
|
|
||||||
applog( LOG_INFO, "Binding thread %d to mask %016llx %016llx",
|
|
||||||
thr_id, u128_hi64( opt_affinity ),
|
|
||||||
u128_lo64( opt_affinity ) );
|
|
||||||
else
|
|
||||||
applog( LOG_INFO, "Binding thread %d to mask %016llx",
|
|
||||||
thr_id, opt_affinity );
|
|
||||||
#else
|
|
||||||
applog( LOG_INFO, "Binding thread %d to mask %016llx",
|
|
||||||
thr_id, opt_affinity );
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // num_cpus > 1
|
|
||||||
|
|
||||||
if ( !algo_gate.miner_thread_init( thr_id ) )
|
if ( !algo_gate.miner_thread_init( thr_id ) )
|
||||||
{
|
{
|
||||||
@@ -2495,6 +2452,8 @@ static void *miner_thread( void *userdata )
|
|||||||
timeval_subtract( &uptime, &total_hashes_time, &session_start );
|
timeval_subtract( &uptime, &total_hashes_time, &session_start );
|
||||||
double hashrate = safe_div( total_hashes, uptime.tv_sec, 0. );
|
double hashrate = safe_div( total_hashes, uptime.tv_sec, 0. );
|
||||||
|
|
||||||
|
if ( hashrate > 0. )
|
||||||
|
{
|
||||||
scale_hash_for_display( &hashrate, hr_units );
|
scale_hash_for_display( &hashrate, hr_units );
|
||||||
sprintf( hr, "%.2f", hashrate );
|
sprintf( hr, "%.2f", hashrate );
|
||||||
#if (defined(_WIN64) || defined(__WINDOWS__) || defined(_WIN32))
|
#if (defined(_WIN64) || defined(__WINDOWS__) || defined(_WIN32))
|
||||||
@@ -2508,6 +2467,7 @@ static void *miner_thread( void *userdata )
|
|||||||
hi_freq / 1e6 );
|
hi_freq / 1e6 );
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} // benchmark
|
} // benchmark
|
||||||
|
|
||||||
// conditional mining
|
// conditional mining
|
||||||
@@ -2773,6 +2733,18 @@ void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
|||||||
sctx->job.final_sapling_hash );
|
sctx->job.final_sapling_hash );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Loop is out of order:
|
||||||
|
//
|
||||||
|
// connect/reconnect
|
||||||
|
// handle message
|
||||||
|
// get new message
|
||||||
|
//
|
||||||
|
// change to
|
||||||
|
// connect/reconnect
|
||||||
|
// get new message
|
||||||
|
// handle message
|
||||||
|
|
||||||
|
|
||||||
static void *stratum_thread(void *userdata )
|
static void *stratum_thread(void *userdata )
|
||||||
{
|
{
|
||||||
struct thr_info *mythr = (struct thr_info *) userdata;
|
struct thr_info *mythr = (struct thr_info *) userdata;
|
||||||
@@ -2790,7 +2762,9 @@ static void *stratum_thread(void *userdata )
|
|||||||
if ( unlikely( stratum_need_reset ) )
|
if ( unlikely( stratum_need_reset ) )
|
||||||
{
|
{
|
||||||
stratum_need_reset = false;
|
stratum_need_reset = false;
|
||||||
|
gettimeofday( &stratum_reset_time, NULL );
|
||||||
stratum_down = true;
|
stratum_down = true;
|
||||||
|
stratum_errors++;
|
||||||
stratum_disconnect( &stratum );
|
stratum_disconnect( &stratum );
|
||||||
if ( strcmp( stratum.url, rpc_url ) )
|
if ( strcmp( stratum.url, rpc_url ) )
|
||||||
{
|
{
|
||||||
@@ -2799,7 +2773,7 @@ static void *stratum_thread(void *userdata )
|
|||||||
applog(LOG_BLUE, "Connection changed to %s", short_url);
|
applog(LOG_BLUE, "Connection changed to %s", short_url);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
applog(LOG_WARNING, "Stratum connection reset");
|
applog(LOG_BLUE, "Stratum connection reset");
|
||||||
// reset stats queue as well
|
// reset stats queue as well
|
||||||
restart_threads();
|
restart_threads();
|
||||||
if ( s_get_ptr != s_put_ptr ) s_get_ptr = s_put_ptr = 0;
|
if ( s_get_ptr != s_put_ptr ) s_get_ptr = s_put_ptr = 0;
|
||||||
@@ -2808,6 +2782,7 @@ static void *stratum_thread(void *userdata )
|
|||||||
while ( !stratum.curl )
|
while ( !stratum.curl )
|
||||||
{
|
{
|
||||||
stratum_down = true;
|
stratum_down = true;
|
||||||
|
restart_threads();
|
||||||
pthread_rwlock_wrlock( &g_work_lock );
|
pthread_rwlock_wrlock( &g_work_lock );
|
||||||
g_work_time = 0;
|
g_work_time = 0;
|
||||||
pthread_rwlock_unlock( &g_work_lock );
|
pthread_rwlock_unlock( &g_work_lock );
|
||||||
@@ -2829,17 +2804,17 @@ static void *stratum_thread(void *userdata )
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
stratum_down = false;
|
stratum_down = false;
|
||||||
restart_threads();
|
|
||||||
applog(LOG_BLUE,"Stratum connection established" );
|
applog(LOG_BLUE,"Stratum connection established" );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
report_summary_log( ( stratum_diff != stratum.job.diff )
|
// report_summary_log( ( stratum_diff != stratum.job.diff )
|
||||||
&& ( stratum_diff != 0. ) );
|
// && ( stratum_diff != 0. ) );
|
||||||
|
|
||||||
if ( stratum.new_job )
|
// if ( stratum.new_job )
|
||||||
stratum_gen_work( &stratum, &g_work );
|
// stratum_gen_work( &stratum, &g_work );
|
||||||
|
|
||||||
|
// Wait for new message from server
|
||||||
if ( likely( stratum_socket_full( &stratum, opt_timeout ) ) )
|
if ( likely( stratum_socket_full( &stratum, opt_timeout ) ) )
|
||||||
{
|
{
|
||||||
if ( likely( s = stratum_recv_line( &stratum ) ) )
|
if ( likely( s = stratum_recv_line( &stratum ) ) )
|
||||||
@@ -2862,6 +2837,53 @@ static void *stratum_thread(void *userdata )
|
|||||||
// stratum_disconnect( &stratum );
|
// stratum_disconnect( &stratum );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
report_summary_log( ( stratum_diff != stratum.job.diff )
|
||||||
|
&& ( stratum_diff != 0. ) );
|
||||||
|
|
||||||
|
if ( !stratum_need_reset )
|
||||||
|
{
|
||||||
|
// Is keepalive needed? Mutex would normally be required but that
|
||||||
|
// would block any attempt to submit a share. A share is more
|
||||||
|
// important even if it messes up the keepalive.
|
||||||
|
|
||||||
|
if ( opt_stratum_keepalive )
|
||||||
|
{
|
||||||
|
struct timeval now, et;
|
||||||
|
gettimeofday( &now, NULL );
|
||||||
|
// any shares submitted since last keepalive?
|
||||||
|
if ( last_submit_time.tv_sec > stratum_keepalive_timer.tv_sec )
|
||||||
|
memcpy( &stratum_keepalive_timer, &last_submit_time,
|
||||||
|
sizeof (struct timeval) );
|
||||||
|
|
||||||
|
timeval_subtract( &et, &now, &stratum_keepalive_timer );
|
||||||
|
|
||||||
|
if ( et.tv_sec > stratum_keepalive_timeout )
|
||||||
|
{
|
||||||
|
double diff = stratum.job.diff * 0.5;
|
||||||
|
stratum_keepalive_timer = now;
|
||||||
|
if ( !opt_quiet )
|
||||||
|
applog( LOG_BLUE,
|
||||||
|
"Stratum keepalive requesting lower difficulty" );
|
||||||
|
stratum_suggest_difficulty( &stratum, diff );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( last_submit_time.tv_sec > stratum_reset_time.tv_sec )
|
||||||
|
timeval_subtract( &et, &now, &last_submit_time );
|
||||||
|
else
|
||||||
|
timeval_subtract( &et, &now, &stratum_reset_time );
|
||||||
|
|
||||||
|
if ( et.tv_sec > stratum_keepalive_timeout + 60 )
|
||||||
|
{
|
||||||
|
applog( LOG_NOTICE, "No shares submitted, resetting stratum connection" );
|
||||||
|
stratum_need_reset = true;
|
||||||
|
stratum_keepalive_timer = now;
|
||||||
|
}
|
||||||
|
} // stratum_keepalive
|
||||||
|
|
||||||
|
if ( stratum.new_job && !stratum_need_reset )
|
||||||
|
stratum_gen_work( &stratum, &g_work );
|
||||||
|
|
||||||
|
} // stratum_need_reset
|
||||||
} // loop
|
} // loop
|
||||||
out:
|
out:
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -2900,13 +2922,15 @@ static bool cpu_capability( bool display_only )
|
|||||||
bool algo_has_sse2 = set_incl( SSE2_OPT, algo_features );
|
bool algo_has_sse2 = set_incl( SSE2_OPT, algo_features );
|
||||||
bool algo_has_aes = set_incl( AES_OPT, algo_features );
|
bool algo_has_aes = set_incl( AES_OPT, algo_features );
|
||||||
bool algo_has_sse42 = set_incl( SSE42_OPT, algo_features );
|
bool algo_has_sse42 = set_incl( SSE42_OPT, algo_features );
|
||||||
|
bool algo_has_avx = set_incl( AVX_OPT, algo_features );
|
||||||
bool algo_has_avx2 = set_incl( AVX2_OPT, algo_features );
|
bool algo_has_avx2 = set_incl( AVX2_OPT, algo_features );
|
||||||
bool algo_has_avx512 = set_incl( AVX512_OPT, algo_features );
|
bool algo_has_avx512 = set_incl( AVX512_OPT, algo_features );
|
||||||
bool algo_has_sha = set_incl( SHA_OPT, algo_features );
|
bool algo_has_sha = set_incl( SHA_OPT, algo_features );
|
||||||
bool algo_has_vaes = set_incl( VAES_OPT, algo_features );
|
bool algo_has_vaes = set_incl( VAES_OPT, algo_features );
|
||||||
bool algo_has_vaes256 = set_incl( VAES256_OPT, algo_features );
|
|
||||||
bool use_aes;
|
bool use_aes;
|
||||||
bool use_sse2;
|
bool use_sse2;
|
||||||
|
bool use_sse42;
|
||||||
|
bool use_avx;
|
||||||
bool use_avx2;
|
bool use_avx2;
|
||||||
bool use_avx512;
|
bool use_avx512;
|
||||||
bool use_sha;
|
bool use_sha;
|
||||||
@@ -2976,6 +3000,8 @@ static bool cpu_capability( bool display_only )
|
|||||||
else if ( sw_has_aes ) printf( " AES" );
|
else if ( sw_has_aes ) printf( " AES" );
|
||||||
if ( sw_has_sha ) printf( " SHA" );
|
if ( sw_has_sha ) printf( " SHA" );
|
||||||
|
|
||||||
|
if ( !display_only )
|
||||||
|
{
|
||||||
printf("\nAlgo features:");
|
printf("\nAlgo features:");
|
||||||
if ( algo_features == EMPTY_SET ) printf( " None" );
|
if ( algo_features == EMPTY_SET ) printf( " None" );
|
||||||
else
|
else
|
||||||
@@ -2984,11 +3010,11 @@ static bool cpu_capability( bool display_only )
|
|||||||
else if ( algo_has_avx2 ) printf( " AVX2 " );
|
else if ( algo_has_avx2 ) printf( " AVX2 " );
|
||||||
else if ( algo_has_sse42 ) printf( " SSE4.2" );
|
else if ( algo_has_sse42 ) printf( " SSE4.2" );
|
||||||
else if ( algo_has_sse2 ) printf( " SSE2 " );
|
else if ( algo_has_sse2 ) printf( " SSE2 " );
|
||||||
if ( algo_has_vaes ||
|
if ( algo_has_vaes ) printf( " VAES" );
|
||||||
algo_has_vaes256 ) printf( " VAES" );
|
|
||||||
else if ( algo_has_aes ) printf( " AES" );
|
else if ( algo_has_aes ) printf( " AES" );
|
||||||
if ( algo_has_sha ) printf( " SHA" );
|
if ( algo_has_sha ) printf( " SHA" );
|
||||||
}
|
}
|
||||||
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
if ( display_only ) return true;
|
if ( display_only ) return true;
|
||||||
@@ -3022,14 +3048,15 @@ static bool cpu_capability( bool display_only )
|
|||||||
|
|
||||||
// Determine mining options
|
// Determine mining options
|
||||||
use_sse2 = cpu_has_sse2 && algo_has_sse2;
|
use_sse2 = cpu_has_sse2 && algo_has_sse2;
|
||||||
|
use_sse42 = cpu_has_sse42 && sw_has_sse42 && algo_has_sse42;
|
||||||
|
use_avx = cpu_has_avx && sw_has_avx && algo_has_avx;
|
||||||
use_aes = cpu_has_aes && sw_has_aes && algo_has_aes;
|
use_aes = cpu_has_aes && sw_has_aes && algo_has_aes;
|
||||||
use_avx2 = cpu_has_avx2 && sw_has_avx2 && algo_has_avx2;
|
use_avx2 = cpu_has_avx2 && sw_has_avx2 && algo_has_avx2;
|
||||||
use_avx512 = cpu_has_avx512 && sw_has_avx512 && algo_has_avx512;
|
use_avx512 = cpu_has_avx512 && sw_has_avx512 && algo_has_avx512;
|
||||||
use_sha = cpu_has_sha && sw_has_sha && algo_has_sha;
|
use_sha = cpu_has_sha && sw_has_sha && algo_has_sha;
|
||||||
use_vaes = cpu_has_vaes && sw_has_vaes && ( algo_has_vaes
|
use_vaes = cpu_has_vaes && sw_has_vaes && algo_has_vaes;
|
||||||
|| algo_has_vaes256 );
|
use_none = !( use_sse2 || use_sse42 || use_avx || use_aes || use_avx512
|
||||||
use_none = !( use_sse2 || use_aes || use_avx512 || use_avx2 ||
|
|| use_avx2 || use_sha || use_vaes );
|
||||||
use_sha || use_vaes );
|
|
||||||
|
|
||||||
// Display best options
|
// Display best options
|
||||||
printf( "\nStarting miner with" );
|
printf( "\nStarting miner with" );
|
||||||
@@ -3038,6 +3065,8 @@ static bool cpu_capability( bool display_only )
|
|||||||
{
|
{
|
||||||
if ( use_avx512 ) printf( " AVX512" );
|
if ( use_avx512 ) printf( " AVX512" );
|
||||||
else if ( use_avx2 ) printf( " AVX2" );
|
else if ( use_avx2 ) printf( " AVX2" );
|
||||||
|
else if ( use_avx ) printf( " AVX" );
|
||||||
|
else if ( use_sse42 ) printf( " SSE42" );
|
||||||
else if ( use_sse2 ) printf( " SSE2" );
|
else if ( use_sse2 ) printf( " SSE2" );
|
||||||
if ( use_vaes ) printf( " VAES" );
|
if ( use_vaes ) printf( " VAES" );
|
||||||
else if ( use_aes ) printf( " AES" );
|
else if ( use_aes ) printf( " AES" );
|
||||||
@@ -3126,7 +3155,7 @@ void parse_arg(int key, char *arg )
|
|||||||
{
|
{
|
||||||
char *p;
|
char *p;
|
||||||
int v, i;
|
int v, i;
|
||||||
uint64_t ul;
|
// uint64_t ul;
|
||||||
double d;
|
double d;
|
||||||
|
|
||||||
switch( key )
|
switch( key )
|
||||||
@@ -3438,22 +3467,12 @@ void parse_arg(int key, char *arg )
|
|||||||
#endif
|
#endif
|
||||||
case 1020: // cpu-affinity
|
case 1020: // cpu-affinity
|
||||||
p = strstr( arg, "0x" );
|
p = strstr( arg, "0x" );
|
||||||
if ( p )
|
opt_affinity = p ? strtoull( p, NULL, 16 )
|
||||||
ul = strtoull( p, NULL, 16 );
|
: atoll( arg );
|
||||||
else
|
|
||||||
ul = atoll( arg );
|
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
// replicate the low 64 bits to make a full 128 bit mask if there are more
|
|
||||||
// than 64 CPUs, otherwise zero extend the upper half.
|
|
||||||
opt_affinity = (uint128_t)ul;
|
|
||||||
if ( num_cpus > 64 )
|
|
||||||
opt_affinity |= opt_affinity << 64;
|
|
||||||
#else
|
|
||||||
opt_affinity = ul;
|
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
case 1021: // cpu-priority
|
case 1021: // cpu-priority
|
||||||
v = atoi(arg);
|
v = atoi(arg);
|
||||||
|
applog(LOG_NOTICE,"--cpu-priority is deprecated and will be removed from a future release");
|
||||||
if (v < 0 || v > 5) /* sanity check */
|
if (v < 0 || v > 5) /* sanity check */
|
||||||
show_usage_and_exit(1);
|
show_usage_and_exit(1);
|
||||||
opt_priority = v;
|
opt_priority = v;
|
||||||
@@ -3490,6 +3509,7 @@ void parse_arg(int key, char *arg )
|
|||||||
break;
|
break;
|
||||||
case 1024:
|
case 1024:
|
||||||
opt_randomize = true;
|
opt_randomize = true;
|
||||||
|
applog(LOG_NOTICE,"--randomize is deprecated and will be removed from a future release");
|
||||||
break;
|
break;
|
||||||
case 1027: // data-file
|
case 1027: // data-file
|
||||||
opt_data_file = strdup( arg );
|
opt_data_file = strdup( arg );
|
||||||
@@ -3497,6 +3517,9 @@ void parse_arg(int key, char *arg )
|
|||||||
case 1028: // verify
|
case 1028: // verify
|
||||||
opt_verify = true;
|
opt_verify = true;
|
||||||
break;
|
break;
|
||||||
|
case 1029: // stratum-keepalive
|
||||||
|
opt_stratum_keepalive = true;
|
||||||
|
break;
|
||||||
case 'V':
|
case 'V':
|
||||||
display_cpu_capability();
|
display_cpu_capability();
|
||||||
exit(0);
|
exit(0);
|
||||||
@@ -3558,9 +3581,7 @@ static void parse_cmdline(int argc, char *argv[])
|
|||||||
#else
|
#else
|
||||||
key = getopt(argc, argv, short_options);
|
key = getopt(argc, argv, short_options);
|
||||||
#endif
|
#endif
|
||||||
if (key < 0)
|
if ( key < 0 ) break;
|
||||||
break;
|
|
||||||
|
|
||||||
parse_arg( key, optarg );
|
parse_arg( key, optarg );
|
||||||
}
|
}
|
||||||
if ( optind < argc )
|
if ( optind < argc )
|
||||||
@@ -3631,16 +3652,10 @@ int main(int argc, char *argv[])
|
|||||||
rpc_user = strdup("");
|
rpc_user = strdup("");
|
||||||
rpc_pass = strdup("");
|
rpc_pass = strdup("");
|
||||||
|
|
||||||
parse_cmdline(argc, argv);
|
|
||||||
|
|
||||||
#if defined(WIN32)
|
#if defined(WIN32)
|
||||||
// SYSTEM_INFO sysinfo;
|
|
||||||
// GetSystemInfo(&sysinfo);
|
|
||||||
// num_cpus = sysinfo.dwNumberOfProcessors;
|
|
||||||
// What happens if GetActiveProcessorGroupCount called if groups not enabled?
|
|
||||||
|
|
||||||
// Are Windows CPU Groups supported?
|
// Are Windows CPU Groups supported?
|
||||||
#if _WIN32_WINNT==0x0601
|
#if defined(WINDOWS_CPU_GROUPS_ENABLED)
|
||||||
num_cpus = 0;
|
num_cpus = 0;
|
||||||
num_cpugroups = GetActiveProcessorGroupCount();
|
num_cpugroups = GetActiveProcessorGroupCount();
|
||||||
for( i = 0; i < num_cpugroups; i++ )
|
for( i = 0; i < num_cpugroups; i++ )
|
||||||
@@ -3649,8 +3664,9 @@ int main(int argc, char *argv[])
|
|||||||
num_cpus += cpus;
|
num_cpus += cpus;
|
||||||
|
|
||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
applog(LOG_DEBUG, "Found %d cpus on cpu group %d", cpus, i);
|
applog( LOG_INFO, "Found %d CPUs in CPU group %d", cpus, i );
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
SYSTEM_INFO sysinfo;
|
SYSTEM_INFO sysinfo;
|
||||||
GetSystemInfo(&sysinfo);
|
GetSystemInfo(&sysinfo);
|
||||||
@@ -3666,21 +3682,22 @@ int main(int argc, char *argv[])
|
|||||||
#else
|
#else
|
||||||
num_cpus = 1;
|
num_cpus = 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ( num_cpus < 1 )
|
if ( num_cpus < 1 )
|
||||||
num_cpus = 1;
|
num_cpus = 1;
|
||||||
|
|
||||||
if (!opt_n_threads)
|
|
||||||
opt_n_threads = num_cpus;
|
opt_n_threads = num_cpus;
|
||||||
|
|
||||||
|
parse_cmdline( argc, argv );
|
||||||
|
|
||||||
if ( opt_algo == ALGO_NULL )
|
if ( opt_algo == ALGO_NULL )
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%s: no algo supplied\n", argv[0]);
|
fprintf( stderr, "%s: No algo parameter specified\n", argv[0] );
|
||||||
show_usage_and_exit(1);
|
show_usage_and_exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// need to register to get algo optimizations for cpu capabilities
|
// need to register to get algo optimizations for cpu capabilities
|
||||||
// but that causes register logs before cpu capabilities is output.
|
// but that causes registration logs before cpu capabilities is output.
|
||||||
// Would need to split register into 2 parts. First part sets algo
|
// Would need to split register function into 2 parts. First part sets algo
|
||||||
// optimizations but no logging, second part does any logging.
|
// optimizations but no logging, second part does any logging.
|
||||||
if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1);
|
if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1);
|
||||||
|
|
||||||
@@ -3724,9 +3741,6 @@ int main(int argc, char *argv[])
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// All options must be set before starting the gate
|
|
||||||
// if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1);
|
|
||||||
|
|
||||||
if ( coinbase_address )
|
if ( coinbase_address )
|
||||||
{
|
{
|
||||||
pk_script_size = address_to_script( pk_script, pk_buffer_size,
|
pk_script_size = address_to_script( pk_script, pk_buffer_size,
|
||||||
@@ -3738,8 +3752,6 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if ( !check_cpu_capability() ) exit(1);
|
|
||||||
|
|
||||||
pthread_mutex_init( &stats_lock, NULL );
|
pthread_mutex_init( &stats_lock, NULL );
|
||||||
pthread_rwlock_init( &g_work_lock, NULL );
|
pthread_rwlock_init( &g_work_lock, NULL );
|
||||||
pthread_mutex_init( &stratum.sock_lock, NULL );
|
pthread_mutex_init( &stratum.sock_lock, NULL );
|
||||||
@@ -3809,42 +3821,26 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// To be confirmed with more than 64 cpus
|
if ( opt_affinity && num_cpus > max_cpus )
|
||||||
if ( opt_affinity != -1 )
|
|
||||||
{
|
{
|
||||||
if ( !affinity_uses_uint128 && num_cpus > 64 )
|
applog( LOG_WARNING, "More than %d CPUs, CPU affinity is disabled",
|
||||||
{
|
max_cpus );
|
||||||
applog(LOG_WARNING,"Setting CPU affinity with more than 64 CPUs is only");
|
opt_affinity = 0ULL;
|
||||||
applog(LOG_WARNING,"available on Linux. Using default affinity.");
|
|
||||||
opt_affinity = -1;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
else
|
|
||||||
{
|
|
||||||
affine_to_cpu_mask( -1, opt_affinity );
|
|
||||||
if ( !opt_quiet )
|
|
||||||
{
|
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
if ( num_cpus > 64 )
|
|
||||||
applog(LOG_DEBUG, "Binding process to cpu mask %x",
|
|
||||||
u128_hi64( opt_affinity ), u128_lo64( opt_affinity ) );
|
|
||||||
else
|
|
||||||
applog(LOG_DEBUG, "Binding process to cpu mask %x",
|
|
||||||
opt_affinity );
|
|
||||||
#else
|
|
||||||
applog(LOG_DEBUG, "Binding process to cpu mask %x",
|
|
||||||
opt_affinity );
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !opt_quiet && ( opt_n_threads < num_cpus ) )
|
if ( opt_affinity )
|
||||||
{
|
{
|
||||||
char affinity_map[64];
|
for ( int thr = 0, cpu = 0; thr < opt_n_threads; thr++, cpu++ )
|
||||||
format_affinity_map( affinity_map, opt_affinity );
|
{
|
||||||
applog( LOG_INFO, "CPU affinity [%s]", affinity_map );
|
while ( !( ( opt_affinity >> ( cpu&63 ) ) & 1ULL ) ) cpu++;
|
||||||
|
thread_affinity_map[ thr ] = cpu % num_cpus;
|
||||||
|
}
|
||||||
|
if ( !opt_quiet )
|
||||||
|
{
|
||||||
|
char affinity_mask[64];
|
||||||
|
format_affinity_mask( affinity_mask, opt_affinity );
|
||||||
|
applog( LOG_INFO, "CPU affinity [%s]", affinity_mask );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_SYSLOG_H
|
#ifdef HAVE_SYSLOG_H
|
||||||
@@ -3944,7 +3940,7 @@ int main(int argc, char *argv[])
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if ( !opt_quiet )
|
if ( !opt_quiet )
|
||||||
applog( LOG_INFO,"API listnening to %s:%d", opt_api_allow,
|
applog( LOG_INFO,"API listening to %s:%d", opt_api_allow,
|
||||||
opt_api_listen );
|
opt_api_listen );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3973,6 +3969,8 @@ int main(int argc, char *argv[])
|
|||||||
gettimeofday( &last_submit_time, NULL );
|
gettimeofday( &last_submit_time, NULL );
|
||||||
memcpy( &five_min_start, &last_submit_time, sizeof (struct timeval) );
|
memcpy( &five_min_start, &last_submit_time, sizeof (struct timeval) );
|
||||||
memcpy( &session_start, &last_submit_time, sizeof (struct timeval) );
|
memcpy( &session_start, &last_submit_time, sizeof (struct timeval) );
|
||||||
|
memcpy( &stratum_keepalive_timer, &last_submit_time, sizeof (struct timeval) );
|
||||||
|
memcpy( &stratum_reset_time, &last_submit_time, sizeof (struct timeval) );
|
||||||
memcpy( &total_hashes_time, &last_submit_time, sizeof (struct timeval) );
|
memcpy( &total_hashes_time, &last_submit_time, sizeof (struct timeval) );
|
||||||
pthread_mutex_unlock( &stats_lock );
|
pthread_mutex_unlock( &stats_lock );
|
||||||
|
|
||||||
|
36
malloc-huge.c
Normal file
36
malloc-huge.c
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
#include "malloc-huge.h"
|
||||||
|
#include "miner.h"
|
||||||
|
|
||||||
|
#define HUGEPAGE_SIZE_2M (2 * 1024 * 1024)
|
||||||
|
|
||||||
|
void *malloc_hugepages( size_t size )
|
||||||
|
{
|
||||||
|
#if !(defined(MAP_HUGETLB) && defined(MAP_ANON))
|
||||||
|
// applog( LOG_WARNING, "Huge pages not available",size);
|
||||||
|
return NULL;
|
||||||
|
#else
|
||||||
|
|
||||||
|
if ( size < HUGEPAGE_MIN_ALLOC )
|
||||||
|
{
|
||||||
|
// applog( LOG_WARNING, "Block too small for huge pages: %lu bytes",size);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t hugepage_mask = (size_t)HUGEPAGE_SIZE_2M - 1;
|
||||||
|
void *p = NULL;
|
||||||
|
int flags =
|
||||||
|
#ifdef MAP_NOCORE
|
||||||
|
MAP_NOCORE |
|
||||||
|
#endif
|
||||||
|
MAP_HUGETLB | MAP_ANON | MAP_PRIVATE;
|
||||||
|
|
||||||
|
// round size up to next page boundary
|
||||||
|
size = ( size + hugepage_mask ) & (~hugepage_mask);
|
||||||
|
|
||||||
|
p = mmap( NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0 );
|
||||||
|
if ( p == MAP_FAILED )
|
||||||
|
p = NULL;
|
||||||
|
return p;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
24
malloc-huge.h
Normal file
24
malloc-huge.h
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#if !(defined(MALLOC_HUGE__))
|
||||||
|
#define MALLOC_HUGE__
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifdef __unix__
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(MAP_HUGETLB)
|
||||||
|
|
||||||
|
// Minimum block size 6 MiB to use huge pages
|
||||||
|
#define HUGEPAGE_MIN_ALLOC (6 * 1024 * 1024)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Attempt to allocate memory backed by 2 MiB pages, returns NULL on failure.
|
||||||
|
void *malloc_hugepages( size_t size );
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
36
miner.h
36
miner.h
@@ -466,6 +466,7 @@ void stratum_disconnect(struct stratum_ctx *sctx);
|
|||||||
bool stratum_subscribe(struct stratum_ctx *sctx);
|
bool stratum_subscribe(struct stratum_ctx *sctx);
|
||||||
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
|
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
|
||||||
bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
|
bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
|
||||||
|
bool stratum_suggest_difficulty( struct stratum_ctx *sctx, double diff );
|
||||||
|
|
||||||
|
|
||||||
extern bool aes_ni_supported;
|
extern bool aes_ni_supported;
|
||||||
@@ -823,6 +824,7 @@ Options:\n\
|
|||||||
qubit Qubit\n\
|
qubit Qubit\n\
|
||||||
scrypt scrypt(1024, 1, 1) (default)\n\
|
scrypt scrypt(1024, 1, 1) (default)\n\
|
||||||
scrypt:N scrypt(N, 1, 1)\n\
|
scrypt:N scrypt(N, 1, 1)\n\
|
||||||
|
scryptn2 scrypt(1048576, 1,1)\n\
|
||||||
sha256d Double SHA-256\n\
|
sha256d Double SHA-256\n\
|
||||||
sha256q Quad SHA-256, Pyrite (PYE)\n\
|
sha256q Quad SHA-256, Pyrite (PYE)\n\
|
||||||
sha256t Triple SHA-256, Onecoin (OC)\n\
|
sha256t Triple SHA-256, Onecoin (OC)\n\
|
||||||
@@ -868,9 +870,9 @@ Options:\n\
|
|||||||
yespowerr16 Yenten (YTN)\n\
|
yespowerr16 Yenten (YTN)\n\
|
||||||
yespower-b2b generic yespower + blake2b\n\
|
yespower-b2b generic yespower + blake2b\n\
|
||||||
zr5 Ziftr\n\
|
zr5 Ziftr\n\
|
||||||
-N, --param-n N parameter for scrypt based algos\n\
|
-N, --param-n=N N parameter for scrypt based algos\n\
|
||||||
-R, --param-r R parameter for scrypt based algos\n\
|
-R, --param-r=N R parameter for scrypt based algos\n\
|
||||||
-K, --param-key Key (pers) parameter for algos that use it\n\
|
-K, --param-key=STRING Key (pers) parameter for algos that use it\n\
|
||||||
-o, --url=URL URL of mining server\n\
|
-o, --url=URL URL of mining server\n\
|
||||||
-O, --userpass=U:P username:password pair for mining server\n\
|
-O, --userpass=U:P username:password pair for mining server\n\
|
||||||
-u, --user=USERNAME username for mining server\n\
|
-u, --user=USERNAME username for mining server\n\
|
||||||
@@ -885,19 +887,19 @@ Options:\n\
|
|||||||
-T, --timeout=N timeout for long poll and stratum (default: 300 seconds)\n\
|
-T, --timeout=N timeout for long poll and stratum (default: 300 seconds)\n\
|
||||||
-s, --scantime=N upper bound on time spent scanning current work when\n\
|
-s, --scantime=N upper bound on time spent scanning current work when\n\
|
||||||
long polling is unavailable, in seconds (default: 5)\n\
|
long polling is unavailable, in seconds (default: 5)\n\
|
||||||
--randomize Randomize scan range start to reduce duplicates\n\
|
--randomize randomize scan range (deprecated)\n\
|
||||||
-f, --diff-factor Divide req. difficulty by this factor (std is 1.0)\n\
|
-f, --diff-factor=N divide req. difficulty by this factor (std is 1.0)\n\
|
||||||
-m, --diff-multiplier Multiply difficulty by this factor (std is 1.0)\n\
|
-m, --diff-multiplier=N Multiply difficulty by this factor (std is 1.0)\n\
|
||||||
--hash-meter Display thread hash rates\n\
|
--hash-meter display thread hash rates\n\
|
||||||
--coinbase-addr=ADDR payout address for solo mining\n\
|
--coinbase-addr=ADDR payout address for solo mining\n\
|
||||||
--coinbase-sig=TEXT data to insert in the coinbase when possible\n\
|
--coinbase-sig=TEXT data to insert in the coinbase when possible\n\
|
||||||
--no-longpoll disable long polling support\n\
|
--no-longpoll disable long polling support\n\
|
||||||
--no-getwork disable getwork support\n\
|
--no-getwork disable getwork support\n\
|
||||||
--no-gbt disable getblocktemplate support\n\
|
--no-gbt disable getblocktemplate support\n\
|
||||||
--no-stratum disable X-Stratum support\n\
|
--no-stratum disable X-Stratum support\n\
|
||||||
--no-extranonce disable Stratum extranonce support\n\
|
--no-extranonce disable Stratum extranonce subscribe\n\
|
||||||
--no-redirect ignore requests to change the URL of the mining server\n\
|
--no-redirect ignore requests to change the URL of the mining server\n\
|
||||||
-q, --quiet disable per-thread hashmeter output\n\
|
-q, --quiet reduce log verbosity\n\
|
||||||
--no-color disable colored output\n\
|
--no-color disable colored output\n\
|
||||||
-D, --debug enable debug output\n\
|
-D, --debug enable debug output\n\
|
||||||
-P, --protocol-dump verbose dump of protocol-level activities\n"
|
-P, --protocol-dump verbose dump of protocol-level activities\n"
|
||||||
@@ -909,16 +911,17 @@ Options:\n\
|
|||||||
-B, --background run the miner in the background\n\
|
-B, --background run the miner in the background\n\
|
||||||
--benchmark run in offline benchmark mode\n\
|
--benchmark run in offline benchmark mode\n\
|
||||||
--cpu-affinity set process affinity to cpu core(s), mask 0x3 for cores 0 and 1\n\
|
--cpu-affinity set process affinity to cpu core(s), mask 0x3 for cores 0 and 1\n\
|
||||||
--cpu-priority set process priority (default: 0 idle, 2 normal to 5 highest)\n\
|
--cpu-priority set process priority (default: 0 idle, 2 normal to 5 highest) (deprecated)\n\
|
||||||
-b, --api-bind=address[:port] IP address for the miner API, default port is 4048)\n\
|
-b, --api-bind=address[:port] IP address for the miner API, default port is 4048)\n\
|
||||||
--api-remote Allow remote control\n\
|
--api-remote allow remote control\n\
|
||||||
--max-temp=N Only mine if cpu temp is less than specified value (linux)\n\
|
--max-temp=N only mine if cpu temp is less than specified value (linux)\n\
|
||||||
--max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
|
--max-rate=N[KMG] only mine if net hashrate is less than specified value\n\
|
||||||
--max-diff=N Only mine if net difficulty is less than specified value\n\
|
--max-diff=N only mine if net difficulty is less than specified value\n\
|
||||||
-c, --config=FILE load a JSON-format configuration file\n\
|
-c, --config=FILE load a JSON-format configuration file\n\
|
||||||
--data-file path and name of data file\n\
|
--data-file=FILE path and name of data file\n\
|
||||||
--verify enable additional time consuming start up tests\n\
|
--verify enable additional time consuming start up tests\n\
|
||||||
-V, --version display version information and exit\n\
|
--stratum-keepalive prevent disconnects when difficulty is too high\n\
|
||||||
|
-V, --version display version and CPU information and exit\n\
|
||||||
-h, --help display this help text and exit\n\
|
-h, --help display this help text and exit\n\
|
||||||
";
|
";
|
||||||
|
|
||||||
@@ -987,6 +990,7 @@ static struct option const options[] = {
|
|||||||
{ "userpass", 1, NULL, 'O' },
|
{ "userpass", 1, NULL, 'O' },
|
||||||
{ "data-file", 1, NULL, 1027 },
|
{ "data-file", 1, NULL, 1027 },
|
||||||
{ "verify", 0, NULL, 1028 },
|
{ "verify", 0, NULL, 1028 },
|
||||||
|
{ "stratum-keepalive", 0, NULL, 1029 },
|
||||||
{ "version", 0, NULL, 'V' },
|
{ "version", 0, NULL, 'V' },
|
||||||
{ 0, 0, 0, 0 }
|
{ 0, 0, 0, 0 }
|
||||||
};
|
};
|
||||||
|
@@ -2,22 +2,21 @@
|
|||||||
#define SIMD_INT_H__ 1
|
#define SIMD_INT_H__ 1
|
||||||
|
|
||||||
// Endian byte swap
|
// Endian byte swap
|
||||||
#define bswap_64( a ) __builtin_bswap64( a )
|
#define bswap_64 __builtin_bswap64
|
||||||
#define bswap_32( a ) __builtin_bswap32( a )
|
#define bswap_32 __builtin_bswap32
|
||||||
|
|
||||||
|
// Bit rotation
|
||||||
|
#define rol64 __rolq
|
||||||
|
#define ror64 __rorq
|
||||||
|
#define rol32 __rold
|
||||||
|
#define ror32 __rord
|
||||||
|
|
||||||
// Safe division, integer or floating point. For floating point it's as
|
// Safe division, integer or floating point. For floating point it's as
|
||||||
// safe as 0. is precisely zero.
|
// safe as 0 is precisely zero.
|
||||||
// Returns safe_result if division by zero.
|
// Returns safe_result if division by zero, typically zero.
|
||||||
#define safe_div( dividend, divisor, safe_result ) \
|
#define safe_div( dividend, divisor, safe_result ) \
|
||||||
( (divisor) == 0 ? safe_result : ( (dividend) / (divisor) ) )
|
( (divisor) == 0 ? safe_result : ( (dividend) / (divisor) ) )
|
||||||
|
|
||||||
// Aliases with familiar names for built in bit rotate instructions
|
|
||||||
#define rol64( a, n ) _lrotl( a, n )
|
|
||||||
#define ror64( a, n ) _lrotr( a, n )
|
|
||||||
#define rol32( a, n ) _rotl( a, n )
|
|
||||||
#define ror32( a, n ) _rotr( a, n )
|
|
||||||
#define rol16( a, n ) _rotwl( a, n )
|
|
||||||
#define ror16( a, n ) _rotwr( a, n )
|
|
||||||
|
|
||||||
///////////////////////////////////////
|
///////////////////////////////////////
|
||||||
//
|
//
|
||||||
|
@@ -209,7 +209,7 @@ static inline void cpu_getname(char *outbuf, size_t maxsz)
|
|||||||
{
|
{
|
||||||
memset(outbuf, 0, maxsz);
|
memset(outbuf, 0, maxsz);
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
char brand[0xC0] = { 0 };
|
char brand[256] = { 0 };
|
||||||
int output[4] = { 0 }, ext;
|
int output[4] = { 0 }, ext;
|
||||||
cpuid(0x80000000, output);
|
cpuid(0x80000000, output);
|
||||||
ext = output[0];
|
ext = output[0];
|
||||||
|
21
util.c
21
util.c
@@ -1658,7 +1658,7 @@ static bool stratum_parse_extranonce(struct stratum_ctx *sctx, json_t *params, i
|
|||||||
pthread_mutex_unlock(&sctx->work_lock);
|
pthread_mutex_unlock(&sctx->work_lock);
|
||||||
|
|
||||||
if ( !opt_quiet ) /* pool dynamic change */
|
if ( !opt_quiet ) /* pool dynamic change */
|
||||||
applog( LOG_INFO, "Stratum extranonce1= %s, extranonce2 size= %d",
|
applog( LOG_INFO, "Stratum extranonce1 0x%s, extranonce2 size %d",
|
||||||
xnonce1, xn2_size);
|
xnonce1, xn2_size);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -1846,6 +1846,25 @@ out:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool stratum_suggest_difficulty( struct stratum_ctx *sctx, double diff )
|
||||||
|
{
|
||||||
|
char *s;
|
||||||
|
s = (char*) malloc( 80 );
|
||||||
|
bool rc = true;
|
||||||
|
|
||||||
|
// response is handled seperately, what ID?
|
||||||
|
sprintf( s, "{\"id\": 1, \"method\": \"mining.suggest_difficulty\", \"params\": [\"%f\"]}", diff );
|
||||||
|
if ( !stratum_send_line( sctx, s ) )
|
||||||
|
{
|
||||||
|
applog(LOG_WARNING,"stratum.suggest_difficulty send failed");
|
||||||
|
rc = false;
|
||||||
|
}
|
||||||
|
free ( s );
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract bloc height L H... here len=3, height=0x1333e8
|
* Extract bloc height L H... here len=3, height=0x1333e8
|
||||||
* "...0000000000ffffffff2703e83313062f503253482f043d61105408"
|
* "...0000000000ffffffff2703e83313062f503253482f043d61105408"
|
||||||
|
@@ -16,18 +16,18 @@ export MINGW_LIB="/usr/x86_64-w64-mingw32/lib"
|
|||||||
export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
|
export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
|
||||||
# used by GCC
|
# used by GCC
|
||||||
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl"
|
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl"
|
||||||
|
# Support for Windows 7 CPU groups, AES sometimes not included in -march
|
||||||
|
export DEFAULT_CFLAGS="-maes -O3 -Wall -D_WIN32_WINNT=0x0601"
|
||||||
|
export DEFAULT_CFLAGS_OLD="-O3 -Wall"
|
||||||
|
|
||||||
# make link to local gmp header file.
|
# make link to local gmp header file.
|
||||||
ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
|
ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
|
||||||
|
|
||||||
# edit configure to fix pthread lib name for Windows.
|
|
||||||
#sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
|
|
||||||
|
|
||||||
# make release directory and copy selected DLLs.
|
# make release directory and copy selected DLLs.
|
||||||
|
|
||||||
rm -rf release > /dev/null
|
rm -rf release > /dev/null
|
||||||
|
|
||||||
mkdir release
|
mkdir release
|
||||||
|
|
||||||
cp README.txt release/
|
cp README.txt release/
|
||||||
cp README.md release/
|
cp README.md release/
|
||||||
cp RELEASE_NOTES release/
|
cp RELEASE_NOTES release/
|
||||||
@@ -41,74 +41,59 @@ cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/
|
|||||||
|
|
||||||
# Start building...
|
# Start building...
|
||||||
|
|
||||||
# Icelake AVX512 SHA VAES
|
# AVX512 SHA VAES: Intel Core Icelake, Rocketlake
|
||||||
./clean-all.sh || echo clean
|
./clean-all.sh || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
./autogen.sh || echo done
|
./autogen.sh || echo done
|
||||||
CFLAGS="-O3 -march=icelake-client -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="-march=icelake-client $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
|
mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
|
||||||
|
|
||||||
# Rocketlake AVX512 SHA AES
|
# AVX512 AES: Intel Core HEDT Slylake-X, Cascadelake
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=cascadelake -msha -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="-march=skylake-avx512 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||||
#CFLAGS="-O3 -march=rocketlake -Wall" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe release/cpuminer-avx512-sha.exe
|
|
||||||
|
|
||||||
# Zen1 AVX2 AES SHA
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=znver1 -Wall" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe release/cpuminer-zen.exe
|
|
||||||
|
|
||||||
# Zen3 AVX2 SHA VAES
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=znver2 -mvaes -Wall" ./configure $CONFIGURE_ARGS
|
|
||||||
# CFLAGS="-O3 -march=znver3 -Wall" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
|
||||||
strip -s cpuminer.exe
|
|
||||||
mv cpuminer.exe release/cpuminer-zen3.exe
|
|
||||||
|
|
||||||
# Slylake-X AVX512 AES
|
|
||||||
# mingw won't compile avx512 without -fno-asynchronous-unwind-tables
|
|
||||||
make clean || echo clean
|
|
||||||
rm -f config.status
|
|
||||||
CFLAGS="-O3 -march=skylake-avx512 -Wall" ./configure $CONFIGURE_ARGS
|
|
||||||
#CFLAGS="-O3 -march=skylake-avx512 -Wall -fno-asynchronous-unwind-tables" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-avx512.exe
|
mv cpuminer.exe release/cpuminer-avx512.exe
|
||||||
|
|
||||||
# Haswell AVX2 AES
|
# AVX2 SHA VAES: Intel Alderlake, AMD Zen3
|
||||||
|
make clean || echo done
|
||||||
|
rm -f config.status
|
||||||
|
CFLAGS="-mavx2 -msha -mvaes $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||||
|
make -j 8
|
||||||
|
strip -s cpuminer.exe
|
||||||
|
mv cpuminer.exe release/cpuminer-avx2-sha-vaes.exe
|
||||||
|
|
||||||
|
# AVX2 AES SHA: AMD Zen1
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
# GCC 9 doesn't include AES in -march=core-avx2
|
CFLAGS="-march=znver1 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||||
CFLAGS="-O3 -march=core-avx2 -maes -Wall" ./configure $CONFIGURE_ARGS
|
make -j 8
|
||||||
|
strip -s cpuminer.exe
|
||||||
|
mv cpuminer.exe release/cpuminer-avx2-sha.exe
|
||||||
|
|
||||||
|
# AVX2 AES: Intel Core Haswell, Skylake, Kabylake, Coffeelake, Cometlake
|
||||||
|
make clean || echo clean
|
||||||
|
rm -f config.status
|
||||||
|
CFLAGS="-march=core-avx2 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-avx2.exe
|
mv cpuminer.exe release/cpuminer-avx2.exe
|
||||||
|
|
||||||
# Sandybridge AVX AES
|
# AVX AES: Intel Sandybridge, Ivybridge
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
# -march=corei7-avx still includes aes, but just in case
|
CFLAGS="-march=corei7-avx -maes $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
||||||
CFLAGS="-O3 -march=corei7-avx -maes -Wall" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-avx.exe
|
mv cpuminer.exe release/cpuminer-avx.exe
|
||||||
|
|
||||||
# Westmere SSE4.2 AES
|
# SSE4.2 AES: Intel Westmere
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=westmere -maes -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="-march=westmere -maes $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
||||||
#CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure $CONFIGURE_ARGS
|
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
||||||
@@ -116,7 +101,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
|||||||
# Nehalem SSE4.2
|
# Nehalem SSE4.2
|
||||||
#make clean || echo clean
|
#make clean || echo clean
|
||||||
#rm -f config.status
|
#rm -f config.status
|
||||||
#CFLAGS="-O3 -march=corei7 -Wall" ./configure $CONFIGURE_ARGS
|
#CFLAGS="$DEFAULT_CFLAGS_OLD -march=corei7" ./configure $CONFIGURE_ARGS
|
||||||
#make
|
#make
|
||||||
#strip -s cpuminer.exe
|
#strip -s cpuminer.exe
|
||||||
#mv cpuminer.exe release/cpuminer-sse42.exe
|
#mv cpuminer.exe release/cpuminer-sse42.exe
|
||||||
@@ -124,7 +109,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
|||||||
# Core2 SSSE3
|
# Core2 SSSE3
|
||||||
#make clean || echo clean
|
#make clean || echo clean
|
||||||
#rm -f config.status
|
#rm -f config.status
|
||||||
#CFLAGS="-O3 -march=core2 -Wall" ./configure $CONFIGURE_ARGS
|
#CFLAGS="$DEFAULT_CFLAGS_OLD -march=core2" ./configure $CONFIGURE_ARGS
|
||||||
#make
|
#make
|
||||||
#strip -s cpuminer.exe
|
#strip -s cpuminer.exe
|
||||||
#mv cpuminer.exe release/cpuminer-ssse3.exe
|
#mv cpuminer.exe release/cpuminer-ssse3.exe
|
||||||
@@ -133,9 +118,16 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
|||||||
# Generic SSE2
|
# Generic SSE2
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -msse2 -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="-msse2 $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-sse2.exe
|
mv cpuminer.exe release/cpuminer-sse2.exe
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
|
|
||||||
|
# Native with CPU groups ennabled
|
||||||
|
make clean || echo clean
|
||||||
|
rm -f config.status
|
||||||
|
CFLAGS="-march=native $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||||
|
make -j 8
|
||||||
|
strip -s cpuminer.exe
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user